This brings in a Yahoo coredump patch from Paul, with additional mods by

me (addition of vn_rdwr_inchunks).  The problem Yahoo is solving is that
if you have large process images core dumping, or you have a large number of
forked processes all core dumping at the same time, the original coredump code
would leave the vnode locked throughout.  This can cause the directory vnode
to get locked up, which can cause the parent directory vnode to get locked
up, and so on all the way to the root node, locking the entire machine up
for extremely long periods of time.

This patch solves the problem in two ways.  First it uses an advisory
non-blocking lock to abort multiple processes trying to core to the same
file.  Second (my contribution) it chunks up the writes and uses bwillwrite()
to avoid holding the vnode locked while blocking in the buffer cache.

Submitted by:	ps
Reviewed by:	dillon
MFC after:	2 weeks
This commit is contained in:
Matthew Dillon 2001-09-08 20:02:33 +00:00
parent 5002a60f9b
commit 06ae1e91c4
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=83222
6 changed files with 74 additions and 16 deletions

View File

@ -187,15 +187,15 @@ pecoff_coredump(register struct proc * p, register struct vnode * vp,
#endif
error = cpu_coredump(p, vp, cred);
if (error == 0)
error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
error = vn_rdwr_inchunks(UIO_WRITE, vp, vm->vm_daddr,
(int) ctob(vm->vm_dsize), (off_t) ctob(UPAGES), UIO_USERSPACE,
IO_NODELOCKED | IO_UNIT, cred, (int *) NULL, p);
IO_UNIT, cred, (int *) NULL, p);
if (error == 0)
error = vn_rdwr(UIO_WRITE, vp,
error = vn_rdwr_inchunks(UIO_WRITE, vp,
(caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
round_page(ctob(vm->vm_ssize)),
(off_t) ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
IO_NODELOCKED | IO_UNIT, cred, (int *) NULL, p);
IO_UNIT, cred, (int *) NULL, p);
return (error);
}

View File

@ -264,15 +264,15 @@ aout_coredump(p, vp, limit)
fill_kinfo_proc(p, &p->p_addr->u_kproc);
error = cpu_coredump(p, vp, cred);
if (error == 0)
error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
error = vn_rdwr_inchunks(UIO_WRITE, vp, vm->vm_daddr,
(int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
IO_UNIT, cred, (int *) NULL, p);
if (error == 0)
error = vn_rdwr(UIO_WRITE, vp,
error = vn_rdwr_inchunks(UIO_WRITE, vp,
(caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
round_page(ctob(vm->vm_ssize)),
(off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
IO_UNIT, cred, (int *) NULL, p);
return (error);
}

View File

@ -794,9 +794,10 @@ elf_coredump(p, vp, limit)
php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
offset = hdrsize;
for (i = 0; i < seginfo.count; i++) {
error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
error = vn_rdwr_inchunks(UIO_WRITE, vp,
(caddr_t)php->p_vaddr,
php->p_filesz, offset, UIO_USERSPACE,
IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
IO_UNIT, cred, (int *)NULL, p);
if (error != 0)
break;
offset += php->p_filesz;
@ -958,8 +959,8 @@ elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
free(tempdata, M_TEMP);
/* Write it to the core file. */
return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
return vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
UIO_SYSSPACE, IO_UNIT, cred, NULL, p);
}
static void

View File

@ -68,6 +68,7 @@
#include <sys/sysent.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/unistd.h>
#include <machine/cpu.h>
@ -1853,6 +1854,7 @@ coredump(p)
{
register struct vnode *vp;
register struct ucred *cred = p->p_ucred;
struct flock lf;
struct nameidata nd;
struct vattr vattr;
int error, error1, flags;
@ -1895,8 +1897,19 @@ coredump(p)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
VOP_UNLOCK(vp, 0, p);
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_WRLCK;
error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK);
if (error)
goto out2;
if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
VOP_UNLOCK(vp, 0, p);
lf.l_type = F_UNLCK;
VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
if ((error = vn_close(vp, FWRITE, cred, p)) != 0)
return (error);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
@ -1908,7 +1921,7 @@ coredump(p)
if (vp->v_type != VREG ||
VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
error = EFAULT;
goto out;
goto out1;
}
VATTR_NULL(&vattr);
vattr.va_size = 0;
@ -1922,9 +1935,11 @@ coredump(p)
p->p_sysent->sv_coredump(p, vp, limit) :
ENOSYS;
out:
VOP_UNLOCK(vp, 0, p);
out1:
lf.l_type = F_UNLCK;
VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
vn_finished_write(mp);
out2:
error1 = vn_close(vp, FWRITE, cred, p);
if (error == 0)
error = error1;

View File

@ -334,6 +334,45 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
return (error);
}
/*
* Package up an I/O request on a vnode into a uio and do it. The I/O
* request is split up into smaller chunks and we try to avoid saturating
* the buffer cache while potentially holding a vnode locked, so we
* check bwillwrite() before calling vn_rdwr()
*/
int
vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
enum uio_rw rw;
struct vnode *vp;
caddr_t base;
int len;
off_t offset;
enum uio_seg segflg;
int ioflg;
struct ucred *cred;
int *aresid;
struct proc *p;
{
int error = 0;
do {
int chunk = (len > MAXBSIZE) ? MAXBSIZE : len;
if (rw != UIO_READ && vp->v_type == VREG)
bwillwrite();
error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
ioflg, cred, aresid, p);
len -= chunk; /* aresid calc already includes length */
if (error)
break;
offset += chunk;
base += chunk;
} while (len);
if (aresid)
*aresid += len;
return (error);
}
/*
* File table vnode read routine.
*/

View File

@ -608,6 +608,9 @@ int vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
int len, off_t offset, enum uio_seg segflg, int ioflg,
struct ucred *cred, int *aresid, struct proc *p));
int vn_rdwr_inchunks __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
int len, off_t offset, enum uio_seg segflg, int ioflg,
struct ucred *cred, int *aresid, struct proc *p));
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
int vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
dev_t vn_todev __P((struct vnode *vp));