Implement sparse core dumps

Currently we allocate and map zero-filled anonymous pages when dumping
core.  This can result in lots of needless disk I/O and page
allocations.  This change tries to make the core dumper more clever and
represent unbacked ranges of virtual memory by holes in the core dump
file.

Add a new page fault type, VM_FAULT_NOFILL, which causes vm_fault() to
clean up and return an error when it would otherwise map a zero-filled
page.  Then, in the core dumper code, prefault all user pages and handle
errors by simply extending the size of the core file.  This also fixes a
bug related to the fact that vn_io_fault1() does not attempt partial I/O
in the face of errors from vm_fault_quick_hold_pages(): if a truncated
file is mapped into a user process, an attempt to dump beyond the end of
the file results in an error, but this means that valid pages
immediately preceding the end of the file might not have been dumped
either.

The change reduces the core dump size of trivial programs by a factor of
ten simply by excluding unaccessed libc.so pages.

PR:		249067
Reviewed by:	kib
Tested by:	pho
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D26590
This commit is contained in:
Mark Johnston 2020-10-02 17:50:22 +00:00
parent fec41f0751
commit f31695cc64
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=366368
3 changed files with 75 additions and 26 deletions

View File

@ -1459,7 +1459,7 @@ extern int compress_user_cores_level;
static void cb_put_phdr(vm_map_entry_t, void *);
static void cb_size_segment(vm_map_entry_t, void *);
static int core_write(struct coredump_params *, const void *, size_t, off_t,
enum uio_seg);
enum uio_seg, size_t *);
static void each_dumpable_segment(struct thread *, segment_callback, void *);
static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t,
struct note_info_list *, size_t);
@ -1519,46 +1519,88 @@ core_compressed_write(void *base, size_t len, off_t offset, void *arg)
{
return (core_write((struct coredump_params *)arg, base, len, offset,
UIO_SYSSPACE));
UIO_SYSSPACE, NULL));
}
static int
core_write(struct coredump_params *p, const void *base, size_t len,
off_t offset, enum uio_seg seg)
off_t offset, enum uio_seg seg, size_t *resid)
{
return (vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, base),
len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
p->active_cred, p->file_cred, NULL, p->td));
p->active_cred, p->file_cred, resid, p->td));
}
static int
core_output(void *base, size_t len, off_t offset, struct coredump_params *p,
core_output(char *base, size_t len, off_t offset, struct coredump_params *p,
void *tmpbuf)
{
vm_map_t map;
struct mount *mp;
size_t resid, runlen;
int error;
bool success;
KASSERT((uintptr_t)base % PAGE_SIZE == 0,
("%s: user address %#lx is not page-aligned",
__func__, (uintptr_t)base));
if (p->comp != NULL)
return (compress_chunk(p, base, tmpbuf, len));
/*
* EFAULT is a non-fatal error that we can get, for example,
* if the segment is backed by a file but extends beyond its
* end.
*/
error = core_write(p, base, len, offset, UIO_USERSPACE);
if (error == EFAULT) {
log(LOG_WARNING, "Failed to fully fault in a core file segment "
"at VA %p with size 0x%zx to be written at offset 0x%jx "
"for process %s\n", base, len, offset, curproc->p_comm);
map = &p->td->td_proc->p_vmspace->vm_map;
for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
/*
* Write a "real" zero byte at the end of the target region
* in the case this is the last segment.
* The intermediate space will be implicitly zero-filled.
* Attempt to page in all virtual pages in the range. If a
* virtual page is not backed by the pager, it is represented as
* a hole in the file. This can occur with zero-filled
* anonymous memory or truncated files, for example.
*/
error = core_write(p, zero_region, 1, offset + len - 1,
UIO_SYSSPACE);
for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
error = vm_fault(map, (uintptr_t)base + runlen,
VM_PROT_READ, VM_FAULT_NOFILL, NULL);
if (runlen == 0)
success = error == KERN_SUCCESS;
else if ((error == KERN_SUCCESS) != success)
break;
}
if (success) {
error = core_write(p, base, runlen, offset,
UIO_USERSPACE, &resid);
if (error != 0) {
if (error != EFAULT)
break;
/*
* EFAULT may be returned if the user mapping
* could not be accessed, e.g., because a mapped
* file has been truncated. Skip the page if no
* progress was made, to protect against a
* hypothetical scenario where vm_fault() was
* successful but core_write() returns EFAULT
* anyway.
*/
runlen -= resid;
if (runlen == 0) {
success = false;
runlen = PAGE_SIZE;
}
}
}
if (!success) {
error = vn_start_write(p->vp, &mp, V_WAIT);
if (error != 0)
break;
vn_lock(p->vp, LK_EXCLUSIVE | LK_RETRY);
error = vn_truncate_locked(p->vp, offset + runlen,
false, p->td->td_ucred);
VOP_UNLOCK(p->vp);
vn_finished_write(mp);
if (error != 0)
break;
}
}
return (error);
}
@ -1589,7 +1631,7 @@ sbuf_drain_core_output(void *arg, const char *data, int len)
error = compressor_write(p->comp, __DECONST(char *, data), len);
else
error = core_write(p, __DECONST(void *, data), len, p->offset,
UIO_SYSSPACE);
UIO_SYSSPACE, NULL);
if (locked)
PROC_LOCK(p->td->td_proc);
if (error != 0)
@ -1681,7 +1723,7 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
offset = round_page(hdrsize + notesz);
for (i = 0; i < seginfo.count; i++) {
error = core_output((caddr_t)(uintptr_t)php->p_vaddr,
error = core_output((char *)(uintptr_t)php->p_vaddr,
php->p_filesz, offset, &params, tmpbuf);
if (error != 0)
break;

View File

@ -1476,6 +1476,12 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
*/
if (vm_fault_next(&fs))
continue;
if ((fs.fault_flags & VM_FAULT_NOFILL) != 0) {
if (fs.first_object == fs.object)
fault_page_free(&fs.first_m);
unlock_and_deallocate(&fs);
return (KERN_OUT_OF_BOUNDS);
}
VM_OBJECT_WUNLOCK(fs.object);
vm_fault_zerofill(&fs);
/* Don't try to prefault neighboring pages. */

View File

@ -384,9 +384,10 @@ long vmspace_resident_count(struct vmspace *vmspace);
/*
* vm_fault option flags
*/
#define VM_FAULT_NORMAL 0 /* Nothing special */
#define VM_FAULT_WIRE 1 /* Wire the mapped page */
#define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */
#define VM_FAULT_NORMAL 0x00 /* Nothing special */
#define VM_FAULT_WIRE 0x01 /* Wire the mapped page */
#define VM_FAULT_DIRTY 0x02 /* Dirty the page; use w/VM_PROT_COPY */
#define VM_FAULT_NOFILL 0x04 /* Fail if the pager doesn't have a copy */
/*
* Initially, mappings are slightly sequential. The maximum window size must