Detect badly behaved coredump note helpers

Coredump notes depend on being able to invoke dump routines twice; once
in a dry-run mode to get the size of the note, and another to actually
emit the note to the corefile.

When a note helper emits a different length section the second time
around than the length it requested the first time, the kernel produces
a corrupt coredump.

NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' fd table
via vn_fullpath.  As vnodes may move around during dump, this is racy.

So:

 - Detect badly behaved notes in putnote() and pad underfilled notes.

 - Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to
   exercise the NT_PROCSTAT_FILES corruption.  It simply picks random
   lengths to expand or truncate paths to in fo_fill_kinfo_vnode().

 - Add a sysctl, kern.coredump_pack_fileinfo, to allow users to
   disable kinfo packing for PROCSTAT_FILES notes.  This should avoid
   both FILES note corruption and truncation, even if filenames change,
   at the cost of about 1 kiB in padding bloat per open fd.  Document
   the new sysctl in core.5.

 - Fix note_procstat_files to self-limit in the 2nd pass.  Since
   sometimes this will result in a short write, pad up to our advertised
   size.  This addresses note corruption, at the risk of sometimes
   truncating the last several fd info entries.

 - Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the
   zero padding.

With suggestions from:	bjk, jhb, kib, wblock
Approved by:	markj (mentor)
Relnotes:	yes
Sponsored by:	EMC / Isilon Storage Division
Differential Revision:	https://reviews.freebsd.org/D3548
This commit is contained in:
Conrad Meyer 2015-09-03 20:32:10 +00:00
parent 188458ea7c
commit 14bdbaf2e4
7 changed files with 129 additions and 19 deletions

View File

@ -767,6 +767,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
bp += kf->kf_structsize;
cnt++;
}
@ -782,6 +784,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */

View File

@ -44,6 +44,8 @@ kinfo_getfile(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
bp += kf->kf_structsize;
cnt++;
}
@ -59,6 +61,8 @@ kinfo_getfile(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */

View File

@ -28,7 +28,7 @@
.\" @(#)core.5 8.3 (Berkeley) 12/11/93
.\" $FreeBSD$
.\"
.Dd March 8, 2015
.Dd September 2, 2015
.Dt CORE 5
.Os
.Sh NAME
@ -120,6 +120,16 @@ Compressed core files will have a suffix of
.Ql .gz
appended to them.
.El
.Sh NOTES
Corefiles are written with open file descriptor information as an ELF note.
By default, file paths are packed to only use as much space as needed.
However, file paths can change at any time, including during core dump,
and this can result in truncated file descriptor data.
.Pp
All file descriptor information can be preserved by disabling packing.
This potentially wastes up to PATH_MAX bytes per open fd.
Packing is disabled with
.Dl sysctl kern.coredump_pack_fileinfo=0 .
.Sh EXAMPLES
In order to store all core images in per-user private areas under
.Pa /var/coredumps ,

View File

@ -1677,7 +1677,8 @@ static void
__elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
{
Elf_Note note;
ssize_t old_len;
ssize_t old_len, sect_len;
size_t new_len, descsz, i;
if (ninfo->type == -1) {
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
@ -1696,7 +1697,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
return;
sbuf_start_section(sb, &old_len);
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
if (sect_len < 0)
return;
new_len = (size_t)sect_len;
descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
if (new_len < descsz) {
/*
* It is expected that individual note emitters will correctly
* predict their expected output size and fill up to that size
* themselves, padding in a format-specific way if needed.
* However, in case they don't, just do it here with zeros.
*/
for (i = 0; i < descsz - new_len; i++)
sbuf_putc(sb, 0);
} else if (new_len > descsz) {
/*
* We can't always truncate sb -- we may have drained some
* of it already.
*/
KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
"read it (%zu > %zu). Since it is longer than "
"expected, this coredump's notes are corrupt. THIS "
"IS A BUG in the note_procstat routine for type %u.\n",
__func__, (unsigned)note.n_type, new_len, descsz,
(unsigned)note.n_type));
}
}
/*
@ -1875,29 +1902,56 @@ __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
#endif
static int pack_fileinfo = 1;
SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
&pack_fileinfo, 0,
"Enable file path packing in 'procstat -f' coredump notes");
static void
note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
int structsize;
size_t size, sect_sz, i;
ssize_t start_len, sect_len;
int structsize, filedesc_flags;
if (pack_fileinfo)
filedesc_flags = KERN_FILEDESC_PACK_KINFO;
else
filedesc_flags = 0;
p = (struct proc *)arg;
structsize = sizeof(struct kinfo_file);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_filedesc_out(p, sb, -1);
kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
structsize = sizeof(struct kinfo_file);
sbuf_start_section(sb, &start_len);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_filedesc_out(p, sb, -1);
kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
filedesc_flags);
sect_len = sbuf_end_section(sb, start_len, 0, 0);
if (sect_len < 0)
return;
sect_sz = sect_len;
KASSERT(sect_sz <= *sizep,
("kern_proc_filedesc_out did not respect maxlen; "
"requested %zu, got %zu", *sizep - sizeof(structsize),
sect_sz - sizeof(structsize)));
for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
sbuf_putc(sb, 0);
}
}

View File

@ -3283,7 +3283,7 @@ pack_kinfo(struct kinfo_file *kif)
static void
export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
struct kinfo_file *kif, struct filedesc *fdp)
struct kinfo_file *kif, struct filedesc *fdp, int flags)
{
int error;
@ -3307,12 +3307,15 @@ export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
error = fo_fill_kinfo(fp, kif, fdp);
if (error == 0)
kif->kf_status |= KF_ATTR_VALID;
pack_kinfo(kif);
if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
pack_kinfo(kif);
else
kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
}
static void
export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
struct kinfo_file *kif)
struct kinfo_file *kif, int flags)
{
int error;
@ -3327,7 +3330,10 @@ export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
kif->kf_fd = fd;
kif->kf_ref_count = -1;
kif->kf_offset = -1;
pack_kinfo(kif);
if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
pack_kinfo(kif);
else
kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
vrele(vp);
}
@ -3336,6 +3342,7 @@ struct export_fd_buf {
struct sbuf *sb;
ssize_t remainder;
struct kinfo_file kif;
int flags;
};
static int
@ -3363,7 +3370,8 @@ export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp,
if (efbuf->remainder == 0)
return (0);
export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp);
export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp,
efbuf->flags);
FILEDESC_SUNLOCK(efbuf->fdp);
error = export_kinfo_to_sb(efbuf);
FILEDESC_SLOCK(efbuf->fdp);
@ -3380,7 +3388,7 @@ export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
return (0);
if (efbuf->fdp != NULL)
FILEDESC_SUNLOCK(efbuf->fdp);
export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif);
export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags);
error = export_kinfo_to_sb(efbuf);
if (efbuf->fdp != NULL)
FILEDESC_SLOCK(efbuf->fdp);
@ -3393,7 +3401,8 @@ export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
* Takes a locked proc as argument, and returns with the proc unlocked.
*/
int
kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags)
{
struct file *fp;
struct filedesc *fdp;
@ -3425,6 +3434,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->fdp = NULL;
efbuf->sb = sb;
efbuf->remainder = maxlen;
efbuf->flags = flags;
if (tracevp != NULL)
export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE, FREAD | FWRITE,
efbuf);
@ -3501,7 +3511,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
return (error);
}
maxlen = req->oldptr != NULL ? req->oldlen : -1;
error = kern_proc_filedesc_out(p, &sb, maxlen);
error = kern_proc_filedesc_out(p, &sb, maxlen,
KERN_FILEDESC_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
@ -3541,7 +3552,7 @@ export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif,
vref(vp);
FILEDESC_SUNLOCK(fdp);
export_vnode_to_kinfo(vp, type, 0, kif);
export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO);
kinfo_to_okinfo(kif, okif);
error = SYSCTL_OUT(req, okif, sizeof(*okif));
FILEDESC_SLOCK(fdp);
@ -3584,7 +3595,8 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
continue;
export_file_to_kinfo(fp, i, NULL, kif, fdp);
export_file_to_kinfo(fp, i, NULL, kif, fdp,
KERN_FILEDESC_PACK_KINFO);
FILEDESC_SUNLOCK(fdp);
kinfo_to_okinfo(kif, okif);
error = SYSCTL_OUT(req, okif, sizeof(*okif));

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/disk.h>
#include <sys/fail.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/kdb.h>
@ -2379,6 +2380,24 @@ vn_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
return (error);
}
static inline void
vn_fill_junk(struct kinfo_file *kif)
{
size_t len, olen;
/*
* Simulate vn_fullpath returning changing values for a given
* vp during e.g. coredump.
*/
len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
olen = strlen(kif->kf_path);
if (len < olen)
strcpy(&kif->kf_path[len - 1], "$");
else
for (; olen < len; olen++)
strcpy(&kif->kf_path[olen], "A");
}
int
vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif)
{
@ -2396,6 +2415,10 @@ vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif)
if (freepath != NULL)
free(freepath, M_TEMP);
KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
vn_fill_junk(kif);
);
/*
* Retrieve vnode attributes.
*/

View File

@ -539,6 +539,8 @@ struct kinfo_sigtramp {
#define KERN_PROC_NOTHREADS 0x1
#define KERN_PROC_MASK32 0x2
/* Flags for kern_proc_filedesc_out. */
#define KERN_FILEDESC_PACK_KINFO 0x00000001U
struct sbuf;
/*
@ -550,7 +552,8 @@ struct sbuf;
* to be locked on enter. On return the process is unlocked.
*/
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);