Fix core corruption caused by race in note_procstat_vmmap

This fix is spiritually similar to r287442 and was discovered thanks to
the KASSERT added in that revision.

NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' vm map
via vn_fullpath.  As vnodes may move during coredump, this is racy.

We do not remove the race, only prevent it from causing coredump
corruption.

- Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable
  kinfo packing for PROCSTAT_VMMAP notes.  This avoids VMMAP corruption
  and truncation, even if names change, at the cost of up to PATH_MAX
  bytes per mapped object.  The new sysctl is documented in core.5.

- Fix note_procstat_vmmap to self-limit in the second pass.  This
  addresses corruption, at the cost of sometimes producing a truncated
  result.

- Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste)
  to grok the new zero padding.

Reported by:	pho (https://people.freebsd.org/~pho/stress/log/datamove4-2.txt)
Relnotes:	yes
Sponsored by:	EMC / Isilon Storage Division
Differential Revision:	https://reviews.freebsd.org/D3824
This commit is contained in:
Conrad Meyer 2015-10-06 18:07:00 +00:00
parent 4f4bbad316
commit e6b95927f3
8 changed files with 61 additions and 11 deletions

View File

@ -1867,6 +1867,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
bp += kv->kve_structsize;
cnt++;
}
@ -1882,6 +1884,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */

View File

@ -44,6 +44,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
bp += kv->kve_structsize;
cnt++;
}
@ -59,6 +61,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */

View File

@ -28,7 +28,7 @@
.\" @(#)core.5 8.3 (Berkeley) 12/11/93
.\" $FreeBSD$
.\"
.Dd September 2, 2015
.Dd October 5, 2015
.Dt CORE 5
.Os
.Sh NAME
@ -130,6 +130,19 @@ All file descriptor information can be preserved by disabling packing.
This potentially wastes up to PATH_MAX bytes per open fd.
Packing is disabled with
.Dl sysctl kern.coredump_pack_fileinfo=0 .
.Pp
Similarly, corefiles are written with vmmap information as an ELF note, which
contains file paths.
By default, they are packed to only use as much space as
needed.
By the same mechanism as for the open files note, these paths can also
change at any time and result in a truncated note.
.Pp
All vmmap information can be preserved by disabling packing.
Like the file information, this potentially wastes up to PATH_MAX bytes per
mapped object.
Packing is disabled with
.Dl sysctl kern.coredump_pack_vmmapinfo=0 .
.Sh EXAMPLES
In order to store all core images in per-user private areas under
.Pa /var/coredumps ,

View File

@ -1959,24 +1959,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
int structsize;
int structsize, vmmap_flags;
if (coredump_pack_vmmapinfo)
vmmap_flags = KERN_VMMAP_PACK_KINFO;
else
vmmap_flags = 0;
p = (struct proc *)arg;
structsize = sizeof(struct kinfo_vmentry);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_vmmap_out(p, sb);
kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
structsize = sizeof(struct kinfo_vmentry);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_vmmap_out(p, sb);
kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
vmmap_flags);
}
}

View File

@ -105,6 +105,11 @@ SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
&coredump_pack_fileinfo, 0,
"Enable file path packing in 'procstat -f' coredump notes");
int coredump_pack_vmmapinfo = 1;
SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
&coredump_pack_vmmapinfo, 0,
"Enable file path packing in 'procstat -v' coredump notes");
static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);

View File

@ -2252,7 +2252,7 @@ next:;
* Must be called with the process locked and will return unlocked.
*/
int
kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
{
vm_map_entry_t entry, tmp_entry;
struct vattr va;
@ -2276,7 +2276,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
PRELE(p);
return (ESRCH);
}
kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
error = 0;
map = &vm->vm_map;
@ -2411,10 +2411,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
free(freepath, M_TEMP);
/* Pack record size down */
kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
strlen(kve->kve_path) + 1;
if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
kve->kve_structsize =
offsetof(struct kinfo_vmentry, kve_path) +
strlen(kve->kve_path) + 1;
else
kve->kve_structsize = sizeof(*kve);
kve->kve_structsize = roundup(kve->kve_structsize,
sizeof(uint64_t));
/* Halt filling and truncate rather than exceeding maxlen */
if (maxlen != -1 && maxlen < kve->kve_structsize) {
error = 0;
vm_map_lock_read(map);
break;
} else if (maxlen != -1)
maxlen -= kve->kve_structsize;
if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
error = ENOMEM;
vm_map_lock_read(map);
@ -2447,7 +2460,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
sbuf_delete(&sb);
return (error);
}
error = kern_proc_vmmap_out(p, &sb);
error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);

View File

@ -84,6 +84,7 @@ int exec_register(const struct execsw *);
int exec_unregister(const struct execsw *);
extern int coredump_pack_fileinfo;
extern int coredump_pack_vmmapinfo;
/*
* note: name##_mod cannot be const storage because the

View File

@ -541,6 +541,9 @@ struct kinfo_sigtramp {
/* Flags for kern_proc_filedesc_out. */
#define KERN_FILEDESC_PACK_KINFO 0x00000001U
/* Flags for kern_proc_vmmap_out. */
#define KERN_VMMAP_PACK_KINFO 0x00000001U
struct sbuf;
/*
@ -556,7 +559,8 @@ int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int vntype_to_kinfo(int vtype);
#endif /* !_KERNEL */