Change the way ELF coredumps are handled. Instead of unconditionally
skipping read-only pages, which can result in valuable non-text-related data not getting dumped, the ELF loader and the dynamic loader now mark read-only text pages NOCORE and the coredump code only checks (primarily) for complete inaccessibility of the page or NOCORE being set. Certain applications which map large amounts of read-only data will produce much larger cores. A new sysctl has been added, debug.elf_legacy_coredump, which will revert to the old behavior. This commit represents collaborative work by all parties involved. The PR contains a program demonstrating the problem. PR: kern/45994 Submitted by: "Peter Edwards" <pmedwards@eircom.net>, Archie Cobbs <archie@dellroad.org> Reviewed by: jdp, dillon MFC after: 7 days
This commit is contained in:
parent
47770b6fd5
commit
fa7dd9c5bc
@ -38,7 +38,8 @@
|
|||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "rtld.h"
|
#include "rtld.h"
|
||||||
|
|
||||||
static int protflags(int); /* Elf flags -> mmap protection */
|
static int convert_prot(int); /* Elf flags -> mmap protection */
|
||||||
|
static int convert_flags(int); /* Elf flags -> mmap flags */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Map a shared object into memory. The "fd" argument is a file descriptor,
|
* Map a shared object into memory. The "fd" argument is a file descriptor,
|
||||||
@ -75,6 +76,7 @@ map_object(int fd, const char *path, const struct stat *sb)
|
|||||||
Elf_Addr data_vlimit;
|
Elf_Addr data_vlimit;
|
||||||
caddr_t data_addr;
|
caddr_t data_addr;
|
||||||
int data_prot;
|
int data_prot;
|
||||||
|
int data_flags;
|
||||||
Elf_Addr clear_vaddr;
|
Elf_Addr clear_vaddr;
|
||||||
caddr_t clear_addr;
|
caddr_t clear_addr;
|
||||||
caddr_t clear_page;
|
caddr_t clear_page;
|
||||||
@ -189,8 +191,8 @@ map_object(int fd, const char *path, const struct stat *sb)
|
|||||||
mapsize = base_vlimit - base_vaddr;
|
mapsize = base_vlimit - base_vaddr;
|
||||||
base_addr = u.hdr.e_type == ET_EXEC ? (caddr_t) base_vaddr : NULL;
|
base_addr = u.hdr.e_type == ET_EXEC ? (caddr_t) base_vaddr : NULL;
|
||||||
|
|
||||||
mapbase = mmap(base_addr, mapsize, protflags(segs[0]->p_flags),
|
mapbase = mmap(base_addr, mapsize, convert_prot(segs[0]->p_flags),
|
||||||
MAP_PRIVATE, fd, base_offset);
|
convert_flags(segs[0]->p_flags), fd, base_offset);
|
||||||
if (mapbase == (caddr_t) -1) {
|
if (mapbase == (caddr_t) -1) {
|
||||||
_rtld_error("%s: mmap of entire address space failed: %s",
|
_rtld_error("%s: mmap of entire address space failed: %s",
|
||||||
path, strerror(errno));
|
path, strerror(errno));
|
||||||
@ -209,10 +211,11 @@ map_object(int fd, const char *path, const struct stat *sb)
|
|||||||
data_vaddr = trunc_page(segs[i]->p_vaddr);
|
data_vaddr = trunc_page(segs[i]->p_vaddr);
|
||||||
data_vlimit = round_page(segs[i]->p_vaddr + segs[i]->p_filesz);
|
data_vlimit = round_page(segs[i]->p_vaddr + segs[i]->p_filesz);
|
||||||
data_addr = mapbase + (data_vaddr - base_vaddr);
|
data_addr = mapbase + (data_vaddr - base_vaddr);
|
||||||
data_prot = protflags(segs[i]->p_flags);
|
data_prot = convert_prot(segs[i]->p_flags);
|
||||||
|
data_flags = convert_flags(segs[i]->p_flags) | MAP_FIXED;
|
||||||
/* Do not call mmap on the first segment - this is redundant */
|
/* Do not call mmap on the first segment - this is redundant */
|
||||||
if (i && mmap(data_addr, data_vlimit - data_vaddr, data_prot,
|
if (i && mmap(data_addr, data_vlimit - data_vaddr, data_prot,
|
||||||
MAP_PRIVATE|MAP_FIXED, fd, data_offset) == (caddr_t) -1) {
|
data_flags, fd, data_offset) == (caddr_t) -1) {
|
||||||
_rtld_error("%s: mmap of data failed: %s", path, strerror(errno));
|
_rtld_error("%s: mmap of data failed: %s", path, strerror(errno));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -315,7 +318,7 @@ obj_new(void)
|
|||||||
* flags for MMAP.
|
* flags for MMAP.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
protflags(int elfflags)
|
convert_prot(int elfflags)
|
||||||
{
|
{
|
||||||
int prot = 0;
|
int prot = 0;
|
||||||
if (elfflags & PF_R)
|
if (elfflags & PF_R)
|
||||||
@ -326,3 +329,17 @@ protflags(int elfflags)
|
|||||||
prot |= PROT_EXEC;
|
prot |= PROT_EXEC;
|
||||||
return prot;
|
return prot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
convert_flags(int elfflags)
|
||||||
|
{
|
||||||
|
int flags = MAP_PRIVATE; /* All mappings are private */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Readonly mappings are marked "MAP_NOCORE", because they can be
|
||||||
|
* reconstructed by a debugger.
|
||||||
|
*/
|
||||||
|
if (!(elfflags & PF_W))
|
||||||
|
flags |= MAP_NOCORE;
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
@ -87,6 +87,9 @@ SYSCTL_INT(_debug, OID_AUTO, elf32_trace, CTLFLAG_RW, &elf_trace, 0, "");
|
|||||||
#else
|
#else
|
||||||
SYSCTL_INT(_debug, OID_AUTO, elf64_trace, CTLFLAG_RW, &elf_trace, 0, "");
|
SYSCTL_INT(_debug, OID_AUTO, elf64_trace, CTLFLAG_RW, &elf_trace, 0, "");
|
||||||
#endif
|
#endif
|
||||||
|
static int elf_legacy_coredump = 0;
|
||||||
|
SYSCTL_INT(_debug, OID_AUTO, elf_legacy_coredump, CTLFLAG_RW,
|
||||||
|
&elf_legacy_coredump, 0, "");
|
||||||
|
|
||||||
static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
|
static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
|
||||||
extern int fallback_elf_brand;
|
extern int fallback_elf_brand;
|
||||||
@ -349,7 +352,7 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
|
|||||||
{
|
{
|
||||||
size_t map_len;
|
size_t map_len;
|
||||||
vm_offset_t map_addr;
|
vm_offset_t map_addr;
|
||||||
int error, rv;
|
int error, rv, cow;
|
||||||
size_t copy_len;
|
size_t copy_len;
|
||||||
vm_offset_t file_addr;
|
vm_offset_t file_addr;
|
||||||
vm_offset_t data_buf = 0;
|
vm_offset_t data_buf = 0;
|
||||||
@ -392,6 +395,11 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
|
|||||||
|
|
||||||
if (map_len != 0) {
|
if (map_len != 0) {
|
||||||
vm_object_reference(object);
|
vm_object_reference(object);
|
||||||
|
|
||||||
|
/* cow flags: don't dump readonly sections in core */
|
||||||
|
cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
|
||||||
|
(prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
|
||||||
|
|
||||||
rv = __elfN(map_insert)(&vmspace->vm_map,
|
rv = __elfN(map_insert)(&vmspace->vm_map,
|
||||||
object,
|
object,
|
||||||
file_addr, /* file offset */
|
file_addr, /* file offset */
|
||||||
@ -399,7 +407,7 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
|
|||||||
map_addr + map_len,/* virtual end */
|
map_addr + map_len,/* virtual end */
|
||||||
prot,
|
prot,
|
||||||
VM_PROT_ALL,
|
VM_PROT_ALL,
|
||||||
MAP_COPY_ON_WRITE | MAP_PREFAULT);
|
cow);
|
||||||
if (rv != KERN_SUCCESS) {
|
if (rv != KERN_SUCCESS) {
|
||||||
vm_object_deallocate(object);
|
vm_object_deallocate(object);
|
||||||
return (EINVAL);
|
return (EINVAL);
|
||||||
@ -1042,17 +1050,29 @@ each_writable_segment(p, func, closure)
|
|||||||
entry = entry->next) {
|
entry = entry->next) {
|
||||||
vm_object_t obj;
|
vm_object_t obj;
|
||||||
|
|
||||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
|
/*
|
||||||
(entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
|
* Don't dump inaccessible mappings, deal with legacy
|
||||||
(VM_PROT_READ|VM_PROT_WRITE))
|
* coredump mode.
|
||||||
|
*
|
||||||
|
* Note that read-only segments related to the elf binary
|
||||||
|
* are marked MAP_ENTRY_NOCOREDUMP now so we no longer
|
||||||
|
* need to arbitrarily ignore such segments.
|
||||||
|
*/
|
||||||
|
if (elf_legacy_coredump) {
|
||||||
|
if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
if ((entry->protection & VM_PROT_ALL) == 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Dont include memory segment in the coredump if
|
* Dont include memory segment in the coredump if
|
||||||
** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
|
* MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
|
||||||
** madvise(2).
|
* madvise(2). Do not dump submaps (i.e. parts of the
|
||||||
|
* kernel map).
|
||||||
*/
|
*/
|
||||||
if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
|
if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ((obj = entry->object.vm_object) == NULL)
|
if ((obj = entry->object.vm_object) == NULL)
|
||||||
|
@ -81,6 +81,7 @@ typedef u_char vm_prot_t; /* protection codes */
|
|||||||
#define VM_PROT_OVERRIDE_WRITE ((vm_prot_t) 0x08) /* copy-on-write */
|
#define VM_PROT_OVERRIDE_WRITE ((vm_prot_t) 0x08) /* copy-on-write */
|
||||||
|
|
||||||
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
|
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
|
||||||
|
#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE)
|
||||||
#define VM_PROT_DEFAULT VM_PROT_ALL
|
#define VM_PROT_DEFAULT VM_PROT_ALL
|
||||||
|
|
||||||
union vm_map_object;
|
union vm_map_object;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user