freebsd-dev/sys/security/audit/audit_bsm_klib.c

549 lines
12 KiB
C
Raw Normal View History

/*
* Copyright (c) 1999-2005 Apple Inc.
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/filedesc.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/sem.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/vnode.h>
#include <bsm/audit.h>
#include <bsm/audit_kevents.h>
#include <security/audit/audit.h>
#include <security/audit/audit_private.h>
/*
* Hash table functions for the audit event number to event class mask
* mapping.
*/
#define EVCLASSMAP_HASH_TABLE_SIZE 251
struct evclass_elem {
au_event_t event;
au_class_t class;
LIST_ENTRY(evclass_elem) entry;
};
struct evclass_list {
LIST_HEAD(, evclass_elem) head;
};
static MALLOC_DEFINE(M_AUDITEVCLASS, "audit_evclass", "Audit event class");
static struct mtx evclass_mtx;
static struct evclass_list evclass_hash[EVCLASSMAP_HASH_TABLE_SIZE];
/*
* Look up the class for an audit event in the class mapping table.
*/
au_class_t
au_event_class(au_event_t event)
{
struct evclass_list *evcl;
struct evclass_elem *evc;
au_class_t class;
mtx_lock(&evclass_mtx);
evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE];
class = 0;
LIST_FOREACH(evc, &evcl->head, entry) {
if (evc->event == event) {
class = evc->class;
goto out;
}
}
out:
mtx_unlock(&evclass_mtx);
return (class);
}
/*
* Insert a event to class mapping. If the event already exists in the
* mapping, then replace the mapping with the new one.
*
* XXX There is currently no constraints placed on the number of mappings.
* May want to either limit to a number, or in terms of memory usage.
*/
void
au_evclassmap_insert(au_event_t event, au_class_t class)
{
struct evclass_list *evcl;
struct evclass_elem *evc, *evc_new;
/*
* Pessimistically, always allocate storage before acquiring mutex.
* Free if there is already a mapping for this event.
*/
evc_new = malloc(sizeof(*evc), M_AUDITEVCLASS, M_WAITOK);
mtx_lock(&evclass_mtx);
evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE];
LIST_FOREACH(evc, &evcl->head, entry) {
if (evc->event == event) {
evc->class = class;
mtx_unlock(&evclass_mtx);
free(evc_new, M_AUDITEVCLASS);
return;
}
}
evc = evc_new;
evc->event = event;
evc->class = class;
LIST_INSERT_HEAD(&evcl->head, evc, entry);
mtx_unlock(&evclass_mtx);
}
void
au_evclassmap_init(void)
{
int i;
mtx_init(&evclass_mtx, "evclass_mtx", NULL, MTX_DEF);
for (i = 0; i < EVCLASSMAP_HASH_TABLE_SIZE; i++)
LIST_INIT(&evclass_hash[i].head);
/*
* Set up the initial event to class mapping for system calls.
*
* XXXRW: Really, this should walk all possible audit events, not all
* native ABI system calls, as there may be audit events reachable
* only through non-native system calls. It also seems a shame to
* frob the mutex this early.
*/
for (i = 0; i < SYS_MAXSYSCALL; i++) {
if (sysent[i].sy_auevent != AUE_NULL)
au_evclassmap_insert(sysent[i].sy_auevent, 0);
}
}
/*
* Check whether an event is aditable by comparing the mask of classes this
* event is part of against the given mask.
*/
int
Introduce support for per-audit pipe preselection independent from the global audit trail configuration. This allows applications consuming audit trails to specify parameters for which audit records are of interest, including selecting records not required by the global trail. Allowing application interest specification without changing the global configuration allows intrusion detection systems to run without interfering with global auditing or each other (if multiple are present). To implement this: - Kernel audit records now carry a flag to indicate whether they have been selected by the global trail or by the audit pipe subsystem, set during record commit, so that this information is available after BSM conversion when delivering the BSM to the trail and audit pipes in the audit worker thread asynchronously. Preselection by either record target will cause the record to be kept. - Similar changes to preselection when the audit record is created when the system call is entering: consult both the global trail and pipes. - au_preselect() now accepts the class in order to avoid repeatedly looking up the mask for each preselection test. - Define a series of ioctls that allow applications to specify whether they want to track the global trail, or program their own preselection parameters: they may specify their own flags and naflags masks, similar to the global masks of the same name, as well as a set of per-auid masks. They also set a per-pipe mode specifying whether they track the global trail, or user their own -- the door is left open for future additional modes. A new ioctl is defined to allow a user process to flush the current audit pipe queue, which can be used after reprogramming pre-selection to make sure that only records of interest are received in future reads. - Audit pipe data structures are extended to hold the additional fields necessary to support preselection. By default, audit pipes track the global trail, so "praudit /dev/auditpipe" will track the global audit trail even though praudit doesn't program the audit pipe selection model. - Comment about the complexities of potentially adding partial read support to audit pipes. By using a set of ioctls, applications can select which records are of interest, and toggle the preselection mode. Obtained from: TrustedBSD Project
2006-06-05 14:48:17 +00:00
au_preselect(au_event_t event, au_class_t class, au_mask_t *mask_p, int sorf)
{
au_class_t effmask = 0;
if (mask_p == NULL)
return (-1);
/*
* Perform the actual check of the masks against the event.
*/
if (sorf & AU_PRS_SUCCESS)
Introduce support for per-audit pipe preselection independent from the global audit trail configuration. This allows applications consuming audit trails to specify parameters for which audit records are of interest, including selecting records not required by the global trail. Allowing application interest specification without changing the global configuration allows intrusion detection systems to run without interfering with global auditing or each other (if multiple are present). To implement this: - Kernel audit records now carry a flag to indicate whether they have been selected by the global trail or by the audit pipe subsystem, set during record commit, so that this information is available after BSM conversion when delivering the BSM to the trail and audit pipes in the audit worker thread asynchronously. Preselection by either record target will cause the record to be kept. - Similar changes to preselection when the audit record is created when the system call is entering: consult both the global trail and pipes. - au_preselect() now accepts the class in order to avoid repeatedly looking up the mask for each preselection test. - Define a series of ioctls that allow applications to specify whether they want to track the global trail, or program their own preselection parameters: they may specify their own flags and naflags masks, similar to the global masks of the same name, as well as a set of per-auid masks. They also set a per-pipe mode specifying whether they track the global trail, or user their own -- the door is left open for future additional modes. A new ioctl is defined to allow a user process to flush the current audit pipe queue, which can be used after reprogramming pre-selection to make sure that only records of interest are received in future reads. - Audit pipe data structures are extended to hold the additional fields necessary to support preselection. By default, audit pipes track the global trail, so "praudit /dev/auditpipe" will track the global audit trail even though praudit doesn't program the audit pipe selection model. - Comment about the complexities of potentially adding partial read support to audit pipes. By using a set of ioctls, applications can select which records are of interest, and toggle the preselection mode. Obtained from: TrustedBSD Project
2006-06-05 14:48:17 +00:00
effmask |= (mask_p->am_success & class);
if (sorf & AU_PRS_FAILURE)
Introduce support for per-audit pipe preselection independent from the global audit trail configuration. This allows applications consuming audit trails to specify parameters for which audit records are of interest, including selecting records not required by the global trail. Allowing application interest specification without changing the global configuration allows intrusion detection systems to run without interfering with global auditing or each other (if multiple are present). To implement this: - Kernel audit records now carry a flag to indicate whether they have been selected by the global trail or by the audit pipe subsystem, set during record commit, so that this information is available after BSM conversion when delivering the BSM to the trail and audit pipes in the audit worker thread asynchronously. Preselection by either record target will cause the record to be kept. - Similar changes to preselection when the audit record is created when the system call is entering: consult both the global trail and pipes. - au_preselect() now accepts the class in order to avoid repeatedly looking up the mask for each preselection test. - Define a series of ioctls that allow applications to specify whether they want to track the global trail, or program their own preselection parameters: they may specify their own flags and naflags masks, similar to the global masks of the same name, as well as a set of per-auid masks. They also set a per-pipe mode specifying whether they track the global trail, or user their own -- the door is left open for future additional modes. A new ioctl is defined to allow a user process to flush the current audit pipe queue, which can be used after reprogramming pre-selection to make sure that only records of interest are received in future reads. - Audit pipe data structures are extended to hold the additional fields necessary to support preselection. By default, audit pipes track the global trail, so "praudit /dev/auditpipe" will track the global audit trail even though praudit doesn't program the audit pipe selection model. - Comment about the complexities of potentially adding partial read support to audit pipes. By using a set of ioctls, applications can select which records are of interest, and toggle the preselection mode. Obtained from: TrustedBSD Project
2006-06-05 14:48:17 +00:00
effmask |= (mask_p->am_failure & class);
if (effmask)
return (1);
else
return (0);
}
/*
* Convert sysctl names and present arguments to events.
*/
au_event_t
audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg)
{
/* can't parse it - so return the worst case */
if ((valid_arg & (ARG_CTLNAME | ARG_LEN)) != (ARG_CTLNAME | ARG_LEN))
return (AUE_SYSCTL);
switch (name[0]) {
/* non-admin "lookups" treat them special */
case KERN_OSTYPE:
case KERN_OSRELEASE:
case KERN_OSREV:
case KERN_VERSION:
case KERN_ARGMAX:
case KERN_CLOCKRATE:
case KERN_BOOTTIME:
case KERN_POSIX1:
case KERN_NGROUPS:
case KERN_JOB_CONTROL:
case KERN_SAVED_IDS:
case KERN_OSRELDATE:
case KERN_DUMMY:
return (AUE_SYSCTL_NONADMIN);
/* only treat the changeable controls as admin */
case KERN_MAXVNODES:
case KERN_MAXPROC:
case KERN_MAXFILES:
case KERN_MAXPROCPERUID:
case KERN_MAXFILESPERPROC:
case KERN_HOSTID:
case KERN_SECURELVL:
case KERN_HOSTNAME:
case KERN_VNODE:
case KERN_PROC:
case KERN_FILE:
case KERN_PROF:
case KERN_NISDOMAINNAME:
case KERN_UPDATEINTERVAL:
case KERN_NTP_PLL:
case KERN_BOOTFILE:
case KERN_DUMPDEV:
case KERN_IPC:
case KERN_PS_STRINGS:
case KERN_USRSTACK:
case KERN_LOGSIGEXIT:
case KERN_IOV_MAX:
case KERN_MAXID:
return ((valid_arg & ARG_VALUE) ?
AUE_SYSCTL : AUE_SYSCTL_NONADMIN);
default:
return (AUE_SYSCTL);
}
/* NOTREACHED */
}
/*
* Convert an open flags specifier into a specific type of open event for
* auditing purposes.
*/
au_event_t
audit_flags_and_error_to_openevent(int oflags, int error)
{
au_event_t aevent;
/*
* Need to check only those flags we care about.
*/
oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY);
/*
* These checks determine what flags are on with the condition that
* ONLY that combination is on, and no other flags are on.
*/
switch (oflags) {
case O_RDONLY:
aevent = AUE_OPEN_R;
break;
case (O_RDONLY | O_CREAT):
aevent = AUE_OPEN_RC;
break;
case (O_RDONLY | O_CREAT | O_TRUNC):
aevent = AUE_OPEN_RTC;
break;
case (O_RDONLY | O_TRUNC):
aevent = AUE_OPEN_RT;
break;
case O_RDWR:
aevent = AUE_OPEN_RW;
break;
case (O_RDWR | O_CREAT):
aevent = AUE_OPEN_RWC;
break;
case (O_RDWR | O_CREAT | O_TRUNC):
aevent = AUE_OPEN_RWTC;
break;
case (O_RDWR | O_TRUNC):
aevent = AUE_OPEN_RWT;
break;
case O_WRONLY:
aevent = AUE_OPEN_W;
break;
case (O_WRONLY | O_CREAT):
aevent = AUE_OPEN_WC;
break;
case (O_WRONLY | O_CREAT | O_TRUNC):
aevent = AUE_OPEN_WTC;
break;
case (O_WRONLY | O_TRUNC):
aevent = AUE_OPEN_WT;
break;
default:
aevent = AUE_OPEN;
break;
}
#if 0
/*
* Convert chatty errors to better matching events. Failures to
* find a file are really just attribute events -- so recast them as
* such.
*
* XXXAUDIT: Solaris defines that AUE_OPEN will never be returned, it
* is just a placeholder. However, in Darwin we return that in
* preference to other events. For now, comment this out as we don't
* have a BSM conversion routine for AUE_OPEN.
*/
switch (aevent) {
case AUE_OPEN_R:
case AUE_OPEN_RT:
case AUE_OPEN_RW:
case AUE_OPEN_RWT:
case AUE_OPEN_W:
case AUE_OPEN_WT:
if (error == ENOENT)
aevent = AUE_OPEN;
}
#endif
return (aevent);
}
/*
* Convert a MSGCTL command to a specific event.
*/
int
audit_msgctl_to_event(int cmd)
{
switch (cmd) {
case IPC_RMID:
return (AUE_MSGCTL_RMID);
case IPC_SET:
return (AUE_MSGCTL_SET);
case IPC_STAT:
return (AUE_MSGCTL_STAT);
default:
/* We will audit a bad command. */
return (AUE_MSGCTL);
}
}
/*
* Convert a SEMCTL command to a specific event.
*/
int
audit_semctl_to_event(int cmd)
{
switch (cmd) {
case GETALL:
return (AUE_SEMCTL_GETALL);
case GETNCNT:
return (AUE_SEMCTL_GETNCNT);
case GETPID:
return (AUE_SEMCTL_GETPID);
case GETVAL:
return (AUE_SEMCTL_GETVAL);
case GETZCNT:
return (AUE_SEMCTL_GETZCNT);
case IPC_RMID:
return (AUE_SEMCTL_RMID);
case IPC_SET:
return (AUE_SEMCTL_SET);
case SETALL:
return (AUE_SEMCTL_SETALL);
case SETVAL:
return (AUE_SEMCTL_SETVAL);
case IPC_STAT:
return (AUE_SEMCTL_STAT);
default:
/* We will audit a bad command */
return (AUE_SEMCTL);
}
}
/*
* Convert a command for the auditon() system call to a audit event.
*/
int
auditon_command_event(int cmd)
{
switch(cmd) {
case A_GETPOLICY:
return (AUE_AUDITON_GPOLICY);
case A_SETPOLICY:
return (AUE_AUDITON_SPOLICY);
case A_GETKMASK:
return (AUE_AUDITON_GETKMASK);
case A_SETKMASK:
return (AUE_AUDITON_SETKMASK);
case A_GETQCTRL:
return (AUE_AUDITON_GQCTRL);
case A_SETQCTRL:
return (AUE_AUDITON_SQCTRL);
case A_GETCWD:
return (AUE_AUDITON_GETCWD);
case A_GETCAR:
return (AUE_AUDITON_GETCAR);
case A_GETSTAT:
return (AUE_AUDITON_GETSTAT);
case A_SETSTAT:
return (AUE_AUDITON_SETSTAT);
case A_SETUMASK:
return (AUE_AUDITON_SETUMASK);
case A_SETSMASK:
return (AUE_AUDITON_SETSMASK);
case A_GETCOND:
return (AUE_AUDITON_GETCOND);
case A_SETCOND:
return (AUE_AUDITON_SETCOND);
case A_GETCLASS:
return (AUE_AUDITON_GETCLASS);
case A_SETCLASS:
return (AUE_AUDITON_SETCLASS);
case A_GETPINFO:
case A_SETPMASK:
case A_SETFSIZE:
case A_GETFSIZE:
case A_GETPINFO_ADDR:
case A_GETKAUDIT:
case A_SETKAUDIT:
default:
return (AUE_AUDITON); /* No special record */
}
}
/*
* Create a canonical path from given path by prefixing either the root
* directory, or the current working directory. If the process working
* directory is NULL, we could use 'rootvnode' to obtain the root directory,
* but this results in a volfs name written to the audit log. So we will
* leave the filename starting with '/' in the audit log in this case.
*
* XXXRW: Since we combine two paths here, ideally a buffer of size
* MAXPATHLEN * 2 would be passed in.
*/
void
audit_canon_path(struct thread *td, char *path, char *cpath)
{
char *bufp;
char *retbuf, *freebuf;
struct vnode *vnp;
struct filedesc *fdp;
int cisr, error, vfslocked;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"audit_canon_path() at %s:%d", __FILE__, __LINE__);
fdp = td->td_proc->p_fd;
bufp = path;
cisr = 0;
Replace custom file descriptor array sleep lock constructed using a mutex and flags with an sxlock. This leads to a significant and measurable performance improvement as a result of access to shared locking for frequent lookup operations, reduced general overhead, and reduced overhead in the event of contention. All of these are imported for threaded applications where simultaneous access to a shared file descriptor array occurs frequently. Kris has reported 2x-4x transaction rate improvements on 8-core MySQL benchmarks; smaller improvements can be expected for many workloads as a result of reduced overhead. - Generally eliminate the distinction between "fast" and regular acquisisition of the filedesc lock; the plan is that they will now all be fast. Change all locking instances to either shared or exclusive locks. - Correct a bug (pointed out by kib) in fdfree() where previously msleep() was called without the mutex held; sx_sleep() is now always called with the sxlock held exclusively. - Universally hold the struct file lock over changes to struct file, rather than the filedesc lock or no lock. Always update the f_ops field last. A further memory barrier is required here in the future (discussed with jhb). - Improve locking and reference management in linux_at(), which fails to properly acquire vnode references before using vnode pointers. Annotate improper use of vn_fullpath(), which will be replaced at a future date. In fcntl(), we conservatively acquire an exclusive lock, even though in some cases a shared lock may be sufficient, which should be revisited. The dropping of the filedesc lock in fdgrowtable() is no longer required as the sxlock can be held over the sleep operation; we should consider removing that (pointed out by attilio). Tested by: kris Discussed with: jhb, kris, attilio, jeff
2007-04-04 09:11:34 +00:00
FILEDESC_SLOCK(fdp);
if (*(path) == '/') {
while (*(bufp) == '/')
bufp++; /* Skip leading '/'s. */
/*
* If no process root, or it is the same as the system root,
* audit the path as passed in with a single '/'.
*/
if ((fdp->fd_rdir == NULL) ||
(fdp->fd_rdir == rootvnode)) {
vnp = NULL;
bufp--; /* Restore one '/'. */
} else {
vnp = fdp->fd_rdir; /* Use process root. */
vref(vnp);
}
} else {
vnp = fdp->fd_cdir; /* Prepend the current dir. */
cisr = (fdp->fd_rdir == fdp->fd_cdir);
vref(vnp);
bufp = path;
}
Replace custom file descriptor array sleep lock constructed using a mutex and flags with an sxlock. This leads to a significant and measurable performance improvement as a result of access to shared locking for frequent lookup operations, reduced general overhead, and reduced overhead in the event of contention. All of these are imported for threaded applications where simultaneous access to a shared file descriptor array occurs frequently. Kris has reported 2x-4x transaction rate improvements on 8-core MySQL benchmarks; smaller improvements can be expected for many workloads as a result of reduced overhead. - Generally eliminate the distinction between "fast" and regular acquisisition of the filedesc lock; the plan is that they will now all be fast. Change all locking instances to either shared or exclusive locks. - Correct a bug (pointed out by kib) in fdfree() where previously msleep() was called without the mutex held; sx_sleep() is now always called with the sxlock held exclusively. - Universally hold the struct file lock over changes to struct file, rather than the filedesc lock or no lock. Always update the f_ops field last. A further memory barrier is required here in the future (discussed with jhb). - Improve locking and reference management in linux_at(), which fails to properly acquire vnode references before using vnode pointers. Annotate improper use of vn_fullpath(), which will be replaced at a future date. In fcntl(), we conservatively acquire an exclusive lock, even though in some cases a shared lock may be sufficient, which should be revisited. The dropping of the filedesc lock in fdgrowtable() is no longer required as the sxlock can be held over the sleep operation; we should consider removing that (pointed out by attilio). Tested by: kris Discussed with: jhb, kris, attilio, jeff
2007-04-04 09:11:34 +00:00
FILEDESC_SUNLOCK(fdp);
if (vnp != NULL) {
/*
* XXX: vn_fullpath() on FreeBSD is "less reliable" than
* vn_getpath() on Darwin, so this will need more attention
* in the future. Also, the question and string bounding
* here seems a bit questionable and will also require
* attention.
*/
vfslocked = VFS_LOCK_GIANT(vnp->v_mount);
vn_lock(vnp, LK_EXCLUSIVE | LK_RETRY);
error = vn_fullpath(td, vnp, &retbuf, &freebuf);
if (error == 0) {
/* Copy and free buffer allocated by vn_fullpath().
* If the current working directory was the same as
* the root directory, and the path was a relative
* pathname, do not separate the two components with
* the '/' character.
*/
snprintf(cpath, MAXPATHLEN, "%s%s%s", retbuf,
cisr ? "" : "/", bufp);
free(freebuf, M_TEMP);
} else
cpath[0] = '\0';
vput(vnp);
VFS_UNLOCK_GIANT(vfslocked);
} else
strlcpy(cpath, bufp, MAXPATHLEN);
}