tests/sys/audit: Avoid race caused by starting auditd(8) for testing
In the CheriBSD CI we reproducibly see the first test in sys/audit (administrative:acct_failure) fail due to a missing startup message. It appears this is caused by a race condition when starting auditd: `service auditd onestart` returns as soon as the initial auditd() parent exits (after the daemon(3) call). We can avoid this problem by setting up the auditd infrastructure in-process: libauditd contains audit_quick_{start,stop}() functions that look like they are ideally suited to this task. This patch also avoids forking lots of shell processes for each of the 418 tests by using `auditon(A_SENDTRIGGER, &trigger, sizeof(trigger))` to check for a running auditd(8) instead of using `service auditd onestatus`. With these two changes (and D28388 to fix the XFAIL'd test) I can now boot and run `cd /usr/tests/sys/audit && kyua test` without any failures in a single-core QEMU instance. Before there would always be at least one failed test. Besides making the tests more reliable in CI, a nice side-effect of this change is that it also significantly speeds up running them by avoiding lots of fork()/execve() caused by shell scripts: Running kyua test on an AArch64 QEMU took 315s before and now takes 68s, so it's roughly 3.5 times faster. This effect is even larger when running on a CHERI-RISC-V QEMU since emulating CHERI instructions on an x86 host is noticeably slower than emulating AArch64. Test Plan: aarch64+amd64 QEMU no longer fail. Reviewed By: asomers Differential Revision: https://reviews.freebsd.org/D28451
This commit is contained in:
parent
cbcfe28f9d
commit
df093aa946
@ -48,10 +48,19 @@ SRCS.miscellaneous+= utils.c
|
||||
|
||||
TEST_METADATA+= timeout="30"
|
||||
TEST_METADATA+= required_user="root"
|
||||
# Only one process can be auditing, if we attempt to run these tests in parallel
|
||||
# some of them will fail to start auditing.
|
||||
# TODO: it would be nice to be able to run them in parallel with other non-audit
|
||||
# tests using some internal form of synchronization.
|
||||
# TODO: In addititon to test failures, running in parallel can trigger a kernel
|
||||
# panic: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=253616
|
||||
TEST_METADATA+= is_exclusive="true"
|
||||
TEST_METADATA+= required_files="/etc/rc.d/auditd /dev/auditpipe"
|
||||
|
||||
LDFLAGS+= -lbsm -lutil
|
||||
OPENBSMDIR=${SRCTOP}/contrib/openbsm
|
||||
CFLAGS+= -I${OPENBSMDIR}
|
||||
LDADD+= ${LIBAUDITD}
|
||||
|
||||
CFLAGS.process-control.c+= -I${SRCTOP}/tests
|
||||
|
||||
|
@ -341,10 +341,16 @@ ATF_TC_CLEANUP(auditctl_success, tc)
|
||||
* at the configured path. To reset this, we need to stop and start the
|
||||
* auditd(8) again. Here, we check if auditd(8) was running already
|
||||
* before the test started. If so, we stop and start it again.
|
||||
*
|
||||
* TODO: should we skip this test if auditd(8) is already running to
|
||||
* avoid restarting it?
|
||||
*/
|
||||
system("service auditd onestop > /dev/null 2>&1");
|
||||
if (!atf_utils_file_exists("started_auditd"))
|
||||
if (!atf_utils_file_exists("started_fake_auditd")) {
|
||||
system("service auditd onestop > /dev/null 2>&1");
|
||||
system("service auditd onestart > /dev/null 2>&1");
|
||||
} else {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <bsm/libbsm.h>
|
||||
#include <bsm/auditd_lib.h>
|
||||
#include <security/audit/audit_ioctl.h>
|
||||
|
||||
#include <atf-c.h>
|
||||
@ -222,13 +223,44 @@ skip_if_extattr_not_supported(const char *path)
|
||||
}
|
||||
}
|
||||
|
||||
FILE
|
||||
*setup(struct pollfd fd[], const char *name)
|
||||
static bool
|
||||
is_auditd_running(void)
|
||||
{
|
||||
int trigger;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* AUDIT_TRIGGER_INITIALIZE is a no-op message on FreeBSD and can
|
||||
* therefore be used to check whether auditd has already been started.
|
||||
* This is significantly cheaper than running `service auditd onestatus`
|
||||
* for each test case. It is also slightly less racy since it will only
|
||||
* return true once auditd() has opened the trigger file rather than
|
||||
* just when the pidfile has been created.
|
||||
*/
|
||||
trigger = AUDIT_TRIGGER_INITIALIZE;
|
||||
err = auditon(A_SENDTRIGGER, &trigger, sizeof(trigger));
|
||||
if (err == 0) {
|
||||
fprintf(stderr, "auditd(8) is running.\n");
|
||||
return (true);
|
||||
} else {
|
||||
/*
|
||||
* A_SENDTRIGGER returns ENODEV if auditd isn't listening,
|
||||
* all other error codes indicate a fatal error.
|
||||
*/
|
||||
ATF_REQUIRE_MSG(errno == ENODEV,
|
||||
"Unexpected error from auditon(2): %s", strerror(errno));
|
||||
return (false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
FILE *
|
||||
setup(struct pollfd fd[], const char *name)
|
||||
{
|
||||
au_mask_t fmask, nomask;
|
||||
FILE *pipestream;
|
||||
fmask = get_audit_mask(name);
|
||||
nomask = get_audit_mask("no");
|
||||
FILE *pipestream;
|
||||
|
||||
ATF_REQUIRE((fd[0].fd = open("/dev/auditpipe", O_RDONLY)) != -1);
|
||||
ATF_REQUIRE((pipestream = fdopen(fd[0].fd, "r")) != NULL);
|
||||
@ -244,12 +276,39 @@ FILE
|
||||
|
||||
/* Set local preselection audit_class as "no" for audit startup */
|
||||
set_preselect_mode(fd[0].fd, &nomask);
|
||||
ATF_REQUIRE_EQ(0, system("service auditd onestatus || \
|
||||
{ service auditd onestart && touch started_auditd ; }"));
|
||||
|
||||
/* If 'started_auditd' exists, that means we started auditd(8) */
|
||||
if (atf_utils_file_exists("started_auditd"))
|
||||
if (!is_auditd_running()) {
|
||||
fprintf(stderr, "Running audit_quick_start() for testing... ");
|
||||
/*
|
||||
* Previously, this test started auditd using
|
||||
* `service auditd onestart`. However, there is a race condition
|
||||
* there since service can return before auditd(8) has
|
||||
* fully started (once the daemon parent process has forked)
|
||||
* and this can cause check_audit_startup() to fail sometimes.
|
||||
*
|
||||
* In the CheriBSD CI this caused the first test executed by
|
||||
* kyua (administrative:acct_failure) to fail every time, but
|
||||
* subsequent ones would almost always succeed.
|
||||
*
|
||||
* To avoid this problem (and as a nice side-effect this speeds
|
||||
* up the test quite a bit), we register this process as a
|
||||
* "fake" auditd(8) using the audit_quick_start() function from
|
||||
* libauditd.
|
||||
*/
|
||||
atf_utils_create_file("started_fake_auditd", "yes\n");
|
||||
ATF_REQUIRE(atf_utils_file_exists("started_fake_auditd"));
|
||||
ATF_REQUIRE_EQ_MSG(0, audit_quick_start(),
|
||||
"Failed to start fake auditd: %m");
|
||||
fprintf(stderr, "done.\n");
|
||||
/* audit_quick_start() should log an audit start event. */
|
||||
check_audit_startup(fd, "audit startup", pipestream);
|
||||
/*
|
||||
* If we exit cleanly shutdown audit_quick_start(), if not
|
||||
* cleanup() will take care of it.
|
||||
* This is not required, but makes it easier to run individual
|
||||
* tests outside of kyua.
|
||||
*/
|
||||
atexit(cleanup);
|
||||
}
|
||||
|
||||
/* Set local preselection parameters specific to "name" audit_class */
|
||||
set_preselect_mode(fd[0].fd, &fmask);
|
||||
@ -259,6 +318,13 @@ FILE
|
||||
void
|
||||
cleanup(void)
|
||||
{
|
||||
if (atf_utils_file_exists("started_auditd"))
|
||||
system("service auditd onestop > /dev/null 2>&1");
|
||||
if (atf_utils_file_exists("started_fake_auditd")) {
|
||||
fprintf(stderr, "Running audit_quick_stop()... ");
|
||||
if (audit_quick_stop() != 0) {
|
||||
fprintf(stderr, "Failed to stop fake auditd: %m\n");
|
||||
abort();
|
||||
}
|
||||
fprintf(stderr, "done.\n");
|
||||
unlink("started_fake_auditd");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user