Add Linux kmemleak support to ZTS

- Kmemleak `clear` is invoked right before every test case run. - Kmemleak `scan` is requested right after each test case is finished. - Kmemleak instrumentation is not used for setup/cleanup/pretest/posttest/failsafe stages to shorten the test case execution time. - Kmemleak periodic scan is disabled (`scan=0`) before the test suite run to avoid interfering with the on-demand scan results. - There are unavoidable potential false positives coming from kernel areas other than OpenZFS module. - The ZTS with kmemleak enabled duration is increased by ~50%. Example run ``` Running Time: 07:12:13 Percent passed: 98.3% unreferenced object 0xffff9da82aea5410 (size 80): comm "kworker/u32:10", pid 942206, jiffies 4296749716 (age 2615.516s) hex dump (first 32 bytes): 00 30 30 00 00 00 00 00 ff 8f 30 00 00 00 00 00 .00.......0..... 51 e6 77 05 a8 9d ff ff 00 00 00 00 00 00 00 00 Q.w............. backtrace: [<000000005cf1fea2>] alloc_extent_state+0x1d/0xb0 [btrfs] [<0000000083f78ae5>] set_extent_bit+0x2ff/0x670 [btrfs] [<00000000de29249e>] lock_extent_bits+0x6b/0xa0 [btrfs] [<00000000b241f424>] lock_and_cleanup_extent_if_need+0xaf/0x1c0 [btrfs] [<0000000093ca72b5>] btrfs_buffered_write+0x297/0x7d0 [btrfs] [<000000002c2938c8>] btrfs_file_write_iter+0x127/0x390 [btrfs] [<00000000b888f720>] do_iter_readv_writev+0x152/0x1b0 [<00000000320f0bcc>] do_iter_write+0x7c/0x1c0 [<000000000b5a8fe0>] lo_write_bvec+0x62/0x150 [loop] [<000000009aa03c73>] loop_process_work+0x250/0xbd0 [loop] [<00000000c7487d8a>] process_one_work+0x1f1/0x390 [<000000000b236831>] worker_thread+0x53/0x3e0 [<0000000023cb3e57>] kthread+0x127/0x150 [<000000002d48676a>] ret_from_fork+0x22/0x30 ``` Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ryan Moeller <ryan@iXsystems.com> Signed-off-by: szubersk <szuberskidamian@gmail.com> Closes #13084
2022-02-24 19:21:13 +01:00 · 2022-02-24 19:21:13 +01:00 · 78fad47cb3
commit 78fad47cb3
parent 4d14a285b3
3 changed files with 62 additions and 13 deletions
--- a/scripts/zfs-tests.sh
+++ b/scripts/zfs-tests.sh
@ -54,6 +54,7 @@ ZFS_DBGMSG="$STF_SUITE/callbacks/zfs_dbgmsg.ksh"
 ZFS_DMESG="$STF_SUITE/callbacks/zfs_dmesg.ksh"
 UNAME=$(uname -s)
 RERUN=""
+KMEMLEAK=""

 # Override some defaults if on FreeBSD
 if [ "$UNAME" = "FreeBSD" ] ; then
@ -329,6 +330,7 @@ OPTIONS:
 	-S          Enable stack tracer (negative performance impact)
 	-c          Only create and populate constrained path
 	-R          Automatically rerun failing tests
+	-m          Enable kmemleak reporting (Linux only)
 	-n NFSFILE  Use the nfsfile to determine the NFS configuration
 	-I NUM      Number of iterations
 	-d DIR      Use DIR for files and loopback devices
@ -355,7 +357,7 @@ $0 -x
 EOF
 }

-while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do
+while getopts 'hvqxkfScRmn:d:s:r:?t:T:u:I:' OPTION; do
 	case $OPTION in
 	h)
 		usage
@ -386,6 +388,9 @@ while getopts 'hvqxkfScRn:d:s:r:?t:T:u:I:' OPTION; do
 	R)
 		RERUN="yes"
 		;;
+	m)
+		KMEMLEAK="yes"
+		;;
 	n)
 		nfsfile=$OPTARG
 		[ -f "$nfsfile" ] || fail "Cannot read file: $nfsfile"
@ -695,12 +700,16 @@ fi
 #
 # Run all the tests as specified.
 #
-msg "${TEST_RUNNER} ${QUIET:+-q}" \
+msg "${TEST_RUNNER}" \
+    "${QUIET:+-q}" \
+    "${KMEMLEAK:+-m}" \
    "-c \"${RUNFILES}\"" \
    "-T \"${TAGS}\"" \
    "-i \"${STF_SUITE}\"" \
    "-I \"${ITERATIONS}\""
-{ ${TEST_RUNNER} ${QUIET:+-q} \
+{ ${TEST_RUNNER} \
+    ${QUIET:+-q} \
+    ${KMEMLEAK:+-m} \
    -c "${RUNFILES}" \
    -T "${TAGS}" \
    -i "${STF_SUITE}" \
@ -722,7 +731,9 @@ if [ "$RESULT" -eq "2" ] && [ -n "$RERUN" ]; then
 	for test_name in $MAYBES; do
 		grep "$test_name " "$TEMP_RESULTS_FILE" >>"$TEST_LIST"
 	done
-	{ ${TEST_RUNNER} ${QUIET:+-q} \
+	{ ${TEST_RUNNER} \
+            ${QUIET:+-q} \
+            ${KMEMLEAK:+-m} \
 	    -c "${RUNFILES}" \
 	    -T "${TAGS}" \
 	    -i "${STF_SUITE}" \
--- a/tests/test-runner/bin/test-runner.py.in
+++ b/tests/test-runner/bin/test-runner.py.in
@ -31,11 +31,13 @@ from pwd import getpwuid
 from select import select
 from subprocess import PIPE
 from subprocess import Popen
+from subprocess import check_output
 from threading import Timer
 from time import time, CLOCK_MONOTONIC

 BASEDIR = '/var/tmp/test_results'
 TESTDIR = '/usr/share/zfs/'
+KMEMLEAK_FILE = '/sys/kernel/debug/kmemleak'
 KILL = 'kill'
 TRUE = 'true'
 SUDO = 'sudo'
@ -75,6 +77,7 @@ class Result(object):
        self.runtime = ''
        self.stdout = []
        self.stderr = []
+        self.kmemleak = ''
        self.result = ''

    def done(self, proc, killed, reran):
@ -90,6 +93,9 @@ class Result(object):
        if killed:
            self.result = 'KILLED'
            Result.runresults['KILLED'] += 1
+        elif len(self.kmemleak) > 0:
+            self.result = 'FAIL'
+            Result.runresults['FAIL'] += 1
        elif self.returncode == 0:
            self.result = 'PASS'
            Result.runresults['PASS'] += 1
@ -250,7 +256,7 @@ User: %s

        return out.lines, err.lines

-    def run(self, dryrun):
+    def run(self, dryrun, kmemleak):
        """
        This is the main function that runs each individual test.
        Determine whether or not the command requires sudo, and modify it
@ -270,6 +276,11 @@ User: %s
            fail('%s' % e)

        self.result.starttime = monotonic_time()
+
+        if kmemleak:
+            cmd = f'echo clear | {SUDO} tee {KMEMLEAK_FILE}'
+            check_output(cmd, shell=True)
+
        proc = Popen(privcmd, stdout=PIPE, stderr=PIPE)
        # Allow a special timeout value of 0 to mean infinity
        if int(self.timeout) == 0:
@ -279,6 +290,12 @@ User: %s
        try:
            t.start()
            self.result.stdout, self.result.stderr = self.collect_output(proc)
+
+            if kmemleak:
+                cmd = f'echo scan | {SUDO} tee {KMEMLEAK_FILE}'
+                check_output(cmd, shell=True)
+                cmd = f'{SUDO} cat {KMEMLEAK_FILE}'
+                self.result.kmemleak = check_output(cmd, shell=True)
        except KeyboardInterrupt:
            self.kill_cmd(proc, True)
            fail('\nRun terminated at user request.')
@ -355,6 +372,9 @@ User: %s
            with open(os.path.join(self.outputdir, 'merged'), 'wb') as merged:
                for _, line in lines:
                    os.write(merged.fileno(), b'%s\n' % line)
+        if len(self.result.kmemleak):
+            with open(os.path.join(self.outputdir, 'kmemleak'), 'wb') as kmem:
+                kmem.write(self.result.kmemleak)


 class Test(Cmd):
@ -439,14 +459,14 @@ Tags: %s

        cont = True
        if len(pretest.pathname):
-            pretest.run(options.dryrun)
+            pretest.run(options.dryrun, False)
            cont = pretest.result.result == 'PASS'
            pretest.log(options)

        if cont:
-            test.run(options.dryrun)
+            test.run(options.dryrun, options.kmemleak)
            if test.result.result == 'KILLED' and len(failsafe.pathname):
-                failsafe.run(options.dryrun)
+                failsafe.run(options.dryrun, False)
                failsafe.log(options, suppress_console=True)
        else:
            test.skip()
@ -454,7 +474,7 @@ Tags: %s
        test.log(options)

        if len(posttest.pathname):
-            posttest.run(options.dryrun)
+            posttest.run(options.dryrun, False)
            posttest.log(options)


@ -557,7 +577,7 @@ Tags: %s

        cont = True
        if len(pretest.pathname):
-            pretest.run(options.dryrun)
+            pretest.run(options.dryrun, False)
            cont = pretest.result.result == 'PASS'
            pretest.log(options)

@ -570,9 +590,9 @@ Tags: %s
            failsafe = Cmd(self.failsafe, outputdir=odir, timeout=self.timeout,
                           user=self.failsafe_user, identifier=self.identifier)
            if cont:
-                test.run(options.dryrun)
+                test.run(options.dryrun, options.kmemleak)
                if test.result.result == 'KILLED' and len(failsafe.pathname):
-                    failsafe.run(options.dryrun)
+                    failsafe.run(options.dryrun, False)
                    failsafe.log(options, suppress_console=True)
            else:
                test.skip()
@ -580,7 +600,7 @@ Tags: %s
            test.log(options)

        if len(posttest.pathname):
-            posttest.run(options.dryrun)
+            posttest.run(options.dryrun, False)
            posttest.log(options)


@ -845,6 +865,11 @@ class TestRun(object):
        else:
            write_log('Could not make a symlink to directory %s\n' %
                      self.outputdir, LOG_ERR)
+
+        if options.kmemleak:
+            cmd = f'echo scan=0 | {SUDO} tee {KMEMLEAK_FILE}'
+            check_output(cmd, shell=True)
+
        iteration = 0
        while iteration < options.iterations:
            for test in sorted(self.tests.keys()):
@ -990,6 +1015,14 @@ def fail(retstr, ret=1):
    exit(ret)


+def kmemleak_cb(option, opt_str, value, parser):
+    if not os.path.exists(KMEMLEAK_FILE):
+        fail(f"File '{KMEMLEAK_FILE}' doesn't exist. " +
+             "Enable CONFIG_DEBUG_KMEMLEAK in kernel configuration.")
+
+    setattr(parser.values, option.dest, True)
+
+
 def options_cb(option, opt_str, value, parser):
    path_options = ['outputdir', 'template', 'testdir', 'logfile']

@ -1027,6 +1060,9 @@ def parse_args():
    parser.add_option('-i', action='callback', callback=options_cb,
                      default=TESTDIR, dest='testdir', type='string',
                      metavar='testdir', help='Specify a test directory.')
+    parser.add_option('-m', action='callback', callback=kmemleak_cb,
+                      default=False, dest='kmemleak',
+                      help='Enable kmemleak reporting (Linux only)')
    parser.add_option('-p', action='callback', callback=options_cb,
                      default='', dest='pre', metavar='script',
                      type='string', help='Specify a pre script.')
--- a/tests/test-runner/man/test-runner.1
+++ b/tests/test-runner/man/test-runner.1
@ -210,6 +210,8 @@ to be consumed by the run command.
 .It Fl d
 Dry run mode.
 Execute no tests, but print a description of each test that would have been run.
+.It Fl m
+Enable kmemleak reporting (Linux only)
 .It Fl g
 Create test groups from any directories found while searching for tests.
 .It Fl o Ar outputdir