freebsd-dev/contrib/cortex-strings/scripts/bench.py
Andrew Turner 09a53ad8f1 Import the Linaro Cortex Strings library into contrib.
Sponsored by:	The FreeBSD Foundation
2016-09-19 13:12:09 +00:00

176 lines
5.5 KiB
Python

#!/usr/bin/env python
"""Simple harness that benchmarks different variants of the routines,
caches the results, and emits all of the records at the end.
Results are generated for different values of:
* Source
* Routine
* Length
* Alignment
"""
import argparse
import subprocess
import math
import sys
# Prefix to the executables
build = '../build/try-'
ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
HAS = {
'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
'bionic-c': ALL,
'csl': 'memcpy memset',
'glibc': 'memcpy memset strchr strlen',
'glibc-c': ALL,
'newlib': 'memcpy strcmp strcpy strlen',
'newlib-c': ALL,
'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
'plain': 'memset memcpy strcmp strcpy',
}
BOUNCE_ALIGNMENTS = ['1']
SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
ALIGNMENTS = {
'bounce': BOUNCE_ALIGNMENTS,
'memchr': SINGLE_BUFFER_ALIGNMENTS,
'memset': SINGLE_BUFFER_ALIGNMENTS,
'strchr': SINGLE_BUFFER_ALIGNMENTS,
'strlen': SINGLE_BUFFER_ALIGNMENTS,
'memcmp': DUAL_BUFFER_ALIGNMENTS,
'memcpy': DUAL_BUFFER_ALIGNMENTS,
'strcmp': DUAL_BUFFER_ALIGNMENTS,
'strcpy': DUAL_BUFFER_ALIGNMENTS,
}
VARIANTS = sorted(HAS.keys())
FUNCTIONS = sorted(ALIGNMENTS.keys())
NUM_RUNS = 5
def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
"""Perform a single run, exercising the cache as appropriate."""
key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
if key in cache:
got = cache[key]
else:
xbuild = build
cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
try:
got = subprocess.check_output(cmd.split()).strip()
except OSError, ex:
assert False, 'Error %s while running %s' % (ex, cmd)
parts = got.split(':')
took = float(parts[7])
cache[key] = got
if not quiet:
print got
sys.stdout.flush()
return took
def run_many(cache, variants, bytes, all_functions):
# We want the data to come out in a useful order. So fix an
# alignment and function, and do all sizes for a variant first
bytes = sorted(bytes)
mid = bytes[int(len(bytes)/1.5)]
if not all_functions:
# Use the ordering in 'this' as the default
all_functions = HAS['this'].split()
# Find all other functions
for functions in HAS.values():
for function in functions.split():
if function not in all_functions:
all_functions.append(function)
for function in all_functions:
for alignment in ALIGNMENTS[function]:
for variant in variants:
if function not in HAS[variant].split():
continue
# Run a tracer through and see how long it takes and
# adjust the number of loops based on that. Not great
# for memchr() and similar which are O(n), but it will
# do
f = 50000000
want = 5.0
loops = int(f / math.sqrt(max(1, mid)))
took = run(cache, variant, function, mid, loops, alignment, 0,
quiet=True)
# Keep it reasonable for silly routines like bounce
factor = min(20, max(0.05, want/took))
f = f * factor
# Round f to a few significant figures
scale = 10**int(math.log10(f) - 1)
f = scale*int(f/scale)
for b in sorted(bytes):
# Figure out the number of loops to give a roughly consistent run
loops = int(f / math.sqrt(max(1, b)))
for run_id in range(0, NUM_RUNS):
run(cache, variant, function, b, loops, alignment,
run_id)
def run_top(cache):
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
args = parser.parse_args()
# Test all powers of 2
step1 = 2.0
# Test intermediate powers of 1.4
step2 = 1.4
bytes = []
for step in [step1, step2]:
if step:
# Figure out how many steps get us up to the top
steps = int(round(math.log(args.limit) / math.log(step)))
bytes.extend([int(step**x) for x in range(0, steps+1)])
run_many(cache, args.variants, bytes, args.functions)
def main():
cachename = 'cache.txt'
cache = {}
try:
with open(cachename) as f:
for line in f:
line = line.strip()
parts = line.split(':')
cache[':'.join(parts[:7])] = line
except:
pass
try:
run_top(cache)
finally:
with open(cachename, 'w') as f:
for line in sorted(cache.values()):
print >> f, line
if __name__ == '__main__':
main()