Begin porting string performance tests to benchtests

This is the initial support for string function performance tests,
along with copying tests for memcpy and memcpy-ifunc as proof of
concept.  The string function benchmarks perform operations at
different alignments and for different sizes and compare performance
between plain operations and the optimized string operations.  Due to
this their output is incompatible with the function benchmarks where
we're interested in fastest time, throughput, etc.

In future, the correctness checks in the benchmark tests can be
removed.  Same goes for the performance measurements in the
string/test-*.
This commit is contained in:
Siddhesh Poyarekar 2013-04-16 17:37:24 +05:30
parent 50fd745b4d
commit c1f75dc386
6 changed files with 447 additions and 3 deletions

View file

@ -1,3 +1,16 @@
2013-06-11 Siddhesh Poyarekar <siddhesh@redhat.com>
* benchtests/Makefile: Disable parallel execution of targets.
(string-bench): Add memcpy.
(benchset): New variable to store a list of benchmark sets.
(bench-func): Renamed from bench.
(bench-set): New target.
(bench): Depend on bench-func and bench-set.
* benchtests/README: Add section on benchmark sets.
* benchtests/bench-memcpy-ifunc.c: New file.
* benchtests/bench-memcpy.c: New file.
* benchtests/bench-string.h: New file.
2013-06-11 Andreas Schwab <schwab@suse.de>
[BZ #15577]

View file

@ -23,6 +23,13 @@ subdir := benchtests
bench := acos acosh asin asinh atan atanh cos cosh exp log modf pow rint sin \
sinh tan tanh
# String function benchmarks.
string-bench := memcpy
string-bench-ifunc := $(addsuffix -ifunc, $(string-bench))
string-bench-all := $(string-bench) $(string-bench-ifunc)
benchset := $(string-bench-all)
acos-ARGLIST = double
acos-RET = double
LDFLAGS-bench-acos = -lm
@ -92,10 +99,15 @@ LDFLAGS-bench-tanh = -lm
# Rules to build and execute the benchmarks. Do not put any benchmark
# parameters beyond this point.
# We don't want the benchmark programs to run in parallel since that could
# affect their performance.
.NOTPARALLEL:
include ../Makeconfig
include ../Rules
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
# The default duration: 10 seconds.
ifndef BENCH_DURATION
@ -112,7 +124,7 @@ endif
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
# for all these modules.
cpp-srcs-left := $(binaries-bench:=.c)
cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c)
lib := nonlib
include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
@ -124,8 +136,17 @@ run-bench = $(test-wrapper-env) \
bench-clean:
rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
bench: $(binaries-bench)
bench: bench-set bench-func
bench-set: $(binaries-benchset)
for run in $^; do \
echo "Running $${run}"; \
$(run-bench) > $${run}.out; \
done
bench-func: $(binaries-bench)
{ for run in $^; do \
echo "Running $${run}" >&2; \
$(run-bench); \
@ -135,7 +156,7 @@ bench: $(binaries-bench)
fi; \
mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
$(binaries-bench): %: %.o \
$(binaries-bench) $(binaries-benchset): %: %.o \
$(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
$(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
$(+link)

View file

@ -72,3 +72,18 @@ the same file by using the `name' directive that looks something like this:
See the pow-inputs file for an example of what such a partitioned input file
would look like.
Benchmark Sets:
==============
In addition to standard benchmarking of functions, one may also generate
custom outputs for a set of functions. This is currently used by string
function benchmarks where the aim is to compare performance between
implementations at various alignments and for various sizes.
To add a benchset for `foo':
- Add `foo' to the benchset variable.
- Write your bench-foo.c that prints out the measurements to stdout.
- On execution, a bench-foo.out is created in $(objpfx) with the contents of
stdout.

View file

@ -0,0 +1,20 @@
/* Measure IFUNC implementations of memcpy function.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define TEST_IFUNC 1
#include "bench-memcpy.c"

163
benchtests/bench-memcpy.c Normal file
View file

@ -0,0 +1,163 @@
/* Measure memcpy functions.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef MEMCPY_RESULT
# define MEMCPY_RESULT(dst, len) dst
# define MIN_PAGE_SIZE 131072
# define TEST_MAIN
# define TEST_NAME "memcpy"
# include "bench-string.h"
char *simple_memcpy (char *, const char *, size_t);
char *builtin_memcpy (char *, const char *, size_t);
IMPL (simple_memcpy, 0)
IMPL (builtin_memcpy, 0)
IMPL (memcpy, 1)
char *
simple_memcpy (char *dst, const char *src, size_t n)
{
char *ret = dst;
while (n--)
*dst++ = *src++;
return ret;
}
char *
builtin_memcpy (char *dst, const char *src, size_t n)
{
return __builtin_memcpy (dst, src, n);
}
#endif
typedef char *(*proto_t) (char *, const char *, size_t);
static void
do_one_test (impl_t *impl, char *dst, const char *src,
size_t len)
{
if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len))
{
error (0, 0, "Wrong result in function %s %p %p", impl->name,
CALL (impl, dst, src, len), MEMCPY_RESULT (dst, len));
ret = 1;
return;
}
if (memcmp (dst, src, len) != 0)
{
error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"",
impl->name, dst, src);
ret = 1;
return;
}
if (HP_TIMING_AVAIL)
{
hp_timing_t start __attribute ((unused));
hp_timing_t stop __attribute ((unused));
hp_timing_t best_time = ~ (hp_timing_t) 0;
size_t i;
for (i = 0; i < 32; ++i)
{
HP_TIMING_NOW (start);
CALL (impl, dst, src, len);
HP_TIMING_NOW (stop);
HP_TIMING_BEST (best_time, start, stop);
}
printf ("\t%zd", (size_t) best_time);
}
}
static void
do_test (size_t align1, size_t align2, size_t len)
{
size_t i, j;
char *s1, *s2;
align1 &= 63;
if (align1 + len >= page_size)
return;
align2 &= 63;
if (align2 + len >= page_size)
return;
s1 = (char *) (buf1 + align1);
s2 = (char *) (buf2 + align2);
for (i = 0, j = 1; i < len; i++, j += 23)
s1[i] = j;
if (HP_TIMING_AVAIL)
printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
FOR_EACH_IMPL (impl, 0)
do_one_test (impl, s2, s1, len);
if (HP_TIMING_AVAIL)
putchar ('\n');
}
int
test_main (void)
{
size_t i;
test_init ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
printf ("\t%s", impl->name);
putchar ('\n');
for (i = 0; i < 18; ++i)
{
do_test (0, 0, 1 << i);
do_test (i, 0, 1 << i);
do_test (0, i, 1 << i);
do_test (i, i, 1 << i);
}
for (i = 0; i < 32; ++i)
{
do_test (0, 0, i);
do_test (i, 0, i);
do_test (0, i, i);
do_test (i, i, i);
}
for (i = 3; i < 32; ++i)
{
if ((i & (i - 1)) == 0)
continue;
do_test (0, 0, 16 * i);
do_test (i, 0, 16 * i);
do_test (0, i, 16 * i);
do_test (i, i, 16 * i);
}
do_test (0, 0, getpagesize ());
return ret;
}
#include "../test-skeleton.c"

212
benchtests/bench-string.h Normal file
View file

@ -0,0 +1,212 @@
/* Measure string and memory functions.
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sys/cdefs.h>
typedef struct
{
const char *name;
void (*fn) (void);
long test;
} impl_t;
extern impl_t __start_impls[], __stop_impls[];
#define IMPL(name, test) \
impl_t tst_ ## name \
__attribute__ ((section ("impls"), aligned (sizeof (void *)))) \
= { __STRING (name), (void (*) (void))name, test };
#ifdef TEST_MAIN
# ifndef _GNU_SOURCE
# define _GNU_SOURCE
# endif
# undef __USE_STRING_INLINES
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <sys/mman.h>
# include <sys/param.h>
# include <unistd.h>
# include <fcntl.h>
# include <error.h>
# include <errno.h>
# include <time.h>
# include <ifunc-impl-list.h>
# define GL(x) _##x
# define GLRO(x) _##x
# include <hp-timing.h>
# define TEST_FUNCTION test_main ()
# define TIMEOUT (4 * 60)
# define OPT_ITERATIONS 10000
# define OPT_RANDOM 10001
# define OPT_SEED 10002
unsigned char *buf1, *buf2;
int ret, do_srandom;
unsigned int seed;
size_t page_size;
hp_timing_t _dl_hp_timing_overhead;
# ifndef ITERATIONS
size_t iterations = 100000;
# define ITERATIONS_OPTIONS \
{ "iterations", required_argument, NULL, OPT_ITERATIONS },
# define ITERATIONS_PROCESS \
case OPT_ITERATIONS: \
iterations = strtoul (optarg, NULL, 0); \
break;
# define ITERATIONS iterations
# else
# define ITERATIONS_OPTIONS
# define ITERATIONS_PROCESS
# endif
# define CMDLINE_OPTIONS ITERATIONS_OPTIONS \
{ "random", no_argument, NULL, OPT_RANDOM }, \
{ "seed", required_argument, NULL, OPT_SEED },
# define CMDLINE_PROCESS ITERATIONS_PROCESS \
case OPT_RANDOM: \
{ \
int fdr = open ("/dev/urandom", O_RDONLY); \
\
if (fdr < 0 || read (fdr, &seed, sizeof(seed)) != sizeof (seed)) \
seed = time (NULL); \
if (fdr >= 0) \
close (fdr); \
do_srandom = 1; \
break; \
} \
\
case OPT_SEED: \
seed = strtoul (optarg, NULL, 0); \
do_srandom = 1; \
break;
# define CALL(impl, ...) \
(* (proto_t) (impl)->fn) (__VA_ARGS__)
# if defined TEST_IFUNC && defined TEST_NAME
/* Increase size of FUNC_LIST if assert is triggered at run-time. */
static struct libc_ifunc_impl func_list[32];
static int func_count;
static int impl_count = -1;
static impl_t *impl_array;
# define FOR_EACH_IMPL(impl, notall) \
impl_t *impl; \
int count; \
if (impl_count == -1) \
{ \
impl_count = 0; \
if (func_count != 0) \
{ \
int f; \
impl_t *skip = NULL, *a; \
for (impl = __start_impls; impl < __stop_impls; ++impl) \
if (strcmp (impl->name, TEST_NAME) == 0) \
skip = impl; \
else \
impl_count++; \
a = impl_array = malloc ((impl_count + func_count) * \
sizeof (impl_t)); \
for (impl = __start_impls; impl < __stop_impls; ++impl) \
if (impl != skip) \
*a++ = *impl; \
for (f = 0; f < func_count; f++) \
if (func_list[f].usable) \
{ \
a->name = func_list[f].name; \
a->fn = func_list[f].fn; \
a->test = 1; \
a++; \
} \
impl_count = a - impl_array; \
} \
else \
{ \
impl_count = __stop_impls - __start_impls; \
impl_array = __start_impls; \
} \
} \
impl = impl_array; \
for (count = 0; count < impl_count; ++count, ++impl) \
if (!notall || impl->test)
# else /* ! (defined TEST_IFUNC && defined TEST_NAME) */
# define FOR_EACH_IMPL(impl, notall) \
for (impl_t *impl = __start_impls; impl < __stop_impls; ++impl) \
if (!notall || impl->test)
# endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */
# define HP_TIMING_BEST(best_time, start, end) \
do \
{ \
hp_timing_t tmptime; \
HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end); \
if (best_time > tmptime) \
best_time = tmptime; \
} \
while (0)
# ifndef BUF1PAGES
# define BUF1PAGES 1
# endif
static void
test_init (void)
{
# if defined TEST_IFUNC && defined TEST_NAME
func_count = __libc_ifunc_impl_list (TEST_NAME, func_list,
(sizeof func_list
/ sizeof func_list[0]));
# endif
page_size = 2 * getpagesize ();
# ifdef MIN_PAGE_SIZE
if (page_size < MIN_PAGE_SIZE)
page_size = MIN_PAGE_SIZE;
# endif
buf1 = mmap (0, (BUF1PAGES + 1) * page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (buf1 == MAP_FAILED)
error (EXIT_FAILURE, errno, "mmap failed");
if (mprotect (buf1 + BUF1PAGES * page_size, page_size, PROT_NONE))
error (EXIT_FAILURE, errno, "mprotect failed");
buf2 = mmap (0, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (buf2 == MAP_FAILED)
error (EXIT_FAILURE, errno, "mmap failed");
if (mprotect (buf2 + page_size, page_size, PROT_NONE))
error (EXIT_FAILURE, errno, "mprotect failed");
HP_TIMING_DIFF_INIT ();
if (do_srandom)
{
printf ("Setting seed to 0x%x\n", seed);
srandom (seed);
}
memset (buf1, 0xa5, BUF1PAGES * page_size);
memset (buf2, 0x5a, page_size);
}
#endif /* TEST_MAIN */