Add generic hp-timing support
Add missing generic hp_timing support. It uses clock_gettime (CLOCK_MONOTONIC) which has unspecified starting time, nano-second accuracy, and should faster on architectures that implementes the symbol as vDSO. Checked on aarch64-linux-gnu, x86_64-linux-gnu, and i686-linux-gnu. I also checked the builds for all afected ABIs. * benchtests/Makefile (USE_CLOCK_GETTIME) Remove. * benchtests/README: Update description. * benchtests/bench-timing.h: Default to hp-timing. * sysdeps/generic/hp-timing.h (HP_TIMING_DIFF, HP_TIMING_ACCUM_NT, HP_TIMING_PRINT): Remove. (HP_TIMING_NOW): Add generic implementation. (hp_timing_t): Change to uint64_t.
This commit is contained in:
parent
1e372ded4f
commit
7621e38bf3
|
@ -1,5 +1,13 @@
|
|||
2019-03-22 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
* benchtests/Makefile (USE_CLOCK_GETTIME) Remove.
|
||||
* benchtests/README: Update description.
|
||||
* benchtests/bench-timing.h: Default to hp-timing.
|
||||
* sysdeps/generic/hp-timing.h (HP_TIMING_DIFF, HP_TIMING_ACCUM_NT,
|
||||
HP_TIMING_PRINT): Remove.
|
||||
(HP_TIMING_NOW): Add generic implementation.
|
||||
(hp_timing_t): Change to uint64_t.
|
||||
|
||||
* benchtests/bench-timing.h: Replace HP_TIMING_AVAIL with
|
||||
HP_TIMING_INLINE.
|
||||
* nptl/descr.h: Likewise.
|
||||
|
|
|
@ -127,17 +127,11 @@ endif
|
|||
|
||||
CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
|
||||
|
||||
# Use clock_gettime to measure performance of functions. The default is to use
|
||||
# HP_TIMING if it is available.
|
||||
ifdef USE_CLOCK_GETTIME
|
||||
CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
|
||||
else
|
||||
# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
|
||||
# of functions. All x86 processors since 2010 support RDTSCP instruction.
|
||||
ifdef USE_RDTSCP
|
||||
CPPFLAGS-nonlib += -DUSE_RDTSCP
|
||||
endif
|
||||
endif
|
||||
|
||||
DETAILED_OPT :=
|
||||
|
||||
|
|
|
@ -27,12 +27,7 @@ BENCH_DURATION.
|
|||
|
||||
The benchmark suite does function call measurements using architecture-specific
|
||||
high precision timing instructions whenever available. When such support is
|
||||
not available, it uses clock_gettime (CLOCK_PROCESS_CPUTIME_ID). One can force
|
||||
the benchmark to use clock_gettime by invoking make as follows:
|
||||
|
||||
$ make USE_CLOCK_GETTIME=1 bench
|
||||
|
||||
Again, one must run `make bench-clean' before changing the measurement method.
|
||||
not available, it uses clock_gettime (CLOCK_MONOTONIC).
|
||||
|
||||
On x86 processors, RDTSCP instruction provides more precise timing data
|
||||
than RDTSC instruction. All x86 processors since 2010 support RDTSCP
|
||||
|
|
|
@ -18,49 +18,21 @@
|
|||
|
||||
#undef attribute_hidden
|
||||
#define attribute_hidden
|
||||
#define __clock_gettime clock_gettime
|
||||
#include <hp-timing.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if HP_TIMING_INLINE && !defined USE_CLOCK_GETTIME
|
||||
# define GL(x) _##x
|
||||
# define GLRO(x) _##x
|
||||
#define GL(x) _##x
|
||||
#define GLRO(x) _##x
|
||||
typedef hp_timing_t timing_t;
|
||||
|
||||
# define TIMING_TYPE "hp_timing"
|
||||
#define TIMING_TYPE "hp_timing"
|
||||
|
||||
# define TIMING_INIT(res) ({ (res) = 1; })
|
||||
#define TIMING_INIT(res) ({ (res) = 1; })
|
||||
|
||||
# define TIMING_NOW(var) HP_TIMING_NOW (var)
|
||||
# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
|
||||
# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
|
||||
|
||||
#else
|
||||
|
||||
#include <time.h>
|
||||
typedef uint64_t timing_t;
|
||||
|
||||
# define TIMING_TYPE "clock_gettime"
|
||||
|
||||
/* Measure the resolution of the clock so we can scale the number of
|
||||
benchmark iterations by this value. */
|
||||
# define TIMING_INIT(res) \
|
||||
({ \
|
||||
struct timespec start; \
|
||||
clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start); \
|
||||
(res) = start.tv_nsec; \
|
||||
})
|
||||
|
||||
# define TIMING_NOW(var) \
|
||||
({ \
|
||||
struct timespec tv; \
|
||||
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv); \
|
||||
(var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec); \
|
||||
})
|
||||
|
||||
# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start)
|
||||
# define TIMING_ACCUM(sum, diff) (sum) += (diff)
|
||||
|
||||
#endif
|
||||
#define TIMING_NOW(var) HP_TIMING_NOW (var)
|
||||
#define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
|
||||
#define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
|
||||
|
||||
#define TIMING_PRINT_MEAN(d_total_s, d_iters) \
|
||||
printf ("\t%g", (d_total_s) / (d_iters))
|
||||
|
|
|
@ -20,16 +20,23 @@
|
|||
#ifndef _HP_TIMING_H
|
||||
#define _HP_TIMING_H 1
|
||||
|
||||
/* There are no generic definitions for the times. We could write something
|
||||
using the `gettimeofday' system call where available but the overhead of
|
||||
the system call might be too high. */
|
||||
#include <time.h>
|
||||
#include <stdint.h>
|
||||
#include <hp-timing-common.h>
|
||||
|
||||
/* Provide dummy definitions. */
|
||||
/* It should not be used for ld.so. */
|
||||
#define HP_TIMING_INLINE (0)
|
||||
typedef int hp_timing_t;
|
||||
#define HP_TIMING_NOW(var)
|
||||
#define HP_TIMING_DIFF(Diff, Start, End)
|
||||
#define HP_TIMING_ACCUM_NT(Sum, Diff)
|
||||
#define HP_TIMING_PRINT(Buf, Len, Val)
|
||||
|
||||
typedef uint64_t hp_timing_t;
|
||||
|
||||
/* The clock_gettime (CLOCK_MONOTONIC) has unspecified starting time,
|
||||
nano-second accuracy, and for some architectues is implemented as
|
||||
vDSO symbol. */
|
||||
#define HP_TIMING_NOW(var) \
|
||||
({ \
|
||||
struct timespec tv; \
|
||||
__clock_gettime (CLOCK_MONOTONIC, &tv); \
|
||||
(var) = (tv.tv_nsec + UINT64_C(1000000000) * tv.tv_sec); \
|
||||
})
|
||||
|
||||
#endif /* hp-timing.h */
|
||||
|
|
Loading…
Reference in a new issue