glibc/sysdeps/aarch64/Makefile
Adhemerval Zanella Netto 4c128c7823 aarch64: Add optimized chacha20
It adds vectorized ChaCha20 implementation based on libgcrypt
cipher/chacha20-aarch64.S.  It is used as default and only
little-endian is supported (BE uses generic code).

As for generic implementation, the last step that XOR with the
input is omited.  The final state register clearing is also
omitted.

On a virtualized Linux on Apple M1 it shows the following
improvements (using formatted bench-arc4random data):

GENERIC                                    MB/s
-----------------------------------------------
arc4random [single-thread]               380.89
arc4random_buf(16) [single-thread]       500.73
arc4random_buf(32) [single-thread]       552.61
arc4random_buf(48) [single-thread]       566.82
arc4random_buf(64) [single-thread]       574.01
arc4random_buf(80) [single-thread]       581.02
arc4random_buf(96) [single-thread]       591.19
arc4random_buf(112) [single-thread]      592.29
arc4random_buf(128) [single-thread]      596.43
-----------------------------------------------

OPTIMIZED                                  MB/s
-----------------------------------------------
arc4random [single-thread]               569.60
arc4random_buf(16) [single-thread]       825.78
arc4random_buf(32) [single-thread]       987.03
arc4random_buf(48) [single-thread]      1042.39
arc4random_buf(64) [single-thread]      1075.50
arc4random_buf(80) [single-thread]      1094.68
arc4random_buf(96) [single-thread]      1130.16
arc4random_buf(112) [single-thread]     1129.58
arc4random_buf(128) [single-thread]     1137.91
-----------------------------------------------

Checked on aarch64-linux-gnu.
2022-07-22 11:58:27 -03:00

75 lines
1.7 KiB
Makefile

long-double-fcts = yes
ifeq (yes,$(aarch64-bti))
# Mark linker output BTI compatible, it warns on non-BTI inputs.
sysdep-LDFLAGS += -Wl,-z,force-bti
# Make warnings fatal outside the test system.
LDFLAGS-lib.so += -Wl,--fatal-warnings
LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings
endif
ifeq ($(subdir),elf)
sysdep-dl-routines += dl-bti
tests += tst-audit26 \
tst-audit27
modules-names += \
tst-audit26mod \
tst-auditmod26 \
tst-audit27mod \
tst-auditmod27
$(objpfx)tst-audit26: $(objpfx)tst-audit26mod.so \
$(objpfx)tst-auditmod26.so
LDFLAGS-tst-audit26 += -Wl,-z,lazy
tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so
$(objpfx)tst-audit27: $(objpfx)tst-audit27mod.so \
$(objpfx)tst-auditmod27.so
$(objpfx)tst-audit27mod.so: $(libsupport)
LDFLAGS-tst-audit27 += -Wl,-z,lazy
tst-audit27-ENV = LD_AUDIT=$(objpfx)tst-auditmod27.so
endif
ifeq ($(subdir),elf)
sysdep-rtld-routines += dl-start
sysdep-dl-routines += tlsdesc dl-tlsdesc
gen-as-const-headers += dl-link.sym
tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
ifeq (yes,$(aarch64-variant-pcs))
tests += tst-vpcs
modules-names += tst-vpcs-mod
LDFLAGS-tst-vpcs-mod.so = -Wl,-z,lazy
$(objpfx)tst-vpcs: $(objpfx)tst-vpcs-mod.so
endif
endif
ifeq ($(subdir),csu)
gen-as-const-headers += tlsdesc.sym
endif
ifeq ($(subdir),stdlib)
sysdep_routines += chacha20-aarch64
endif
ifeq ($(subdir),gmon)
CFLAGS-mcount.c += -mgeneral-regs-only
endif
ifeq ($(subdir),math)
CPPFLAGS += -I../soft-fp
endif
ifeq ($(subdir),misc)
sysdep_headers += sys/ifunc.h
sysdep_routines += __mtag_tag_zero_region \
__mtag_tag_region
endif
ifeq ($(subdir),malloc)
sysdep_malloc_debug_routines = __mtag_tag_zero_region __mtag_tag_region
endif