ARM: Use movw/movt more when available

This commit is contained in:
Roland McGrath 2014-10-22 14:20:35 -07:00
parent b5af9297d5
commit 8c2b1ed8bb
9 changed files with 209 additions and 69 deletions

View File

@ -1,3 +1,30 @@
2014-10-22 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/__longjmp.S [NEED_HWCAP] [IS_IN_rtld]: Use LDST_PCREL
macro to get at the _rt_local_ro field.
[NEED_HWCAP] [!IS_IN_rtld]: Use LDR_GLOBAL to get at _rtld_global_ro
([PIC] case) or _dl_hwcap ([!PIC] case).
* sysdeps/arm/setjmp.S: Likewise.
* config.h.in (ARM_PCREL_MOVW_OK): New macro.
* sysdeps/arm/configure.ac: New check to define it.
* sysdeps/arm/configure: Regenerated.
* sysdeps/arm/sysdep.h [__ASSEMBLER__]: Include <arm-features.h>.
(LDST_INDEXED_NOINDEX, LDST_INDEXED_INDEX): New macros.
(LDST_INDEXED, LDST_PC_INDEXED): New macros, differing definitions
depending on [ARM_NO_INDEX_REGISTER] and [__thumb2__].
(LDST_PCREL) [!__thumb2__ && ARCH_HAS_T2 && ARM_PCREL_MOVW_OK]:
Use move/movt pair instead of a load.
(LDST_GLOBAL): Macro removed.
(LDR_GLOBAL): New macro replaces it.
(LDR_HIDDEN): New macro.
(PTR_MANGLE_LOAD): Use LDR_GLOBAL rather than LDST_GLOBAL.
Use LDR_HIDDEN instead for __pointer_chk_guard_local.
* setjmp/tst-setjmp-static.c: New file.
* setjmp/Makefile (tests): Add it.
(tests-static): New variable.
2014-10-22 Maciej W. Rozycki <macro@codesourcery.com>
[BZ #17485]

View File

@ -243,6 +243,9 @@
/* The ARM hard-float ABI is being used. */
#undef HAVE_ARM_PCS_VFP
/* The ARM movw/movt instructions using PC-relative relocs work right. */
#define ARM_PCREL_MOVW_OK 0
/* The pt_chown binary is being built and used by grantpt. */
#define HAVE_PT_CHOWN 0

View File

@ -28,7 +28,8 @@ routines := setjmp sigjmp bsd-setjmp bsd-_setjmp \
longjmp __longjmp jmp-unwind
tests := tst-setjmp jmpbug bug269-setjmp tst-setjmp-fp \
tst-sigsetjmp
tst-sigsetjmp tst-setjmp-static
tests-static := tst-setjmp-static
include ../Rules

View File

@ -0,0 +1 @@
#include "tst-setjmp.c"

View File

@ -77,21 +77,15 @@ ENTRY (__longjmp)
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
ldr a4, 1f
ldr a3, .Lrtld_local_ro
0: add a4, pc, a4
add a4, a4, a3
ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
LDST_PCREL (ldr, a4, a3, \
C_SYMBOL_NAME(_rtld_local_ro) \
+ RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
# ifdef PIC
ldr a4, 1f
ldr a3, .Lrtld_global_ro
0: add a4, pc, a4
ldr a4, [a4, a3]
ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_rtld_global_ro), \
RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
ldr a4, .Lhwcap
ldr a4, [a4, #0]
LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_dl_hwcap), 0)
# endif
# endif
#endif
@ -138,21 +132,4 @@ ENTRY (__longjmp)
DO_RET(lr)
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_local_ro:
.long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
# else
# ifdef PIC
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
# else
.Lhwcap:
.long C_SYMBOL_NAME(_dl_hwcap)
# endif
# endif
#endif
END (__longjmp)

52
sysdeps/arm/configure vendored
View File

@ -150,8 +150,8 @@ else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifdef __ARM_PCS_VFP
yes
#endif
yes
#endif
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
@ -211,6 +211,54 @@ else
have-arm-tls-desc = no"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
$as_echo_n "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
if ${libc_cv_arm_pcrel_movw+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.s <<\EOF
.syntax unified
.arm
.arch armv7-a
.text
.globl foo
.type foo,%function
foo: movw r0, #:lower16:symbol - 1f - 8
movt r0, #:upper16:symbol - 1f - 8
1: add r0, pc
@ And now a case with a local symbol.
movw r0, #:lower16:3f - 2f - 8
movt r0, #:upper16:3f - 2f - 8
2: add r0, pc
bx lr
.data
.globl symbol
.hidden symbol
symbol: .long 23
3: .long 17
EOF
libc_cv_arm_pcrel_movw=no
${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
-nostartfiles -nostdlib -shared \
-o conftest.so conftest.s 1>&5 2>&5 &&
LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&5 &&
{
cat conftest.dr 1>&5
fgrep 'TEXTREL
R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
}
rm -f conftest*
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcrel_movw" >&5
$as_echo "$libc_cv_arm_pcrel_movw" >&6; }
if test $libc_cv_arm_pcrel_movw = yes; then
$as_echo "#define ARM_PCREL_MOVW_OK 1" >>confdefs.h
fi
libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.

View File

@ -17,8 +17,8 @@ dnl it. Until we do, don't define it.
AC_CACHE_CHECK([whether the compiler is using the ARM hard-float ABI],
[libc_cv_arm_pcs_vfp],
[AC_EGREP_CPP(yes,[#ifdef __ARM_PCS_VFP
yes
#endif
yes
#endif
], libc_cv_arm_pcs_vfp=yes, libc_cv_arm_pcs_vfp=no)])
if test $libc_cv_arm_pcs_vfp = yes; then
AC_DEFINE(HAVE_ARM_PCS_VFP)
@ -40,6 +40,46 @@ else
LIBC_CONFIG_VAR([have-arm-tls-desc], [no])
fi
AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
libc_cv_arm_pcrel_movw, [
cat > conftest.s <<\EOF
.syntax unified
.arm
.arch armv7-a
.text
.globl foo
.type foo,%function
foo: movw r0, #:lower16:symbol - 1f - 8
movt r0, #:upper16:symbol - 1f - 8
1: add r0, pc
@ And now a case with a local symbol.
movw r0, #:lower16:3f - 2f - 8
movt r0, #:upper16:3f - 2f - 8
2: add r0, pc
bx lr
.data
.globl symbol
.hidden symbol
symbol: .long 23
3: .long 17
EOF
libc_cv_arm_pcrel_movw=no
${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
-nostartfiles -nostdlib -shared \
-o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD &&
LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&AS_MESSAGE_LOG_FD &&
{
cat conftest.dr 1>&AS_MESSAGE_LOG_FD
fgrep 'TEXTREL
R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
}
rm -f conftest*])
if test $libc_cv_arm_pcrel_movw = yes; then
AC_DEFINE([ARM_PCREL_MOVW_OK])
fi
libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.

View File

@ -58,21 +58,15 @@ ENTRY (__sigsetjmp)
#ifdef NEED_HWCAP
/* Check if we have a VFP unit. */
# ifdef IS_IN_rtld
ldr a3, 1f
ldr a4, .Lrtld_local_ro
0: add a3, pc, a3
add a3, a3, a4
ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
LDST_PCREL (ldr, a3, a4, \
C_SYMBOL_NAME(_rtld_local_ro) \
+ RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
# ifdef PIC
ldr a3, 1f
ldr a4, .Lrtld_global_ro
0: add a3, pc, a3
ldr a3, [a3, a4]
ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_rtld_global_ro), \
RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
ldr a3, .Lhwcap
ldr a3, [a3, #0]
LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_dl_hwcap), 0)
# endif
# endif
#endif
@ -114,23 +108,6 @@ ENTRY (__sigsetjmp)
/* Make a tail call to __sigjmp_save; it takes the same args. */
B PLTJMP(C_SYMBOL_NAME(__sigjmp_save))
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_local_ro:
.long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
# else
# ifdef PIC
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
# else
.Lhwcap:
.long C_SYMBOL_NAME(_dl_hwcap)
# endif
# endif
#endif
END (__sigsetjmp)
hidden_def (__sigsetjmp)

View File

@ -21,6 +21,8 @@
#ifndef __ASSEMBLER__
# include <stdint.h>
#else
# include <arm-features.h>
#endif
/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
@ -157,6 +159,32 @@
.arm
# endif
/* Load or store to/from address X + Y into/from R, (maybe) using T.
X or Y can use T freely; T can be R if OP is a load. The first
version eschews the two-register addressing mode, while the
second version uses it. */
# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \
add T, X, Y; \
sfi_breg T, \
OP R, [T]
# define LDST_INDEXED_INDEX(OP, R, X, Y) \
OP R, [X, Y]
# ifdef ARM_NO_INDEX_REGISTER
/* We're never using the two-register addressing mode, so this
always uses an intermediate add. */
# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
# else
/* The two-register addressing mode is OK, except on Thumb with pc. */
# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y)
# ifdef __thumb2__
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
# else
# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X)
# endif
# endif
/* Load or store to/from a pc-relative EXPR into/from R, using T. */
# ifdef __thumb2__
# define LDST_PCREL(OP, R, T, EXPR) \
@ -166,6 +194,11 @@
.previous; \
99: add T, T, pc; \
OP R, [T]
# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
# define LDST_PCREL(OP, R, T, EXPR) \
movw T, #:lower16:EXPR - 99f - PC_OFS; \
movt T, #:upper16:EXPR - 99f - PC_OFS; \
99: LDST_PC_INDEXED (OP, R, T, T)
# else
# define LDST_PCREL(OP, R, T, EXPR) \
ldr T, 98f; \
@ -175,17 +208,50 @@
99: OP R, [pc, T]
# endif
/* Load or store to/from a global EXPR into/from R, using T. */
# define LDST_GLOBAL(OP, R, T, EXPR) \
/* Load from a global SYMBOL + CONSTANT into R, using T. */
# if defined (ARCH_HAS_T2) && !defined (PIC)
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw T, #:lower16:SYMBOL; \
movt T, #:upper16:SYMBOL; \
ldr R, [T, $CONSTANT]
# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
movw T, #:lower16:99f - 98f - PC_OFS; \
movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
movt T, #:upper16:99f - 98f - PC_OFS; \
.pushsection .rodata.cst4, "aM", %progbits, 4; \
.balign 4; \
99: .word SYMBOL##(GOT); \
.popsection; \
97: add R, R, pc; \
98: LDST_PC_INDEXED (ldr, T, T, T); \
LDST_INDEXED (ldr, R, T, R, T); \
ldr R, [R, $CONSTANT]
# else
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
ldr T, 99f; \
ldr R, 100f; \
98: add T, T, pc; \
ldr T, [T, R]; \
.subsection 2; \
99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \
100: .word EXPR##(GOT); \
100: .word SYMBOL##(GOT); \
.previous; \
OP R, [T]
ldr R, [T, $CONSTANT]
# endif
/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
be in the same linked object (as for one with hidden visibility).
We can avoid the GOT indirection in the PIC case. For the pure
static case, LDR_GLOBAL is already optimal. */
# ifdef PIC
# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
# else
# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
# endif
/* Cope with negative memory offsets, which thumb can't encode.
Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
@ -296,7 +362,7 @@
(!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
# ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \
LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local));
LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
# define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard)
@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
#else
# ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \
LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard));
LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
# define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard)