Merge branch 'fix-aarch64-page-size'

Change the dispatch stubs on aarch64 to be aligned to a 64K boundary, since
some aarch64 systems use a 64K page size.
This commit is contained in:
Kyle Brenneman 2017-08-18 12:58:33 -06:00
commit 8d4d03f77d
8 changed files with 102 additions and 96 deletions

View file

@ -48,10 +48,15 @@
/*
* u_execmem_alloc() allocates 128 bytes per stub.
*/
#define AARCH64_ENTRY_SIZE 128
#define ENTRY_STUB_ALIGN 128
#if !defined(GLDISPATCH_PAGE_SIZE)
// Note that on aarch64, the page size could be 4K or 64K. Pick 64K, since that
// will work in either case.
#define GLDISPATCH_PAGE_SIZE 65536
#endif
#define STUB_ASM_ENTRY(func) \
".balign " U_STRINGIFY(AARCH64_ENTRY_SIZE) "\n\t" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n\t" \
".global " func "\n\t" \
".type " func ", %function\n\t" \
func ":\n\t"
@ -97,7 +102,7 @@
/*
* Bytecode for STUB_ASM_CODE()
*/
static const uint32_t BYTECODE_TEMPLATE[] =
static const uint32_t ENTRY_TEMPLATE[] =
{
0xa9bf03e1, // <ENTRY>: stp x1, x0, [sp,#-16]!
0x58000240, // <ENTRY+4>: ldr x0, <ENTRY+76>
@ -125,10 +130,8 @@ static const uint32_t BYTECODE_TEMPLATE[] =
0x00000000, 0x00000000, // <ENTRY+92>: slot * sizeof(void*)
};
#define AARCH64_BYTECODE_SIZE sizeof(BYTECODE_TEMPLATE)
__asm__(".section wtext,\"ax\"\n"
".balign 4096\n"
".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:\n");
@ -136,19 +139,19 @@ __asm__(".section wtext,\"ax\"\n"
#define MAPI_TMP_STUB_ASM_GCC
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:\n"
".text\n\t");
const int entry_type = __GLDISPATCH_STUB_AARCH64;
const int entry_stub_size = AARCH64_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
// The offsets in BYTECODE_TEMPLATE that need to be patched.
static const int TEMPLATE_OFFSET_CURRENT_TABLE = AARCH64_BYTECODE_SIZE - 3*8;
static const int TEMPLATE_OFFSET_CURRENT_TABLE_GET = AARCH64_BYTECODE_SIZE - 2*8;
static const int TEMPLATE_OFFSET_SLOT = AARCH64_BYTECODE_SIZE - 8;
// The offsets in ENTRY_TEMPLATE that need to be patched.
static const int TEMPLATE_OFFSET_CURRENT_TABLE = sizeof(ENTRY_TEMPLATE) - 3*8;
static const int TEMPLATE_OFFSET_CURRENT_TABLE_GET = sizeof(ENTRY_TEMPLATE) - 2*8;
static const int TEMPLATE_OFFSET_SLOT = sizeof(ENTRY_TEMPLATE) - 8;
void entry_generate_default_code(char *entry, int slot)
{
@ -157,7 +160,7 @@ void entry_generate_default_code(char *entry, int slot)
// Get the pointer to the writable mapping.
writeEntry = (char *) u_execmem_get_writable(entry);
memcpy(writeEntry, BYTECODE_TEMPLATE, AARCH64_BYTECODE_SIZE);
memcpy(writeEntry, ENTRY_TEMPLATE, sizeof(ENTRY_TEMPLATE));
// Patch the slot number and whatever addresses need to be patched.
*((uint64_t *)(writeEntry + TEMPLATE_OFFSET_SLOT)) = (uint64_t)(slot * sizeof(mapi_func));
@ -167,5 +170,5 @@ void entry_generate_default_code(char *entry, int slot)
(uint64_t)_glapi_get_current;
// See http://community.arm.com/groups/processors/blog/2010/02/17/caches-and-self-modifying-code
__builtin___clear_cache(writeEntry, writeEntry + AARCH64_BYTECODE_SIZE);
__builtin___clear_cache(writeEntry, writeEntry + sizeof(ENTRY_TEMPLATE));
}

View file

@ -49,7 +49,10 @@ __asm__(".syntax unified\n\t");
/*
* u_execmem_alloc() allocates 64 bytes per stub.
*/
#define ARMV7_ENTRY_SIZE 128
#define ENTRY_STUB_ALIGN 128
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 4096
#endif
/*
* This runs in Thumb mode.
@ -60,7 +63,7 @@ __asm__(".syntax unified\n\t");
* See: https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
*/
#define STUB_ASM_ENTRY(func) \
".balign " U_STRINGIFY(ARMV7_ENTRY_SIZE) "\n\t" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n\t" \
".thumb_func\n\t" \
".global " func "\n\t" \
".type " func ", %function\n\t" \
@ -125,7 +128,7 @@ __asm__(".syntax unified\n\t");
* _glapi_Current and _glapi_get_current. In the generated stubs, we can just
* plug the addresses in directly.
*/
static const uint16_t BYTECODE_TEMPLATE[] =
static const uint16_t ENTRY_TEMPLATE[] =
{
0xb40f, // push {r0-r3}
0xf8df, 0x0028, // ldr r0, 1f
@ -151,10 +154,8 @@ static const uint16_t BYTECODE_TEMPLATE[] =
0x0000, 0x0000, // 3: .word " slot "
};
#define ARMV7_BYTECODE_SIZE sizeof(BYTECODE_TEMPLATE)
__asm__(".section wtext,\"ax\"\n"
".balign 4096\n"
".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".syntax unified\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
@ -163,7 +164,7 @@ __asm__(".section wtext,\"ax\"\n"
#define MAPI_TMP_STUB_ASM_GCC
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:\n"
@ -177,16 +178,16 @@ __asm__(".arm\n\t");
#endif
const int entry_type = __GLDISPATCH_STUB_ARMV7_THUMB;
const int entry_stub_size = ARMV7_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
static const int TEMPLATE_OFFSET_CURRENT_TABLE = ARMV7_BYTECODE_SIZE - 3*4;
static const int TEMPLATE_OFFSET_CURRENT_TABLE_GET = ARMV7_BYTECODE_SIZE - 2*4;
static const int TEMPLATE_OFFSET_SLOT = ARMV7_BYTECODE_SIZE - 4;
static const int TEMPLATE_OFFSET_CURRENT_TABLE = sizeof(ENTRY_TEMPLATE) - 3*4;
static const int TEMPLATE_OFFSET_CURRENT_TABLE_GET = sizeof(ENTRY_TEMPLATE) - 2*4;
static const int TEMPLATE_OFFSET_SLOT = sizeof(ENTRY_TEMPLATE) - 4;
void
entry_init_public(void)
{
STATIC_ASSERT(ARMV7_BYTECODE_SIZE <= ARMV7_ENTRY_SIZE);
STATIC_ASSERT(sizeof(ENTRY_TEMPLATE) <= ENTRY_STUB_ALIGN);
}
void entry_generate_default_code(char *entry, int slot)
@ -199,7 +200,7 @@ void entry_generate_default_code(char *entry, int slot)
// Get the pointer to the writable mapping.
writeEntry = (char *) u_execmem_get_writable(entry - 1);
memcpy(writeEntry, BYTECODE_TEMPLATE, ARMV7_BYTECODE_SIZE);
memcpy(writeEntry, ENTRY_TEMPLATE, sizeof(ENTRY_TEMPLATE));
*((uint32_t *)(writeEntry + TEMPLATE_OFFSET_SLOT)) = slot;
*((uint32_t *)(writeEntry + TEMPLATE_OFFSET_CURRENT_TABLE)) =
@ -208,7 +209,7 @@ void entry_generate_default_code(char *entry, int slot)
(uint32_t)_glapi_get_current;
// See http://community.arm.com/groups/processors/blog/2010/02/17/caches-and-self-modifying-code
__builtin___clear_cache(writeEntry, writeEntry + ARMV7_BYTECODE_SIZE);
__builtin___clear_cache(writeEntry, writeEntry + sizeof(ENTRY_TEMPLATE));
}
// Note: The rest of these functions could also be used for ARMv7 TLS stubs,

View file

@ -37,17 +37,13 @@
// NOTE: These must be powers of two:
#define PPC64LE_ENTRY_SIZE 64
#define PPC64LE_PAGE_ALIGN 65536
#if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0)
#error PPC64LE_ENTRY_SIZE must be a power of two!
#endif
#if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0)
#error PPC64LE_PAGE_ALIGN must be a power of two!
#define ENTRY_STUB_ALIGN 64
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 65536
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
@ -55,7 +51,7 @@ __asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":\n\t" \
" addis 2, 12, .TOC.-" func "@ha\n\t" \
" addi 2, 2, .TOC.-" func "@l\n\t" \
@ -82,7 +78,7 @@ __asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
#include "mapi_tmp.h"
__asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
@ -98,7 +94,7 @@ __asm__("ppc64le_current_tls:\n\t"
extern uint64_t ppc64le_current_tls();
const int entry_type = __GLDISPATCH_STUB_PPC64LE;
const int entry_stub_size = PPC64LE_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
static const uint32_t ENTRY_TEMPLATE[] =
{
@ -144,7 +140,7 @@ void entry_generate_default_code(char *entry, int slot)
{
char *writeEntry = u_execmem_get_writable(entry);
STATIC_ASSERT(PPC64LE_ENTRY_SIZE >= sizeof(ENTRY_TEMPLATE));
STATIC_ASSERT(ENTRY_STUB_ALIGN >= sizeof(ENTRY_TEMPLATE));
assert(slot >= 0);

View file

@ -36,17 +36,13 @@
// NOTE: These must be powers of two:
#define PPC64LE_ENTRY_SIZE 256
#define PPC64LE_PAGE_ALIGN 65536
#if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0)
#error PPC64LE_ENTRY_SIZE must be a power of two!
#endif
#if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0)
#error PPC64LE_PAGE_ALIGN must be a power of two!
#define ENTRY_STUB_ALIGN 256
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 65536
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
@ -54,7 +50,7 @@ __asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":\n\t" \
" addis 2, 12, .TOC.-" func "@ha\n\t" \
" addi 2, 2, .TOC.-" func "@l\n\t" \
@ -113,14 +109,14 @@ __asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
#include "mapi_tmp.h"
__asm__(".balign " U_STRINGIFY(PPC64LE_PAGE_ALIGN) "\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
__asm__(".text\n");
const int entry_type = __GLDISPATCH_STUB_PPC64LE;
const int entry_stub_size = PPC64LE_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
static const uint32_t ENTRY_TEMPLATE[] =
{

View file

@ -38,41 +38,44 @@
#include "glapi.h"
#include "glvnd/GLdispatchABI.h"
#define ENTRY_STUB_SIZE 32
#define ENTRY_STUB_ALIGN 32
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 4096
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(ENTRY_STUB_SIZE) "\n" \
func ":"
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":"
#ifdef __ILP32__
#define STUB_ASM_CODE(slot) \
"movq _glapi_tls_Current@GOTTPOFF(%rip), %rax\n\t" \
"movl %fs:(%rax), %r11d\n\t" \
"movl 4*" slot "(%r11d), %r11d\n\t" \
"jmp *%r11"
"movq _glapi_tls_Current@GOTTPOFF(%rip), %rax\n\t" \
"movl %fs:(%rax), %r11d\n\t" \
"movl 4*" slot "(%r11d), %r11d\n\t" \
"jmp *%r11"
#else // __ILP32__
#define STUB_ASM_CODE(slot) \
"movq _glapi_tls_Current@GOTTPOFF(%rip), %rax\n\t" \
"movq %fs:(%rax), %r11\n\t" \
"jmp *(8 * " slot ")(%r11)"
"movq _glapi_tls_Current@GOTTPOFF(%rip), %rax\n\t" \
"movq %fs:(%rax), %r11\n\t" \
"jmp *(8 * " slot ")(%r11)"
#endif // __ILP32__
#define MAPI_TMP_STUB_ASM_GCC
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
@ -86,7 +89,7 @@ __asm__("x86_64_current_tls:\n\t"
extern uint64_t
x86_64_current_tls();
const int entry_stub_size = ENTRY_STUB_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
#ifdef __ILP32__
@ -118,7 +121,7 @@ void entry_generate_default_code(char *entry, int slot)
char *writeEntry = u_execmem_get_writable(entry);
uint64_t tls_addr;
STATIC_ASSERT(ENTRY_STUB_SIZE >= sizeof(ENTRY_TEMPLATE));
STATIC_ASSERT(ENTRY_STUB_ALIGN >= sizeof(ENTRY_TEMPLATE));
assert(slot >= 0);

View file

@ -40,19 +40,22 @@
#include "glapi.h"
#include "glvnd/GLdispatchABI.h"
#define X86_64_ENTRY_SIZE 64
#define ENTRY_STUB_ALIGN 64
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 4096
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(X86_64_ENTRY_SIZE) "\n" \
func ":"
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":"
/*
* Note that this stub does not exactly match the machine code in
@ -87,14 +90,14 @@ __asm__(".balign 4096\n"
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
__asm__(".text\n");
const int entry_type = __GLDISPATCH_STUB_X86_64;
const int entry_stub_size = X86_64_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
static const unsigned char ENTRY_TEMPLATE[] =
{

View file

@ -39,30 +39,31 @@
#include "glvnd/GLdispatchABI.h"
#define ENTRY_STUB_ALIGN 16
#define ENTRY_STUB_SIZE ENTRY_STUB_ALIGN
#define ENTRY_STUB_ALIGN_DIRECTIVE ".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n"
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 4096
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
ENTRY_STUB_ALIGN_DIRECTIVE \
func ":"
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":\n"
#define STUB_ASM_CODE(slot) \
"call x86_current_tls\n\t" \
"movl %gs:(%eax), %eax\n\t" \
"jmp *(4 * " slot ")(%eax)"
"call x86_current_tls\n\t" \
"movl %gs:(%eax), %eax\n\t" \
"jmp *(4 * " slot ")(%eax)"
#define MAPI_TMP_STUB_ASM_GCC
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
@ -70,7 +71,7 @@ __asm__(".balign 4096\n"
__asm__(".text\n");
__asm__("x86_current_tls:\n\t"
ENTRY_STUB_ALIGN_DIRECTIVE
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n"
"call 1f\n"
"1:\n\t"
"popl %eax\n\t"
@ -82,7 +83,7 @@ extern uint32_t
x86_current_tls();
const int entry_type = __GLDISPATCH_STUB_X86;
const int entry_stub_size = ENTRY_STUB_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
static const unsigned char ENTRY_TEMPLATE[] =
{

View file

@ -38,19 +38,22 @@
#include "glapi.h"
#include "glvnd/GLdispatchABI.h"
#define X86_ENTRY_SIZE 64
#define ENTRY_STUB_ALIGN 64
#if !defined(GLDISPATCH_PAGE_SIZE)
#define GLDISPATCH_PAGE_SIZE 4096
#endif
__asm__(".section wtext,\"ax\",@progbits\n");
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_start\n"
".hidden public_entry_start\n"
"public_entry_start:");
#define STUB_ASM_ENTRY(func) \
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(X86_ENTRY_SIZE) "\n" \
func ":"
".globl " func "\n" \
".type " func ", @function\n" \
".balign " U_STRINGIFY(ENTRY_STUB_ALIGN) "\n" \
func ":\n"
#define STUB_ASM_CODE(slot) \
"push %ebx\n" \
@ -71,14 +74,14 @@ __asm__(".balign 4096\n"
#include "mapi_tmp.h"
__asm__(".balign 4096\n"
__asm__(".balign " U_STRINGIFY(GLDISPATCH_PAGE_SIZE) "\n"
".globl public_entry_end\n"
".hidden public_entry_end\n"
"public_entry_end:");
__asm__(".text\n");
const int entry_type = __GLDISPATCH_STUB_X86;
const int entry_stub_size = X86_ENTRY_SIZE;
const int entry_stub_size = ENTRY_STUB_ALIGN;
// Note that the generated stubs are simpler than the assembly stubs above.
// For the generated stubs, we can patch in the addresses of _glapi_Current and