Small optimizations to cancellable x86-64 code.

This commit is contained in:
Ulrich Drepper 2009-08-07 20:36:53 -07:00
parent 57b378ac89
commit 9083bcc5dc
5 changed files with 49 additions and 61 deletions

View file

@ -1,3 +1,12 @@
2009-08-07 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
enabled by the special *_asynccancel functions.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/cancellation.S: Include lowlevellock.h.
2009-08-04 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file.

View file

@ -20,6 +20,7 @@
#include <sysdep.h>
#include <tcb-offsets.h>
#include <kernel-features.h>
#include "lowlevellock.h"
#ifdef IS_IN_libpthread
# ifdef SHARED

View file

@ -157,7 +157,6 @@ __pthread_cond_timedwait:
.LcleanupSTART1:
34: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
movq 8(%rsp), %rdi
movq %r13, %r10
movl $FUTEX_WAIT_BITSET, %esi
@ -511,7 +510,6 @@ __pthread_cond_timedwait:
.LcleanupSTART2:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
movq 8(%rsp), %rdi
leaq 32(%rsp), %r10
cmpq $-1, dep_mutex(%rdi)

View file

@ -45,14 +45,11 @@ __pthread_cond_wait:
cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
#endif
pushq %r12
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0)
pushq %r13
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r13, 0)
#define FRAME_SIZE 32
subq $FRAME_SIZE, %rsp
leaq -FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)
/* Stack frame:
@ -112,7 +109,7 @@ __pthread_cond_wait:
movl %edx, 4(%rsp)
/* Unlock. */
8: movl cond_futex(%rdi), %r12d
8: movl cond_futex(%rdi), %edx
LOCK
#if cond_lock == 0
decl (%rdi)
@ -125,9 +122,7 @@ __pthread_cond_wait:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
movq 8(%rsp), %rdi
xorq %r10, %r10
movq %r12, %rdx
cmpq $-1, dep_mutex(%rdi)
leaq cond_futex(%rdi), %rdi
movl $FUTEX_WAIT, %esi
@ -243,21 +238,14 @@ __pthread_cond_wait:
callq __pthread_mutex_cond_lock
14: addq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
popq %r13
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
14: movq FRAME_SIZE(%rsp), %r13
leaq FRAME_SIZE+8(%rsp), %rsp
cfi_adjust_cfa_offset(-(FRAME_SIZE + 8))
/* We return the result of the mutex_lock operation. */
retq
cfi_adjust_cfa_offset(16 + FRAME_SIZE)
cfi_rel_offset(%r12, FRAME_SIZE + 8)
cfi_adjust_cfa_offset(8 + FRAME_SIZE)
cfi_rel_offset(%r13, FRAME_SIZE)
18: callq __pthread_mutex_cond_lock_adjust
@ -285,7 +273,11 @@ __pthread_cond_wait:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
/* The call preserves %rdx. */
callq __lll_unlock_wake
#if cond_lock != 0
subq $cond_lock, %rdi
#endif
jmp 4b
/* Locking in loop failed. */
@ -349,9 +341,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
__condvar_cleanup1:
/* Stack frame:
rsp + 48
+--------------------------+
rsp + 40 | %r12 |
rsp + 40
+--------------------------+
rsp + 32 | %r13 |
+--------------------------+
@ -410,7 +400,7 @@ __condvar_cleanup1:
3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi)
/* Wake up a thread which wants to destroy the condvar object. */
xorq %r12, %r12
xorl %ecx, %ecx
cmpq $0xffffffffffffffff, total_seq(%rdi)
jne 4f
movl cond_nwaiters(%rdi), %eax
@ -433,7 +423,7 @@ __condvar_cleanup1:
movl $SYS_futex, %eax
syscall
subq $cond_nwaiters, %rdi
movl $1, %r12d
movl $1, %ecx
4: LOCK
#if cond_lock == 0
@ -449,10 +439,11 @@ __condvar_cleanup1:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
/* The call preserves %rcx. */
callq __lll_unlock_wake
/* Wake up all waiters to make sure no signal gets lost. */
2: testq %r12, %r12
2: testl %ecx, %ecx
jnz 5f
addq $cond_futex, %rdi
cmpq $-1, dep_mutex-cond_futex(%rdi)
@ -474,7 +465,6 @@ __condvar_cleanup1:
callq __pthread_mutex_cond_lock
movq 24(%rsp), %rdi
movq 40(%rsp), %r12
movq 32(%rsp), %r13
.LcallUR:
call _Unwind_Resume@PLT

View file

@ -61,16 +61,13 @@ sem_wait:
xorl %eax, %eax
retq
1: pushq %r12
/* This push is only needed to store the sem_t pointer for the
exception handler. */
1: pushq %rdi
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0)
pushq %r13
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r13, 0)
movq %rdi, %r13
LOCK
addq $1, NWAITERS(%r13)
addq $1, NWAITERS(%rdi)
.LcleanupSTART:
6: call __pthread_enable_asynccancel
@ -78,7 +75,6 @@ sem_wait:
xorq %r10, %r10
movl $SYS_futex, %eax
movq %r13, %rdi
#if FUTEX_WAIT == 0
movl PRIVATE(%rdi), %esi
#else
@ -87,22 +83,23 @@ sem_wait:
#endif
xorl %edx, %edx
syscall
movq %rax, %r12
movq %rax, %rcx
movl %r8d, %edi
xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
movq %r8, %rdi
testq %r12, %r12
testq %rcx, %rcx
je 3f
cmpq $-EWOULDBLOCK, %r12
cmpq $-EWOULDBLOCK, %rcx
jne 4f
3:
#if VALUE == 0
movl (%r13), %eax
movl (%rdi), %eax
#else
movl VALUE(%r13), %eax
movl VALUE(%rdi), %eax
#endif
5: testl %eax, %eax
je 6b
@ -110,50 +107,43 @@ sem_wait:
leal -1(%rax), %edx
LOCK
#if VALUE == 0
cmpxchgl %edx, (%r13)
cmpxchgl %edx, (%rdi)
#else
cmpxchgl %edx, VALUE(%r13)
cmpxchgl %edx, VALUE(%rdi)
#endif
jne 5b
LOCK
subq $1, NWAITERS(%r13)
xorl %eax, %eax
9: popq %r13
9: LOCK
subq $1, NWAITERS(%rdi)
leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
retq
cfi_adjust_cfa_offset(2 * 8)
cfi_rel_offset(%r12, 8)
cfi_rel_offset(%r13, 0)
4: negq %r12
cfi_adjust_cfa_offset(8)
4: negq %rcx
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl %r12d, %fs:(%rdx)
movl %ecx, %fs:(%rdx)
#else
# error "not supported. %rcx and %rdi must be preserved"
callq __errno_location@plt
movl %r12d, (%rax)
movl %ecx, (%rax)
#endif
orl $-1, %eax
LOCK
subq $1, NWAITERS(%r13)
jmp 9b
.size sem_wait,.-sem_wait
.type sem_wait_cleanup,@function
sem_wait_cleanup:
movq (%rsp), %rdi
LOCK
subq $1, NWAITERS(%r13)
subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT