glibc/sysdeps/alpha/alphaev67/strrchr.S
Richard Henderson 68b7efaadb Relocate alpha from ports to libc
Also fixed the following whitespace nits to satisfy the push:

sysdeps/alpha/alphaev6/memset.S:142: space before tab in indent.
sysdeps/alpha/configure:1: new blank line at EOF.
sysdeps/alpha/fpu/e_sqrt.c:126: space before tab in indent.
sysdeps/alpha/preconfigure:1: new blank line at EOF.
sysdeps/unix/sysv/linux/alpha/syscalls.list:1: new blank line at EOF.
2014-02-12 07:00:06 -08:00

117 lines
3.6 KiB
ArmAsm

/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */
/* Return the address of the last occurrence of a given character
within a null-terminated string, or null if it is not found. */
#include <sysdep.h>
.arch ev6
.set noreorder
.set noat
ENTRY(strrchr)
#ifdef PROF
ldgp gp, 0(pv)
lda AT, _mcount
jsr AT, (AT), _mcount
.prologue 1
#else
.prologue 0
#endif
and a1, 0xff, t2 # E : 00000000000000ch
insbl a1, 1, t4 # U : 000000000000ch00
insbl a1, 2, t5 # U : 0000000000ch0000
ldq_u t0, 0(a0) # L : load first quadword Latency=3
mov zero, t6 # E : t6 is last match aligned addr
or t2, t4, a1 # E : 000000000000chch
sll t5, 8, t3 # U : 00000000ch000000
mov zero, t8 # E : t8 is last match byte compare mask
andnot a0, 7, v0 # E : align source addr
or t5, t3, t3 # E : 00000000chch0000
sll a1, 32, t2 # U : 0000chch00000000
sll a1, 48, t4 # U : chch000000000000
or t4, a1, a1 # E : chch00000000chch
or t2, t3, t2 # E : 0000chchchch0000
or a1, t2, a1 # E : chchchchchchchch
lda t5, -1 # E : build garbage mask
cmpbge zero, t0, t1 # E : bits set iff byte == zero
mskqh t5, a0, t4 # E : Complete garbage mask
xor t0, a1, t2 # E : make bytes == c zero
cmpbge zero, t4, t4 # E : bits set iff byte is garbage
cmpbge zero, t2, t3 # E : bits set iff byte == c
andnot t1, t4, t1 # E : clear garbage from null test
andnot t3, t4, t3 # E : clear garbage from char test
bne t1, $eos # U : did we already hit the terminator?
/* Character search main loop */
$loop:
ldq t0, 8(v0) # L : load next quadword
cmovne t3, v0, t6 # E : save previous comparisons match
nop # : Latency=2, extra map slot (keep nop with cmov)
nop
cmovne t3, t3, t8 # E : Latency=2, extra map slot
nop # : keep with cmovne
addq v0, 8, v0 # E :
xor t0, a1, t2 # E :
cmpbge zero, t0, t1 # E : bits set iff byte == zero
cmpbge zero, t2, t3 # E : bits set iff byte == c
beq t1, $loop # U : if we havnt seen a null, loop
nop
/* Mask out character matches after terminator */
$eos:
negq t1, t4 # E : isolate first null byte match
and t1, t4, t4 # E :
subq t4, 1, t5 # E : build a mask of the bytes upto...
or t4, t5, t4 # E : ... and including the null
and t3, t4, t3 # E : mask out char matches after null
cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot
nop # : Keep with cmovne
nop
cmovne t3, v0, t6 # E :
nop # : Keep with cmovne
/* Locate the address of the last matched character */
ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0)
nop
cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen
nop # E : hide the cmov latency (2) behind ctlz latency
lda t5, 0x3f($31) # E :
subq t5, t2, t5 # E : Normalize leading zero count
addq t6, t5, v0 # E : and add to quadword address
ret # L0 : Latency=3
nop
nop
END(strrchr)
weak_alias (strrchr, rindex)
libc_hidden_builtin_def (strrchr)