glibc/sysdeps/powerpc/powerpc32/strlen.S
Ulrich Drepper 92b27c7470 Update.
2002-07-29  Steven Munroe  <sjmunroe@us.ibm.com>

	* sysdeps/powerpc/__longjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/__longjmp.S: ...here.
	* sysdeps/powerpc/add_n.S: Moved to...
	* sysdeps/powerpc/powerpc32/add_n.S: ...here.
	* sysdeps/powerpc/addmul_1.S: Moved to...
	* sysdeps/powerpc/powerpc32/addmul_1.S: ...here.
	* sysdeps/powerpc/atomicity.h: Moved to...
	* sysdeps/powerpc/powerpc32/atomicity.h: ...here.
	* sysdeps/powerpc/backtrace.c: Moved to...
	* sysdeps/powerpc/powerpc32/backtrace.c: ...here.
	* sysdeps/powerpc/bp-asm.h: Moved to...
	* sysdeps/powerpc/powerpc32/bp-asm.h: ...here.
	* sysdeps/powerpc/bsd-_setjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/bsd-_setjmp.S: ...here.
	* sysdeps/powerpc/bsd-setjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/bsd-setjmp.S: ...here.
	* sysdeps/powerpc/dl-machine.c: Moved to...
	* sysdeps/powerpc/powerpc32/dl-machine.c: ...here.
	* sysdeps/powerpc/dl-machine.h: Moved to...
	* sysdeps/powerpc/powerpc32/dl-machine.h: ...here.
	* sysdeps/powerpc/dl-start.S: Moved to...
	* sysdeps/powerpc/powerpc32/dl-start.S: ...here.
	* sysdeps/powerpc/gprrest0.S: Moved to...
	* sysdeps/powerpc/powerpc32/gprrest0.S: ...here.
	* sysdeps/powerpc/gprrest1.S: Moved to...
	* sysdeps/powerpc/powerpc32/gprrest1.S: ...here.
	* sysdeps/powerpc/gprsave0.S: Moved to...
	* sysdeps/powerpc/powerpc32/gprsave0.S: ...here.
	* sysdeps/powerpc/gprsave1.S: Moved to...
	* sysdeps/powerpc/powerpc32/gprsave1.S: ...here.
	* sysdeps/powerpc/lshift.S: Moved to...
	* sysdeps/powerpc/powerpc32/lshift.S: ...here.
	* sysdeps/powerpc/memset.S: Moved to...
	* sysdeps/powerpc/powerpc32/memset.S: ...here.
	* sysdeps/powerpc/mul_1.S: Moved to...
	* sysdeps/powerpc/powerpc32/mul_1.S: ...here.
	* sysdeps/powerpc/ppc-mcount.S: Moved to...
	* sysdeps/powerpc/powerpc32/ppc-mcount.S: ...here.
	* sysdeps/powerpc/register-dump.h: Moved to...
	* sysdeps/powerpc/powerpc32/register-dump.h: ...here.
	* sysdeps/powerpc/rshift.S: Moved to...
	* sysdeps/powerpc/powerpc32/rshift.S: ...here.
	* sysdeps/powerpc/setjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/setjmp.S: ...here.
	* sysdeps/powerpc/stpcpy.S: Moved to...
	* sysdeps/powerpc/powerpc32/stpcpy.S: ...here.
	* sysdeps/powerpc/strchr.S: Moved to...
	* sysdeps/powerpc/powerpc32/strchr.S: ...here.
	* sysdeps/powerpc/strcmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/strcmp.S: ...here.
	* sysdeps/powerpc/strcpy.S: Moved to...
	* sysdeps/powerpc/powerpc32/strcpy.S: ...here.
	* sysdeps/powerpc/strlen.S: Moved to...
	* sysdeps/powerpc/powerpc32/strlen.S: ...here.
	* sysdeps/powerpc/sub_n.S: Moved to...
	* sysdeps/powerpc/powerpc32/sub_n.S: ...here.
	* sysdeps/powerpc/submul_1.S: Moved to...
	* sysdeps/powerpc/powerpc32/submul_1.S: ...here.
	* sysdeps/powerpc/elf/bzero.S: Moved to...
	* sysdeps/powerpc/powerpc32/elf/bzero.S: ...here.
	* sysdeps/powerpc/elf/start.S: Moved to...
	* sysdeps/powerpc/powerpc32/elf/start.S: ...here.
	* sysdeps/powerpc/fpu/__longjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/__longjmp.S: ...here.
	* sysdeps/powerpc/fpu/fprrest.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/fprrest.S: ...here.
	* sysdeps/powerpc/fpu/fprsave.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/fprsave.S: ...here.
	* sysdeps/powerpc/fpu/setjmp.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/setjmp.S: ...here.
	* sysdeps/powerpc/fpu/s_copysign.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/s_copysign.S: ...here.
	* sysdeps/powerpc/fpu/s_copysignf.S: Moved to...
	* sysdeps/powerpc/powerpc32/fpu/s_copysignf.S: ...here.
	* sysdeps/unix/sysv/linux/powerpc/brk.S: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S: ...here.
	* sysdeps/unix/sysv/linux/powerpc/clone.S: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: ...here.
	* sysdeps/unix/sysv/linux/powerpc/glob64.c: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/glob64.c: ...here.
	* sysdeps/unix/sysv/linux/powerpc/kernel_stat.h: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/kernel_stat.h: ...here.
	* sysdeps/unix/sysv/linux/powerpc/socket.S: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/socket.S: ...here.
	* sysdeps/unix/sysv/linux/powerpc/sysdep.h: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h: ...here.
	* sysdeps/unix/sysv/linux/powerpc/syscalls.list: Moved to...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/syscalls.list: ...here.
	Support PowerPC64.  Separate powerpc into powerpc/powerpc32 and
	powerpc/powerpc64.

2002-07-29  Steven Munroe  <sjmunroe@us.ibm.com>

	* FAQ.in: Add powerpc64 to supported targets list. Also state the
	minimum gcc version is 3.2
	* README: Add powerpc64 to supported targets list.
	* configure.in: Change machine=powerpc to machine=powerpc/powerpc32.
	Add powerpc64 and machine=powerpc/powerpc64.
	(HAVE_ASM_GLOBAL_DOT_NAME): Define if linux*powerpc/powerpc64*.
	* shlib-versions: Set DEFAULT version to 2.2.5 for powerpc64.
	* sysdeps/powerpc/Dist: Remove dl-machine.c, dl-start.S, ppc-mcount.S,
	gprsave1.S, gprsave0.S, gprrest1.S, and gprrest0.S.
	* sysdeps/powerpc/powerpc32/Dist: New file.
	* sysdeps/powerpc/Implies: Remove wordsize-32 and powerpc/soft-fp.
	* sysdeps/powerpc/powerpc32/Implies: New file.
	* sysdeps/powerpc/Makefile(cflags): Remove powerpc32 specific cflags.
	($(with-fp) = no): Move test to powerpc32/Makefile.
	($(subdir) = misc): Move to powerpc32/Makefile.
	($(build-shared) = yes): Move to powerpc32/Makefile.
	($(subdir) = csu): Move to powerpc32/Makefile.
	(sysdep-rtld-routines): Remove dl-start.  Moved these bits to ...
	* sysdeps/powerpc/powerpc32/Makefile: New file.
	* sysdeps/powerpc/Versions: Remove libgcc functions.
	* sysdeps/powerpc/powerpc32/Versions: New file.
	* sysdeps/powerpc/fpu/Makefile: Remove fprsave and fprrest.
	* sysdeps/powerpc/powerpc32/fpu/Makefile: New file.
	* sysdeps/unix/sysv/linux/configure.in (powerpc*):
	Set arch_minimum_kernel=2.4.19 for powerpc/powerpc64. Also set
	libc_cv_gcc_unwind_find_fde=yes only if !powerpc/powerpc64.
	($machine): Add powerpc/powerpc64 to if ... | for
	libc_cv_slibdir=/libc64.
	(powerpc*): Set ldd_rewrite_script.
	* sysdeps/unix/sysv/linux/powerpc/ldd-rewrite.sed: New file.
	* sysdeps/unix/sysv/linux/powerpc/Dist: Remove clone.S.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/Dist: New file.
	Add clone.S.
	* sysdeps/unix/sysv/linux/powerpc/Makefile: Remove oldgetrlimit64.
	* sysdeps/unix/sysv/linux/powerpc/Versions: Remove GLIBC_2.0
	functions.  Remove GLIBC_2.2 functions except getrlimit and
	setrlimit.  Moved them to ...
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/Versions: New file.

2002-09-04  Ulrich Drepper  <drepper@redhat.com>

	* libio/tst-atime.c: Include <errno.h>.
	(do_test): Only perform fstatvfs check if ST_NOATIME is defined.

2002-09-03  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (regcomp): Append "__restrict" modifier to avoid
	warnings of some compilers.
	(build_collating_symbol): Change the type of characters from
	"unsigned char"	to "char", and append a cast to "char*" pointer in
	array subscript.
	(build_collating_symbol): Likewise.
	(build_equiv_class): Likewise.
	(build_charclass): Likewise.
	(re_compile_pattern): Remove incorrect cast.
	(re_compile_fastmap_iter): Change the type of characters from
	"unsigned char"	to "char", and append a cast to "char*" pointer
	in array subscript.
	(parse_bracket_exp): Likewise.
	* posix/regex_internal.c (re_string_construct_common): Likewise.
	(re_string_allocate): Likewise.
	(re_string_construct): Likewise.
	(re_string_realloc_buffers): Likewise.
	(build_wcs_buffer): Likewise.
	(re_string_reconstruct): Likewise.
	* posix/regex_internal.h: Change the type of characters in
	re_string_t and bracket_elem_t from "unsigned char" to "char".
	* posix/regexec.c (regexec): Append "__restrict" modifier to avoid
	warnings of some compilers.
	(transit_state_bkref_loop): Change the type of characters from
	"unsigned char"	to "char", and append a cast to "char*" pointer in
	array subscript.
	(check_node_accept_bytes): Likewise.
	(find_collation_sequence_value): Likewise.
2002-09-05 10:28:51 +00:00

160 lines
5.9 KiB
ArmAsm

/* Optimized strlen implementation for PowerPC.
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* The algorithm here uses the following techniques:
1) Given a word 'x', we can test to see if it contains any 0 bytes
by subtracting 0x01010101, and seeing if any of the high bits of each
byte changed from 0 to 1. This works because the least significant
0 byte must have had no incoming carry (otherwise it's not the least
significant), so it is 0x00 - 0x01 == 0xff. For all other
byte values, either they have the high bit set initially, or when
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
there were no 0x00 bytes in the word.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
This produces 0x80 in each byte that was zero, and 0x00 in all
the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
byte, and the '| x' part ensures that bytes with the high bit set
produce 0x00. The addition will carry into the high bit of each byte
iff that byte had one of its low 7 bits set. We can then just see
which was the most significant bit set and divide by 8 to find how
many to add to the index.
This is from the book 'The PowerPC Compiler Writer's Guide',
by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
We deal with strings not aligned to a word boundary by taking the
first word and ensuring that bytes not part of the string
are treated as nonzero. To allow for memory latency, we unroll the
loop a few times, being careful to ensure that we do not read ahead
across cache line boundaries.
Questions to answer:
1) How long are strings passed to strlen? If they're often really long,
we should probably use cache management instructions and/or unroll the
loop more. If they're often quite short, it might be better to use
fact (2) in the inner loop than have to recalculate it.
2) How popular are bytes with the high bit set? If they are very rare,
on some processors it might be useful to use the simpler expression
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
ALU), but this fails when any character has its high bit set. */
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
saving them. We can also use registers 14 through 31 if we save them.
We can't use r1 (it's the stack pointer), r2 nor r13 because the user
program may expect them to hold their usual value if we get sent
a signal. Integer parameters are passed in r3 through r10.
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
them, the others we must save. */
/* int [r3] strlen (char *s [r3]) */
ENTRY (BP_SYM (strlen))
#define rTMP1 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
string to make it start at a word boundary */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */
#define rWORD1 r8 /* current string word */
#define rWORD2 r9 /* next string word */
#define rMASK r9 /* mask for first string word */
#define rTMP2 r10
#define rTMP3 r11
#define rTMP4 r12
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
clrrwi rSTR, rRTN, 2
lis r7F7F, 0x7f7f
rlwinm rPADN, rRTN, 3, 27, 28
lwz rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words.
We make an exception and use method (2) on the first two words, to reduce
overhead. */
srw rMASK, rMASK, rPADN
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor rTMP1, rTMP2, rTMP1
and. rWORD1, rTMP1, rMASK
mtcrf 0x01, rRTN
bne L(done0)
lis rFEFE, -0x101
addi rFEFE, rFEFE, -0x101
/* Are we now aligned to a doubleword boundary? */
bt 29, L(loop)
/* Handle second word of pair. */
lwzu rWORD1, 4(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor. rWORD1, rTMP2, rTMP1
bne L(done0)
/* The loop. */
L(loop):
lwz rWORD1, 4(rSTR)
lwzu rWORD2, 8(rSTR)
add rTMP1, rFEFE, rWORD1
nor rTMP2, r7F7F, rWORD1
and. rTMP1, rTMP1, rTMP2
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
and. rTMP1, rTMP3, rTMP4
beq L(loop)
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that
contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */
L(done0):
cntlzw rTMP3, rWORD1
subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr
END (BP_SYM (strlen))