Fix wrong copying processing for last bytes in x86-32 wcscpy

Wrong copy algorithm for last bytes, not thread safety.
In some particular cases it uses the destination
memory beyond the string end for
16-byte load, puts changes into that part that is relevant
to destination string and writes whole 16-byte chunk into memory.
I have a test case where the memory beyond the string end contains
malloc/free data, that appear corrupted in case free() updates
it in between the 16-byte read and 16-byte write.
This commit is contained in:
Liubov Dmitrieva 2011-12-23 08:50:39 -05:00 committed by Ulrich Drepper
parent d455f537be
commit c044cf14b0
2 changed files with 59 additions and 74 deletions

View file

@ -1,3 +1,8 @@
2011-12-23 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
* sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Fix wrong copying
processing for last bytes.
2011-08-06 Bruno Haible <bruno@clisp.org>
[BZ #13061]

View file

@ -54,7 +54,6 @@ ENTRY (__wcscpy_ssse3)
PUSH (%edi)
mov %edx, %edi
PUSH (%esi)
lea 16(%ecx), %esi
@ -220,7 +219,19 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@ -236,33 +247,16 @@ L(Shl4Start):
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm3, %xmm1
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@ -305,14 +299,13 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
movaps (%edx), %xmm6
psrldq $12, %xmm6
palignr $4, %xmm1, %xmm6
movaps %xmm6, (%edx)
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
add $12, %edx
add $12, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@ -337,7 +330,19 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@ -353,33 +358,16 @@ L(Shl8Start):
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm3, %xmm1
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@ -422,14 +410,11 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
movaps (%edx), %xmm6
psrldq $8, %xmm6
palignr $8, %xmm1, %xmm6
movaps %xmm6, (%edx)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
add $8, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
@ -454,7 +439,19 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@ -470,33 +467,16 @@ L(Shl12Start):
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
movaps %xmm3, %xmm1
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm3, %xmm1
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@ -539,11 +519,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
movaps (%edx), %xmm6
psrldq $4, %xmm6
movl (%ecx), %esi
movl %esi, (%edx)
mov $4, %esi
palignr $12, %xmm1, %xmm6
movaps %xmm6, (%edx)
.p2align 4
L(CopyFrom1To16Bytes):
@ -555,6 +533,7 @@ L(CopyFrom1To16Bytes):
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@ -564,6 +543,7 @@ L(CopyFrom1To16Bytes):
L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax