Add optimized nearbyint{,f} for x86-64
This commit is contained in:
parent
d38f1dba00
commit
581d30e386
|
@ -1,5 +1,14 @@
|
||||||
2011-10-18 Ulrich Drepper <drepper@gmail.com>
|
2011-10-18 Ulrich Drepper <drepper@gmail.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/Makefile [math] (libm-sysdep-routines):
|
||||||
|
Add s_nearbyint-c and s_nearbyintf-c.
|
||||||
|
* sysdeps/x86_64/fpu/bits/mathinline.h: Define nearbyint and
|
||||||
|
nearbyintf inlines.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_nearbyint-c.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_nearbyint.S: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_nearbyintf-c.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S: New file.
|
||||||
|
|
||||||
* math/math_private.h: Define defaults for libc_fegetround,
|
* math/math_private.h: Define defaults for libc_fegetround,
|
||||||
libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf,
|
libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf,
|
||||||
libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf,
|
libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf,
|
||||||
|
|
|
@ -167,6 +167,24 @@ __NTH (rintf (float __x))
|
||||||
return __res;
|
return __res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __USE_ISOC99
|
||||||
|
/* Round to nearest integer without raising inexact exception. */
|
||||||
|
__MATH_INLINE double
|
||||||
|
__NTH (nearbyint (double __x))
|
||||||
|
{
|
||||||
|
double __res;
|
||||||
|
__asm ("roundsd $0xc, %1, %0" : "=x" (__res) : "xm" (__x));
|
||||||
|
return __res;
|
||||||
|
}
|
||||||
|
__MATH_INLINE float
|
||||||
|
__NTH (nearbyintf (float __x))
|
||||||
|
{
|
||||||
|
float __res;
|
||||||
|
__asm ("roundss $0xc, %1, %0" : "=x" (__res) : "xm" (__x));
|
||||||
|
return __res;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
__END_NAMESPACE_C99
|
__END_NAMESPACE_C99
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
ifeq ($(subdir),math)
|
ifeq ($(subdir),math)
|
||||||
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
|
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
|
||||||
s_rint-c s_rintf-c
|
s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
|
||||||
endif
|
endif
|
||||||
|
|
3
sysdeps/x86_64/fpu/multiarch/s_nearbyint-c.c
Normal file
3
sysdeps/x86_64/fpu/multiarch/s_nearbyint-c.c
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#undef __nearbyint
|
||||||
|
#define __nearbyint __nearbyint_c
|
||||||
|
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c>
|
40
sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
Normal file
40
sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <machine/asm.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
|
||||||
|
ENTRY(__nearbyint)
|
||||||
|
.type __nearbyint, @gnu_indirect_function
|
||||||
|
call __get_cpu_features@plt
|
||||||
|
movq %rax, %rdx
|
||||||
|
leaq __nearbyint_sse41(%rip), %rax
|
||||||
|
testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
|
||||||
|
jnz 2f
|
||||||
|
leaq __nearbyint_c(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(__nearbyint)
|
||||||
|
weak_alias (__nearbyint, nearbyint)
|
||||||
|
|
||||||
|
|
||||||
|
ENTRY(__nearbyint_sse41)
|
||||||
|
roundsd $0xc, %xmm0, %xmm0
|
||||||
|
ret
|
||||||
|
END(__nearbyint_sse41)
|
3
sysdeps/x86_64/fpu/multiarch/s_nearbyintf-c.c
Normal file
3
sysdeps/x86_64/fpu/multiarch/s_nearbyintf-c.c
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#undef __nearbyintf
|
||||||
|
#define __nearbyintf __nearbyintf_c
|
||||||
|
#include <sysdeps/ieee754/flt-32/s_nearbyintf.c>
|
40
sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
Normal file
40
sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <machine/asm.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
|
||||||
|
ENTRY(__nearbyintf)
|
||||||
|
.type __nearbyintf, @gnu_indirect_function
|
||||||
|
call __get_cpu_features@plt
|
||||||
|
movq %rax, %rdx
|
||||||
|
leaq __nearbyintf_sse41(%rip), %rax
|
||||||
|
testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
|
||||||
|
jnz 2f
|
||||||
|
leaq __nearbyintf_c(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(__nearbyintf)
|
||||||
|
weak_alias (__nearbyintf, nearbyintf)
|
||||||
|
|
||||||
|
|
||||||
|
ENTRY(__nearbyintf_sse41)
|
||||||
|
roundss $0xc, %xmm0, %xmm0
|
||||||
|
ret
|
||||||
|
END(__nearbyintf_sse41)
|
Loading…
Reference in a new issue