Fix f64xdivf128, f64xmulf128 spurious underflows (bug 28358)

As described in bug 28358, the round-to-odd computations used in the libm functions that round their results to a narrower format can yield spurious underflow exceptions in the following circumstances: the narrowing only narrows the precision of the type and not the exponent range (i.e., it's narrowing _Float128 to _Float64x on x86_64, x86 or ia64), the architecture does after-rounding tininess detection (which applies to all those architectures), the result is inexact, tiny before rounding but not tiny after rounding (with the chosen rounding mode) for _Float64x (which is possible for narrowing mul, div and fma, not for narrowing add, sub or sqrt), so the underflow exception resulting from the toward-zero computation in _Float128 is spurious for _Float64x. Fixed by making ROUND_TO_ODD call feclearexcept (FE_UNDERFLOW) in the problem cases (as indicated by an extra argument to the macro); there is never any need to preserve underflow exceptions from this part of the computation, because the conversion of the round-to-odd value to the narrower type will underflow in exactly the cases in which the function should raise that exception, but it may be more efficient to avoid the extra manipulation of the floating-point environment when not needed. Tested for x86_64 and x86, and with build-many-glibcs.py.
2021-09-21 21:54:37 +00:00 · 2021-09-21 21:54:37 +00:00 · 1356f38df5
parent 0b5ca7c3e5
commit 1356f38df5
18 changed files with 9604 additions and 31 deletions
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@ -4870,6 +4870,21 @@ div 0x1.0000000000008001000000000001p0 0x1.0000000000008p0
 # Similar, for double rounding to 64-bit of a division of 53-bit values.
 div 0x1ffe1p0 0xfffp0

+# Cases where there is underflow before rounding (for some format) but
+# might not be after rounding, depending on the rounding mode.
+div 0x1p-126 0x1.0000001p0
+div 0x1p-126 -0x1.0000001p0
+div -0x1p-126 0x1.0000001p0
+div -0x1p-126 -0x1.0000001p0
+div 0x1p-1022 0x1.00000000000001p0
+div 0x1p-1022 -0x1.00000000000001p0
+div -0x1p-1022 0x1.00000000000001p0
+div -0x1p-1022 -0x1.00000000000001p0
+div 0x1p-16382 0x1.00000000000000001p0
+div 0x1p-16382 -0x1.00000000000000001p0
+div -0x1p-16382 0x1.00000000000000001p0
+div -0x1p-16382 -0x1.00000000000000001p0
+
 erf 0
 erf -0
 erf 0.125
@ -6645,6 +6660,21 @@ mul 0x50000000000000005p-64 0xcccccccccccccccccccccccccccdp-114
 # This product equals 2^64 + 2^11 + 1.
 mul 97689974585 188829449

+# Cases where there is underflow before rounding (for some format) but
+# might not be after rounding, depending on the rounding mode.
+mul 0x0.ffffff8p-126 0x1.0000001p0
+mul 0x0.ffffff8p-126 -0x1.0000001p0
+mul -0x0.ffffff8p-126 0x1.0000001p0
+mul -0x0.ffffff8p-126 -0x1.0000001p0
+mul 0x0.fffffffffffffcp-1022 0x1.00000000000001p0
+mul 0x0.fffffffffffffcp-1022 -0x1.00000000000001p0
+mul -0x0.fffffffffffffcp-1022 0x1.00000000000001p0
+mul -0x0.fffffffffffffcp-1022 -0x1.00000000000001p0
+mul 0x0.ffffffffffffffff8p-16382 0x1.00000000000000001p0
+mul 0x0.ffffffffffffffff8p-16382 -0x1.00000000000000001p0
+mul -0x0.ffffffffffffffff8p-16382 0x1.00000000000000001p0
+mul -0x0.ffffffffffffffff8p-16382 -0x1.00000000000000001p0
+
 pow 0 0
 pow 0 -0
 pow -0 0
--- a/math/auto-libm-test-out-narrow-div
+++ b/math/auto-libm-test-out-narrow-div
--- a/math/auto-libm-test-out-narrow-mul
+++ b/math/auto-libm-test-out-narrow-mul
--- a/math/math-narrow.h
+++ b/math/math-narrow.h
@ -28,6 +28,7 @@
 #include <math_private.h>
 #include <fenv_private.h>
 #include <math-narrow-alias.h>
+#include <stdbool.h>

 /* Carry out a computation using round-to-odd.  The computation is
   EXPR; the union type in which to store the result is UNION and the
@ -37,11 +38,15 @@
   function rather than a C operator is used when argument and result
   types are the same) and the libc_fe* macros to ensure that the
   correct rounding mode is used, for platforms with multiple rounding
-   modes where those macros set only the relevant mode.  This macro
-   does not work correctly if the sign of an exact zero result depends
-   on the rounding mode, so that case must be checked for
-   separately.  */
-#define ROUND_TO_ODD(EXPR, UNION, SUFFIX, MANTISSA)			\
+   modes where those macros set only the relevant mode.
+   CLEAR_UNDERFLOW indicates whether underflow exceptions must be
+   cleared (in the case where a round-toward-zero underflow might not
+   indicate an underflow after narrowing, when that narrowing only
+   reduces precision not exponent range and the architecture uses
+   before-rounding tininess detection).  This macro does not work
+   correctly if the sign of an exact zero result depends on the
+   rounding mode, so that case must be checked for separately.  */
+#define ROUND_TO_ODD(EXPR, UNION, SUFFIX, MANTISSA, CLEAR_UNDERFLOW)	\
  ({									\
    fenv_t env;								\
    UNION u;								\
@ -49,6 +54,8 @@
    libc_feholdexcept_setround ## SUFFIX (&env, FE_TOWARDZERO);		\
    u.d = (EXPR);							\
    math_force_eval (u.d);						\
+    if (CLEAR_UNDERFLOW)						\
+      feclearexcept (FE_UNDERFLOW);					\
    u.ieee.MANTISSA							\
      |= libc_feupdateenv_test ## SUFFIX (&env, FE_INEXACT) != 0;	\
 									\
@ -91,7 +98,7 @@
 	ret = (TYPE) ((X) + (Y));					\
      else								\
 	ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) + (Y),		\
-				   UNION, SUFFIX, MANTISSA);		\
+				   UNION, SUFFIX, MANTISSA, false);	\
 									\
      CHECK_NARROW_ADD (ret, (X), (Y));					\
      return ret;							\
@ -149,7 +156,7 @@
 	ret = (TYPE) ((X) - (Y));					\
      else								\
 	ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) - (Y),		\
-				   UNION, SUFFIX, MANTISSA);		\
+				   UNION, SUFFIX, MANTISSA, false);	\
 									\
      CHECK_NARROW_SUB (ret, (X), (Y));					\
      return ret;							\
@ -194,15 +201,17 @@
  while (0)

 /* Implement narrowing multiply using round-to-odd.  The arguments are
-   X and Y, the return type is TYPE and UNION, MANTISSA and SUFFIX are
-   as for ROUND_TO_ODD.  */
-#define NARROW_MUL_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA)	\
+   X and Y, the return type is TYPE and UNION, MANTISSA, SUFFIX and
+   CLEAR_UNDERFLOW are as for ROUND_TO_ODD.  */
+#define NARROW_MUL_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA,	\
+				CLEAR_UNDERFLOW)			\
  do									\
    {									\
      TYPE ret;								\
 									\
      ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) * (Y),		\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA,		\
+				 CLEAR_UNDERFLOW);			\
 									\
      CHECK_NARROW_MUL (ret, (X), (Y));					\
      return ret;							\
@ -246,16 +255,18 @@
    }							\
  while (0)

-/* Implement narrowing divide using round-to-odd.  The arguments are
-   X and Y, the return type is TYPE and UNION, MANTISSA and SUFFIX are
-   as for ROUND_TO_ODD.  */
-#define NARROW_DIV_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA)	\
+/* Implement narrowing divide using round-to-odd.  The arguments are X
+   and Y, the return type is TYPE and UNION, MANTISSA, SUFFIX and
+   CLEAR_UNDERFLOW are as for ROUND_TO_ODD.  */
+#define NARROW_DIV_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA,	\
+				CLEAR_UNDERFLOW)			\
  do									\
    {									\
      TYPE ret;								\
 									\
      ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) / (Y),		\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA,		\
+				 CLEAR_UNDERFLOW);			\
 									\
      CHECK_NARROW_DIV (ret, (X), (Y));					\
      return ret;							\
@ -308,7 +319,7 @@
      TYPE ret;								\
 									\
      ret = (TYPE) ROUND_TO_ODD (sqrt ## SUFFIX (math_opt_barrier (X)),	\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA, false);	\
 									\
      CHECK_NARROW_SQRT (ret, (X));					\
      return ret;							\
--- a/sysdeps/i386/fpu/s_f32xdivf64.c
+++ b/sysdeps/i386/fpu/s_f32xdivf64.c
@ -24,6 +24,6 @@ __f32xdivf64 (_Float64 x, _Float64 y)
 {
  /* To avoid double rounding, use round-to-odd on long double.  */
  NARROW_DIV_ROUND_TO_ODD ((long double) x, (long double) y, double,
-			   union ieee854_long_double, l, mantissa1);
+			   union ieee854_long_double, l, mantissa1, false);
 }
 libm_alias_float32x_float64 (div)
--- a/sysdeps/i386/fpu/s_f32xmulf64.c
+++ b/sysdeps/i386/fpu/s_f32xmulf64.c
@ -24,6 +24,6 @@ __f32xmulf64 (_Float64 x, _Float64 y)
 {
  /* To avoid double rounding, use round-to-odd on long double.  */
  NARROW_MUL_ROUND_TO_ODD ((long double) x, (long double) y, double,
-			   union ieee854_long_double, l, mantissa1);
+			   union ieee854_long_double, l, mantissa1, false);
 }
 libm_alias_float32x_float64 (mul)
--- a/sysdeps/ieee754/dbl-64/s_fdiv.c
+++ b/sysdeps/ieee754/dbl-64/s_fdiv.c
@ -29,6 +29,7 @@
 float
 __fdiv (double x, double y)
 {
-  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1);
+  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1,
+			   false);
 }
 libm_alias_float_double (div)
--- a/sysdeps/ieee754/dbl-64/s_fmul.c
+++ b/sysdeps/ieee754/dbl-64/s_fmul.c
@ -29,6 +29,7 @@
 float
 __fmul (double x, double y)
 {
-  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1);
+  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1,
+			   false);
 }
 libm_alias_float_double (mul)
--- a/sysdeps/ieee754/ldbl-128/s_ddivl.c
+++ b/sysdeps/ieee754/ldbl-128/s_ddivl.c
@ -32,6 +32,6 @@ double
 __ddivl (_Float128 x, _Float128 y)
 {
  NARROW_DIV_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_double_ldouble (div)
--- a/sysdeps/ieee754/ldbl-128/s_dmull.c
+++ b/sysdeps/ieee754/ldbl-128/s_dmull.c
@ -32,6 +32,6 @@ double
 __dmull (_Float128 x, _Float128 y)
 {
  NARROW_MUL_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_double_ldouble (mul)
--- a/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c
+++ b/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c
@ -18,6 +18,7 @@

 #include <math.h>
 #include <math-narrow.h>
+#include <tininess.h>

 /* math_ldbl.h defines _Float128 to long double for this directory,
   but when they are different, this function must be defined with
@ -30,7 +31,7 @@ __f64xdivf128 (_Float128 x, _Float128 y)
 {
 #if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
  NARROW_DIV_ROUND_TO_ODD (x, y, _Float64x, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, TININESS_AFTER_ROUNDING);
 #else
  NARROW_DIV_TRIVIAL (x, y, _Float64x);
 #endif
--- a/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c
+++ b/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c
@ -18,6 +18,7 @@

 #include <math.h>
 #include <math-narrow.h>
+#include <tininess.h>

 /* math_ldbl.h defines _Float128 to long double for this directory,
   but when they are different, this function must be defined with
@ -30,7 +31,7 @@ __f64xmulf128 (_Float128 x, _Float128 y)
 {
 #if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
  NARROW_MUL_ROUND_TO_ODD (x, y, _Float64x, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, TININESS_AFTER_ROUNDING);
 #else
  NARROW_MUL_TRIVIAL (x, y, _Float64x);
 #endif
--- a/sysdeps/ieee754/ldbl-128/s_fdivl.c
+++ b/sysdeps/ieee754/ldbl-128/s_fdivl.c
@ -28,6 +28,6 @@ float
 __fdivl (_Float128 x, _Float128 y)
 {
  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_float_ldouble (div)
--- a/sysdeps/ieee754/ldbl-128/s_fmull.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmull.c
@ -28,6 +28,6 @@ float
 __fmull (_Float128 x, _Float128 y)
 {
  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_float_ldouble (mul)
--- a/sysdeps/ieee754/ldbl-96/s_ddivl.c
+++ b/sysdeps/ieee754/ldbl-96/s_ddivl.c
@ -28,6 +28,6 @@ double
 __ddivl (long double x, long double y)
 {
  NARROW_DIV_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_double_ldouble (div)
--- a/sysdeps/ieee754/ldbl-96/s_dmull.c
+++ b/sysdeps/ieee754/ldbl-96/s_dmull.c
@ -28,6 +28,6 @@ double
 __dmull (long double x, long double y)
 {
  NARROW_MUL_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_double_ldouble (mul)
--- a/sysdeps/ieee754/ldbl-96/s_fdivl.c
+++ b/sysdeps/ieee754/ldbl-96/s_fdivl.c
@ -26,6 +26,6 @@ float
 __fdivl (long double x, long double y)
 {
  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_float_ldouble (div)
--- a/sysdeps/ieee754/ldbl-96/s_fmull.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmull.c
@ -26,6 +26,6 @@ float
 __fmull (long double x, long double y)
 {
  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_float_ldouble (mul)