[BZ #3268] Add fam float/double to soft-sp

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[BZ #3268] Add fam float/double to soft-sp

Steven Munroe
Previous soft-fp work exposed a general lack of correct Fused Multiply Add (fma) implemetations for platforms that don't implement fma in hardware (soft-fp or hard-fp that doesn't implement fma).

This patch provides a basic soft-fp implementation for float and double fma.  This updated version provides macros to directly convert between RAW, SEMIRAW, and CANNONICAL internal forms without requiring floating types (like TF). This allows platforms that do not implement 128-bit long double to still use the quad.h soft-fp macros in the implementation of the double fma.

This patch has been updated and verified on powerpc32 with todays CVS.

I assume that __fmadf4/__fmasf4 are acceptable names for these functions. These functions need to be in libc.so to access the soft-float exeception and rounding modes.

A separate patch can then override s_fma.c/s_fmaf.c (in libm.so) as needed to call __fmadf4/__fmasf4 directly. For from ./ports for example.



2007-04-03  Steven Munroe  <[hidden email]>

        [BZ #3268]
        * soft-fp/Makefile (gcc-single-routines): Add fmasf4.
        (gcc-double-routines): Add fmadf4.
        * soft-fp/double.h: Define FP_COPY_RAW_D, FP_COPY_RAW_TO_CANONICAL_D,
        FP_COPY_RAW_TO_SEMIRAW_D, FP_COPY_SEMIRAW_D, and
        FP_COPY_CANONICAL_TO_SEMIRAW_D macros.
        * soft-fp/quad.h: Define FP_COPY_RAW_Q, FP_COPY_RAW_TO_CANONICAL_Q,
        FP_COPY_RAW_TO_SEMIRAW_Q, FP_COPY_SEMIRAW_Q, and
        FP_COPY_CANONICAL_TO_SEMIRAW_Q macros.
        * soft-fp/fmadf4.c: New file.
        * soft-fp/fmasf4.c: New file.

diff -urN libc25-cvstip-20070320/soft-fp/Makefile libc25/soft-fp/Makefile
--- libc25-cvstip-20070320/soft-fp/Makefile 2006-01-06 04:47:45.000000000 -0600
+++ libc25/soft-fp/Makefile 2007-03-30 15:34:09.000000000 -0500
@@ -24,12 +24,13 @@
 
 gcc-single-routines := negsf2 addsf3 subsf3 mulsf3 divsf3 eqsf2 \
  lesf2 gesf2 unordsf2 fixsfsi fixunssfsi floatsisf fixsfdi        \
- fixunssfdi floatdisf sqrtsf2 floatunsisf floatundisf
+ fixunssfdi floatdisf sqrtsf2 floatunsisf floatundisf \
+ fmasf4
 
 gcc-double-routines := negdf2 adddf3 subdf3 muldf3 divdf3 eqdf2 \
  ledf2 gedf2 unorddf2 fixdfsi fixunsdfsi floatsidf fixdfdi        \
  fixunsdfdi floatdidf extendsfdf2 truncdfsf2 sqrtdf2 floatunsidf \
- floatundidf
+ floatundidf fmadf4
 
 gcc-quad-routines := negtf2 addtf3 subtf3 multf3 divtf3 eqtf2 \
  letf2 getf2 unordtf2 fixtfsi fixunstfsi floatsitf fixtfdi      \
diff -urN libc25-cvstip-20070320/soft-fp/double.h libc25/soft-fp/double.h
--- libc25-cvstip-20070320/soft-fp/double.h 2007-03-20 07:10:36.000000000 -0500
+++ libc25/soft-fp/double.h 2007-03-30 15:34:14.000000000 -0500
@@ -142,6 +142,53 @@
       _FP_PACK_RAW_2_P(D,val,X); \
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_D(Y, X) \
+  do { \
+    Y##_f0 = X##_f0; \
+    Y##_f1 = X##_f1 & \
+     ((1 << (_FP_FRACBITS_D \
+     - (_FP_IMPLBIT_D != 0) \
+ - _FP_W_TYPE_SIZE)) -1);\
+    Y##_e  = X##_e  & \
+     ((1 << _FP_EXPBITS_D) \
+ -1); \
+    Y##_s  = X##_s; \
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_D(Y,X) \
+  do { \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_CANONICAL(D,2,Y); \
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_D(Y,X) \
+  do { \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_SEMIRAW(D,2,Y); \
+  } while (0)
+
+#define FP_COPY_SEMIRAW_D(Y, X) \
+  do { \
+    Y##_f0 = X##_f0; \
+    Y##_f1 = X##_f1 & \
+     ((1 << (_FP_FRACBITS_D \
+     - (_FP_IMPLBIT_D != 0) \
+ - _FP_W_TYPE_SIZE \
+ + 3)) -1); \
+    Y##_e  = X##_e; \
+    Y##_s  = X##_s; \
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_D(Y,X) \
+  do { \
+    _FP_PACK_CANONICAL(D,2,X); \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_SEMIRAW(D,2,Y); \
+  } while (0)
+
 #define FP_ISSIGNAN_D(X) _FP_ISSIGNAN(D,2,X)
 #define FP_NEG_D(R,X) _FP_NEG(D,2,R,X)
 #define FP_ADD_D(R,X,Y) _FP_ADD(D,2,R,X,Y)
@@ -239,6 +286,50 @@
       _FP_PACK_RAW_1_P(D,val,X); \
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_D(Y, X) \
+  do { \
+    Y##_f = X##_f & \
+     ((1 << ( _FP_FRACBITS_D \
+     - (_FP_IMPLBIT_D != 0)))\
+ -1); \
+    Y##_e = X##_e & \
+     ((1 << _FP_EXPBITS_D) \
+ -1); \
+    Y##_s = X##_s; \
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_D(Y,X) \
+  do { \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_CANONICAL(D,1,Y); \
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_D(Y,X) \
+  do { \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_SEMIRAW(D,1,Y); \
+  } while (0)
+
+#define FP_COPY_SEMIRAW_D(Y, X) \
+  do { \
+    Y##_f = X##_f & \
+     ((1 << ( _FP_FRACBITS_D \
+     - (_FP_IMPLBIT_D != 0) \
+ + 3)) -1); \
+    Y##_e = X##_e; \
+    Y##_s = X##_s; \
+  } while (0)
+  
+#define FP_COPY_CANONICAL_TO_SEMIRAW_D(Y,X) \
+  do { \
+    _FP_PACK_CANONICAL(D,1,X); \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_SEMIRAW(D,1,Y); \
+  } while (0)
+
 #define FP_ISSIGNAN_D(X) _FP_ISSIGNAN(D,1,X)
 #define FP_NEG_D(R,X) _FP_NEG(D,1,R,X)
 #define FP_ADD_D(R,X,Y) _FP_ADD(D,1,R,X,Y)
diff -urN libc25-cvstip-20070320/soft-fp/fmadf4.c libc25/soft-fp/fmadf4.c
--- libc25-cvstip-20070320/soft-fp/fmadf4.c Wed Dec 31 18:00:00 1969
+++ libc25/soft-fp/fmadf4.c Fri Mar 30 15:34:14 2007
@@ -0,0 +1,91 @@
+/* soft-fp x * y + z as ternary operation.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Steven Munroe <[hidden email]>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+/* Compute floating point multiply-add with higher (quad) precision.  */
+DFtype
+__fmadf4 (DFtype a, DFtype b, DFtype c)
+{
+    FP_DECL_EX;
+    FP_DECL_D(A);
+    FP_DECL_D(B);
+    FP_DECL_D(C);
+    FP_DECL_Q(X);
+    FP_DECL_Q(Y);
+    FP_DECL_Q(Z);
+    FP_DECL_Q(U);
+    FP_DECL_Q(V);
+    FP_DECL_D(R);
+    double r;
+  
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_RAW_D (A, a);
+    FP_UNPACK_RAW_D (B, b);
+    FP_UNPACK_RAW_D (C, c);
+
+    /* Extend double to quad.  */
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    FP_EXTEND(Q,D,4,2,X,A);
+    FP_EXTEND(Q,D,4,2,Y,B);
+    FP_EXTEND(Q,D,4,2,Z,C);
+#else
+    FP_EXTEND(Q,D,2,1,X,A);
+    FP_EXTEND(Q,D,2,1,Y,B);
+    FP_EXTEND(Q,D,2,1,Z,C);
+#endif
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Multiply.
+       Rounding is not an issue as we keep the full 106 bit product.  */
+    FP_COPY_RAW_TO_CANONICAL_Q(X,X);
+    FP_COPY_RAW_TO_CANONICAL_Q(Y,Y);
+    FP_MUL_Q(U,X,Y);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Add without rounding.  */
+    FP_COPY_CANONICAL_TO_SEMIRAW_Q(U,U);
+    FP_COPY_RAW_TO_SEMIRAW_Q(Z,Z);
+    FP_ADD_Q(V,U,Z);
+
+    /* Truncate quad to double and round.  */
+    FP_COPY_SEMIRAW_Q(V,V);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    FP_TRUNC(D,Q,2,4,R,V);
+#else
+    FP_TRUNC(D,Q,1,2,R,V);
+#endif
+    FP_PACK_SEMIRAW_D(r,R);
+    FP_HANDLE_EXCEPTIONS;
+
+    return r;
+}
+
diff -urN libc25-cvstip-20070320/soft-fp/fmasf4.c libc25/soft-fp/fmasf4.c
--- libc25-cvstip-20070320/soft-fp/fmasf4.c Wed Dec 31 18:00:00 1969
+++ libc25/soft-fp/fmasf4.c Fri Mar 30 15:34:14 2007
@@ -0,0 +1,90 @@
+/* soft-fp x * y + z as ternary operation.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Steven Munroe <[hidden email]>, 2006.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "double.h"
+
+/* Compute floating point multiply-add with higher (double) precision.  */
+SFtype
+__fmasf4 (SFtype a, SFtype b, SFtype c)
+{
+    FP_DECL_EX;
+    FP_DECL_S(A);
+    FP_DECL_S(B);
+    FP_DECL_S(C);
+    FP_DECL_D(X);
+    FP_DECL_D(Y);
+    FP_DECL_D(Z);
+    FP_DECL_D(U);
+    FP_DECL_D(V);
+    FP_DECL_S(R);
+    float r;
+    
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_RAW_S (A, a);
+    FP_UNPACK_RAW_S (B, b);
+    FP_UNPACK_RAW_S (C, c);
+
+    /* Extend single to double.  */
+#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
+    FP_EXTEND(D,S,2,1,X,A);
+    FP_EXTEND(D,S,2,1,Y,B);
+    FP_EXTEND(D,S,2,1,Z,C);
+#else
+    FP_EXTEND(D,S,1,1,X,A);
+    FP_EXTEND(D,S,1,1,Y,B);
+    FP_EXTEND(D,S,1,1,Z,C);
+#endif
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Multiply.
+       Rounding is not an issue as we keep the full 48 bit product.  */
+    FP_COPY_RAW_TO_CANONICAL_D(X,X);
+    FP_COPY_RAW_TO_CANONICAL_D(Y,Y);
+    FP_MUL_D(U,X,Y);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Add without rounding.  */
+    FP_COPY_CANONICAL_TO_SEMIRAW_D(U,U);
+    FP_COPY_RAW_TO_SEMIRAW_D(Z,Z);
+    FP_ADD_D(V,U,Z);
+
+    /* Truncate double to single and round.  */
+    FP_COPY_SEMIRAW_D(V,V);
+#if FP_W_TYPE_SIZE < _FP_FRACBITS_D
+    FP_TRUNC(S,D,1,2,R,V);
+#else
+    FP_TRUNC(S,D,1,1,R,V);
+#endif
+    FP_PACK_SEMIRAW_S(r,R);
+    FP_HANDLE_EXCEPTIONS;
+
+    return r;
+}
diff -urN libc25-cvstip-20070320/soft-fp/quad.h libc25/soft-fp/quad.h
--- libc25-cvstip-20070320/soft-fp/quad.h 2007-03-20 07:10:36.000000000 -0500
+++ libc25/soft-fp/quad.h 2007-03-30 15:34:17.000000000 -0500
@@ -148,6 +148,58 @@
       _FP_PACK_RAW_4_P(Q,val,X); \
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_Q(Y, X) \
+  do { \
+    Y##_f[0] = X##_f[0]; \
+    Y##_f[1] = X##_f[1]; \
+    Y##_f[2] = X##_f[2]; \
+    Y##_f[3] = X##_f[3] & \
+     ((1 << (_FP_FRACBITS_Q \
+     - (_FP_IMPLBIT_Q != 0) \
+ -(_FP_W_TYPE_SIZE * 3)))\
+ -1); \
+    Y##_e    = X##_e  & \
+     ((1 << _FP_EXPBITS_Q) \
+ -1); \
+    Y##_s    = X##_s; \
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_Q(Y,X) \
+  do { \
+    FP_COPY_RAW_Q(Y,X); \
+    _FP_UNPACK_CANONICAL(Q,4,Y); \
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_Q(Y,X) \
+  do { \
+    FP_COPY_RAW_Q(Y,X); \
+    _FP_UNPACK_SEMIRAW(Q,4,Y); \
+  } while (0)
+
+#define FP_COPY_SEMIRAW_Q(Y, X) \
+  do { \
+    Y##_f[0] = X##_f[0]; \
+    Y##_f[1] = X##_f[1]; \
+    Y##_f[2] = X##_f[2]; \
+    Y##_f[3] = X##_f[3] & \
+     ((1 << (_FP_FRACBITS_Q \
+     - (_FP_IMPLBIT_Q != 0) \
+ -(_FP_W_TYPE_SIZE * 3) \
+ + 3)) -1); \
+    Y##_e    = X##_e; \
+    Y##_s    = X##_s; \
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_Q(Y,X) \
+  do { \
+    _FP_PACK_CANONICAL(Q,4,X); \
+    FP_COPY_RAW_Q(Y,X); \
+    _FP_UNPACK_SEMIRAW(Q,4,Y); \
+  } while (0)
+
 #define FP_ISSIGNAN_Q(X) _FP_ISSIGNAN(Q,4,X)
 #define FP_NEG_Q(R,X) _FP_NEG(Q,4,R,X)
 #define FP_ADD_Q(R,X,Y) _FP_ADD(Q,4,R,X,Y)
@@ -249,6 +301,53 @@
       _FP_PACK_RAW_2_P(Q,val,X); \
   } while (0)
 
+/* Copy the internal layout between RAW, SEMIRAW, and CANONICAL forms.
+   These macros are used in the fma implementations.  */
+
+#define FP_COPY_RAW_Q(Y, X) \
+  do { \
+    Y##_f0 = X##_f0; \
+    Y##_f1 = X##_f1 & \
+     ((1 << (_FP_FRACBITS_Q \
+     - (_FP_IMPLBIT_Q != 0) \
+ - _FP_W_TYPE_SIZE)) -1);\
+    Y##_e  = X##_e  & \
+     ((1 << _FP_EXPBITS_Q) \
+ -1); \
+    Y##_s  = X##_s; \
+  } while (0)
+  
+#define FP_COPY_RAW_TO_CANONICAL_Q(Y,X) \
+  do { \
+    FP_COPY_RAW_Q(Y,X); \
+    _FP_UNPACK_CANONICAL(Q,2,Y); \
+  } while (0)
+
+#define FP_COPY_RAW_TO_SEMIRAW_Q(Y,X) \
+  do { \
+    FP_COPY_RAW_D(Y,X); \
+    _FP_UNPACK_SEMIRAW(Q,2,Y); \
+  } while (0)
+
+#define FP_COPY_SEMIRAW_Q(Y, X) \
+  do { \
+    Y##_f0 = X##_f0; \
+    Y##_f1 = X##_f1 & \
+     ((1 << (_FP_FRACBITS_Q \
+     - (_FP_IMPLBIT_Q != 0) \
+ - _FP_W_TYPE_SIZE \
+ + 3)) -1); \
+    Y##_e  = X##_e; \
+    Y##_s  = X##_s; \
+  } while (0)
+
+#define FP_COPY_CANONICAL_TO_SEMIRAW_Q(Y,X) \
+  do { \
+    _FP_PACK_CANONICAL(Q,2,X); \
+    FP_COPY_RAW_Q(Y,X); \
+    _FP_UNPACK_SEMIRAW(Q,2,Y); \
+  } while (0)
+
 #define FP_ISSIGNAN_Q(X) _FP_ISSIGNAN(Q,2,X)
 #define FP_NEG_Q(R,X) _FP_NEG(Q,2,R,X)
 #define FP_ADD_Q(R,X,Y) _FP_ADD(Q,2,R,X,Y)