[PATCH] PPC clean from powerpc-cpu merge

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] PPC clean from powerpc-cpu merge

Steven Munroe
Realized that gcc defaults to -mcpu=common/powerpc64 which assumes this
ISA V1.0 and the old style branch hints.  The ISA V2.0+ introduced the
new style branch hints.
(http://www.power.org/resources/downloads/PowerISA_Public.pdf page 28)

This can be a problem as the --with-cpu= configure only asserts -mcpu=
to gcc for "c" files (not *.S" assembler files). The encoding of the ISA
V1.0 branch hints when used on ISA V2.0+ hardware results in "No branch
hint given" (effectively ignored). To make the branch hints effective we
need to either pass -mcpu=power4 to as or insert the appropriate
".machine power#" statement in the source. The attached patch takes the
.machine approach.

Also found that ./sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S was
missing and include for math_ldbl_opt.h which hosed the version symbol
generation.



2007-07-07  Steven Munroe  <[hidden email]>

        * sysdeps/powerpc/powerpc32/power4/memcmp.S: Specify .machine power4
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc32/power4/memcpy.S: Specify .machine power4
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc32/power4/memset.S: Specify .machine power4
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc32/power6/memcpy.S: Specify .machine power6
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc64/power4/memcmp.S: Specify .machine power4
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc64/power4/memcpy.S: Specify .machine power4
        to get ISA-V2.0 branch hints.
        * sysdeps/powerpc/powerpc64/power4/memset.S: Specify .machine power4
        to get ISA-V2.0 branch hints.  Remove toc ref to __cache_line_size.

        * sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S:
        Include math_ldbl_opt.h

diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcmp.S libc25/sysdeps/powerpc/powerpc32/power4/memcmp.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcmp.S 2007-06-03 15:49:26.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memcmp.S 2007-07-10 14:42:31.508087560 -0500
@@ -23,6 +23,7 @@
 
 /* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
+ .machine power4
 EALIGN (BP_SYM(memcmp), 4, 0)
  CALL_MCOUNT
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcpy.S libc25/sysdeps/powerpc/powerpc32/power4/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memcpy.S 2007-06-03 15:49:42.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memcpy.S 2007-07-10 14:40:48.066969976 -0500
@@ -34,6 +34,7 @@
    possible when both source and destination are word aligned.
    Each case has an optimized unrolled loop.   */
 
+ .machine power4
 EALIGN (BP_SYM (memcpy), 5, 0)
  CALL_MCOUNT
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memset.S libc25/sysdeps/powerpc/powerpc32/power4/memset.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power4/memset.S 2007-06-03 15:49:48.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power4/memset.S 2007-06-18 14:49:20.000000000 -0500
@@ -28,6 +28,7 @@
    cache line (1024 bits). There is a special case for setting cache lines
    to 0, to take advantage of the dcbz instruction.  */
 
+ .machine power4
 EALIGN (BP_SYM (memset), 5, 0)
  CALL_MCOUNT
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S libc25/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2007-06-03 15:54:19.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2007-06-21 16:33:55.000000000 -0500
@@ -18,6 +18,7 @@
    02110-1301 USA.  */
 
 #include <sysdep.h>
+#include <math_ldbl_opt.h>
 
 /* long long int[r3, r4] __llrint (double x[fp1])  */
 ENTRY (__llrint)
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/memcpy.S libc25/sysdeps/powerpc/powerpc32/power6/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc32/power6/memcpy.S 2007-06-03 15:53:47.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc32/power6/memcpy.S 2007-07-10 14:40:14.676989000 -0500
@@ -34,6 +34,7 @@
    possible when both source and destination are word aligned.
    Each case has an optimized unrolled loop.   */
 
+ .machine power6
 EALIGN (BP_SYM (memcpy), 5, 0)
  CALL_MCOUNT
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcmp.S libc25/sysdeps/powerpc/powerpc64/power4/memcmp.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcmp.S 2007-06-03 16:14:29.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memcmp.S 2007-07-10 14:42:47.355074944 -0500
@@ -23,6 +23,7 @@
 
 /* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
 
+ .machine power4
 EALIGN (BP_SYM(memcmp), 4, 0)
  CALL_MCOUNT 3
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcpy.S libc25/sysdeps/powerpc/powerpc64/power4/memcpy.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memcpy.S 2007-06-03 16:14:43.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memcpy.S 2007-07-10 14:41:21.642057832 -0500
@@ -36,6 +36,7 @@
    posible when both source and destination are doubleword aligned.
    Each case has a optimized unrolled loop.   */
 
+ .machine power4
 EALIGN (BP_SYM (memcpy), 5, 0)
  CALL_MCOUNT 3
 
diff -urN libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memset.S libc25/sysdeps/powerpc/powerpc64/power4/memset.S
--- libc25-cvstip-20070606/sysdeps/powerpc/powerpc64/power4/memset.S 2007-06-03 16:14:50.000000000 -0500
+++ libc25/sysdeps/powerpc/powerpc64/power4/memset.S 2007-07-09 11:17:33.449056568 -0500
@@ -22,12 +22,6 @@
 #include <bp-sym.h>
 #include <bp-asm.h>
 
- .section ".toc","aw"
-.LC0:
- .tc __cache_line_size[TC],__cache_line_size
- .section ".text"
- .align 2
-
 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
@@ -35,6 +29,7 @@
    cache line (256 bits). There is a special case for setting cache lines
    to 0, to take advantage of the dcbz instruction.  */
 
+ .machine power4
 EALIGN (BP_SYM (memset), 5, 0)
  CALL_MCOUNT 3