[PATCH 1/5] S390: Add new hwcap values for new cpu architecture arch13.

classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/5] S390: Add new hwcap values for new cpu architecture arch13.

Stefan Liebler-2
The new hwcap values indicate support for:
-"Vector-Enhancements Facility 2" (tag "vxe2", hwcap 2^15)
-"Vector-Packed-Decimal-Enhancement Facility" (tag "vxp", hwcap 2^16)
-"Enhanced-Sort Facility" (tag "sort", hwcap 2^17)
-"Deflate-Conversion Facility" (tag "dflt", hwcap 2^18)

ChangeLog:

        * sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags):
        Add vxe2, vxp, dflt, sort flags.
        * sysdeps/s390/dl-procinfo.h: Add HWCAP_S390_VXRS_EXT2,
        HWCAP_S390_VXRS_PDE, HWCAP_S390_SORT, HWCAP_S390_DFLT
        capabilities.
        * sysdeps/unix/sysv/linux/s390/bits/hwcap.h
        (HWCAP_S390_VXRS_EXT2, HWCAP_S390_VXRS_PDE, HWCAP_S390_SORT,
        HWCAP_S390_DFLT): Define.
---
 sysdeps/s390/dl-procinfo.c                | 4 ++--
 sysdeps/s390/dl-procinfo.h                | 6 +++++-
 sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 4 ++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index 8f581c1982..44c9901800 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -46,12 +46,12 @@
 #if !defined PROCINFO_DECL && defined SHARED
   ._dl_s390_cap_flags
 #else
-PROCINFO_CLASS const char _dl_s390_cap_flags[15][9]
+PROCINFO_CLASS const char _dl_s390_cap_flags[19][9]
 #endif
 #ifndef PROCINFO_DECL
 = {
      "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
-     "highgprs", "te", "vx", "vxd", "vxe", "gs"
+     "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt"
   }
 #endif
 #if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index b4b81fc70a..6e0d83da1e 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -21,7 +21,7 @@
 #define _DL_PROCINFO_H 1
 #include <ldsodefs.h>
 
-#define _DL_HWCAP_COUNT 15
+#define _DL_HWCAP_COUNT 19
 
 #define _DL_PLATFORMS_COUNT 9
 
@@ -54,6 +54,10 @@ enum
   HWCAP_S390_VXD = 1 << 12,
   HWCAP_S390_VXE = 1 << 13,
   HWCAP_S390_GS = 1 << 14,
+  HWCAP_S390_VXRS_EXT2 = 1 << 15,
+  HWCAP_S390_VXRS_PDE = 1 << 16,
+  HWCAP_S390_SORT = 1 << 17,
+  HWCAP_S390_DFLT = 1 << 18,
 };
 
 #define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
index 60c38074c4..3608bea25c 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
@@ -39,3 +39,7 @@
 #define HWCAP_S390_VXD          4096
 #define HWCAP_S390_VXE          8192
 #define HWCAP_S390_GS           16384
+#define HWCAP_S390_VXRS_EXT2    32768
+#define HWCAP_S390_VXRS_PDE     65536
+#define HWCAP_S390_SORT         131072
+#define HWCAP_S390_DFLT         262144
--
2.17.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 2/5] S390: Add configure check to detect support for arch13.

Stefan Liebler-2
Add two configure checks which detect if arch13 is supported
by the assembler at all - by explicitely setting the machine -
and if it is supported with default settings.

ChangeLog:

        * config.h.in (HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT,
        HAVE_S390_ARCH13_ASM_SUPPORT): New undefine.
        * sysdeps/s390/configure.ac: Add checks for arch13 support.
        * sysdeps/s390/configure: Regenerated.
---
 config.h.in               |  6 ++++
 sysdeps/s390/configure    | 71 +++++++++++++++++++++++++++++++++++++++
 sysdeps/s390/configure.ac | 49 +++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)

diff --git a/config.h.in b/config.h.in
index f059ec0435..824dfe8d8c 100644
--- a/config.h.in
+++ b/config.h.in
@@ -71,6 +71,9 @@
 /* Define if assembler supports z13 zarch instructions as default on S390.  */
 #undef  HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
 
+/* Define if assembler supports arch13 zarch instruction as default on S390.  */
+#undef  HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
+
 /* Define if assembler supports vector instructions on S390.  */
 #undef  HAVE_S390_VX_ASM_SUPPORT
 
@@ -78,6 +81,9 @@
    on S390.  */
 #undef  HAVE_S390_VX_GCC_SUPPORT
 
+/* Define if assembler supports arch13 instructions on S390.  */
+#undef  HAVE_S390_ARCH13_ASM_SUPPORT
+
 /* Define if assembler supports Intel MPX.  */
 #undef  HAVE_MPX_SUPPORT
 
diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
index 4a44775e30..fa46e9e351 100644
--- a/sysdeps/s390/configure
+++ b/sysdeps/s390/configure
@@ -112,6 +112,43 @@ then
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 arch13 zarch instruction support" >&5
+$as_echo_n "checking for S390 arch13 zarch instruction support... " >&6; }
+if ${libc_cv_asm_s390_arch13+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<\EOF
+void testinsn (char *buf)
+{
+    __asm__ (".machine \"arch13\" \n\t"
+     ".machinemode \"zarch_nohighgprs\" \n\t"
+     "lghi %%r0,16 \n\t"
+     "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+}
+EOF
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } ;
+then
+  libc_cv_asm_s390_arch13=yes
+else
+  libc_cv_asm_s390_arch13=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_s390_arch13" >&5
+$as_echo "$libc_cv_asm_s390_arch13" >&6; }
+if test "$libc_cv_asm_s390_arch13" = yes ;
+then
+  $as_echo "#define HAVE_S390_ARCH13_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 z10 zarch instruction support as default" >&5
 $as_echo_n "checking for S390 z10 zarch instruction support as default... " >&6; }
 if ${libc_cv_asm_s390_min_z10_zarch+:} false; then :
@@ -225,5 +262,39 @@ then
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for S390 arch13 zarch instruction support as default" >&5
+$as_echo_n "checking for S390 arch13 zarch instruction support as default... " >&6; }
+if ${libc_cv_asm_s390_min_arch13_zarch+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<\EOF
+void testinsn (char *buf)
+{
+    __asm__ ("lghi %%r0,16 \n\t"
+     "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+}
+EOF
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } ;
+then
+  libc_cv_asm_s390_min_arch13_zarch=yes
+else
+  libc_cv_asm_s390_min_arch13_zarch=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_s390_min_arch13_zarch" >&5
+$as_echo "$libc_cv_asm_s390_min_arch13_zarch" >&6; }
+if test "$libc_cv_asm_s390_min_arch13_zarch" = yes ;
+then
+  $as_echo "#define HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT 1" >>confdefs.h
+
+fi
+
 test -n "$critic_missing" && as_fn_error $? "
 *** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
index 4dfb5574b4..3ed5a8ef87 100644
--- a/sysdeps/s390/configure.ac
+++ b/sysdeps/s390/configure.ac
@@ -80,6 +80,32 @@ then
   AC_DEFINE(HAVE_S390_VX_GCC_SUPPORT)
 fi
 
+AC_CACHE_CHECK(for S390 arch13 zarch instruction support,
+       libc_cv_asm_s390_arch13, [dnl
+cat > conftest.c <<\EOF
+void testinsn (char *buf)
+{
+    __asm__ (".machine \"arch13\" \n\t"
+     ".machinemode \"zarch_nohighgprs\" \n\t"
+     "lghi %%r0,16 \n\t"
+     "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+}
+EOF
+dnl test, if assembler supports S390 arch13 instructions
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null]) ;
+then
+  libc_cv_asm_s390_arch13=yes
+else
+  libc_cv_asm_s390_arch13=no
+fi
+rm -f conftest* ])
+if test "$libc_cv_asm_s390_arch13" = yes ;
+then
+  AC_DEFINE(HAVE_S390_ARCH13_ASM_SUPPORT)
+fi
+
+
 AC_CACHE_CHECK(for S390 z10 zarch instruction support as default,
        libc_cv_asm_s390_min_z10_zarch, [dnl
 cat > conftest.c <<\EOF
@@ -163,5 +189,28 @@ then
   AC_DEFINE(HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT)
 fi
 
+AC_CACHE_CHECK(for S390 arch13 zarch instruction support as default,
+       libc_cv_asm_s390_min_arch13_zarch, [dnl
+cat > conftest.c <<\EOF
+void testinsn (char *buf)
+{
+    __asm__ ("lghi %%r0,16 \n\t"
+     "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+}
+EOF
+dnl test, if assembler supports S390 arch13 zarch instructions as default
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ -o conftest.o &> /dev/null]) ;
+then
+  libc_cv_asm_s390_min_arch13_zarch=yes
+else
+  libc_cv_asm_s390_min_arch13_zarch=no
+fi
+rm -f conftest* ])
+if test "$libc_cv_asm_s390_min_arch13_zarch" = yes ;
+then
+  AC_DEFINE(HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT)
+fi
+
 test -n "$critic_missing" && AC_MSG_ERROR([
 *** $critic_missing])
--
2.17.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 3/5] S390: Add arch13 memmove ifunc variant.

Stefan Liebler-2
In reply to this post by Stefan Liebler-2
This patch introduces the new arch13 ifunc variant for memmove.
For the forward or non-overlapping case it is just using memcpy.
For the backward case it relies on the new instruction mvcrl.
The instruction copies up to 256 bytes at once.
In case of an overlap, it copies the bytes like copying them
one by one starting from right to left.

ChangeLog:

        * sysdeps/s390/ifunc-memcpy.h (HAVE_MEMMOVE_ARCH13, MEMMOVE_ARCH13
        HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT): New defines.
        * sysdeps/s390/memcpy-z900.S: Add arch13 memmove implementation.
        * sysdeps/s390/memmove.c (memmove): Add arch13 variant in
        ifunc selector.
        * sysdeps/s390/multiarch/ifunc-impl-list.c
        (__libc_ifunc_impl_list): Add ifunc variant for arch13 memmove.
        * sysdeps/s390/multiarch/ifunc-resolve.h (S390_STFLE_BITS_ARCH13_MIE3,
        S390_IS_ARCH13_MIE3): New defines.
---
 sysdeps/s390/ifunc-memcpy.h              | 23 +++++++++-
 sysdeps/s390/memcpy-z900.S               | 55 ++++++++++++++++++++++++
 sysdeps/s390/memmove.c                   | 16 +++++--
 sysdeps/s390/multiarch/ifunc-impl-list.c |  5 +++
 sysdeps/s390/multiarch/ifunc-resolve.h   |  5 +++
 5 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/sysdeps/s390/ifunc-memcpy.h b/sysdeps/s390/ifunc-memcpy.h
index b83ae73508..1badb30ed8 100644
--- a/sysdeps/s390/ifunc-memcpy.h
+++ b/sysdeps/s390/ifunc-memcpy.h
@@ -44,7 +44,7 @@
 #endif
 
 #if defined SHARED && defined USE_MULTIARCH && IS_IN (libc) \
-  && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+  && ! defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
 # define HAVE_MEMMOVE_IFUNC 1
 #else
 # define HAVE_MEMMOVE_IFUNC 0
@@ -56,14 +56,27 @@
 # define HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT 0
 #endif
 
-#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+#ifdef HAVE_S390_ARCH13_ASM_SUPPORT
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT HAVE_MEMMOVE_IFUNC
+#else
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT 0
+#endif
+
+#if defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
+# define MEMMOVE_DEFAULT MEMMOVE_ARCH13
+# define HAVE_MEMMOVE_C 0
+# define HAVE_MEMMOVE_Z13 0
+# define HAVE_MEMMOVE_ARCH13 1
+#elif defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
 # define MEMMOVE_DEFAULT MEMMOVE_Z13
 # define HAVE_MEMMOVE_C 0
 # define HAVE_MEMMOVE_Z13 1
+# define HAVE_MEMMOVE_ARCH13 HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
 #else
 # define MEMMOVE_DEFAULT MEMMOVE_C
 # define HAVE_MEMMOVE_C 1
 # define HAVE_MEMMOVE_Z13 HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT
+# define HAVE_MEMMOVE_ARCH13 HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
 #endif
 
 #if HAVE_MEMCPY_Z900_G5
@@ -101,3 +114,9 @@
 #else
 # define MEMMOVE_Z13 NULL
 #endif
+
+#if HAVE_MEMMOVE_ARCH13
+# define MEMMOVE_ARCH13 __memmove_arch13
+#else
+# define MEMMOVE_ARCH13 NULL
+#endif
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
index 90d5f7becc..307332fcf9 100644
--- a/sysdeps/s390/memcpy-z900.S
+++ b/sysdeps/s390/memcpy-z900.S
@@ -277,6 +277,61 @@ ENTRY(MEMMOVE_Z13)
 END(MEMMOVE_Z13)
 #endif /* HAVE_MEMMOVE_Z13  */
 
+#if HAVE_MEMMOVE_ARCH13
+ENTRY(MEMMOVE_ARCH13)
+ .machine "arch13"
+ .machinemode "zarch_nohighgprs"
+# if ! defined __s390x__
+ /* Note: The 31bit dst and src pointers are prefixed with zeroes.  */
+ llgfr %r4,%r4
+ llgfr %r3,%r3
+ llgfr %r2,%r2
+# endif /* ! defined __s390x__ */
+ sgrk %r5,%r2,%r3
+ aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index.  */
+ clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE
+.L_MEMMOVE_ARCH13_SMALL:
+ jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik).  */
+ /* Store up to 16 bytes with vll/vstl (needs highest index).  */
+ vll %v16,%r0,0(%r3)
+ vstl %v16,%r0,0(%r2)
+.L_MEMMOVE_ARCH13_END:
+ br      %r14
+.L_MEMMOVE_ARCH13_LARGE:
+ lgr     %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest  */
+ /* The unsigned comparison (dst - src >= len) determines if we can
+   execute the forward case with memcpy.  */
+#if ! HAVE_MEMCPY_Z196
+# error The arch13 variant of memmove needs the z196 variant of memcpy!
+#endif
+ /* Backward case.  */
+ clgrjhe %r5,%r4,.L_Z196_start2
+ clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
+ /* Move up to 256bytes with mvcrl (move right to left).  */
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1.  */
+ br      %r14
+.L_MEMMOVE_ARCH13_LARGER_256B:
+ /* First move the "remaining" block of up to 256 bytes at the end of
+   src/dst buffers.  Then move blocks of 256bytes in a loop starting
+   with the block at the end.
+   (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
+   passed to mvcrl instructions are aligned, too)  */
+ risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256  */
+ risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF  */
+ slgr %r4,%r0
+ lay %r1,-1(%r4,%r1)
+ lay %r3,-1(%r4,%r3)
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1.  */
+ lghi %r0,255 /* Always copy 256 bytes in the loop below!  */
+.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
+ aghi %r1,-256
+ aghi %r3,-256
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1.  */
+ brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
+ br      %r14
+END(MEMMOVE_ARCH13)
+#endif /* HAVE_MEMMOVE_ARCH13  */
+
 #if ! HAVE_MEMCPY_IFUNC
 /* If we don't use ifunc, define an alias for mem[p]cpy here.
    Otherwise see sysdeps/s390/mem[p]cpy.c.  */
diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
index fd4da377a3..fb6b69ae2f 100644
--- a/sysdeps/s390/memmove.c
+++ b/sysdeps/s390/memmove.c
@@ -36,9 +36,19 @@ extern __typeof (__redirect_memmove) MEMMOVE_C attribute_hidden;
 extern __typeof (__redirect_memmove) MEMMOVE_Z13 attribute_hidden;
 # endif
 
+# if HAVE_MEMMOVE_ARCH13
+extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
+# endif
+
 s390_libc_ifunc_expr (__redirect_memmove, memmove,
-      (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
-      ? MEMMOVE_Z13
-      : MEMMOVE_DEFAULT
+      ({
+ s390_libc_ifunc_expr_stfle_init ();
+ (HAVE_MEMMOVE_ARCH13
+ && S390_IS_ARCH13_MIE3 (stfle_bits))
+  ? MEMMOVE_ARCH13
+  : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
+  ? MEMMOVE_Z13
+  : MEMMOVE_DEFAULT;
+      })
       )
 #endif
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index b54c52af36..d742d66a6a 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -169,6 +169,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
 #if HAVE_MEMMOVE_IFUNC
     IFUNC_IMPL (i, name, memmove,
+# if HAVE_MEMMOVE_ARCH13
+ IFUNC_IMPL_ADD (array, i, memmove,
+ S390_IS_ARCH13_MIE3 (stfle_bits),
+ MEMMOVE_ARCH13)
+# endif
 # if HAVE_MEMMOVE_Z13
  IFUNC_IMPL_ADD (array, i, memmove,
  dl_hwcap & HWCAP_S390_VX, MEMMOVE_Z13)
diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h
index b833dfef28..743de9e591 100644
--- a/sysdeps/s390/multiarch/ifunc-resolve.h
+++ b/sysdeps/s390/multiarch/ifunc-resolve.h
@@ -22,6 +22,11 @@
 
 #define S390_STFLE_BITS_Z10  34 /* General instructions extension */
 #define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
+#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
+  Facility 3, e.g. mvcrl.  */
+
+#define S390_IS_ARCH13_MIE3(STFLE_BITS) \
+  ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))) != 0)
 
 #define S390_IS_Z196(STFLE_BITS) \
   ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)
--
2.17.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 4/5] S390: Add arch13 strstr ifunc variant.

Stefan Liebler-2
In reply to this post by Stefan Liebler-2
This patch introduces the new arch13 ifunc variant for strstr.
For needles longer than 9 charachters it is relying on the common-code
implementation.  For shorter needles it is using the new vstrs instruction
which is able to search a substring within a vector register.

ChangeLog:

        * sysdeps/s390/Makefile (sysdep_routines): Add strstr-arch13.
        * sysdeps/s390/ifunc-strstr.h (HAVE_STRSTR_ARCH13, STRSTR_ARCH13,
        STRSTR_Z13_ONLY_USED_AS_FALLBACK, HAVE_STRSTR_IFUNC_AND_ARCH13_SUPPORT):
        New defines.
        * sysdeps/s390/multiarch/ifunc-impl-list.c
        (__libc_ifunc_impl_list): Add ifunc variant for arch13 strstr.
        * sysdeps/s390/strstr-arch13.S: New file.
        * sysdeps/s390/strstr-vx.c: Omit GI symbol for z13 strstr ifunc variant
        if it is only used as fallback.
        * sysdeps/s390/strstr.c (strstr): Add arch13 variant in ifunc selector.
---
 sysdeps/s390/Makefile                    |   2 +-
 sysdeps/s390/ifunc-strstr.h              |  28 +++-
 sysdeps/s390/multiarch/ifunc-impl-list.c |   4 +
 sysdeps/s390/strstr-arch13.S             | 179 +++++++++++++++++++++++
 sysdeps/s390/strstr-vx.c                 |   4 +-
 sysdeps/s390/strstr.c                    |   8 +-
 6 files changed, 219 insertions(+), 6 deletions(-)
 create mode 100644 sysdeps/s390/strstr-arch13.S

diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile
index 35fcf7e21d..af14344d31 100644
--- a/sysdeps/s390/Makefile
+++ b/sysdeps/s390/Makefile
@@ -58,7 +58,7 @@ sysdep_routines += bzero memset memset-z900 \
    memcmp memcmp-z900 \
    mempcpy memcpy memcpy-z900 \
    memmove memmove-c \
-   strstr strstr-vx strstr-c \
+   strstr strstr-arch13 strstr-vx strstr-c \
    memmem memmem-vx memmem-c \
    strlen strlen-vx strlen-c \
    strnlen strnlen-vx strnlen-c \
diff --git a/sysdeps/s390/ifunc-strstr.h b/sysdeps/s390/ifunc-strstr.h
index 2f67606213..2d50d07077 100644
--- a/sysdeps/s390/ifunc-strstr.h
+++ b/sysdeps/s390/ifunc-strstr.h
@@ -17,7 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #if defined USE_MULTIARCH && IS_IN (libc) \
-  && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+  && ! defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
 # define HAVE_STRSTR_IFUNC 1
 #else
 # define HAVE_STRSTR_IFUNC 0
@@ -29,14 +29,32 @@
 # define HAVE_STRSTR_IFUNC_AND_VX_SUPPORT 0
 #endif
 
-#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+#ifdef HAVE_S390_ARCH13_ASM_SUPPORT
+# define HAVE_STRSTR_IFUNC_AND_ARCH13_SUPPORT HAVE_STRSTR_IFUNC
+#else
+# define HAVE_STRSTR_IFUNC_AND_ARCH13_SUPPORT 0
+#endif
+
+#if defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
+# define STRSTR_DEFAULT STRSTR_ARCH13
+# define HAVE_STRSTR_C 0
+# define HAVE_STRSTR_Z13 1
+# define STRSTR_Z13_ONLY_USED_AS_FALLBACK 1
+# define HAVE_STRSTR_ARCH13 1
+#elif defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
 # define STRSTR_DEFAULT STRSTR_Z13
 # define HAVE_STRSTR_C 0
 # define HAVE_STRSTR_Z13 1
+# define HAVE_STRSTR_ARCH13 HAVE_STRSTR_IFUNC_AND_ARCH13_SUPPORT
 #else
 # define STRSTR_DEFAULT STRSTR_C
 # define HAVE_STRSTR_C 1
 # define HAVE_STRSTR_Z13 HAVE_STRSTR_IFUNC_AND_VX_SUPPORT
+# define HAVE_STRSTR_ARCH13 HAVE_STRSTR_IFUNC_AND_ARCH13_SUPPORT
+#endif
+
+#ifndef STRSTR_Z13_ONLY_USED_AS_FALLBACK
+# define STRSTR_Z13_ONLY_USED_AS_FALLBACK 0
 #endif
 
 #if HAVE_STRSTR_C
@@ -50,3 +68,9 @@
 #else
 # define STRSTR_Z13 NULL
 #endif
+
+#if HAVE_STRSTR_ARCH13
+# define STRSTR_ARCH13 __strstr_arch13
+#else
+# define STRSTR_ARCH13 NULL
+#endif
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index d742d66a6a..75289b582b 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -186,6 +186,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
 #if HAVE_STRSTR_IFUNC
     IFUNC_IMPL (i, name, strstr,
+# if HAVE_STRSTR_ARCH13
+ IFUNC_IMPL_ADD (array, i, strstr,
+ dl_hwcap & HWCAP_S390_VXRS_EXT2, STRSTR_ARCH13)
+# endif
 # if HAVE_STRSTR_Z13
  IFUNC_IMPL_ADD (array, i, strstr,
  dl_hwcap & HWCAP_S390_VX, STRSTR_Z13)
diff --git a/sysdeps/s390/strstr-arch13.S b/sysdeps/s390/strstr-arch13.S
new file mode 100644
index 0000000000..929b026adf
--- /dev/null
+++ b/sysdeps/s390/strstr-arch13.S
@@ -0,0 +1,179 @@
+/* Vector optimized 32/64 bit S/390 version of strstr.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ifunc-strstr.h>
+#if HAVE_STRSTR_ARCH13
+# include "sysdep.h"
+# include "asm-syntax.h"
+ .text
+
+/* char *strstr (const char *haystack=r2, const char *needle=r3)
+   Locate a substring.  */
+ENTRY(STRSTR_ARCH13)
+ .machine "arch13"
+ .machinemode "zarch_nohighgprs"
+ lcbb %r1,0(%r3),6
+ jo .Lneedle_on_bb /* Needle on block-boundary?  */
+ vl %v18,0(%r3),6 /* Load needle.  */
+ vfenezb %v19,%v18,%v18 /* v19[7] contains the length of needle.  */
+.Lneedle_loaded:
+ vlgvb %r4,%v19,7 /* Get index of zero or 16 if not found.  */
+ lghi %r5,17 /* See below: min-skip-partial-match-index.  */
+ cgibe %r4,0,0(%r14) /* Test if needle is zero and return.  */
+
+ /* The vstrs instruction is able to handle needles up to a length of 16,
+   but then we may have to load the next part of haystack with a
+   small offset.  This will be slow - see examples:
+   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
+   needle   =  mmmmmmmmmmmmmma0
+   => needle_len=15; vstrs reports a partial match; haystack+=2
+   haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma
+   needle   =        mmmmmmmma0000000
+   => needle_len=9; vstrs reports a partial match; haystack+=8  */
+# if ! HAVE_STRSTR_Z13
+#  error The arch13 variant of strstr needs the z13 variant of strstr!
+# endif
+ clgfi %r4,9
+ jh STRSTR_Z13
+
+ /* In case of a partial match, the vstrs instruction returns the index
+   of the partial match in a vector-register.  Then we have to
+   reload the string at the "current-position plus this index" and run
+   vstrs again in order to determine if it was a full match or no match.
+   Transferring this index from vr to gr, compute the haystack-address
+   and loading with vl is quite slow as all instructions have data
+   dependencies.  Thus we assume, that a partial match is always at the
+   first possible index and just load the next part of haystack from
+   there instead of waiting until the correct index is computed:
+   min-skip-partial-match-index = (16 - n_len) + 1  */
+ sgr %r5,%r4
+
+.Lloop:
+ lcbb %r1,0(%r2),6
+ jo .Lloop_haystack_on_bb /* Haystack on block-boundary?  */
+ vl %v16,0(%r2) /* Load next part of haystack.  */
+.Lloop_haystack_loaded:
+ /* Vector string search with zero search (cc=0 => no match).  */
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jne .Lloop_vstrs_nonzero_cc
+ lcbb %r1,16(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb16
+ vl %v16,16(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jne .Lloop_vstrs_nonzero_cc16
+ lcbb %r1,32(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb32
+ vl %v16,32(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jne .Lloop_vstrs_nonzero_cc32
+ lcbb %r1,48(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb48
+ vl %v16,48(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jne .Lloop_vstrs_nonzero_cc48
+ la %r2,64(%r2)
+ j .Lloop
+
+.Lloop_vstrs_nonzero_cc48:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc32:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc16:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc:
+ jh .Lend_match_found /* cc == 2 (full match)  */
+ jl .Lend_no_match /* cc == 1 (no match, end of string)  */
+ /* cc == 3 (partial match) See above: min-skip-partial-match-index!  */
+ lcbb %r1,0(%r5,%r2),6
+ la %r2,0(%r5,%r2)
+ jo .Lloop_haystack_on_bb
+ vl %v16,0(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+.Lloop_vstrs_nonzero_cc_loop:
+ jh .Lend_match_found
+ jl .Lend_no_match
+ la %r2,0(%r5,%r2)
+ je .Lloop
+ lcbb %r1,0(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb
+ vl %v16,0(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jh .Lend_match_found
+ jl .Lend_no_match
+ la %r2,0(%r5,%r2)
+ je .Lloop
+ lcbb %r1,0(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb
+ vl %v16,0(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ jh .Lend_match_found
+ jl .Lend_no_match
+ la %r2,0(%r5,%r2)
+ je .Lloop
+ lcbb %r1,0(%r2),6 /* Next part of haystack.  */
+ jo .Lloop_haystack_on_bb
+ vl %v16,0(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,2
+ j .Lloop_vstrs_nonzero_cc_loop
+
+.Lend_no_match:
+ lghi %r2,0
+ br %r14
+.Lend_match_found:
+ vlgvb %r4,%v20,7
+ la %r2,0(%r4,%r2)
+ br %r14
+
+.Lloop_haystack_on_bb48:
+ la %r2,16(%r2)
+.Lloop_haystack_on_bb32:
+ la %r2,16(%r2)
+.Lloop_haystack_on_bb16:
+ la %r2,16(%r2)
+.Lloop_haystack_on_bb:
+ /* Haystack located on page-boundary.  */
+ ahi %r1,-1 /* vll needs highest index instead of count.  */
+ vll %v16,%r1,0(%r2)
+ vlvgb %v21,%r1,7
+ vfenezb %v17,%v16,%v16 /* Search zero in loaded haystack bytes.  */
+ veclb %v17,%v21 /* Zero index <= loaded byte index?  */
+ jle .Lloop_haystack_loaded /* -> v16 contains full haystack.  */
+ vl %v16,0(%r2) /* Load haystack beyond page boundary.  */
+ j .Lloop_haystack_loaded
+
+.Lneedle_on_bb:
+ /* Needle located on page-boundary.  */
+ ahi %r1,-1 /* vll needs highest index instead of count.  */
+ vll %v18,%r1,0(%r3)
+ vlvgb %v21,%r1,7
+ vfenezb %v19,%v18,%v18 /* Search zero in loaded needle bytes.  */
+ veclb %v19,%v21 /* Zero index <= max loaded byte index?  */
+ jle .Lneedle_loaded /* -> v18 contains full needle.  */
+ vl %v16,0(%r3) /* Load needle beyond page boundary.  */
+ vfenezb %v19,%v18,%v18
+ j .Lneedle_loaded
+END(STRSTR_ARCH13)
+
+# if ! HAVE_STRSTR_IFUNC
+strong_alias (STRSTR_ARCH13, strstr)
+# endif
+
+# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
+strong_alias (STRSTR_ARCH13, __GI_strstr)
+# endif
+#endif
diff --git a/sysdeps/s390/strstr-vx.c b/sysdeps/s390/strstr-vx.c
index 275263952e..6adffdaaff 100644
--- a/sysdeps/s390/strstr-vx.c
+++ b/sysdeps/s390/strstr-vx.c
@@ -19,11 +19,11 @@
 #include <ifunc-strstr.h>
 
 #if HAVE_STRSTR_Z13
-# if HAVE_STRSTR_IFUNC
+# if HAVE_STRSTR_IFUNC || STRSTR_Z13_ONLY_USED_AS_FALLBACK
 #  define STRSTR STRSTR_Z13
 #  if defined SHARED && IS_IN (libc)
 #   undef libc_hidden_builtin_def
-#   if HAVE_STRSTR_C
+#   if HAVE_STRSTR_C || STRSTR_Z13_ONLY_USED_AS_FALLBACK
 #    define libc_hidden_builtin_def(name)
 #   else
 #    define libc_hidden_builtin_def(name) \
diff --git a/sysdeps/s390/strstr.c b/sysdeps/s390/strstr.c
index 045c380cd1..599d20d0d0 100644
--- a/sysdeps/s390/strstr.c
+++ b/sysdeps/s390/strstr.c
@@ -32,8 +32,14 @@ extern __typeof (__redirect_strstr) STRSTR_C attribute_hidden;
 extern __typeof (__redirect_strstr) STRSTR_Z13 attribute_hidden;
 # endif
 
+# if HAVE_STRSTR_ARCH13
+extern __typeof (__redirect_strstr) STRSTR_ARCH13 attribute_hidden;
+# endif
+
 s390_libc_ifunc_expr (__redirect_strstr, strstr,
-      (HAVE_STRSTR_Z13 && (hwcap & HWCAP_S390_VX))
+      (HAVE_STRSTR_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2))
+      ? STRSTR_ARCH13
+      : (HAVE_STRSTR_Z13 && (hwcap & HWCAP_S390_VX))
       ? STRSTR_Z13
       : STRSTR_DEFAULT
       )
--
2.17.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 5/5] S390: Add arch13 memmem ifunc variant.

Stefan Liebler-2
In reply to this post by Stefan Liebler-2
This patch introduces the new arch13 ifunc variant for memmem.
For needles longer than 9 bytes it is relying on the common-code
implementation.  For shorter needles it is using the new vstrs instruction
which is able to search a substring within a vector register.

ChangeLog:

        * sysdeps/s390/Makefile (sysdep_routines): Add memmem-arch13.
        * sysdeps/s390/ifunc-memmem.h (HAVE_MEMMEM_ARCH13, MEMMEM_ARCH13,
        MEMMEM_Z13_ONLY_USED_AS_FALLBACK, HAVE_MEMMEM_IFUNC_AND_ARCH13_SUPPORT):
        New defines.
        * sysdeps/s390/memmem-arch13.S: New file.
        * sysdeps/s390/memmem-vx.c: Omit GI symbol for z13 memmem ifunc variant
        if it is only used as fallback.
        * sysdeps/s390/memmem.c (memmem): Add arch13 variant in ifunc selector.
        * sysdeps/s390/multiarch/ifunc-impl-list.c
        (__libc_ifunc_impl_list): Add ifunc variant for arch13 memmem.
---
 sysdeps/s390/Makefile                    |   2 +-
 sysdeps/s390/ifunc-memmem.h              |  28 +++-
 sysdeps/s390/memmem-arch13.S             | 161 +++++++++++++++++++++++
 sysdeps/s390/memmem-vx.c                 |   4 +-
 sysdeps/s390/memmem.c                    |   8 +-
 sysdeps/s390/multiarch/ifunc-impl-list.c |   4 +
 6 files changed, 201 insertions(+), 6 deletions(-)
 create mode 100644 sysdeps/s390/memmem-arch13.S

diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile
index af14344d31..a8c49c928f 100644
--- a/sysdeps/s390/Makefile
+++ b/sysdeps/s390/Makefile
@@ -59,7 +59,7 @@ sysdep_routines += bzero memset memset-z900 \
    mempcpy memcpy memcpy-z900 \
    memmove memmove-c \
    strstr strstr-arch13 strstr-vx strstr-c \
-   memmem memmem-vx memmem-c \
+   memmem memmem-arch13 memmem-vx memmem-c \
    strlen strlen-vx strlen-c \
    strnlen strnlen-vx strnlen-c \
    strcpy strcpy-vx strcpy-z900 \
diff --git a/sysdeps/s390/ifunc-memmem.h b/sysdeps/s390/ifunc-memmem.h
index 5494c19e4c..6f2fb2b0ab 100644
--- a/sysdeps/s390/ifunc-memmem.h
+++ b/sysdeps/s390/ifunc-memmem.h
@@ -17,7 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #if defined USE_MULTIARCH && IS_IN (libc) \
-  && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+  && ! defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
 # define HAVE_MEMMEM_IFUNC 1
 #else
 # define HAVE_MEMMEM_IFUNC 0
@@ -29,14 +29,32 @@
 # define HAVE_MEMMEM_IFUNC_AND_VX_SUPPORT 0
 #endif
 
-#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
+#ifdef HAVE_S390_ARCH13_ASM_SUPPORT
+# define HAVE_MEMMEM_IFUNC_AND_ARCH13_SUPPORT HAVE_MEMMEM_IFUNC
+#else
+# define HAVE_MEMMEM_IFUNC_AND_ARCH13_SUPPORT 0
+#endif
+
+#if defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
+# define MEMMEM_DEFAULT MEMMEM_ARCH13
+# define HAVE_MEMMEM_C 0
+# define HAVE_MEMMEM_Z13 1
+# define MEMMEM_Z13_ONLY_USED_AS_FALLBACK 1
+# define HAVE_MEMMEM_ARCH13 1
+#elif defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
 # define MEMMEM_DEFAULT MEMMEM_Z13
 # define HAVE_MEMMEM_C 0
 # define HAVE_MEMMEM_Z13 1
+# define HAVE_MEMMEM_ARCH13 HAVE_MEMMEM_IFUNC_AND_ARCH13_SUPPORT
 #else
 # define MEMMEM_DEFAULT MEMMEM_C
 # define HAVE_MEMMEM_C 1
 # define HAVE_MEMMEM_Z13 HAVE_MEMMEM_IFUNC_AND_VX_SUPPORT
+# define HAVE_MEMMEM_ARCH13 HAVE_MEMMEM_IFUNC_AND_ARCH13_SUPPORT
+#endif
+
+#ifndef MEMMEM_Z13_ONLY_USED_AS_FALLBACK
+# define MEMMEM_Z13_ONLY_USED_AS_FALLBACK 0
 #endif
 
 #if HAVE_MEMMEM_C
@@ -50,3 +68,9 @@
 #else
 # define MEMMEM_Z13 NULL
 #endif
+
+#if HAVE_MEMMEM_ARCH13
+# define MEMMEM_ARCH13 __memmem_arch13
+#else
+# define MEMMEM_ARCH13 NULL
+#endif
diff --git a/sysdeps/s390/memmem-arch13.S b/sysdeps/s390/memmem-arch13.S
new file mode 100644
index 0000000000..b59d60acf0
--- /dev/null
+++ b/sysdeps/s390/memmem-arch13.S
@@ -0,0 +1,161 @@
+/* Vector optimized 32/64 bit S/390 version of memmem.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ifunc-memmem.h>
+#if HAVE_MEMMEM_ARCH13
+# include "sysdep.h"
+# include "asm-syntax.h"
+ .text
+
+/* void *memmem(const void *haystack=r2, size_t haystacklen=r3,
+ const void *needle=r4, size_t needlelen=r5);
+   Locate a substring.  */
+ENTRY(MEMMEM_ARCH13)
+ .machine "arch13"
+ .machinemode "zarch_nohighgprs"
+# if ! defined __s390x__
+ llgfr %r3,%r3
+ llgfr %r5,%r5
+ llgfr %r4,%r4
+ llgfr %r2,%r2
+# endif /* ! defined __s390x__ */
+ clgrjl %r3,%r5,.Lend_no_match /* Haystack < needle?  */
+
+ /* Jump to fallback if needle > 9.  See also strstr-arch13.S.  */
+# if ! HAVE_MEMMEM_Z13
+#  error The arch13 variant of memmem needs the z13 variant of memmem!
+# endif
+ clgfi %r5,9
+ jh MEMMEM_Z13
+
+ aghik %r0,%r5,-1 /* vll needs highest index.  */
+ bc 4,0(%r14) /* cc==1: return if needle-len == 0.  */
+ vll %v18,%r0,0(%r4) /* Load needle.  */
+ vlvgb %v19,%r5,7 /* v19[7] contains length of needle.  */
+
+ clgijh %r3,16,.Lhaystack_larger_16
+.Lhaystack_smaller_16_on_bb:
+ aghik %r0,%r3,-1 /* vll needs highest index.  */
+ vll %v16,%r0,0(%r2) /* Load haystack.  */
+.Lhaystack_smaller_16:
+ sgr %r3,%r5 /* r3 = largest valid match-index.  */
+ jl .Lend_no_match /* Haystack-len < needle-len?  */
+ vstrs %v20,%v16,%v18,%v19,0,0
+ /* Vector string search without zero search where v20 will contain
+   the index of a partial/full match or 16 (index is named k).
+   cc=0 (no match; k=16): .Lend_no_match
+   cc=1 (only available with zero-search): Ignore
+   cc=2 (full match; k<16): Needle found, but could be beyond haystack!
+   cc=3 (partial match; k<16): Always at end of v16 and thus beyond!  */
+ brc 9,.Lend_no_match /* Jump away if cc == 0 || cc == 3.  */
+ vlgvb %r1,%v20,7
+ /* Verify that the full-match (cc=2) is valid!  */
+ clgrjh %r1,%r3,.Lend_no_match /* Jump away if match is beyond.  */
+ la %r2,0(%r1,%r2)
+ br %r14
+.Lend_no_match:
+ lghi %r2,0
+ br %r14
+
+.Lhaystack_larger_16:
+ vl %v16,0(%r2)
+ lghi %r1,17
+ lay %r4,-16(%r3,%r2) /* Boundary for loading with vl.  */
+ lay %r0,-64(%r3,%r2) /* Boundary for loading with 4xvl.  */
+ /* See also strstr-arch13.S:
+   min-skip-partial-match-index = (16 - n_len) + 1  */
+ sgr %r1,%r5
+ clgfi %r3,64 /* Set Boundary to zero ...  */
+ la %r3,0(%r3,%r2)
+ locghil %r0,0 /* ... if haystack < 64bytes.  */
+ jh .Lloop64
+.Lloop:
+ la %r2,16(%r2)
+ /* Vector string search with zero search.  cc=0 => no match.  */
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc
+ clgrjh %r2,%r4,.Lhaystack_too_small
+.Lloop16:
+ vl %v16,0(%r2)
+ la %r2,16(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc
+ clgrjle %r2,%r4,.Lloop16
+.Lhaystack_too_small:
+ sgr %r3,%r2 /* r3 = (haystack + len) - curr_pos  */
+ je .Lend_no_match /* Remaining haystack is empty.  */
+ lcbb %r0,0(%r2),6
+ jo .Lhaystack_smaller_16_on_bb
+ vl %v16,0(%r2) /* Load haystack.  */
+ j .Lhaystack_smaller_16
+
+.Lend_match_found:
+ vlgvb %r4,%v20,7
+ sgr %r2,%r1
+ la %r2,0(%r4,%r2)
+ br %r14
+
+.Lloop_vstrs_nonzero_cc32:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc16:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc0:
+ la %r2,16(%r2)
+.Lloop_vstrs_nonzero_cc:
+ lay %r2,-16(%r1,%r2) /* Compute next load address.  */
+ jh .Lend_match_found /* cc == 2 (full match)  */
+ clgrjh %r2,%r4,.Lhaystack_too_small
+ vl %v16,0(%r2)
+.Lloop_vstrs_nonzero_cc_loop:
+ la %r2,0(%r1,%r2)
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jh .Lend_match_found
+ clgrjh %r2,%r4,.Lhaystack_too_small
+ vl %v16,0(%r2) /* Next part of haystack.  */
+ jo .Lloop_vstrs_nonzero_cc_loop
+ /* Case: no-match.  */
+ clgrjh %r2,%r0,.Lloop /* Jump away if haystack has less than 64b.  */
+.Lloop64:
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc0
+ vl %v16,16(%r2) /* Next part of haystack.  */
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc16
+ vl %v16,32(%r2) /* Next part of haystack.  */
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc32
+ vl %v16,48(%r2) /* Next part of haystack.  */
+ la %r2,64(%r2)
+ vstrs %v20,%v16,%v18,%v19,0,0
+ jne .Lloop_vstrs_nonzero_cc
+ clgrjh %r2,%r4,.Lhaystack_too_small
+ vl %v16,0(%r2) /* Next part of haystack.  */
+ clgrjle %r2,%r0,.Lloop64
+ j .Lloop
+END(MEMMEM_ARCH13)
+
+# if ! HAVE_MEMMEM_IFUNC
+strong_alias (MEMMEM_ARCH13, __memmem)
+weak_alias (__memmem, memmem)
+# endif
+
+# if MEMMEM_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc)
+weak_alias (MEMMEM_ARCH13, __GI_memmem)
+strong_alias (MEMMEM_ARCH13, __GI___memmem)
+# endif
+#endif
diff --git a/sysdeps/s390/memmem-vx.c b/sysdeps/s390/memmem-vx.c
index 31f617346c..6b0407d0e0 100644
--- a/sysdeps/s390/memmem-vx.c
+++ b/sysdeps/s390/memmem-vx.c
@@ -20,7 +20,7 @@
 
 #if HAVE_MEMMEM_Z13
 # include <string.h>
-# if HAVE_MEMMEM_IFUNC
+# if HAVE_MEMMEM_IFUNC || MEMMEM_Z13_ONLY_USED_AS_FALLBACK
 
 #  ifndef _LIBC
 #   define memmem MEMMEM_Z13
@@ -32,7 +32,7 @@
 #   undef libc_hidden_def
 #   undef libc_hidden_weak
 
-#   if HAVE_MEMMEM_C
+#   if HAVE_MEMMEM_C || MEMMEM_Z13_ONLY_USED_AS_FALLBACK
 #    define libc_hidden_def(name)
 #    define libc_hidden_weak(name)
 #   else
diff --git a/sysdeps/s390/memmem.c b/sysdeps/s390/memmem.c
index fe9598e908..cf029a71e5 100644
--- a/sysdeps/s390/memmem.c
+++ b/sysdeps/s390/memmem.c
@@ -34,8 +34,14 @@ extern __typeof (__redirect_memmem) MEMMEM_C attribute_hidden;
 extern __typeof (__redirect_memmem) MEMMEM_Z13 attribute_hidden;
 # endif
 
+# if HAVE_MEMMEM_ARCH13
+extern __typeof (__redirect_memmem) MEMMEM_ARCH13 attribute_hidden;
+# endif
+
 s390_libc_ifunc_expr (__redirect_memmem, __memmem,
-      (HAVE_MEMMEM_Z13 && (hwcap & HWCAP_S390_VX))
+      (HAVE_MEMMEM_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2))
+      ? MEMMEM_ARCH13
+      : (HAVE_MEMMEM_Z13 && (hwcap & HWCAP_S390_VX))
       ? MEMMEM_Z13
       : MEMMEM_DEFAULT
       )
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index 75289b582b..1948436417 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -202,6 +202,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
 #if HAVE_MEMMEM_IFUNC
     IFUNC_IMPL (i, name, memmem,
+# if HAVE_MEMMEM_ARCH13
+      IFUNC_IMPL_ADD (array, i, memmem,
+      dl_hwcap & HWCAP_S390_VXRS_EXT2, MEMMEM_ARCH13)
+# endif
 # if HAVE_MEMMEM_Z13
  IFUNC_IMPL_ADD (array, i, memmem,
  dl_hwcap & HWCAP_S390_VX, MEMMEM_Z13)
--
2.17.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/5] S390: Add new hwcap values for new cpu architecture arch13.

Stefan Liebler-2
In reply to this post by Stefan Liebler-2
On 3/18/19 4:08 PM, Stefan Liebler wrote:

> The new hwcap values indicate support for:
> -"Vector-Enhancements Facility 2" (tag "vxe2", hwcap 2^15)
> -"Vector-Packed-Decimal-Enhancement Facility" (tag "vxp", hwcap 2^16)
> -"Enhanced-Sort Facility" (tag "sort", hwcap 2^17)
> -"Deflate-Conversion Facility" (tag "dflt", hwcap 2^18)
>
> ChangeLog:
>
> * sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags):
> Add vxe2, vxp, dflt, sort flags.
> * sysdeps/s390/dl-procinfo.h: Add HWCAP_S390_VXRS_EXT2,
> HWCAP_S390_VXRS_PDE, HWCAP_S390_SORT, HWCAP_S390_DFLT
> capabilities.
> * sysdeps/unix/sysv/linux/s390/bits/hwcap.h
> (HWCAP_S390_VXRS_EXT2, HWCAP_S390_VXRS_PDE, HWCAP_S390_SORT,
> HWCAP_S390_DFLT): Define.
> ---
>   sysdeps/s390/dl-procinfo.c                | 4 ++--
>   sysdeps/s390/dl-procinfo.h                | 6 +++++-
>   sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 4 ++++
>   3 files changed, 11 insertions(+), 3 deletions(-)
>

I've just committed the patch-series with one change in the first hwcap
patch:
The vxe2 hwcap is also marked as important hwcap.

Bye,
Stefan