[PATCH-V2: 00/24] CET: Prepare for CET enabling

classic Classic list List threaded Threaded
54 messages Options
123
Reply | Threaded
Open this post in threaded view
|

[PATCH-V2: 00/24] CET: Prepare for CET enabling

H.J. Lu-30
This is the first set of patches to enable CET, excluding changes which
use the new CET system calls.  The complete set of patches is available
on hjl/cet/master branch at:

https://github.com/hjl-tools/glibc/tree/hjl/cet/master

I will submit the second set of patches with the new CET system calls,
which are on Linux kernel CET branch:

https://github.com/yyu168/linux_cet

later.


H.J. Lu (24):
  x86: Rename __glibc_reserved1 to feature_1 in tcbhead_t [BZ #22563]
  x86: Support shadow stack pointer in setjmp/longjmp
  x86: Support IBT and SHSTK in Intel CET [BZ #21598]
  x86: Add _CET_ENDBR to functions in crti.S
  x86: Add _CET_ENDBR to functions in dl-tlsdesc.S
  x86-64: Add _CET_ENDBR to STRCMP_SSE42
  i386: Add _CET_ENDBR to indirect jump targets in add_n.S/sub_n.S
  x86: Update vfork to pop shadow stack
  x86_64: Use _CET_NOTRACK in strcmp.S
  x86-64: Use _CET_NOTRACK in strcpy-sse2-unaligned.S
  x86-64: Use _CET_NOTRACK in strcmp-sse42.S
  x86-64: Use _CET_NOTRACK in memcpy-ssse3-back.S
  x86-64: Use _CET_NOTRACK in memcmp-sse4.S
  x86-64: Use _CET_NOTRACK in memcpy-ssse3.S
  i386: Use _CET_NOTRACK in i686/memcmp.S
  i386: Use _CET_NOTRACK in memset-sse2.S
  i386: Use _CET_NOTRACK in memcmp-sse4.S
  i386: Use _CET_NOTRACK in memcpy-ssse3-rep.S
  i386: Use _CET_NOTRACK in memcpy-ssse3.S
  i386: Use _CET_NOTRACK in strcpy-sse2.S
  i386: Use _CET_NOTRACK in strcat-sse2.S
  i386: Use _CET_NOTRACK in memset-sse2-rep.S
  x86-64: Add endbr64 to tst-quadmod[12].S
  Intel CET: Document --enable-cet

 INSTALL                                       |  11 ++
 NEWS                                          |  10 +
 configure                                     |  11 ++
 configure.ac                                  |   6 +
 elf/dl-load.c                                 |  61 +++---
 elf/dl-open.c                                 |   4 +
 elf/rtld.c                                    |  12 ++
 manual/install.texi                           |  10 +
 nptl/pthread_create.c                         |   5 +
 sysdeps/i386/__longjmp.S                      |  78 ++++++++
 sysdeps/i386/add_n.S                          |  27 ++-
 sysdeps/i386/bsd-_setjmp.S                    |  21 ++
 sysdeps/i386/bsd-setjmp.S                     |  21 ++
 sysdeps/i386/crti.S                           |   2 +
 sysdeps/i386/dl-tlsdesc.S                     |   7 +
 sysdeps/i386/dl-trampoline.S                  |  72 +++++++
 sysdeps/i386/i686/add_n.S                     |  27 ++-
 sysdeps/i386/i686/memcmp.S                    |   4 +-
 sysdeps/i386/i686/multiarch/memcmp-sse4.S     |   4 +-
 .../i386/i686/multiarch/memcpy-ssse3-rep.S    |   8 +-
 sysdeps/i386/i686/multiarch/memcpy-ssse3.S    |   4 +-
 sysdeps/i386/i686/multiarch/memset-sse2-rep.S |   4 +-
 sysdeps/i386/i686/multiarch/memset-sse2.S     |   4 +-
 sysdeps/i386/i686/multiarch/strcat-sse2.S     |   4 +-
 sysdeps/i386/i686/multiarch/strcpy-sse2.S     |   4 +-
 sysdeps/i386/nptl/tcb-offsets.sym             |   1 +
 sysdeps/i386/nptl/tls.h                       |   5 +-
 sysdeps/i386/setjmp.S                         |  21 ++
 sysdeps/i386/sub_n.S                          |  26 ++-
 .../unix/sysv/linux/i386/____longjmp_chk.S    |  40 ++++
 sysdeps/unix/sysv/linux/i386/dl-cet.c         |  67 +++++++
 sysdeps/unix/sysv/linux/i386/dl-machine.h     |  23 +++
 sysdeps/unix/sysv/linux/i386/vfork.S          |  54 +++++
 sysdeps/unix/sysv/linux/x86/Makefile          |  43 +++-
 sysdeps/unix/sysv/linux/x86/check-cet.awk     |  53 +++++
 sysdeps/unix/sysv/linux/x86/configure         |  69 +++++++
 sysdeps/unix/sysv/linux/x86/configure.ac      |  46 +++++
 sysdeps/unix/sysv/linux/x86/dl-cet.c          | 186 ++++++++++++++++++
 sysdeps/unix/sysv/linux/x86/dl-cet.h          | 138 +++++++++++++
 sysdeps/unix/sysv/linux/x86/dl-procruntime.c  |  57 ++++++
 sysdeps/unix/sysv/linux/x86/ldsodefs.h        |  29 +++
 sysdeps/unix/sysv/linux/x86/link_map.h        |  26 +++
 sysdeps/unix/sysv/linux/x86/pthreaddef.h      |  24 +++
 .../unix/sysv/linux/x86_64/____longjmp_chk.S  |  41 ++++
 sysdeps/unix/sysv/linux/x86_64/dl-machine.h   |  27 +++
 sysdeps/unix/sysv/linux/x86_64/vfork.S        |  35 ++++
 sysdeps/x86/Makefile                          |   1 +
 sysdeps/x86/cpu-features.h                    |   5 +
 sysdeps/x86/jmp_buf-ssp.sym                   |   1 +
 sysdeps/x86/sysdep.h                          |   8 +
 sysdeps/x86_64/__longjmp.S                    |  45 +++++
 sysdeps/x86_64/crti.S                         |   2 +
 sysdeps/x86_64/dl-tlsdesc.S                   |   5 +
 sysdeps/x86_64/dl-trampoline.h                |   2 +
 sysdeps/x86_64/multiarch/memcmp-sse4.S        |   2 +-
 sysdeps/x86_64/multiarch/memcpy-ssse3-back.S  |   6 +-
 sysdeps/x86_64/multiarch/memcpy-ssse3.S       | 124 ++++++------
 sysdeps/x86_64/multiarch/strcmp-sse42.S       |   3 +-
 .../x86_64/multiarch/strcpy-sse2-unaligned.S  |   2 +-
 sysdeps/x86_64/nptl/tcb-offsets.sym           |   1 +
 sysdeps/x86_64/nptl/tls.h                     |   5 +-
 sysdeps/x86_64/setjmp.S                       |  21 ++
 sysdeps/x86_64/strcmp.S                       |   2 +-
 sysdeps/x86_64/tst-quadmod1.S                 |   6 +
 sysdeps/x86_64/tst-quadmod2.S                 |   6 +
 65 files changed, 1561 insertions(+), 118 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/i386/dl-cet.c
 create mode 100644 sysdeps/unix/sysv/linux/i386/dl-machine.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/check-cet.awk
 create mode 100644 sysdeps/unix/sysv/linux/x86/configure
 create mode 100644 sysdeps/unix/sysv/linux/x86/configure.ac
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-cet.c
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-cet.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-procruntime.c
 create mode 100644 sysdeps/unix/sysv/linux/x86/ldsodefs.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/link_map.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/pthreaddef.h
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/dl-machine.h
 create mode 100644 sysdeps/x86/jmp_buf-ssp.sym

--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 01/24] x86: Rename __glibc_reserved1 to feature_1 in tcbhead_t [BZ #22563]

H.J. Lu-30
This will be used by CET run-time control.

        [BZ #22563]
        * nptl/pthread_create.c (__pthread_create_2_1): Use
        THREAD_COPY_ADDITONAL_INFO to copy additonal info if defined.
        * sysdeps/i386/nptl/tcb-offsets.sym (FEATURE_1_OFFSET): New.
        * sysdeps/x86_64/nptl/tcb-offsets.sym (FEATURE_1_OFFSET):
        Likewise.
        * sysdeps/i386/nptl/tls.h (tcbhead_t): Rename __glibc_reserved1
        to feature_1.
        * sysdeps/x86_64/nptl/tls.h (tcbhead_t): Likewise.
        * sysdeps/unix/sysv/linux/x86/pthreaddef.h: New file.
---
 nptl/pthread_create.c                    |  5 +++++
 sysdeps/i386/nptl/tcb-offsets.sym        |  1 +
 sysdeps/i386/nptl/tls.h                  |  5 ++++-
 sysdeps/unix/sysv/linux/x86/pthreaddef.h | 24 ++++++++++++++++++++++++
 sysdeps/x86_64/nptl/tcb-offsets.sym      |  1 +
 sysdeps/x86_64/nptl/tls.h                |  5 ++++-
 6 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/x86/pthreaddef.h

diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 92c945b12b..16998f4bbd 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -712,6 +712,11 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
   THREAD_COPY_POINTER_GUARD (pd);
 #endif
 
+  /* Copy additonal info.  */
+#ifdef THREAD_COPY_ADDITONAL_INFO
+  THREAD_COPY_ADDITONAL_INFO (pd);
+#endif
+
   /* Verify the sysinfo bits were copied in allocate_stack if needed.  */
 #ifdef NEED_DL_SYSINFO
   CHECK_THREAD_SYSINFO (pd);
diff --git a/sysdeps/i386/nptl/tcb-offsets.sym b/sysdeps/i386/nptl/tcb-offsets.sym
index 7d7fe5e71c..fbac241c45 100644
--- a/sysdeps/i386/nptl/tcb-offsets.sym
+++ b/sysdeps/i386/nptl/tcb-offsets.sym
@@ -12,3 +12,4 @@ CLEANUP offsetof (struct pthread, cleanup)
 CLEANUP_PREV offsetof (struct _pthread_cleanup_buffer, __prev)
 MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
 POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
+FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1)
diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h
index afb71ce431..9535c01905 100644
--- a/sysdeps/i386/nptl/tls.h
+++ b/sysdeps/i386/nptl/tls.h
@@ -41,7 +41,10 @@ typedef struct
   uintptr_t stack_guard;
   uintptr_t pointer_guard;
   int gscope_flag;
-  int __glibc_reserved1;
+  /* Bit 0: IBT.
+     Bit 1: SHSTK.
+   */
+  unsigned int feature_1;
   /* Reservation of some values for the TM ABI.  */
   void *__private_tm[3];
   /* GCC split stack support.  */
diff --git a/sysdeps/unix/sysv/linux/x86/pthreaddef.h b/sysdeps/unix/sysv/linux/x86/pthreaddef.h
new file mode 100644
index 0000000000..539f6540d0
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/pthreaddef.h
@@ -0,0 +1,24 @@
+/* Pthread macros.  Linux/x86 version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include_next <pthreaddef.h>
+
+/* Wee need to copy feature_1 in pthread_create.  */
+#define THREAD_COPY_ADDITONAL_INFO(descr) \
+  ((descr)->header.feature_1 \
+   = THREAD_GETMEM (THREAD_SELF, header.feature_1))
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index be63404a16..387621e88c 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -12,6 +12,7 @@ MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
 MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
 POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
 VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache)
+FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1)
 
 -- Not strictly offsets, but these values are also used in the TCB.
 TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
index 65c0051dcf..6557c80ffd 100644
--- a/sysdeps/x86_64/nptl/tls.h
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -51,7 +51,10 @@ typedef struct
   uintptr_t stack_guard;
   uintptr_t pointer_guard;
   unsigned long int vgetcpu_cache[2];
-  int __glibc_reserved1;
+  /* Bit 0: IBT.
+     Bit 1: SHSTK.
+   */
+  unsigned int feature_1;
   int __glibc_unused1;
   /* Reservation of some values for the TM ABI.  */
   void *__private_tm[4];
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 02/24] x86: Support shadow stack pointer in setjmp/longjmp

H.J. Lu-30
In reply to this post by H.J. Lu-30
Save and restore shadow stack pointer in setjmp and longjmp to support
shadow stack in Intel CET.  Use feature_1 in tcbhead_t to check if
shadow stack is enabled before saving and restoring shadow stack
pointer so that it works with the old smaller cancel_jmp_buf which
doesn't have space for shadow stack pointer.

2017-12-07  Igor Tsimbalist  <[hidden email]>
            H.J. Lu  <[hidden email]>

        * sysdeps/i386/__longjmp.S: Include <jmp_buf-ssp.h>.
        (__longjmp): Restore shadow stack pointer if shadow stack is
        enabled, SHADOW_STACK_POINTER_OFFSET is defined and __longjmp
        isn't defined for __longjmp_cancel.
        * sysdeps/i386/bsd-_setjmp.S: Include <jmp_buf-ssp.h>.
        (_setjmp): Save shadow stack pointer if shadow stack is enabled
        and SHADOW_STACK_POINTER_OFFSET is defined.
        * sysdeps/i386/bsd-setjmp.S: Include <jmp_buf-ssp.h>.
        (setjmp): Save shadow stack pointer if shadow stack is enabled
        and SHADOW_STACK_POINTER_OFFSET is defined.
        * sysdeps/i386/setjmp.S: Include <jmp_buf-ssp.h>.
        (__sigsetjmp): Save shadow stack pointer if shadow stack is
        enabled and SHADOW_STACK_POINTER_OFFSET is defined.
        * sysdeps/unix/sysv/linux/i386/____longjmp_chk.S: Include
        <jmp_buf-ssp.h>.
        (____longjmp_chk): Restore shadow stack pointer if shadow stack
        is enabled and SHADOW_STACK_POINTER_OFFSET is defined.
        * sysdeps/unix/sysv/linux/x86/Makefile (gen-as-const-headers):
        Remove jmp_buf-ssp.sym.
        * sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S: Include
        <jmp_buf-ssp.h>.
        (____longjmp_chk): Restore shadow stack pointer if shadow stack
        is enabled and SHADOW_STACK_POINTER_OFFSET is defined.
        * sysdeps/x86/Makefile (gen-as-const-headers): Add
        jmp_buf-ssp.sym.
        * sysdeps/x86/jmp_buf-ssp.sym: New dummy file.
        * sysdeps/x86_64/__longjmp.S: Include <jmp_buf-ssp.h>.
        (__longjmp): Restore shadow stack pointer if shadow stack is
        enabled, SHADOW_STACK_POINTER_OFFSET is defined and __longjmp
        isn't defined for __longjmp_cancel.
        * sysdeps/x86_64/setjmp.S: Include <jmp_buf-ssp.h>.
        (__sigsetjmp): Save shadow stack pointer if shadow stack is
        enabled and SHADOW_STACK_POINTER_OFFSET is defined.
---
 sysdeps/i386/__longjmp.S                      | 78 +++++++++++++++++++
 sysdeps/i386/bsd-_setjmp.S                    | 21 +++++
 sysdeps/i386/bsd-setjmp.S                     | 21 +++++
 sysdeps/i386/setjmp.S                         | 21 +++++
 .../unix/sysv/linux/i386/____longjmp_chk.S    | 40 ++++++++++
 sysdeps/unix/sysv/linux/x86/Makefile          |  1 -
 .../unix/sysv/linux/x86_64/____longjmp_chk.S  | 41 ++++++++++
 sysdeps/x86/Makefile                          |  1 +
 sysdeps/x86/jmp_buf-ssp.sym                   |  1 +
 sysdeps/x86_64/__longjmp.S                    | 45 +++++++++++
 sysdeps/x86_64/setjmp.S                       | 21 +++++
 11 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/x86/jmp_buf-ssp.sym

diff --git a/sysdeps/i386/__longjmp.S b/sysdeps/i386/__longjmp.S
index b38333bead..8b5d7f3d44 100644
--- a/sysdeps/i386/__longjmp.S
+++ b/sysdeps/i386/__longjmp.S
@@ -18,14 +18,57 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't restore shadow stack register if
+   1. Shadow stack isn't enabled.  Or
+   2. __longjmp is defined for __longjmp_cancel.
+ */
+#if !defined __CET__ || (__CET__ & 2) == 0 || defined __longjmp
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
  .text
 ENTRY (__longjmp)
 #ifdef PTR_DEMANGLE
  movl 4(%esp), %eax /* User's jmp_buf in %eax.  */
 
+# ifdef SHADOW_STACK_POINTER_OFFSET
+#  if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz .Lnoadj
+#  else
+ xorl %edx, %edx
+#  endif
+ /* Check and adjust the Shadow-Stack-Pointer.  */
+ rdsspd %edx
+ /* And compare it with the saved ssp value.  */
+ subl SHADOW_STACK_POINTER_OFFSET(%eax), %edx
+ je .Lnoadj
+ /* Count the number of frames to adjust and adjust it
+   with incssp instruction.  The instruction can adjust
+   the ssp by [0..255] value only thus use a loop if
+   the number of frames is bigger than 255.  */
+ negl %edx
+ shrl $2, %edx
+ /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+       restoring Shadow-Stack-Pointer of setjmp's caller, we
+       need to unwind shadow stack by one more frame.  */
+ addl $1, %edx
+ cmpl $255, %edx
+ jbe .Lonetime
+.Loopadj:
+ incsspd %edx
+ subl $255, %edx
+ cmpl $255, %edx
+ ja .Loopadj
+.Lonetime:
+ incsspd %edx
+.Lnoadj:
+# endif
  /* Save the return address now.  */
  movl (JB_PC*4)(%eax), %edx
  /* Get the stack pointer.  */
@@ -56,6 +99,41 @@ ENTRY (__longjmp)
 #else
  movl 4(%esp), %ecx /* User's jmp_buf in %ecx.  */
  movl 8(%esp), %eax /* Second argument is return value.  */
+# ifdef SHADOW_STACK_POINTER_OFFSET
+#  if IS_IN (libc) && defined SHARED
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz .Lnoadj
+#  endif
+ /* Check and adjust the Shadow-Stack-Pointer.  */
+ xorl %edx, %edx
+ /* Get the current ssp.  */
+ rdsspd %edx
+ /* And compare it with the saved ssp value.  */
+ subl SHADOW_STACK_POINTER_OFFSET(%ecx), %edx
+ je .Lnoadj
+ /* Count the number of frames to adjust and adjust it
+   with incssp instruction.  The instruction can adjust
+   the ssp by [0..255] value only thus use a loop if
+   the number of frames is bigger than 255.  */
+ negl %edx
+ shrl $2, %edx
+ /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+       restoring Shadow-Stack-Pointer of setjmp's caller, we
+       need to unwind shadow stack by one more frame.  */
+ addl $1, %edx
+ cmpl $255, %edx
+ jbe .Lonetime
+ movl $255, %ebx
+.Loopadj:
+ incsspd %ebx
+ subl $255, %edx
+ cmpl $255, %edx
+ ja .Loopadj
+.Lonetime:
+ incsspd %edx
+.Lnoadj:
+# endif
  /* Save the return address now.  */
  movl (JB_PC*4)(%ecx), %edx
  LIBC_PROBE (longjmp, 3, 4@%ecx, -4@%eax, 4@%edx)
diff --git a/sysdeps/i386/bsd-_setjmp.S b/sysdeps/i386/bsd-_setjmp.S
index a626cc6d22..5b09e5dbf8 100644
--- a/sysdeps/i386/bsd-_setjmp.S
+++ b/sysdeps/i386/bsd-_setjmp.S
@@ -22,12 +22,18 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <stap-probe.h>
 
 #define PARMS 4 /* no space for saved regs */
 #define JMPBUF PARMS
 #define SIGMSK JMPBUF+4
 
+/* Don't save shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 ENTRY (_setjmp)
 
  xorl %eax, %eax
@@ -51,6 +57,21 @@ ENTRY (_setjmp)
  movl %ebp, (JB_BP*4)(%edx) /* Save caller's frame pointer.  */
 
  movl %eax, JB_SIZE(%edx) /* No signal mask set.  */
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz .Lskip_ssp
+# else
+ xorl %ecx, %ecx
+# endif
+ /* Get the current Shadow-Stack-Pointer and save it.  */
+ rdsspd %ecx
+ movl %ecx, SHADOW_STACK_POINTER_OFFSET(%edx)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+.Lskip_ssp:
+# endif
+#endif
  ret
 END (_setjmp)
 libc_hidden_def (_setjmp)
diff --git a/sysdeps/i386/bsd-setjmp.S b/sysdeps/i386/bsd-setjmp.S
index 2da8b73c49..5f5db092e5 100644
--- a/sysdeps/i386/bsd-setjmp.S
+++ b/sysdeps/i386/bsd-setjmp.S
@@ -22,12 +22,18 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <stap-probe.h>
 
 #define PARMS  4 /* no space for saved regs */
 #define JMPBUF PARMS
 #define SIGMSK JMPBUF+4
 
+/* Don't save shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 ENTRY (setjmp)
  /* Note that we have to use a non-exported symbol in the next
    jump since otherwise gas will emit it as a jump through the
@@ -51,6 +57,21 @@ ENTRY (setjmp)
 #endif
  movl %ecx, (JB_PC*4)(%eax)
  movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer.  */
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz .Lskip_ssp
+# else
+ xorl %ecx, %ecx
+# endif
+ /* Get the current Shadow-Stack-Pointer and save it.  */
+ rdsspd %ecx
+ movl %ecx, SHADOW_STACK_POINTER_OFFSET(%eax)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+.Lskip_ssp:
+# endif
+#endif
 
  /* Call __sigjmp_save.  */
  pushl $1
diff --git a/sysdeps/i386/setjmp.S b/sysdeps/i386/setjmp.S
index 6a08701717..31e26fd6d4 100644
--- a/sysdeps/i386/setjmp.S
+++ b/sysdeps/i386/setjmp.S
@@ -18,6 +18,7 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
@@ -25,6 +26,11 @@
 #define JMPBUF PARMS
 #define SIGMSK JMPBUF+4
 
+/* Don't save shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 ENTRY (__sigsetjmp)
 
  movl JMPBUF(%esp), %eax
@@ -46,6 +52,21 @@ ENTRY (__sigsetjmp)
  movl %ecx, (JB_PC*4)(%eax)
  movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer.  */
 
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz .Lskip_ssp
+# else
+ xorl %ecx, %ecx
+# endif
+ /* Get the current Shadow-Stack-Pointer and save it.  */
+ rdsspd %ecx
+ movl %ecx, SHADOW_STACK_POINTER_OFFSET(%eax)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+.Lskip_ssp:
+# endif
+#endif
 #if IS_IN (rtld)
  /* In ld.so we never save the signal mask.  */
  xorl %eax, %eax
diff --git a/sysdeps/unix/sysv/linux/i386/____longjmp_chk.S b/sysdeps/unix/sysv/linux/i386/____longjmp_chk.S
index 3452433112..7b4f4caa35 100644
--- a/sysdeps/unix/sysv/linux/i386/____longjmp_chk.S
+++ b/sysdeps/unix/sysv/linux/i386/____longjmp_chk.S
@@ -17,9 +17,14 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't restore shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
 
  .section .rodata.str1.1,"aMS",@progbits,1
  .type longjmp_msg,@object
@@ -46,6 +51,41 @@ longjmp_msg:
 ENTRY (____longjmp_chk)
  movl 4(%esp), %ecx /* User's jmp_buf in %ecx.  */
 
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl   $(1 << 1), %gs:FEATURE_1_OFFSET
+ jz      .Lnoadj
+# else
+ xorl %edx, %edx
+# endif
+ /* Check and adjust the Shadow-Stack-Pointer.  */
+ rdsspd %edx
+ /* And compare it with the saved ssp value.  */
+ subl SHADOW_STACK_POINTER_OFFSET(%ecx), %edx
+ je .Lnoadj
+ /* Count the number of frames to adjust and adjust it
+   with incssp instruction.  The instruction can adjust
+   the ssp by [0..255] value only thus use a loop if
+   the number of frames is bigger than 255.  */
+ negl %edx
+ shrl $2, %edx
+ /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+       restoring Shadow-Stack-Pointer of setjmp's caller, we
+       need to unwind shadow stack by one more frame.  */
+ addl $1, %edx
+ cmpl $255, %edx
+ jbe .Lonetime
+ movl $255, %ebx
+.Loopadj:
+ incsspd %ebx
+ subl $255, %edx
+ cmpl $255, %edx
+ ja .Loopadj
+.Lonetime:
+ incsspd %edx
+.Lnoadj:
+#endif
  /* Save the return address now.  */
  movl (JB_PC*4)(%ecx), %edx
  /* Get the stack pointer.  */
diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
index c55a43e58d..111ff9ff58 100644
--- a/sysdeps/unix/sysv/linux/x86/Makefile
+++ b/sysdeps/unix/sysv/linux/x86/Makefile
@@ -21,6 +21,5 @@ sysdep_routines += dl-vdso
 endif
 
 ifeq ($(subdir),setjmp)
-gen-as-const-headers += jmp_buf-ssp.sym
 tests += tst-saved_mask-1
 endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
index 8a9f2e1a3c..d42289221d 100644
--- a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
+++ b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
@@ -20,7 +20,13 @@
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't restore shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 #include <sigaltstack-offsets.h>
+#include <jmp_buf-ssp.h>
 
  .section .rodata.str1.1,"aMS",@progbits,1
  .type longjmp_msg,@object
@@ -105,6 +111,41 @@ ENTRY(____longjmp_chk)
  cfi_restore (%rsi)
 
 .Lok:
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %fs:FEATURE_1_OFFSET
+ jz .Lnoadj
+# else
+ xorl %eax, %eax
+# endif
+ /* Check and adjust the Shadow-Stack-Pointer.  */
+ rdsspq %rax
+ /* And compare it with the saved ssp value.  */
+ subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
+ je .Lnoadj
+ /* Count the number of frames to adjust and adjust it
+   with incssp instruction.  The instruction can adjust
+   the ssp by [0..255] value only thus use a loop if
+   the number of frames is bigger than 255.  */
+ negq %rax
+ shrq $3, %rax
+ /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+       restoring Shadow-Stack-Pointer of setjmp's caller, we
+       need to unwind shadow stack by one more frame.  */
+ addq $1, %rax
+ cmpq $255, %rax
+ jbe .Lonetime
+ movl $255, %ebx
+.Loopadj:
+ incsspq %rbx
+ subq $255, %rax
+ cmpq $255, %rax
+ ja .Loopadj
+.Lonetime:
+ incsspq %rax
+.Lnoadj:
+#endif
  LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
  /* We add unwind information for the target here.  */
  cfi_def_cfa(%rdi, 0)
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index d25d6f0ae4..65292f4032 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -10,5 +10,6 @@ tests-static += tst-get-cpu-features-static
 endif
 
 ifeq ($(subdir),setjmp)
+gen-as-const-headers += jmp_buf-ssp.sym
 sysdep_routines += __longjmp_cancel
 endif
diff --git a/sysdeps/x86/jmp_buf-ssp.sym b/sysdeps/x86/jmp_buf-ssp.sym
new file mode 100644
index 0000000000..1aaaedc9ec
--- /dev/null
+++ b/sysdeps/x86/jmp_buf-ssp.sym
@@ -0,0 +1 @@
+-- FIXME: Define SHADOW_STACK_POINTER_OFFSET to support shadow stack.
diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
index a487e0efd0..a9ebe3226e 100644
--- a/sysdeps/x86_64/__longjmp.S
+++ b/sysdeps/x86_64/__longjmp.S
@@ -17,9 +17,18 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't restore shadow stack register if
+   1. Shadow stack isn't enabled.  Or
+   2. __longjmp is defined for __longjmp_cancel.
+ */
+#if !defined __CET__ || (__CET__ & 2) == 0 || defined __longjmp
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 /* Jump to the position specified by ENV, causing the
    setjmp call there to return VAL, or 1 if VAL is 0.
    void __longjmp (__jmp_buf env, int val).  */
@@ -41,6 +50,42 @@ ENTRY(__longjmp)
  shlq $32, %rax
  orq %rax, %r9
 # endif
+#endif
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %fs:FEATURE_1_OFFSET
+ jz .Lnoadj
+# else
+ xorl %eax, %eax
+# endif
+ /* Check and adjust the Shadow-Stack-Pointer.  */
+ /* Get the current ssp.  */
+ rdsspq %rax
+ /* And compare it with the saved ssp value.  */
+ subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
+ je .Lnoadj
+ /* Count the number of frames to adjust and adjust it
+   with incssp instruction.  The instruction can adjust
+   the ssp by [0..255] value only thus use a loop if
+   the number of frames is bigger than 255.  */
+ negq %rax
+ shrq $3, %rax
+ /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+       restoring Shadow-Stack-Pointer of setjmp's caller, we
+       need to unwind shadow stack by one more frame.  */
+ addq $1, %rax
+ cmpq $255, %rax
+ jbe .Lonetime
+ movl $255, %ebx
+.Loopadj:
+ incsspq %rbx
+ subq $255, %rax
+ cmpq $255, %rax
+ ja .Loopadj
+.Lonetime:
+ incsspq %rax
+.Lnoadj:
 #endif
  LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
  /* We add unwind information for the target here.  */
diff --git a/sysdeps/x86_64/setjmp.S b/sysdeps/x86_64/setjmp.S
index e0a648e3e4..bd9bb0ee6b 100644
--- a/sysdeps/x86_64/setjmp.S
+++ b/sysdeps/x86_64/setjmp.S
@@ -18,9 +18,15 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't save shadow stack register if shadow stack isn't enabled.  */
+#if !defined __CET__ || (__CET__ & 2) == 0
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 ENTRY (__sigsetjmp)
  /* Save registers.  */
  movq %rbx, (JB_RBX*8)(%rdi)
@@ -54,6 +60,21 @@ ENTRY (__sigsetjmp)
 #endif
  movq %rax, (JB_PC*8)(%rdi)
 
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+ /* Check if Shadow Stack is enabled.  */
+ testl $(1 << 1), %fs:FEATURE_1_OFFSET
+ jz .Lskip_ssp
+# else
+ xorl %eax, %eax
+# endif
+ /* Get the current Shadow-Stack-Pointer and save it.  */
+ rdsspq %rax
+ movq %rax, SHADOW_STACK_POINTER_OFFSET(%rdi)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+.Lskip_ssp:
+# endif
+#endif
 #if IS_IN (rtld)
  /* In ld.so we never save the signal mask.  */
  xorl %eax, %eax
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 03/24] x86: Support IBT and SHSTK in Intel CET [BZ #21598]

H.J. Lu-30
In reply to this post by H.J. Lu-30
Intel Control-flow Enforcement Technology (CET) instructions:

https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-en
forcement-technology-preview.pdf

includes Indirect Branch Tracking (IBT) and Shadow Stack (SHSTK).

GNU_PROPERTY_X86_FEATURE_1_IBT is added to GNU program property to
indicate that all executable sections are compatible with IBT when
ENDBR instruction starts each valid target where an indirect branch
instruction can land.  Linker sets GNU_PROPERTY_X86_FEATURE_1_IBT on
output only if it is set on all relocatable inputs.

On an IBT capable processor, the following steps should be taken:

1. When loading an executable without an interpreter, enable IBT and
lock IBT if GNU_PROPERTY_X86_FEATURE_1_IBT is set on the executable.
2. When loading an executable with an interpreter, enable IBT if
GNU_PROPERTY_X86_FEATURE_1_IBT is set on the interpreter.
  a. If GNU_PROPERTY_X86_FEATURE_1_IBT isn't set on the executable,
     disable IBT.
  b. Lock IBT.
3. If IBT is enabled, when loading a shared object without
GNU_PROPERTY_X86_FEATURE_1_IBT:
  a. If legacy interwork is allowed, then mark all pages in executable
     PT_LOAD segments in legacy code page bitmap.  Failure of legacy code
     page bitmap allocation causes an error.
  b. If legacy interwork isn't allowed, it causes an error.

GNU_PROPERTY_X86_FEATURE_1_SHSTK is added to GNU program property to
indicate that all executable sections are compatible with SHSTK where
return address popped from shadow stack always matches return address
popped from normal stack.  Linker sets GNU_PROPERTY_X86_FEATURE_1_SHSTK
on output only if it is set on all relocatable inputs.

On a SHSTK capable processor, the following steps should be taken:

1. When loading an executable without an interpreter, enable SHSTK if
GNU_PROPERTY_X86_FEATURE_1_SHSTK is set on the executable.
2. When loading an executable with an interpreter, enable SHSTK if
GNU_PROPERTY_X86_FEATURE_1_SHSTK is set on interpreter.
  a. If GNU_PROPERTY_X86_FEATURE_1_SHSTK isn't set on the executable
     or any shared objects loaded via the DT_NEEDED tag, disable SHSTK.
  b. Otherwise lock SHSTK.
3. After SHSTK is enabled, it is an error to load a shared object
without GNU_PROPERTY_X86_FEATURE_1_SHSTK.

To enable CET support in glibc, --enable-cet is required to configure
glibc.  When CET is enabled, both compiler and assembler must support
CET.  Otherwise, it is a configure-time error.

To support CET run-time control,

1. _dl_x86_feature_1 is added to the writable ld.so namespace to indicate
if IBT or SHSTK are enabled at run-time.  It should be initialized by
init_cpu_features.
2. For dynamic executables:
   a. A l_cet field is added to struct link_map to indicate if IBT or
      SHSTK is enabled in an ELF module.  The optional DL_PROCESS_PT_NOTE
      is called to process PT_NOTE segment for GNU program property and
      set l_cet.
   b. _dl_start_user calls _dl_cet_init, instead of _dl_init, which sets
      up IBT and SHSTK.
   c. DL_OPEN_CHECK is added to check IBT and SHSTK compatibilty when
      dlopening a shared object.
3. Replace i386 _dl_runtime_resolve and _dl_runtime_profile with
_dl_runtime_resolve_shstk and _dl_runtime_profile_shstk, respectively if
SHSTK is enabled.

<cet.h> from CET-enabled GCC is automatically included by assembly codes
to add GNU_PROPERTY_X86_FEATURE_1_IBT and GNU_PROPERTY_X86_FEATURE_1_SHSTK
to GNU program property.  _CET_ENDBR is added at the entrance of all
assembly functions whose address may be taken.  _CET_NOTRACK is used to
insert NOTRACK prefix with indirect jump table to support IBT.  It is
defined as notrack when _CET_NOTRACK is defined in <cet.h>.

         [BZ #21598]
        * configure.ac: Add --enable-cet.
        * configure: Regenerated.
        * elf/dl-load.c (filebuf): Moved before "dynamic-link.h".
        (_dl_map_object_from_fd): Call DL_PROCESS_PT_NOTE on PT_NOTE
        segment if DL_PROCESS_PT_NOTE is defined.
        * elf/rtld.c (dl_main): Call DL_PROCESS_PT_NOTE on PT_NOTE
        segment if DL_PROCESS_PT_NOTE is defined.  Call DL_MAIN_CHECK
        if DL_MAIN_CHECK is defined.
        * elf/dl-open.c (dl_open_worker): Call DL_OPEN_CHECK if it is
        defined.
        * sysdeps/unix/sysv/linux/i386/dl-cet.c: New file.
        * sysdeps/unix/sysv/linux/i386/dl-machine.h: Likewise.
        * sysdeps/unix/sysv/linux/x86/check-cet.awk: Likewise.
        * sysdeps/unix/sysv/linux/x86/configure: Likewise.
        * sysdeps/unix/sysv/linux/x86/configure.ac: Likewise.
        * sysdeps/unix/sysv/linux/x86/dl-cet.c: Likewise.
        * sysdeps/unix/sysv/linux/x86/dl-cet.h: Likewise.
        * sysdeps/unix/sysv/linux/x86/dl-procruntime.c: Likewise.
        * sysdeps/unix/sysv/linux/x86/ldsodefs.h: Likewise.
        * sysdeps/unix/sysv/linux/x86/link_map.h: Likewise.
        * sysdeps/unix/sysv/linux/x86_64/dl-machine.h: Likewise.
        * sysdeps/i386/dl-trampoline.S (_dl_runtime_resolve): Add
        _CET_ENDBR.
        (_dl_runtime_profile): Likewise.
        (_dl_runtime_resolve_shstk): New.
        (_dl_runtime_profile_shstk): Likewise.
        * sysdeps/unix/sysv/linux/x86/Makefile (sysdep-dl-routines): Add
        dl-cet if CET is enabled.
        (CFLAGS-.o): Add -fcf-protection if CET is enabled.
        (CFLAGS-.os): Likewise.
        (CFLAGS-.op): Likewise.
        (CFLAGS-.oS): Likewise.
        (asm-CPPFLAGS): Add -fcf-protection -include cet.h if CET
        is enabled.
        (tests-special): Add $(objpfx)check-cet.out.
        (cet-built-dso): New.
        (+$(cet-built-dso:=.note)): Likewise.
        (common-generated): Add $(cet-built-dso:$(common-objpfx)%=%.note).
        ($(objpfx)check-cet.out): New.
        (generated): Add check-cet.out.
        * sysdeps/x86/sysdep.h (_CET_NOTRACK): New.
        (_CET_ENDBR): Define if not defined.
        (ENTRY): Add _CET_ENDBR.
        * sysdeps/x86_64/dl-trampoline.h (_dl_runtime_resolve): Add
        _CET_ENDBR.
        (_dl_runtime_profile): Likewise.
---
 configure                                    |  11 ++
 configure.ac                                 |   6 +
 elf/dl-load.c                                |  61 +++---
 elf/dl-open.c                                |   4 +
 elf/rtld.c                                   |  12 ++
 sysdeps/i386/dl-trampoline.S                 |  72 +++++++
 sysdeps/unix/sysv/linux/i386/dl-cet.c        |  67 +++++++
 sysdeps/unix/sysv/linux/i386/dl-machine.h    |  23 +++
 sysdeps/unix/sysv/linux/x86/Makefile         |  42 +++++
 sysdeps/unix/sysv/linux/x86/check-cet.awk    |  53 ++++++
 sysdeps/unix/sysv/linux/x86/configure        |  69 +++++++
 sysdeps/unix/sysv/linux/x86/configure.ac     |  46 +++++
 sysdeps/unix/sysv/linux/x86/dl-cet.c         | 186 +++++++++++++++++++
 sysdeps/unix/sysv/linux/x86/dl-cet.h         | 138 ++++++++++++++
 sysdeps/unix/sysv/linux/x86/dl-procruntime.c |  57 ++++++
 sysdeps/unix/sysv/linux/x86/ldsodefs.h       |  29 +++
 sysdeps/unix/sysv/linux/x86/link_map.h       |  26 +++
 sysdeps/unix/sysv/linux/x86_64/dl-machine.h  |  27 +++
 sysdeps/x86/cpu-features.h                   |   5 +
 sysdeps/x86/sysdep.h                         |   8 +
 sysdeps/x86_64/dl-trampoline.h               |   2 +
 21 files changed, 919 insertions(+), 25 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/i386/dl-cet.c
 create mode 100644 sysdeps/unix/sysv/linux/i386/dl-machine.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/check-cet.awk
 create mode 100644 sysdeps/unix/sysv/linux/x86/configure
 create mode 100644 sysdeps/unix/sysv/linux/x86/configure.ac
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-cet.c
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-cet.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/dl-procruntime.c
 create mode 100644 sysdeps/unix/sysv/linux/x86/ldsodefs.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/link_map.h
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/dl-machine.h

diff --git a/configure b/configure
index 7a8bd3f817..f512c401dd 100755
--- a/configure
+++ b/configure
@@ -788,6 +788,7 @@ enable_nscd
 enable_pt_chown
 enable_tunables
 enable_mathvec
+enable_cet
 with_cpu
 '
       ac_precious_vars='build_alias
@@ -1461,6 +1462,8 @@ Optional Features:
                           'no' and 'valstring'
   --enable-mathvec        Enable building and installing mathvec [default
                           depends on architecture]
+  --enable-cet            enable Intel Control-flow Enforcement Technology
+                          (CET), x86 only
 
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@@ -3741,6 +3744,14 @@ else
 fi
 
 
+# Check whether --enable-cet was given.
+if test "${enable_cet+set}" = set; then :
+  enableval=$enable_cet; enable_cet=$enableval
+else
+  enable_cet=no
+fi
+
+
 # We keep the original values in `$config_*' and never modify them, so we
 # can write them unchanged into config.make.  Everything else uses
 # $machine, $vendor, and $os, and changes them whenever convenient.
diff --git a/configure.ac b/configure.ac
index ca1282a6b3..d342227f7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -453,6 +453,12 @@ AC_ARG_ENABLE([mathvec],
       [build_mathvec=$enableval],
       [build_mathvec=notset])
 
+AC_ARG_ENABLE([cet],
+      AC_HELP_STRING([--enable-cet],
+     [enable Intel Control-flow Enforcement Technology (CET), x86 only]),
+      [enable_cet=$enableval],
+      [enable_cet=no])
+
 # We keep the original values in `$config_*' and never modify them, so we
 # can write them unchanged into config.make.  Everything else uses
 # $machine, $vendor, and $os, and changes them whenever convenient.
diff --git a/elf/dl-load.c b/elf/dl-load.c
index e81601f36d..6197124d02 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -30,6 +30,32 @@
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+
+/* Type for the buffer we put the ELF header and hopefully the program
+   header.  This buffer does not really have to be too large.  In most
+   cases the program header follows the ELF header directly.  If this
+   is not the case all bets are off and we can make the header
+   arbitrarily large and still won't get it read.  This means the only
+   question is how large are the ELF and program header combined.  The
+   ELF header 32-bit files is 52 bytes long and in 64-bit files is 64
+   bytes long.  Each program header entry is again 32 and 56 bytes
+   long respectively.  I.e., even with a file which has 10 program
+   header entries we only have to read 372B/624B respectively.  Add to
+   this a bit of margin for program notes and reading 512B and 832B
+   for 32-bit and 64-bit files respecitvely is enough.  If this
+   heuristic should really fail for some file the code in
+   `_dl_map_object_from_fd' knows how to recover.  */
+struct filebuf
+{
+  ssize_t len;
+#if __WORDSIZE == 32
+# define FILEBUF_SIZE 512
+#else
+# define FILEBUF_SIZE 832
+#endif
+  char buf[FILEBUF_SIZE] __attribute__ ((aligned (__alignof (ElfW(Ehdr)))));
+};
+
 #include "dynamic-link.h"
 #include <abi-tag.h>
 #include <stackinfo.h>
@@ -69,31 +95,6 @@ int __stack_prot attribute_hidden attribute_relro
 #endif
 
 
-/* Type for the buffer we put the ELF header and hopefully the program
-   header.  This buffer does not really have to be too large.  In most
-   cases the program header follows the ELF header directly.  If this
-   is not the case all bets are off and we can make the header
-   arbitrarily large and still won't get it read.  This means the only
-   question is how large are the ELF and program header combined.  The
-   ELF header 32-bit files is 52 bytes long and in 64-bit files is 64
-   bytes long.  Each program header entry is again 32 and 56 bytes
-   long respectively.  I.e., even with a file which has 10 program
-   header entries we only have to read 372B/624B respectively.  Add to
-   this a bit of margin for program notes and reading 512B and 832B
-   for 32-bit and 64-bit files respecitvely is enough.  If this
-   heuristic should really fail for some file the code in
-   `_dl_map_object_from_fd' knows how to recover.  */
-struct filebuf
-{
-  ssize_t len;
-#if __WORDSIZE == 32
-# define FILEBUF_SIZE 512
-#else
-# define FILEBUF_SIZE 832
-#endif
-  char buf[FILEBUF_SIZE] __attribute__ ((aligned (__alignof (ElfW(Ehdr)))));
-};
-
 /* This is the decomposed LD_LIBRARY_PATH search path.  */
 static struct r_search_path_struct env_path_list attribute_relro;
 
@@ -1150,6 +1151,16 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
   l->l_relro_addr = ph->p_vaddr;
   l->l_relro_size = ph->p_memsz;
   break;
+
+#ifdef DL_PROCESS_PT_NOTE
+ case PT_NOTE:
+  if (DL_PROCESS_PT_NOTE (l, ph, fd, fbp))
+    {
+      errstring = N_("cannot process note segment");
+      goto call_lose;
+    }
+  break;
+#endif
  }
 
     if (__glibc_unlikely (nloadcmds == 0))
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 9dde4acfbc..f3bc41cb47 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -291,6 +291,10 @@ dl_open_worker (void *a)
   _dl_debug_state ();
   LIBC_PROBE (map_complete, 3, args->nsid, r, new);
 
+#ifdef DL_OPEN_CHECK
+  DL_OPEN_CHECK (new);
+#endif
+
   /* Print scope information.  */
   if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_SCOPES))
     _dl_show_scope (new, 0);
diff --git a/elf/rtld.c b/elf/rtld.c
index 8c732adb68..04b692908d 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1241,6 +1241,14 @@ of this helper program; chances are you did not intend to run this program.\n\
  main_map->l_relro_addr = ph->p_vaddr;
  main_map->l_relro_size = ph->p_memsz;
  break;
+
+#ifdef DL_PROCESS_PT_NOTE
+      case PT_NOTE:
+ if (DL_PROCESS_PT_NOTE (main_map, ph))
+  _dl_error_printf ("\
+ERROR: '%s': cannot process note segment.\n", _dl_argv[0]);
+ break;
+#endif
       }
 
   /* Adjust the address of the TLS initialization image in case
@@ -2110,6 +2118,10 @@ ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
  _dl_show_scope (l, 0);
     }
 
+#ifdef DL_MAIN_CHECK
+  DL_MAIN_CHECK (main_map, _dl_argv[0]);
+#endif
+
   if (prelinked)
     {
       if (main_map->l_info [ADDRIDX (DT_GNU_CONFLICT)] != NULL)
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 8bf86f8fd9..6dc0319216 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -32,6 +32,7 @@
  .align 16
 _dl_runtime_resolve:
  cfi_adjust_cfa_offset (8)
+ _CET_ENDBR
  pushl %eax # Preserve registers otherwise clobbered.
  cfi_adjust_cfa_offset (4)
  pushl %ecx
@@ -50,14 +51,85 @@ _dl_runtime_resolve:
  cfi_endproc
  .size _dl_runtime_resolve, .-_dl_runtime_resolve
 
+# The SHSTK compatible version.
+ .text
+ .globl _dl_runtime_resolve_shstk
+ .type _dl_runtime_resolve_shstk, @function
+ cfi_startproc
+ .align 16
+_dl_runtime_resolve_shstk:
+ cfi_adjust_cfa_offset (8)
+ _CET_ENDBR
+ pushl %eax # Preserve registers otherwise clobbered.
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl 12(%esp), %edx # Copy args pushed by PLT in register.  Note
+ movl 8(%esp), %eax # that `fixup' takes its parameters in regs.
+ call _dl_fixup # Call resolver.
+ movl (%esp), %edx # Get register content back.
+ movl %eax, %ecx # Store the function address.
+ movl 4(%esp), %eax # Get register content back.
+ addl $16, %esp # Adjust stack: PLT1 + PLT2 + %eax + %edx
+ cfi_adjust_cfa_offset (-16)
+ jmp *%ecx # Jump to function address.
+ cfi_endproc
+ .size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
 
 #ifndef PROF
+# The SHSTK compatible version.
+ .globl _dl_runtime_profile_shstk
+ .type _dl_runtime_profile_shstk, @function
+ cfi_startproc
+ .align 16
+_dl_runtime_profile_shstk:
+ cfi_adjust_cfa_offset (8)
+ _CET_ENDBR
+ pushl %esp
+ cfi_adjust_cfa_offset (4)
+ addl $8, (%esp) # Account for the pushed PLT data
+ pushl %ebp
+ cfi_adjust_cfa_offset (4)
+ pushl %eax # Preserve registers otherwise clobbered.
+ cfi_adjust_cfa_offset (4)
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl %esp, %ecx
+ subl $8, %esp
+ cfi_adjust_cfa_offset (8)
+ movl $-1, 4(%esp)
+ leal 4(%esp), %edx
+ movl %edx, (%esp)
+ pushl %ecx # Address of the register structure
+ cfi_adjust_cfa_offset (4)
+ movl 40(%esp), %ecx # Load return address
+ movl 36(%esp), %edx # Copy args pushed by PLT in register.  Note
+ movl 32(%esp), %eax # that `fixup' takes its parameters in regs.
+ call _dl_profile_fixup # Call resolver.
+ cfi_adjust_cfa_offset (-8)
+ movl (%esp), %edx
+ testl %edx, %edx
+ jns 1f
+ movl 4(%esp), %edx # Get register content back.
+ movl %eax, %ecx # Store the function address.
+ movl 12(%esp), %eax # Get register content back.
+ # Adjust stack: PLT1 + PLT2 + %esp + %ebp + %eax + %ecx + %edx
+ # + free.
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+ jmp *%ecx # Jump to function address.
+ cfi_endproc
+ .size _dl_runtime_profile_shstk, .-_dl_runtime_profile_shstk
+
  .globl _dl_runtime_profile
  .type _dl_runtime_profile, @function
  cfi_startproc
  .align 16
 _dl_runtime_profile:
  cfi_adjust_cfa_offset (8)
+ _CET_ENDBR
  pushl %esp
  cfi_adjust_cfa_offset (4)
  addl $8, (%esp) # Account for the pushed PLT data
diff --git a/sysdeps/unix/sysv/linux/i386/dl-cet.c b/sysdeps/unix/sysv/linux/i386/dl-cet.c
new file mode 100644
index 0000000000..d9d80e151c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/dl-cet.c
@@ -0,0 +1,67 @@
+/* Linux/i386 CET initializers function.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define LINKAGE static inline
+#define _dl_cet_check cet_check
+#include <sysdeps/unix/sysv/linux/x86/dl-cet.c>
+#undef _dl_cet_check
+
+#ifdef SHARED
+void
+_dl_cet_check (struct link_map *main_map, const char *program)
+{
+  cet_check (main_map, program);
+
+  if ((GL(dl_x86_feature_1)[0] & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
+    {
+      /* Replace _dl_runtime_resolve and _dl_runtime_profile with
+         _dl_runtime_resolve_shstk and _dl_runtime_profile_shstk,
+ respectively if SHSTK is enabled.  */
+      extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden;
+      extern void _dl_runtime_resolve_shstk (Elf32_Word) attribute_hidden;
+      extern void _dl_runtime_profile (Elf32_Word) attribute_hidden;
+      extern void _dl_runtime_profile_shstk (Elf32_Word) attribute_hidden;
+      unsigned int i;
+      struct link_map *l;
+      Elf32_Addr *got;
+
+      if (main_map->l_info[DT_JMPREL])
+ {
+  got = (Elf32_Addr *) D_PTR (main_map, l_info[DT_PLTGOT]);
+  if (got[2] == (Elf32_Addr) &_dl_runtime_resolve)
+    got[2] = (Elf32_Addr) &_dl_runtime_resolve_shstk;
+  else if (got[2] == (Elf32_Addr) &_dl_runtime_profile)
+    got[2] = (Elf32_Addr) &_dl_runtime_profile_shstk;
+ }
+
+      i = main_map->l_searchlist.r_nlist;
+      while (i-- > 0)
+ {
+  l = main_map->l_initfini[i];
+  if (l->l_info[DT_JMPREL])
+    {
+      got = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
+      if (got[2] == (Elf32_Addr) &_dl_runtime_resolve)
+ got[2] = (Elf32_Addr) &_dl_runtime_resolve_shstk;
+      else if (got[2] == (Elf32_Addr) &_dl_runtime_profile)
+ got[2] = (Elf32_Addr) &_dl_runtime_profile_shstk;
+    }
+ }
+    }
+}
+#endif
diff --git a/sysdeps/unix/sysv/linux/i386/dl-machine.h b/sysdeps/unix/sysv/linux/i386/dl-machine.h
new file mode 100644
index 0000000000..3c2894edf2
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/dl-machine.h
@@ -0,0 +1,23 @@
+/* Machine-dependent ELF dynamic relocation inline functions.
+   Linux/i386 version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __CET__
+# include <sysdeps/unix/sysv/linux/x86/dl-cet.h>
+#endif
+#include <sysdeps/i386/dl-machine.h>
diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
index 111ff9ff58..00be6cc472 100644
--- a/sysdeps/unix/sysv/linux/x86/Makefile
+++ b/sysdeps/unix/sysv/linux/x86/Makefile
@@ -23,3 +23,45 @@ endif
 ifeq ($(subdir),setjmp)
 tests += tst-saved_mask-1
 endif
+
+ifeq ($(enable-cet),yes)
+ifeq ($(subdir),elf)
+sysdep-dl-routines += dl-cet
+endif
+
+# Add -fcf-protection to CFLAGS when CET is enabled.
+CFLAGS-.o += -fcf-protection
+CFLAGS-.os += -fcf-protection
+CFLAGS-.op += -fcf-protection
+CFLAGS-.oS += -fcf-protection
+
+# Compile assembly codes with <cet.h> when CET is enabled.
+asm-CPPFLAGS += -fcf-protection -include cet.h
+
+ifeq ($(subdir),elf)
+ifeq (yes,$(build-shared))
+tests-special += $(objpfx)check-cet.out
+endif
+
+# FIXME: Can't use all-built-dso in elf/Makefile since this file is
+# processed before elf/Makefile.  Duplicate it here.
+cet-built-dso := $(common-objpfx)elf/ld.so $(common-objpfx)libc.so \
+ $(filter-out $(common-objpfx)linkobj/libc.so, \
+      $(sort $(wildcard $(addprefix $(common-objpfx), \
+    */lib*.so \
+    iconvdata/*.so))))
+
+$(cet-built-dso:=.note): %.note: %
+ @rm -f $@T
+ LC_ALL=C $(READELF) -n $< > $@T
+ test -s $@T
+ mv -f $@T $@
+common-generated += $(cet-built-dso:$(common-objpfx)%=%.note)
+
+$(objpfx)check-cet.out: $(..)sysdeps/unix/sysv/linux/x86/check-cet.awk \
+ $(cet-built-dso:=.note)
+ LC_ALL=C $(AWK) -f $^ > $@; \
+ $(evaluate-test)
+generated += check-cet.out
+endif
+endif
diff --git a/sysdeps/unix/sysv/linux/x86/check-cet.awk b/sysdeps/unix/sysv/linux/x86/check-cet.awk
new file mode 100644
index 0000000000..380d998caf
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/check-cet.awk
@@ -0,0 +1,53 @@
+# Verify that all shared objects contain the CET property.
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+# This awk script expects to get command-line files that are each
+# the output of 'readelf -n' on a single shared object.
+# It exits successfully (0) if all of them contained the CET property.
+# It fails (1) if any didn't contain the CET property
+# It fails (2) if the input did not take the expected form.
+
+BEGIN { result = cet = sanity = 0 }
+
+function check_one(name) {
+  if (!sanity) {
+    print name ": *** input did not look like readelf -n output";
+    result = 2;
+  } else if (cet) {
+    print name ": OK";
+  } else {
+    print name ": *** no CET property found";
+    result = result ? result : 1;
+  }
+
+  cet = sanity = 0;
+}
+
+FILENAME != lastfile {
+  if (lastfile)
+    check_one(lastfile);
+  lastfile = FILENAME;
+}
+
+index ($0, "Displaying notes") != 0 { sanity = 1 }
+index ($0, "IBT") != 0 && index ($0, "SHSTK") != 0 { cet = 1 }
+
+END {
+  check_one(lastfile);
+  exit(result);
+}
diff --git a/sysdeps/unix/sysv/linux/x86/configure b/sysdeps/unix/sysv/linux/x86/configure
new file mode 100644
index 0000000000..358319d93d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/configure
@@ -0,0 +1,69 @@
+# This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+ # Local configure fragment for sysdeps/unix/sysv/linux/x86.
+
+if test x"$enable_cet" = xyes; then
+  # Check if CET can be enabled.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether CET can be enabled" >&5
+$as_echo_n "checking whether CET can be enabled... " >&6; }
+if ${libc_cv_x86_cet_available+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.c <<EOF
+#if !defined __CET__ || __CET__ != 3
+# error CET isn't available.
+#endif
+EOF
+ if { ac_try='${CC-cc} -c $CFLAGS -fcf-protection -include cet.h conftest.c 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+   libc_cv_x86_cet_available=yes
+ else
+   libc_cv_x86_cet_available=no
+ fi
+ rm -rf conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_cet_available" >&5
+$as_echo "$libc_cv_x86_cet_available" >&6; }
+  if test $libc_cv_x86_cet_available = yes; then
+    enable_cet=yes
+  else
+    if test x"$enable_cet" = xdefault; then
+      enable_cet=no
+    else
+      as_fn_error $? "$CC doesn't support CET" "$LINENO" 5
+    fi
+  fi
+fi
+if test $enable_cet = yes; then
+  # Check if assembler supports CET.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $AS supports CET" >&5
+$as_echo_n "checking whether $AS supports CET... " >&6; }
+if ${libc_cv_x86_cet_as+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.s <<EOF
+ incsspd %ecx
+EOF
+ if { ac_try='${CC-cc} -c $CFLAGS conftest.s -o conftest.o 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+   libc_cv_x86_cet_as=yes
+ else
+   libc_cv_x86_cet_as=no
+ fi
+ rm -rf conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_cet_as" >&5
+$as_echo "$libc_cv_x86_cet_as" >&6; }
+  if test $libc_cv_x86_cet_as = no; then
+    as_fn_error $? "$AS doesn't support CET" "$LINENO" 5
+  fi
+fi
+config_vars="$config_vars
+enable-cet = $enable_cet"
diff --git a/sysdeps/unix/sysv/linux/x86/configure.ac b/sysdeps/unix/sysv/linux/x86/configure.ac
new file mode 100644
index 0000000000..baccd38633
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/configure.ac
@@ -0,0 +1,46 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/unix/sysv/linux/x86.
+
+if test x"$enable_cet" = xyes; then
+  # Check if CET can be enabled.
+  AC_CACHE_CHECK(whether CET can be enabled,
+ libc_cv_x86_cet_available, [dnl
+cat > conftest.c <<EOF
+#if !defined __CET__ || __CET__ != 3
+# error CET isn't available.
+#endif
+EOF
+ if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS -fcf-protection -include cet.h conftest.c 1>&AS_MESSAGE_LOG_FD); then
+   libc_cv_x86_cet_available=yes
+ else
+   libc_cv_x86_cet_available=no
+ fi
+ rm -rf conftest*])
+  if test $libc_cv_x86_cet_available = yes; then
+    enable_cet=yes
+  else
+    if test x"$enable_cet" = xdefault; then
+      enable_cet=no
+    else
+      AC_MSG_ERROR([$CC doesn't support CET])
+    fi
+  fi
+fi
+if test $enable_cet = yes; then
+  # Check if assembler supports CET.
+  AC_CACHE_CHECK(whether $AS supports CET,
+ libc_cv_x86_cet_as, [dnl
+cat > conftest.s <<EOF
+ incsspd %ecx
+EOF
+ if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.s -o conftest.o 1>&AS_MESSAGE_LOG_FD); then
+   libc_cv_x86_cet_as=yes
+ else
+   libc_cv_x86_cet_as=no
+ fi
+ rm -rf conftest*])
+  if test $libc_cv_x86_cet_as = no; then
+    AC_MSG_ERROR([$AS doesn't support CET])
+  fi
+fi
+LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
diff --git a/sysdeps/unix/sysv/linux/x86/dl-cet.c b/sysdeps/unix/sysv/linux/x86/dl-cet.c
new file mode 100644
index 0000000000..5acf19dae4
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/dl-cet.c
@@ -0,0 +1,186 @@
+/* Linux/x86 CET initializers function.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <errno.h>
+#include <libintl.h>
+#include <ldsodefs.h>
+
+static int
+dl_cet_mark_legacy_region (struct link_map *l)
+{
+  /* FIXME: Mark legacy region  */
+  return 0;
+}
+
+/* Check if object M is compatible with CET.  */
+
+static void
+dl_cet_check (struct link_map *m, const char *program)
+{
+  /* Check if IBT is enabled by kernel.  */
+  bool ibt_enabled
+    = (GL(dl_x86_feature_1)[0] & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
+  /* Check if SHSTK is enabled by kernel.  */
+  bool shstk_enabled
+    = (GL(dl_x86_feature_1)[0] & GNU_PROPERTY_X86_FEATURE_1_SHSTK) != 0;
+
+  if (ibt_enabled || shstk_enabled)
+    {
+      struct link_map *l = NULL;
+
+      /* Check if IBT and SHSTK are enabled in object.  */
+      bool enable_ibt = ibt_enabled;
+      bool enable_shstk = shstk_enabled;
+      if (program)
+ {
+  /* Enable IBT and SHSTK only if they are enabled in executable.
+     NB: IBT and SHSTK may be disabled by environment variable:
+
+     GLIBC_TUNABLES=glibc.tune.hwcaps=-IBT,-SHSTK
+   */
+  enable_ibt &= (HAS_CPU_FEATURE (IBT)
+ && (m->l_cet & lc_ibt) != 0);
+  enable_shstk &= (HAS_CPU_FEATURE (SHSTK)
+   && (m->l_cet & lc_shstk) != 0);
+ }
+
+      /* ld.so is CET-enabled by kernel.  But shared objects may not
+ support IBT nor SHSTK.  */
+      if (enable_ibt || enable_shstk)
+ {
+  int res;
+  unsigned int i;
+  unsigned int first_legacy, last_legacy;
+  bool need_legacy_bitmap = false;
+
+  i = m->l_searchlist.r_nlist;
+  while (i-- > 0)
+    {
+      /* Check each shared object to see if IBT and SHSTK are
+ enabled.  */
+      l = m->l_initfini[i];
+
+      if (l->l_init_called)
+ continue;
+
+#ifdef SHARED
+      /* Skip CET check for ld.so since ld.so is CET-enabled.
+ CET will be disabled later if CET isn't enabled in
+ executable.  */
+      if (l == &GL(dl_rtld_map)
+  ||  l->l_real == &GL(dl_rtld_map)
+  || (program && l == m))
+ continue;
+#endif
+
+      if (enable_ibt && !(l->l_cet & lc_ibt))
+ {
+  /* Remember the first and last legacy objects.  */
+  if (!need_legacy_bitmap)
+    last_legacy = i;
+  first_legacy = i;
+  need_legacy_bitmap = true;
+ }
+
+      /* SHSTK is enabled only if it is enabled in executable as
+ well as all shared objects.  */
+      enable_shstk &= (l->l_cet & lc_shstk) != 0;
+    }
+
+  if (need_legacy_bitmap)
+    {
+      /* Put legacy shared objects in legacy bitmap.  */
+      for (i = first_legacy; i <= last_legacy; i++)
+ {
+  l = m->l_initfini[i];
+
+  if (l->l_init_called || (l->l_cet & lc_ibt))
+    continue;
+
+#ifdef SHARED
+  if (l == &GL(dl_rtld_map)
+      ||  l->l_real == &GL(dl_rtld_map)
+      || (program && l == m))
+    continue;
+#endif
+
+  /* If IBT is enabled in executable and IBT isn't enabled
+     in this shard object, mark PT_LOAD segments with PF_X
+     in legacy code page bitmap.  */
+  res = dl_cet_mark_legacy_region (l);
+  if (res != 0)
+    {
+      if (program)
+ _dl_fatal_printf ("%s: fail to mark legacy code region\n",
+  l->l_name);
+      else
+ _dl_signal_error (-res, l->l_name, "dlopen",
+  N_("fail to mark legacy code region"));
+    }
+ }
+    }
+ }
+
+      if (enable_ibt != ibt_enabled || enable_shstk != shstk_enabled)
+ {
+  if (!program)
+    {
+      /* When SHSTK is enabled, we can't dlopening a shared
+ object without SHSTK.  */
+      if (enable_shstk != shstk_enabled)
+ _dl_signal_error (EINVAL, l->l_name, "dlopen",
+  N_("shadow stack isn't enabled"));
+      return;
+    }
+
+#ifdef SHARED
+  /* FIXME: Disable IBT and/or SHSTK if they are enabled in
+     ld.so, but disabled in executable or shared objects.  */
+  ;
+#endif
+ }
+
+#ifdef SHARED
+      if (program && (ibt_enabled || shstk_enabled))
+ {
+  /* FIXME: Lock CET if IBT or SHSTK is enabled in executable.  */
+  ;
+ }
+#endif
+    }
+}
+
+void
+_dl_cet_open_check (struct link_map *l)
+{
+  dl_cet_check (l, NULL);
+}
+
+#ifdef SHARED
+
+# ifndef LINKAGE
+#  define LINKAGE
+# endif
+
+LINKAGE
+void
+_dl_cet_check (struct link_map *main_map, const char *program)
+{
+  dl_cet_check (main_map, program);
+}
+#endif /* SHARED */
diff --git a/sysdeps/unix/sysv/linux/x86/dl-cet.h b/sysdeps/unix/sysv/linux/x86/dl-cet.h
new file mode 100644
index 0000000000..97bf3918eb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/dl-cet.h
@@ -0,0 +1,138 @@
+/* Linux/x86 CET inline functions.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _dl_cet_h
+#define _dl_cet_h
+
+extern void _dl_cet_check (struct link_map *, const char *)
+    attribute_hidden;
+
+#define DL_MAIN_CHECK(l, n) _dl_cet_check ((l), (n))
+
+#ifdef ElfW
+static inline void __attribute__ ((unused))
+dl_process_cet_property_note (struct link_map *l,
+      const ElfW(Nhdr) *note,
+      const ElfW(Addr) size,
+      const ElfW(Addr) align)
+{
+  /* The NT_GNU_PROPERTY_TYPE_0 note must be aliged to 4 bytes in
+     32-bit objects and to 8 bytes in 64-bit objects.  Skip notes
+     with incorrect alignment.  */
+  if (align != (__ELF_NATIVE_CLASS / 8))
+    return;
+
+  const ElfW(Addr) start = (ElfW(Addr)) note;
+
+  while ((ElfW(Addr)) (note + 1) - start < size)
+    {
+      /* Find the NT_GNU_PROPERTY_TYPE_0 note.  */
+      if (note->n_namesz == 4
+  && note->n_type == NT_GNU_PROPERTY_TYPE_0
+  && memcmp (note + 1, "GNU", 4) == 0)
+ {
+  /* Check for invalid property.  */
+  if (note->n_descsz < 8
+      || (note->n_descsz % sizeof (ElfW(Addr))) != 0)
+    break;
+
+  /* Start and end of property array.  */
+  unsigned char *ptr = (unsigned char *) (note + 1) + 4;
+  unsigned char *ptr_end = ptr + note->n_descsz;
+
+  while (1)
+    {
+      unsigned int type = *(unsigned int *) ptr;
+      unsigned int datasz = *(unsigned int *) (ptr + 4);
+
+      ptr += 8;
+      if ((ptr + datasz) > ptr_end)
+ break;
+
+      if (type == GNU_PROPERTY_X86_FEATURE_1_AND
+  && datasz == 4)
+ {
+  unsigned int feature_1 = *(unsigned int *) ptr;
+  if ((feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT))
+    l->l_cet |= lc_ibt;
+  if ((feature_1 & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
+    l->l_cet |= lc_shstk;
+  break;
+ }
+    }
+ }
+
+      /* NB: Note sections like .note.ABI-tag and .note.gnu.build-id are
+ aligned to 4 bytes in 64-bit ELF objects.  */
+      note = ((const void *) note
+      + ELF_NOTE_NEXT_OFFSET (note->n_namesz, note->n_descsz,
+      align));
+    }
+}
+
+# ifdef FILEBUF_SIZE
+#  define DL_PROCESS_PT_NOTE(l, ph, fd, fbp) \
+  dl_process_pt_note ((l), (ph), (fd), (fbp))
+
+static inline int __attribute__ ((unused))
+dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph,
+    int fd, struct filebuf *fbp)
+{
+  const ElfW(Nhdr) *note;
+  ElfW(Nhdr) *note_malloced = NULL;
+  ElfW(Addr) size = ph->p_filesz;
+
+  if (ph->p_offset + size <= (size_t) fbp->len)
+    note = (const void *) (fbp->buf + ph->p_offset);
+  else
+    {
+      if (size < __MAX_ALLOCA_CUTOFF)
+ note = alloca (size);
+      else
+ {
+  note_malloced = malloc (size);
+  note = note_malloced;
+ }
+      __lseek (fd, ph->p_offset, SEEK_SET);
+      if (__libc_read (fd, (void *) note, size) != size)
+ {
+  if (note_malloced)
+    free (note_malloced);
+  return -1;
+ }
+    }
+
+  dl_process_cet_property_note (l, note, size, ph->p_align);
+  if (note_malloced)
+    free (note_malloced);
+  return 0;
+}
+# else
+#  define DL_PROCESS_PT_NOTE(l, ph) dl_process_pt_note ((l), (ph))
+
+static inline int __attribute__ ((unused))
+dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
+{
+  const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
+  dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align);
+  return 0;
+}
+# endif
+#endif
+
+#endif /* _dl_cet_h */
diff --git a/sysdeps/unix/sysv/linux/x86/dl-procruntime.c b/sysdeps/unix/sysv/linux/x86/dl-procruntime.c
new file mode 100644
index 0000000000..6b46815f46
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/dl-procruntime.c
@@ -0,0 +1,57 @@
+/* Data for processor runtime information.  Linux/x86 version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This information must be kept in sync with the _DL_HWCAP_COUNT,
+   HWCAP_PLATFORMS_START and HWCAP_PLATFORMS_COUNT definitions in
+   dl-hwcap.h.
+
+   If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+  ._dl_x86_feature_1
+# else
+PROCINFO_CLASS unsigned int _dl_x86_feature_1[2]
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86/ldsodefs.h b/sysdeps/unix/sysv/linux/x86/ldsodefs.h
new file mode 100644
index 0000000000..bf8954b045
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/ldsodefs.h
@@ -0,0 +1,29 @@
+/* Run-time dynamic linker data structures for x86 loaded ELF shared objects.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _LDSODEFS_H
+
+/* Get the real definitions.  */
+#include_next <ldsodefs.h>
+
+#ifdef __CET__
+extern void _dl_cet_open_check (struct link_map *);
+# define DL_OPEN_CHECK(l) _dl_cet_open_check ((l))
+#endif
+
+#endif /* ldsodefs.h */
diff --git a/sysdeps/unix/sysv/linux/x86/link_map.h b/sysdeps/unix/sysv/linux/x86/link_map.h
new file mode 100644
index 0000000000..ef1206a9d2
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/link_map.h
@@ -0,0 +1,26 @@
+/* Additional fields in struct link_map.  Linux/x86 version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* If this object is enabled with CET.  */
+enum
+  {
+    lc_none = 0, /* Not enabled with CET.  */
+    lc_ibt = 1 << 0, /* Enabled with IBT.  */
+    lc_shstk = 1 << 1, /* Enabled with STSHK.  */
+    lc_ibt_and_shstk = lc_ibt | lc_shstk /* Enabled with both.  */
+  } l_cet:2;
diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-machine.h b/sysdeps/unix/sysv/linux/x86_64/dl-machine.h
new file mode 100644
index 0000000000..97cb9f187a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/dl-machine.h
@@ -0,0 +1,27 @@
+/* Machine-dependent ELF dynamic relocation inline functions.
+   Linux/x86-64 version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __CET__
+# include <sysdeps/unix/sysv/linux/x86/dl-cet.h>
+#endif
+#ifdef __ILP32__
+# include <sysdeps/x86_64/x32/dl-machine.h>
+#else
+# include <sysdeps/x86_64/dl-machine.h>
+#endif
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 624e681e96..4e15c15e92 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -160,6 +160,11 @@ struct cpu_features
 extern const struct cpu_features *__get_cpu_features (void)
      __attribute__ ((const));
 
+# ifdef ElfW
+extern void _dl_setup_cet (const ElfW(Phdr) *, size_t, const ElfW(Addr))
+    attribute_hidden;
+# endif
+
 # if defined (_LIBC) && !IS_IN (nonlib)
 /* Unused for x86.  */
 #  define INIT_ARCH()
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index afcb7cfd76..9843d9dff7 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -25,6 +25,13 @@
 
 /* Syntactic details of assembler.  */
 
+#ifdef _CET_ENDBR
+# define _CET_NOTRACK notrack
+#else
+# define _CET_ENDBR
+# define _CET_NOTRACK
+#endif
+
 /* ELF uses byte-counts for .align, most others use log2 of count of bytes.  */
 #define ALIGNARG(log2) 1<<log2
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
@@ -36,6 +43,7 @@
   .align ALIGNARG(4);      \
   C_LABEL(name)      \
   cfi_startproc;      \
+  _CET_ENDBR;      \
   CALL_MCOUNT
 
 #undef END
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index 298cfb3d99..a28b1e73a4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -64,6 +64,7 @@
  cfi_startproc
 _dl_runtime_resolve:
  cfi_adjust_cfa_offset(16) # Incorporate PLT
+ _CET_ENDBR
 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
 #  if LOCAL_STORAGE_AREA != 8
 #   error LOCAL_STORAGE_AREA must be 8
@@ -168,6 +169,7 @@ _dl_runtime_resolve:
 _dl_runtime_profile:
  cfi_startproc
  cfi_adjust_cfa_offset(16) # Incorporate PLT
+ _CET_ENDBR
  /* The La_x86_64_regs data structure pointed to by the
    fourth paramater must be VEC_SIZE-byte aligned.  This must
    be explicitly enforced.  We have the set up a dynamically
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 04/24] x86: Add _CET_ENDBR to functions in crti.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
Add _CET_ENDBR to functions in crti.S, which are called indirectly, to
support IBT.

Tested on i686 and x86-64.

        * sysdeps/i386/crti.S (_init): Add _CET_ENDBR.
        (_fini): Likewise.
        * sysdeps/x86_64/crti.S (_init): Likewise.
        (_fini): Likewise.
---
 sysdeps/i386/crti.S   | 2 ++
 sysdeps/x86_64/crti.S | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/sysdeps/i386/crti.S b/sysdeps/i386/crti.S
index ffbc92c22f..65ddc1c934 100644
--- a/sysdeps/i386/crti.S
+++ b/sysdeps/i386/crti.S
@@ -61,6 +61,7 @@
  .hidden _init
  .type _init, @function
 _init:
+ _CET_ENDBR
  pushl %ebx
  /* Maintain 16-byte stack alignment for called functions.  */
  subl $8, %esp
@@ -81,6 +82,7 @@ _init:
  .hidden _fini
  .type _fini, @function
 _fini:
+ _CET_ENDBR
  pushl %ebx
  subl $8, %esp
  LOAD_PIC_REG (bx)
diff --git a/sysdeps/x86_64/crti.S b/sysdeps/x86_64/crti.S
index f26915e956..067ac14884 100644
--- a/sysdeps/x86_64/crti.S
+++ b/sysdeps/x86_64/crti.S
@@ -61,6 +61,7 @@
  .hidden _init
  .type _init, @function
 _init:
+ _CET_ENDBR
  /* Maintain 16-byte stack alignment for called functions.  */
  subq $8, %rsp
 #if PREINIT_FUNCTION_WEAK
@@ -79,4 +80,5 @@ _init:
  .hidden _fini
  .type _fini, @function
 _fini:
+ _CET_ENDBR
  subq $8, %rsp
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 05/24] x86: Add _CET_ENDBR to functions in dl-tlsdesc.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
Add _CET_ENDBR to functions in dl-tlsdesc.S, which are called indirectly,
to support IBT.

Tested on i686 and x86-64.

        * sysdeps/i386/dl-tlsdesc.S (_dl_tlsdesc_return): Add
        _CET_ENDBR.
        (_dl_tlsdesc_undefweak): Likewise.
        (_dl_tlsdesc_dynamic): Likewise.
        (_dl_tlsdesc_resolve_abs_plus_addend): Likewise.
        (_dl_tlsdesc_resolve_rel): Likewise.
        (_dl_tlsdesc_resolve_rela): Likewise.
        (_dl_tlsdesc_resolve_hold): Likewise.
        * sysdeps/x86_64/dl-tlsdesc.S (_dl_tlsdesc_return): Likewise.
        (_dl_tlsdesc_undefweak): Likewise.
        (_dl_tlsdesc_dynamic): Likewise.
        (_dl_tlsdesc_resolve_rela): Likewise.
        (_dl_tlsdesc_resolve_hold): Likewise.
---
 sysdeps/i386/dl-tlsdesc.S   | 7 +++++++
 sysdeps/x86_64/dl-tlsdesc.S | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
index 2c4764d1aa..128f0af318 100644
--- a/sysdeps/i386/dl-tlsdesc.S
+++ b/sysdeps/i386/dl-tlsdesc.S
@@ -37,6 +37,7 @@
  cfi_startproc
  .align 16
 _dl_tlsdesc_return:
+ _CET_ENDBR
  movl 4(%eax), %eax
  ret
  cfi_endproc
@@ -58,6 +59,7 @@ _dl_tlsdesc_return:
  cfi_startproc
  .align 16
 _dl_tlsdesc_undefweak:
+ _CET_ENDBR
  movl 4(%eax), %eax
  subl %gs:0, %eax
  ret
@@ -99,6 +101,7 @@ _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
  cfi_startproc
  .align 16
 _dl_tlsdesc_dynamic:
+ _CET_ENDBR
  /* Like all TLS resolvers, preserve call-clobbered registers.
    We need two scratch regs anyway.  */
  subl $28, %esp
@@ -154,6 +157,7 @@ _dl_tlsdesc_dynamic:
  .align 16
 _dl_tlsdesc_resolve_abs_plus_addend:
 0:
+ _CET_ENDBR
  pushl %eax
  cfi_adjust_cfa_offset (4)
  pushl %ecx
@@ -192,6 +196,7 @@ _dl_tlsdesc_resolve_abs_plus_addend:
  .align 16
 _dl_tlsdesc_resolve_rel:
 0:
+ _CET_ENDBR
  pushl %eax
  cfi_adjust_cfa_offset (4)
  pushl %ecx
@@ -230,6 +235,7 @@ _dl_tlsdesc_resolve_rel:
  .align 16
 _dl_tlsdesc_resolve_rela:
 0:
+ _CET_ENDBR
  pushl %eax
  cfi_adjust_cfa_offset (4)
  pushl %ecx
@@ -268,6 +274,7 @@ _dl_tlsdesc_resolve_rela:
  .align 16
 _dl_tlsdesc_resolve_hold:
 0:
+ _CET_ENDBR
  pushl %eax
  cfi_adjust_cfa_offset (4)
  pushl %ecx
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index 437bd8cde7..80d771cd88 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -37,6 +37,7 @@
  cfi_startproc
  .align 16
 _dl_tlsdesc_return:
+ _CET_ENDBR
  movq 8(%rax), %rax
  ret
  cfi_endproc
@@ -58,6 +59,7 @@ _dl_tlsdesc_return:
  cfi_startproc
  .align 16
 _dl_tlsdesc_undefweak:
+ _CET_ENDBR
  movq 8(%rax), %rax
  subq %fs:0, %rax
  ret
@@ -96,6 +98,7 @@ _dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
  cfi_startproc
  .align 16
 _dl_tlsdesc_dynamic:
+ _CET_ENDBR
  /* Preserve call-clobbered registers that we modify.
    We need two scratch regs anyway.  */
  movq %rsi, -16(%rsp)
@@ -166,6 +169,7 @@ _dl_tlsdesc_dynamic:
  .align 16
  /* The PLT entry will have pushed the link_map pointer.  */
 _dl_tlsdesc_resolve_rela:
+ _CET_ENDBR
  cfi_adjust_cfa_offset (8)
  /* Save all call-clobbered registers.  Add 8 bytes for push in
    the PLT entry to align the stack.  */
@@ -216,6 +220,7 @@ _dl_tlsdesc_resolve_rela:
  .align 16
 _dl_tlsdesc_resolve_hold:
 0:
+ _CET_ENDBR
  /* Save all call-clobbered registers.  */
  subq $72, %rsp
  cfi_adjust_cfa_offset (72)
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 06/24] x86-64: Add _CET_ENDBR to STRCMP_SSE42

H.J. Lu-30
In reply to this post by H.J. Lu-30
Add _CET_ENDBR to STRCMP_SSE42, which is called indirectly, to support
IBT.

        * sysdeps/x86_64/multiarch/strcmp-sse42.S (STRCMP_SSE42): Add
        _CET_ENDBR.
---
 sysdeps/x86_64/multiarch/strcmp-sse42.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index 6fa0c2c7d2..5a0c6668a7 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -126,6 +126,7 @@ END (GLABEL(__strncasecmp))
 
 STRCMP_SSE42:
  cfi_startproc
+ _CET_ENDBR
  CALL_MCOUNT
 
 /*
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 07/24] i386: Add _CET_ENDBR to indirect jump targets in add_n.S/sub_n.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
i386 add_n.S and sub_n.S use a trick to implment jump tables with LEA.
We can't use conditional branches nor normal jump tables since jump
table entries use EFLAGS set by jump table index.  This patch adds
_CET_ENDBR to indirect jump targets and adjust destination for
_CET_ENDBR.

        * sysdeps/i386/add_n.S: Include <sysdep.h>, instead of
        "sysdep.h".
        (__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
        _CET_ENDBR to indirect jump targets and adjust jump destination
        for _CET_ENDBR.
        * sysdeps/i386/i686/add_n.S: Include <sysdep.h>, instead of
        "sysdep.h".
        (__mpn_add_n): Save and restore %ebx if IBT is enabed.  Add
        _CET_ENDBR to indirect jump targets and adjust jump destination
        for _CET_ENDBR.
        * sysdeps/i386/sub_n.S: Include <sysdep.h>, instead of
        "sysdep.h".
        (__mpn_sub_n): Save and restore %ebx if IBT is enabed.  Add
        _CET_ENDBR to indirect jump targets and adjust jump destination
        for _CET_ENDBR.
---
 sysdeps/i386/add_n.S      | 27 ++++++++++++++++++++++++++-
 sysdeps/i386/i686/add_n.S | 27 ++++++++++++++++++++++++++-
 sysdeps/i386/sub_n.S      | 26 +++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/sysdeps/i386/add_n.S b/sysdeps/i386/add_n.S
index d6a35237b0..d3805ba213 100644
--- a/sysdeps/i386/add_n.S
+++ b/sysdeps/i386/add_n.S
@@ -17,7 +17,7 @@
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS 4+8 /* space for 2 saved regs */
@@ -40,6 +40,13 @@ ENTRY (__mpn_add_n)
  cfi_rel_offset (esi, 0)
  movl S2(%esp),%edx
  movl SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+#endif
+
  movl %ecx,%eax
  shrl $3,%ecx /* compute count for unrolled loop */
  negl %eax
@@ -51,6 +58,9 @@ ENTRY (__mpn_add_n)
  subl %eax,%esi /* ... by a constant when we ... */
  subl %eax,%edx /* ... enter the loop */
  shrl $2,%eax /* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+ leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
@@ -64,30 +74,40 @@ L(0): leal (%eax,%eax,8),%eax
 #else
 /* Calculate start address in loop for non-PIC.  */
  leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+ addl %ebx,%eax /* Adjust for endbr32 */
 #endif
  jmp *%eax /* jump into loop */
  ALIGN (3)
 L(oop): movl (%esi),%eax
  adcl (%edx),%eax
  movl %eax,(%edi)
+ _CET_ENDBR
  movl 4(%esi),%eax
  adcl 4(%edx),%eax
  movl %eax,4(%edi)
+ _CET_ENDBR
  movl 8(%esi),%eax
  adcl 8(%edx),%eax
  movl %eax,8(%edi)
+ _CET_ENDBR
  movl 12(%esi),%eax
  adcl 12(%edx),%eax
  movl %eax,12(%edi)
+ _CET_ENDBR
  movl 16(%esi),%eax
  adcl 16(%edx),%eax
  movl %eax,16(%edi)
+ _CET_ENDBR
  movl 20(%esi),%eax
  adcl 20(%edx),%eax
  movl %eax,20(%edi)
+ _CET_ENDBR
  movl 24(%esi),%eax
  adcl 24(%edx),%eax
  movl %eax,24(%edi)
+ _CET_ENDBR
  movl 28(%esi),%eax
  adcl 28(%edx),%eax
  movl %eax,28(%edi)
@@ -100,6 +120,11 @@ L(oop): movl (%esi),%eax
  sbbl %eax,%eax
  negl %eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#endif
  popl %esi
  cfi_adjust_cfa_offset (-4)
  cfi_restore (esi)
diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S
index 5c6c1448d6..d2d532cd30 100644
--- a/sysdeps/i386/i686/add_n.S
+++ b/sysdeps/i386/i686/add_n.S
@@ -17,7 +17,7 @@
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS 4+8 /* space for 2 saved regs */
@@ -44,6 +44,13 @@ ENTRY (__mpn_add_n)
  cfi_rel_offset (esi, 0)
  movl S2(%esp),%edx
  movl SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+#endif
+
  movl %ecx,%eax
  shrl $3,%ecx /* compute count for unrolled loop */
  negl %eax
@@ -55,6 +62,9 @@ ENTRY (__mpn_add_n)
  subl %eax,%esi /* ... by a constant when we ... */
  subl %eax,%edx /* ... enter the loop */
  shrl $2,%eax /* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+ leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  */
  leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax
@@ -63,30 +73,40 @@ L(0):
 #else
 /* Calculate start address in loop for non-PIC.  */
  leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+ addl %ebx,%eax /* Adjust for endbr32 */
 #endif
  jmp *%eax /* jump into loop */
  ALIGN (3)
 L(oop): movl (%esi),%eax
  adcl (%edx),%eax
  movl %eax,(%edi)
+ _CET_ENDBR
  movl 4(%esi),%eax
  adcl 4(%edx),%eax
  movl %eax,4(%edi)
+ _CET_ENDBR
  movl 8(%esi),%eax
  adcl 8(%edx),%eax
  movl %eax,8(%edi)
+ _CET_ENDBR
  movl 12(%esi),%eax
  adcl 12(%edx),%eax
  movl %eax,12(%edi)
+ _CET_ENDBR
  movl 16(%esi),%eax
  adcl 16(%edx),%eax
  movl %eax,16(%edi)
+ _CET_ENDBR
  movl 20(%esi),%eax
  adcl 20(%edx),%eax
  movl %eax,20(%edi)
+ _CET_ENDBR
  movl 24(%esi),%eax
  adcl 24(%edx),%eax
  movl %eax,24(%edi)
+ _CET_ENDBR
  movl 28(%esi),%eax
  adcl 28(%edx),%eax
  movl %eax,28(%edi)
@@ -99,6 +119,11 @@ L(oop): movl (%esi),%eax
  sbbl %eax,%eax
  negl %eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#endif
  popl %esi
  cfi_adjust_cfa_offset (-4)
  cfi_restore (esi)
diff --git a/sysdeps/i386/sub_n.S b/sysdeps/i386/sub_n.S
index 7c6f48c32e..8f321b8c6c 100644
--- a/sysdeps/i386/sub_n.S
+++ b/sysdeps/i386/sub_n.S
@@ -17,7 +17,7 @@
    along with the GNU MP Library; see the file COPYING.LIB.  If not,
    see <http://www.gnu.org/licenses/>.  */
 
-#include "sysdep.h"
+#include <sysdep.h>
 #include "asm-syntax.h"
 
 #define PARMS 4+8 /* space for 2 saved regs */
@@ -40,6 +40,13 @@ ENTRY (__mpn_sub_n)
  cfi_rel_offset (esi, 0)
  movl S2(%esp),%edx
  movl SIZE(%esp),%ecx
+
+#if defined __CET__ && (__CET__ & 1) != 0
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+#endif
+
  movl %ecx,%eax
  shrl $3,%ecx /* compute count for unrolled loop */
  negl %eax
@@ -51,6 +58,9 @@ ENTRY (__mpn_sub_n)
  subl %eax,%esi /* ... by a constant when we ... */
  subl %eax,%edx /* ... enter the loop */
  shrl $2,%eax /* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+ leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
@@ -64,30 +74,39 @@ L(0): leal (%eax,%eax,8),%eax
 #else
 /* Calculate start address in loop for non-PIC.  */
  leal (L(oop) - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+ addl %ebx,%eax /* Adjust for endbr32 */
 #endif
  jmp *%eax /* jump into loop */
  ALIGN (3)
 L(oop): movl (%esi),%eax
  sbbl (%edx),%eax
  movl %eax,(%edi)
+ _CET_ENDBR
  movl 4(%esi),%eax
  sbbl 4(%edx),%eax
  movl %eax,4(%edi)
+ _CET_ENDBR
  movl 8(%esi),%eax
  sbbl 8(%edx),%eax
  movl %eax,8(%edi)
  movl 12(%esi),%eax
  sbbl 12(%edx),%eax
  movl %eax,12(%edi)
+ _CET_ENDBR
  movl 16(%esi),%eax
  sbbl 16(%edx),%eax
  movl %eax,16(%edi)
+ _CET_ENDBR
  movl 20(%esi),%eax
  sbbl 20(%edx),%eax
  movl %eax,20(%edi)
+ _CET_ENDBR
  movl 24(%esi),%eax
  sbbl 24(%edx),%eax
  movl %eax,24(%edi)
+ _CET_ENDBR
  movl 28(%esi),%eax
  sbbl 28(%edx),%eax
  movl %eax,28(%edi)
@@ -100,6 +119,11 @@ L(oop): movl (%esi),%eax
  sbbl %eax,%eax
  negl %eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+ popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+#endif
  popl %esi
  cfi_adjust_cfa_offset (-4)
  cfi_restore (esi)
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 08/24] x86: Update vfork to pop shadow stack

H.J. Lu-30
In reply to this post by H.J. Lu-30
Since we can't change return address on shadow stack, if shadow stack
is in use, we need to pop shadow stack and jump back to caller directly.

        * sysdeps/unix/sysv/linux/i386/vfork.S (SYSCALL_ERROR_HANDLER):
        Redefine if shadow stack is enabled.
        (SYSCALL_ERROR_LABEL): Likewise.
        (__vfork): Pop shadow stack and jump back to to caller directly
        when shadow stack is in use.
        * sysdeps/unix/sysv/linux/x86_64/vfork.S (SYSCALL_ERROR_HANDLER):
        Redefine if shadow stack is enabled.
        (SYSCALL_ERROR_LABEL): Likewise.
        (__vfork): Pop shadow stack and jump back to to caller directly
        when shadow stack is in use.
---
 sysdeps/unix/sysv/linux/i386/vfork.S   | 54 ++++++++++++++++++++++++++
 sysdeps/unix/sysv/linux/x86_64/vfork.S | 35 +++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/sysdeps/unix/sysv/linux/i386/vfork.S b/sysdeps/unix/sysv/linux/i386/vfork.S
index 8f40d02d09..06d834d632 100644
--- a/sysdeps/unix/sysv/linux/i386/vfork.S
+++ b/sysdeps/unix/sysv/linux/i386/vfork.S
@@ -21,6 +21,35 @@
 #include <bits/errno.h>
 #include <tcb-offsets.h>
 
+#if defined __CET__ && (__CET__ & 2) != 0
+/* When shadow stack is in use, we need to pop shadow stack and jump
+   back to caller directly.   */
+# undef SYSCALL_ERROR_HANDLER
+# ifdef PIC
+#  define SYSCALL_ERROR_HANDLER \
+0: \
+  calll .L1; \
+.L1: \
+  popl %edx; \
+.L2: \
+  addl $_GLOBAL_OFFSET_TABLE_ + (.L2 - .L1), %edx; \
+  movl __libc_errno@gotntpoff(%edx), %edx; \
+  negl %eax; \
+  movl %eax, %gs:(%edx); \
+  orl $-1, %eax; \
+  jmp 1b;
+# else
+#  define SYSCALL_ERROR_HANDLER \
+0: \
+  movl __libc_errno@indntpoff, %edx; \
+  negl %eax; \
+  movl %eax, %gs:(%edx); \
+  orl $-1, %eax; \
+  jmp 1b;
+# endif
+# undef SYSCALL_ERROR_LABEL
+# define SYSCALL_ERROR_LABEL 0f
+#endif
 
 /* Clone the calling process, but without copying the whole address space.
    The calling process is suspended until the new process exits or is
@@ -38,16 +67,41 @@ ENTRY (__vfork)
  movl $SYS_ify (vfork), %eax
  int $0x80
 
+#if !defined __CET__ || (__CET__ & 2) == 0
  /* Jump to the return PC.  Don't jump directly since this
    disturbs the branch target cache.  Instead push the return
    address back on the stack.  */
  pushl %ecx
  cfi_adjust_cfa_offset (4)
+#endif
 
  cmpl $-4095, %eax
  /* Branch forward if it failed.  */
  jae SYSCALL_ERROR_LABEL
 
+#if defined __CET__ && (__CET__ & 2) != 0
+1:
+ /* Check if shadow stack is in use.  */
+ xorl %edx, %edx
+ rdsspd %edx
+ testl %edx, %edx
+ /* Normal return if shadow stack isn't in use.  */
+ je L(no_shstk)
+
+ /* Pop return address from shadow stack and jump back to caller
+   directly.  */
+ movl $1, %edx
+ incsspd %edx
+ jmp *%ecx
+
+L(no_shstk):
+ /* Jump to the return PC.  Don't jump directly since this
+   disturbs the branch target cache.  Instead push the return
+   address back on the stack.  */
+ pushl %ecx
+ cfi_adjust_cfa_offset (4)
+#endif
+
  ret
 
 PSEUDO_END (__vfork)
diff --git a/sysdeps/unix/sysv/linux/x86_64/vfork.S b/sysdeps/unix/sysv/linux/x86_64/vfork.S
index e4c8269e3d..cab3ad34a0 100644
--- a/sysdeps/unix/sysv/linux/x86_64/vfork.S
+++ b/sysdeps/unix/sysv/linux/x86_64/vfork.S
@@ -20,6 +20,18 @@
 #include <bits/errno.h>
 #include <tcb-offsets.h>
 
+#if defined __CET__ && (__CET__ & 2) != 0
+/* When shadow stack is in use, we need to pop shadow stack and jump
+   back to caller directly.   */
+# undef SYSCALL_ERROR_HANDLER
+# define SYSCALL_ERROR_HANDLER \
+0: \
+  SYSCALL_SET_ERRNO; \
+  or $-1, %RAX_LP; \
+  jmp 1b;
+# undef SYSCALL_ERROR_LABEL
+# define SYSCALL_ERROR_LABEL 0f
+#endif
 
 /* Clone the calling process, but without copying the whole address space.
    The calling process is suspended until the new process exits or is
@@ -38,13 +50,36 @@ ENTRY (__vfork)
  movl $SYS_ify (vfork), %eax
  syscall
 
+#if !defined __CET__ || (__CET__ & 2) == 0
  /* Push back the return PC.  */
  pushq %rdi
  cfi_adjust_cfa_offset(8)
+#endif
 
  cmpl $-4095, %eax
  jae SYSCALL_ERROR_LABEL /* Branch forward if it failed.  */
 
+#if defined __CET__ && (__CET__ & 2) != 0
+1:
+ /* Check if shadow stack is in use.  */
+ xorl %esi, %esi
+ rdsspq %rsi
+ testq %rsi, %rsi
+ /* Normal return if shadow stack isn't in use.  */
+ je L(no_shstk)
+
+ /* Pop return address from shadow stack and jump back to caller
+   directly.  */
+ movl $1, %esi
+ incsspq %rsi
+ jmp *%rdi
+
+L(no_shstk):
+ /* Push back the return PC.  */
+ pushq %rdi
+ cfi_adjust_cfa_offset(8)
+#endif
+
  /* Normal return.  */
  ret
 
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 09/24] x86_64: Use _CET_NOTRACK in strcmp.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/strcmp.S (STRCMP): Add _CET_NOTRACK before
        indirect jump to jump table.
---
 sysdeps/x86_64/strcmp.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index de54fce647..e16945b961 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -233,7 +233,7 @@ LABEL(bigger):
  lea LABEL(unaligned_table)(%rip), %r10
  movslq (%r10, %r9,4), %r9
  lea (%r10, %r9), %r10
- jmp *%r10 /* jump to corresponding case */
+ _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
 
 /*
  * The following cases will be handled by ashr_0
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 10/24] x86-64: Use _CET_NOTRACK in strcpy-sse2-unaligned.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
---
 sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 56b748eb2c..72bf7e8586 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
  lea TABLE(%rip), %r11;                              \
  movslq (%r11, INDEX, SCALE), %rcx;                     \
  lea (%r11, %rcx), %rcx;                             \
- jmp *%rcx
+ _CET_NOTRACK jmp *%rcx
 
 # ifndef USE_AS_STRCAT
 
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 11/24] x86-64: Use _CET_NOTRACK in strcmp-sse42.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/multiarch/strcmp-sse42.S (STRCMP_SSE42): Add
        _CET_NOTRACK before indirect jump to jump table.
---
 sysdeps/x86_64/multiarch/strcmp-sse42.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index 5a0c6668a7..d3c07bd292 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -275,7 +275,7 @@ LABEL(bigger):
  movslq (%r10, %r9,4), %r9
  pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
  lea (%r10, %r9), %r10
- jmp *%r10 /* jump to corresponding case */
+ _CET_NOTRACK jmp *%r10 /* jump to corresponding case */
 
 /*
  * The following cases will be handled by ashr_0
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 12/24] x86-64: Use _CET_NOTRACK in memcpy-ssse3-back.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
        (MEMCPY): Likewise.
---
 sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 7e37035487..3cd1123326 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -39,7 +39,7 @@
   lea TABLE(%rip), %r11; \
   movslq (%r11, INDEX, SCALE), INDEX; \
   lea (%r11, INDEX), INDEX; \
-  jmp *INDEX; \
+  _CET_NOTRACK jmp *INDEX; \
   ud2
 
  .section .text.ssse3,"ax",@progbits
@@ -125,7 +125,7 @@ L(144bytesormore):
  sub $0x80, %rdx
  movslq (%r11, %r9, 4), %r9
  add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 
  .p2align 4
@@ -155,7 +155,7 @@ L(copy_backward):
  sub $0x80, %rdx
  movslq (%r11, %r9, 4), %r9
  add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 
  .p2align 4
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 13/24] x86-64: Use _CET_NOTRACK in memcmp-sse4.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/multiarch/memcmp-sse4.S (BRANCH_TO_JMPTBL_ENTRY):
        Add _CET_NOTRACK before indirect jump to jump table.
---
 sysdeps/x86_64/multiarch/memcmp-sse4.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 0d96d6b775..8e164f2cb6 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -31,7 +31,7 @@
   lea TABLE(%rip), %r11; \
   movslq (%r11, INDEX, SCALE), %rcx; \
   add %r11, %rcx; \
-  jmp *%rcx; \
+  _CET_NOTRACK jmp *%rcx; \
   ud2
 
 /* Warning!
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 14/24] x86-64: Use _CET_NOTRACK in memcpy-ssse3.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/x86_64/multiarch/memcpy-ssse3.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
        (MEMCPY): Likewise.
---
 sysdeps/x86_64/multiarch/memcpy-ssse3.S | 124 ++++++++++++------------
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 5dd209034b..0240bfa309 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -39,7 +39,7 @@
   lea TABLE(%rip), %r11; \
   movslq (%r11, INDEX, SCALE), INDEX; \
   lea (%r11, INDEX), INDEX; \
-  jmp *INDEX; \
+  _CET_NOTRACK jmp *INDEX; \
   ud2
 
  .section .text.ssse3,"ax",@progbits
@@ -86,7 +86,7 @@ L(start):
  add %rdx, %rsi
  add %rdx, %rdi
  add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 
  .p2align 4
@@ -441,7 +441,7 @@ L(shl_1):
  lea (L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9
 L(L1_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_1_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -464,7 +464,7 @@ L(shl_1_loop_L1):
  jb L(shl_1_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_1_end):
  movaps %xmm4, -0x20(%rdi)
@@ -484,7 +484,7 @@ L(shl_1_bwd):
  lea (L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9
 L(L1_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_1_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -509,7 +509,7 @@ L(shl_1_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_1_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_1_bwd_end):
  movaps %xmm4, (%rdi)
@@ -526,7 +526,7 @@ L(shl_2):
  lea (L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9
 L(L2_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_2_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -549,7 +549,7 @@ L(shl_2_loop_L1):
  jb L(shl_2_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_2_end):
  movaps %xmm4, -0x20(%rdi)
@@ -569,7 +569,7 @@ L(shl_2_bwd):
  lea (L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9
 L(L2_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_2_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -594,7 +594,7 @@ L(shl_2_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_2_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_2_bwd_end):
  movaps %xmm4, (%rdi)
@@ -611,7 +611,7 @@ L(shl_3):
  lea (L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9
 L(L3_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_3_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -634,7 +634,7 @@ L(shl_3_loop_L1):
  jb L(shl_3_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_3_end):
  movaps %xmm4, -0x20(%rdi)
@@ -654,7 +654,7 @@ L(shl_3_bwd):
  lea (L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9
 L(L3_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_3_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -679,7 +679,7 @@ L(shl_3_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_3_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_3_bwd_end):
  movaps %xmm4, (%rdi)
@@ -696,7 +696,7 @@ L(shl_4):
  lea (L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9
 L(L4_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_4_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -719,7 +719,7 @@ L(shl_4_loop_L1):
  jb L(shl_4_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_4_end):
  movaps %xmm4, -0x20(%rdi)
@@ -739,7 +739,7 @@ L(shl_4_bwd):
  lea (L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9
 L(L4_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_4_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -764,7 +764,7 @@ L(shl_4_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_4_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_4_bwd_end):
  movaps %xmm4, (%rdi)
@@ -781,7 +781,7 @@ L(shl_5):
  lea (L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9
 L(L5_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_5_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -804,7 +804,7 @@ L(shl_5_loop_L1):
  jb L(shl_5_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_5_end):
  movaps %xmm4, -0x20(%rdi)
@@ -824,7 +824,7 @@ L(shl_5_bwd):
  lea (L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9
 L(L5_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_5_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -849,7 +849,7 @@ L(shl_5_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_5_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_5_bwd_end):
  movaps %xmm4, (%rdi)
@@ -866,7 +866,7 @@ L(shl_6):
  lea (L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9
 L(L6_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_6_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -889,7 +889,7 @@ L(shl_6_loop_L1):
  jb L(shl_6_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_6_end):
  movaps %xmm4, -0x20(%rdi)
@@ -909,7 +909,7 @@ L(shl_6_bwd):
  lea (L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9
 L(L6_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_6_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -934,7 +934,7 @@ L(shl_6_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_6_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_6_bwd_end):
  movaps %xmm4, (%rdi)
@@ -951,7 +951,7 @@ L(shl_7):
  lea (L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9
 L(L7_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_7_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -974,7 +974,7 @@ L(shl_7_loop_L1):
  jb L(shl_7_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_7_end):
  movaps %xmm4, -0x20(%rdi)
@@ -994,7 +994,7 @@ L(shl_7_bwd):
  lea (L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9
 L(L7_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_7_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1019,7 +1019,7 @@ L(shl_7_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_7_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_7_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1036,7 +1036,7 @@ L(shl_8):
  lea (L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9
 L(L8_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
 L(shl_8_loop_L2):
  prefetchnta 0x1c0(%rsi)
 L(shl_8_loop_L1):
@@ -1058,7 +1058,7 @@ L(shl_8_loop_L1):
  jb L(shl_8_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
  .p2align 4
 L(shl_8_end):
@@ -1079,7 +1079,7 @@ L(shl_8_bwd):
  lea (L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9
 L(L8_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_8_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1104,7 +1104,7 @@ L(shl_8_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_8_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_8_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1121,7 +1121,7 @@ L(shl_9):
  lea (L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9
 L(L9_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_9_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1144,7 +1144,7 @@ L(shl_9_loop_L1):
  jb L(shl_9_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_9_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1164,7 +1164,7 @@ L(shl_9_bwd):
  lea (L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9
 L(L9_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_9_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1189,7 +1189,7 @@ L(shl_9_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_9_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_9_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1206,7 +1206,7 @@ L(shl_10):
  lea (L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9
 L(L10_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_10_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1229,7 +1229,7 @@ L(shl_10_loop_L1):
  jb L(shl_10_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_10_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1249,7 +1249,7 @@ L(shl_10_bwd):
  lea (L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9
 L(L10_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_10_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1274,7 +1274,7 @@ L(shl_10_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_10_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_10_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1291,7 +1291,7 @@ L(shl_11):
  lea (L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9
 L(L11_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_11_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1314,7 +1314,7 @@ L(shl_11_loop_L1):
  jb L(shl_11_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_11_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1334,7 +1334,7 @@ L(shl_11_bwd):
  lea (L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9
 L(L11_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_11_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1359,7 +1359,7 @@ L(shl_11_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_11_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_11_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1376,7 +1376,7 @@ L(shl_12):
  lea (L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9
 L(L12_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_12_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1399,7 +1399,7 @@ L(shl_12_loop_L1):
  jb L(shl_12_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_12_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1419,7 +1419,7 @@ L(shl_12_bwd):
  lea (L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9
 L(L12_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_12_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1444,7 +1444,7 @@ L(shl_12_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_12_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_12_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1461,7 +1461,7 @@ L(shl_13):
  lea (L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9
 L(L13_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_13_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1484,7 +1484,7 @@ L(shl_13_loop_L1):
  jb L(shl_13_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_13_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1504,7 +1504,7 @@ L(shl_13_bwd):
  lea (L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9
 L(L13_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_13_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1529,7 +1529,7 @@ L(shl_13_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_13_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_13_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1546,7 +1546,7 @@ L(shl_14):
  lea (L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9
 L(L14_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_14_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1569,7 +1569,7 @@ L(shl_14_loop_L1):
  jb L(shl_14_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_14_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1589,7 +1589,7 @@ L(shl_14_bwd):
  lea (L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9
 L(L14_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_14_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1614,7 +1614,7 @@ L(shl_14_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_14_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_14_bwd_end):
  movaps %xmm4, (%rdi)
@@ -1631,7 +1631,7 @@ L(shl_15):
  lea (L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9
 L(L15_fwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_15_loop_L2):
  prefetchnta 0x1c0(%rsi)
@@ -1654,7 +1654,7 @@ L(shl_15_loop_L1):
  jb L(shl_15_end)
  movaps %xmm4, -0x20(%rdi)
  movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_15_end):
  movaps %xmm4, -0x20(%rdi)
@@ -1674,7 +1674,7 @@ L(shl_15_bwd):
  lea (L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9
 L(L15_bwd):
  lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_15_bwd_loop_L2):
  prefetchnta -0x1c0(%rsi)
@@ -1699,7 +1699,7 @@ L(shl_15_bwd_loop_L1):
  movaps %xmm3, 0x10(%rdi)
  jb L(shl_15_bwd_end)
  movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
  ud2
 L(shl_15_bwd_end):
  movaps %xmm4, (%rdi)
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 15/24] i386: Use _CET_NOTRACK in i686/memcmp.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/i386/i686/memcmp.S (memcmp): Add _CET_NOTRACK before
        indirect jump to jump table.
---
 sysdeps/i386/i686/memcmp.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
index a0bc9ec97c..3259076cd7 100644
--- a/sysdeps/i386/i686/memcmp.S
+++ b/sysdeps/i386/i686/memcmp.S
@@ -80,7 +80,7 @@ L(not_1):
  LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
  addl %ecx, %edx
  addl %ecx, %esi
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
 
  ALIGN (4)
 L(28bytes):
@@ -326,7 +326,7 @@ L(32bytesormore):
  LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
  addl %ecx, %edx
  addl %ecx, %esi
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
 
 L(load_ecx_28):
  addl $0x4, %edx
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 16/24] i386: Use _CET_NOTRACK in memset-sse2.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/i386/i686/multiarch/memset-sse2.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
---
 sysdeps/i386/i686/multiarch/memset-sse2.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S
index 8869c80f25..5c9eba1e65 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2.S
@@ -63,7 +63,7 @@
     add (%ebx,%ecx,4), %ebx; \
     add %ecx, %edx; \
     /* We loaded the jump table and adjusted EDX. Go.  */ \
-    jmp *%ebx
+    _CET_NOTRACK jmp *%ebx
 #else
 # define ENTRANCE
 # define RETURN_END ret
@@ -75,7 +75,7 @@
    absolute offsets.  */
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
     add %ecx, %edx; \
-    jmp *TABLE(,%ecx,4)
+    _CET_NOTRACK jmp *TABLE(,%ecx,4)
 #endif
 
  .section .text.sse2,"ax",@progbits
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 17/24] i386: Use _CET_NOTRACK in memcmp-sse4.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/i386/i686/multiarch/memcmp-sse4.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
---
 sysdeps/i386/i686/multiarch/memcmp-sse4.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
index be37108987..bc2cb2729d 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -59,7 +59,7 @@
  absolute address.  */ \
  addl (%ebx,INDEX,SCALE), %ebx; \
 /* We loaded the jump table and adjusted EDX/ESI. Go.  */ \
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
 # else
 #  define JMPTBL(I, B) I
 
@@ -67,7 +67,7 @@
  jump table with relative offsets.  INDEX is a register contains the
  index into the jump table.   SCALE is the scale of INDEX. */
 #  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
+ _CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
 # endif
 
 
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 18/24] i386: Use _CET_NOTRACK in memcpy-ssse3-rep.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
        (BRANCH_TO_JMPTBL_ENTRY_TAIL): Likewise.
---
 sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
index 287f1df8ca..129310bd8a 100644
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
@@ -71,7 +71,7 @@
        absolute address.  */ \
     addl (%ebx,INDEX,SCALE), %ebx; \
     /* We loaded the jump table.  Go.  */ \
-    jmp *%ebx
+    _CET_NOTRACK jmp *%ebx
 
 # define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
     addl $(TABLE - .), %ebx
@@ -79,7 +79,7 @@
 # define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
     addl (%ebx,INDEX,SCALE), %ebx; \
     /* We loaded the jump table.  Go.  */ \
-    jmp *%ebx
+    _CET_NOTRACK jmp *%ebx
 #else
 # define PARMS 4
 # define ENTRANCE
@@ -91,12 +91,12 @@
    absolute offsets.  INDEX is a register contains the index into the
    jump table.  SCALE is the scale of INDEX. */
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
-    jmp *TABLE(,INDEX,SCALE)
+    _CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
 
 # define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
 
 # define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
-    jmp *TABLE(,INDEX,SCALE)
+    _CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
 #endif
 
  .section .text.ssse3,"ax",@progbits
--
2.17.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 19/24] i386: Use _CET_NOTRACK in memcpy-ssse3.S

H.J. Lu-30
In reply to this post by H.J. Lu-30
        * sysdeps/i386/i686/multiarch/memcpy-ssse3.S
        (BRANCH_TO_JMPTBL_ENTRY): Add _CET_NOTRACK before indirect jump
        to jump table.
---
 sysdeps/i386/i686/multiarch/memcpy-ssse3.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
index bb35c4c2b7..5ead569a5e 100644
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
@@ -71,7 +71,7 @@
  absolute address.  */ \
  addl (%ebx, INDEX, SCALE), %ebx; \
     /* We loaded the jump table.  Go.  */ \
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
 # else
 
 #  define PARMS 4
@@ -85,7 +85,7 @@
  jump table.  SCALE is the scale of INDEX. */
 
 #  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(, INDEX, SCALE)
+ _CET_NOTRACK jmp *TABLE(, INDEX, SCALE)
 # endif
 
  .section .text.ssse3,"ax",@progbits
--
2.17.1

123