[PATCH 0/4] i386: Finish CET support

classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/4] i386: Finish CET support

H.J. Lu-30
This patch set finishes CET support on i386:

1. getcontext, setcontext and swapcontext are updated not to preserve
EAX, ECX and EDX.  Since they are caller-saved, caller will reload them
after getcontext, setcontext and swapcontext calls if needed.  The extra
scratch registers are used to enable CET.
2. Add missing _CET_ENDBR to i386 assembly files.
3. Enable CET support in i386 ucontext functions.

Tested on i386 CET/non-CET machines.


H.J. Lu (4):
  i386: Don't unnecessarily save and restore EAX, ECX and EDX [BZ#
    25262]
  i386/sub_n.S: Add a missing _CET_ENDBR to indirect jump target
  i386: Add _CET_ENDBR to assembly files without ENTRY
  i386: Enable CET support in ucontext functions

 sysdeps/i386/i386-mcount.S                  |   2 +
 sysdeps/i386/nptl/pthread_spin_lock.S       |   2 +
 sysdeps/i386/nptl/pthread_spin_unlock.S     |   3 +
 sysdeps/i386/pthread_spin_trylock.S         |   2 +
 sysdeps/i386/sub_n.S                        |   1 +
 sysdeps/unix/sysv/linux/i386/_exit.S        |   1 +
 sysdeps/unix/sysv/linux/i386/getcontext.S   |  64 +++++++-
 sysdeps/unix/sysv/linux/i386/makecontext.S  | 123 +++++++++++++++
 sysdeps/unix/sysv/linux/i386/setcontext.S   | 112 ++++++++++++--
 sysdeps/unix/sysv/linux/i386/swapcontext.S  | 156 ++++++++++++++++++--
 sysdeps/unix/sysv/linux/i386/sysdep.h       |   5 +
 sysdeps/unix/sysv/linux/i386/ucontext_i.sym |   1 +
 12 files changed, 443 insertions(+), 29 deletions(-)

--
2.21.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 1/4] i386: Don't unnecessarily save and restore EAX, ECX and EDX [BZ# 25262]

H.J. Lu-30
On i386, since EAX, ECX and EDX are caller-saved, there are no need
to save and restore EAX, ECX and EDX in getcontext, setcontext and
swapcontext.  They just need to clear EAX on success.  The extra
scratch registers are needed to enable CET.

Tested on i386.
---
 sysdeps/unix/sysv/linux/i386/getcontext.S  |  8 +-------
 sysdeps/unix/sysv/linux/i386/setcontext.S  | 11 ++++-------
 sysdeps/unix/sysv/linux/i386/swapcontext.S | 17 +++++------------
 3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/i386/getcontext.S b/sysdeps/unix/sysv/linux/i386/getcontext.S
index 1568f9604a..85da8195f9 100644
--- a/sysdeps/unix/sysv/linux/i386/getcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/getcontext.S
@@ -26,13 +26,7 @@ ENTRY(__getcontext)
  /* Load address of the context data structure.  */
  movl 4(%esp), %eax
 
- /* Return value of getcontext.  EAX is the only register whose
-   value is not preserved.  */
- movl $0, oEAX(%eax)
-
- /* Save the 32-bit register values and the return address.  */
- movl %ecx, oECX(%eax)
- movl %edx, oEDX(%eax)
+ /* Save the preserved register values and the return address.  */
  movl %edi, oEDI(%eax)
  movl %esi, oESI(%eax)
  movl %ebp, oEBP(%eax)
diff --git a/sysdeps/unix/sysv/linux/i386/setcontext.S b/sysdeps/unix/sysv/linux/i386/setcontext.S
index dffe1d18e7..9095584b44 100644
--- a/sysdeps/unix/sysv/linux/i386/setcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/setcontext.S
@@ -65,22 +65,19 @@ ENTRY(__setcontext)
  cfi_offset (esi, oESI)
  cfi_offset (ebp, oEBP)
  cfi_offset (ebx, oEBX)
- cfi_offset (edx, oEDX)
- cfi_offset (ecx, oECX)
  movl oESP(%eax), %esp
 
  /* Push the return address on the new stack so we can return there.  */
  pushl %ecx
 
- /* Load the values of all the 32-bit registers (except ESP).
-   Since we are loading from EAX, it must be last.  */
+ /* Load the values of all the preserved registers (except ESP).  */
  movl oEDI(%eax), %edi
  movl oESI(%eax), %esi
  movl oEBP(%eax), %ebp
  movl oEBX(%eax), %ebx
- movl oEDX(%eax), %edx
- movl oECX(%eax), %ecx
- movl oEAX(%eax), %eax
+
+ /* All done, return 0 for success.  */
+ xorl %eax, %eax
 
  /* End FDE here, we fall into another context.  */
  cfi_endproc
diff --git a/sysdeps/unix/sysv/linux/i386/swapcontext.S b/sysdeps/unix/sysv/linux/i386/swapcontext.S
index 6fdc176535..83e5f0b2ff 100644
--- a/sysdeps/unix/sysv/linux/i386/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/swapcontext.S
@@ -26,13 +26,7 @@ ENTRY(__swapcontext)
  /* Load address of the context data structure we save in.  */
  movl 4(%esp), %eax
 
- /* Return value of swapcontext.  EAX is the only register whose
-   value is not preserved.  */
- movl $0, oEAX(%eax)
-
- /* Save the 32-bit register values and the return address.  */
- movl %ecx, oECX(%eax)
- movl %edx, oEDX(%eax)
+ /* Save the preserved register values and the return address.  */
  movl %edi, oEDI(%eax)
  movl %esi, oESI(%eax)
  movl %ebp, oEBP(%eax)
@@ -91,15 +85,14 @@ ENTRY(__swapcontext)
  /* Push the return address on the new stack so we can return there.  */
  pushl %ecx
 
- /* Load the values of all the 32-bit registers (except ESP).
-   Since we are loading from EAX, it must be last.  */
+ /* Load the values of all the preserved registers (except ESP).  */
  movl oEDI(%eax), %edi
  movl oESI(%eax), %esi
  movl oEBP(%eax), %ebp
  movl oEBX(%eax), %ebx
- movl oEDX(%eax), %edx
- movl oECX(%eax), %ecx
- movl oEAX(%eax), %eax
+
+ /* All done, return 0 for success.  */
+ xorl %eax, %eax
 
  /* The following 'ret' will pop the address of the code and jump
    to it.  */
--
2.21.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 2/4] i386/sub_n.S: Add a missing _CET_ENDBR to indirect jump target

H.J. Lu-30
In reply to this post by H.J. Lu-30
Add a missing _CET_ENDBR to indirect jump targe in sysdeps/i386/sub_n.S.
---
 sysdeps/i386/sub_n.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sysdeps/i386/sub_n.S b/sysdeps/i386/sub_n.S
index 1562cc00fd..1760beb38f 100644
--- a/sysdeps/i386/sub_n.S
+++ b/sysdeps/i386/sub_n.S
@@ -91,6 +91,7 @@ L(oop): movl (%esi),%eax
  movl 8(%esi),%eax
  sbbl 8(%edx),%eax
  movl %eax,8(%edi)
+ _CET_ENDBR
  movl 12(%esi),%eax
  sbbl 12(%edx),%eax
  movl %eax,12(%edi)
--
2.21.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 3/4] i386: Add _CET_ENDBR to assembly files without ENTRY

H.J. Lu-30
In reply to this post by H.J. Lu-30
Add _CET_ENDBR to i386 assembly files which don't use ENTRY to add
ENDBR32 at function entries when CET is enabled.
---
 sysdeps/i386/i386-mcount.S              | 2 ++
 sysdeps/i386/nptl/pthread_spin_lock.S   | 2 ++
 sysdeps/i386/nptl/pthread_spin_unlock.S | 3 +++
 sysdeps/i386/pthread_spin_trylock.S     | 2 ++
 sysdeps/unix/sysv/linux/i386/_exit.S    | 1 +
 5 files changed, 10 insertions(+)

diff --git a/sysdeps/i386/i386-mcount.S b/sysdeps/i386/i386-mcount.S
index 32b2a5f0bf..6575b84a1b 100644
--- a/sysdeps/i386/i386-mcount.S
+++ b/sysdeps/i386/i386-mcount.S
@@ -30,6 +30,7 @@
  .type C_SYMBOL_NAME(_mcount), @function
  .align ALIGNARG(4)
 C_LABEL(_mcount)
+ _CET_ENDBR
  /* Save the caller-clobbered registers.  */
  pushl %eax
  pushl %ecx
@@ -58,6 +59,7 @@ weak_alias (_mcount, mcount)
  .type C_SYMBOL_NAME(__fentry__), @function
  .align ALIGNARG(4)
 C_LABEL(__fentry__)
+ _CET_ENDBR
  /* Save the caller-clobbered registers.  */
  pushl %eax
  pushl %ecx
diff --git a/sysdeps/i386/nptl/pthread_spin_lock.S b/sysdeps/i386/nptl/pthread_spin_lock.S
index 62c72b9063..2876a8b1cf 100644
--- a/sysdeps/i386/nptl/pthread_spin_lock.S
+++ b/sysdeps/i386/nptl/pthread_spin_lock.S
@@ -15,12 +15,14 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <sysdep.h>
 #include <lowlevellock.h>
 
  .globl pthread_spin_lock
  .type pthread_spin_lock,@function
  .align 16
 pthread_spin_lock:
+ _CET_ENDBR
  mov 4(%esp), %eax
 1: LOCK
  decl 0(%eax)
diff --git a/sysdeps/i386/nptl/pthread_spin_unlock.S b/sysdeps/i386/nptl/pthread_spin_unlock.S
index d190ca562d..dc412190ae 100644
--- a/sysdeps/i386/nptl/pthread_spin_unlock.S
+++ b/sysdeps/i386/nptl/pthread_spin_unlock.S
@@ -16,10 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <sysdep.h>
+
  .globl pthread_spin_unlock
  .type pthread_spin_unlock,@function
  .align 16
 pthread_spin_unlock:
+ _CET_ENDBR
  movl 4(%esp), %eax
  movl $1, (%eax)
  xorl %eax, %eax
diff --git a/sysdeps/i386/pthread_spin_trylock.S b/sysdeps/i386/pthread_spin_trylock.S
index 0ec09d8b64..c7f94375a7 100644
--- a/sysdeps/i386/pthread_spin_trylock.S
+++ b/sysdeps/i386/pthread_spin_trylock.S
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <sysdep.h>
 #include <pthread-errnos.h>
 
 
@@ -29,6 +30,7 @@
  .type pthread_spin_trylock,@function
  .align 16
 pthread_spin_trylock:
+ _CET_ENDBR
  movl 4(%esp), %edx
  movl $1, %eax
  xorl %ecx, %ecx
diff --git a/sysdeps/unix/sysv/linux/i386/_exit.S b/sysdeps/unix/sysv/linux/i386/_exit.S
index d59c127501..a21d5f69b3 100644
--- a/sysdeps/unix/sysv/linux/i386/_exit.S
+++ b/sysdeps/unix/sysv/linux/i386/_exit.S
@@ -21,6 +21,7 @@
  .type _exit,@function
  .global _exit
 _exit:
+ _CET_ENDBR
  movl 4(%esp), %ebx
 
  /* Try the new syscall first.  */
--
2.21.0

Reply | Threaded
Open this post in threaded view
|

[PATCH 4/4] i386: Enable CET support in ucontext functions

H.J. Lu-30
In reply to this post by H.J. Lu-30
1. getcontext and swapcontext are updated to save the caller's shadow
stack pointer and return address.
2. setcontext and swapcontext are updated to restore shadow stack and
jump to new context directly.
3. makecontext is updated to allocate a new shadow stack and set the
caller's return address to the helper code, L(exitcode).

Since makecontext allocates a new shadow stack when making a new
context and kernel allocates a new shadow stack for clone/fork/vfork
syscalls, we track the current shadow stack base.  In setcontext and
swapcontext, if the target shadow stack base is the same as the current
shadow stack base, we unwind the shadow stack.  Otherwise it is a stack
switch and we look for a restore token.

We enable shadow stack at run-time only if program and all used shared
objects, including dlopened ones, are shadow stack enabled, which means
that they must be compiled with GCC 8 or above and glibc 2.28 or above.
We need to save and restore shadow stack only if shadow stack is enabled.
When caller of getcontext, setcontext, swapcontext and makecontext is
compiled with smaller ucontext_t, shadow stack won't be enabled at
run-time.  We check if shadow stack is enabled before accessing the
extended field in ucontext_t.

Tested on i386 CET/non-CET machines.
---
 sysdeps/unix/sysv/linux/i386/getcontext.S   |  56 ++++++++
 sysdeps/unix/sysv/linux/i386/makecontext.S  | 123 +++++++++++++++++
 sysdeps/unix/sysv/linux/i386/setcontext.S   | 101 +++++++++++++-
 sysdeps/unix/sysv/linux/i386/swapcontext.S  | 139 ++++++++++++++++++++
 sysdeps/unix/sysv/linux/i386/sysdep.h       |   5 +
 sysdeps/unix/sysv/linux/i386/ucontext_i.sym |   1 +
 6 files changed, 422 insertions(+), 3 deletions(-)

diff --git a/sysdeps/unix/sysv/linux/i386/getcontext.S b/sysdeps/unix/sysv/linux/i386/getcontext.S
index 85da8195f9..0ab2808815 100644
--- a/sysdeps/unix/sysv/linux/i386/getcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/getcontext.S
@@ -18,6 +18,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <asm/prctl.h>
 
 #include "ucontext_i.h"
 
@@ -42,6 +43,61 @@ ENTRY(__getcontext)
  movw %fs, %dx
  movl %edx, oFS(%eax)
 
+#if SHSTK_ENABLED
+ /* Check if shadow stack is enabled.  */
+ testl $X86_FEATURE_1_SHSTK, %gs:FEATURE_1_OFFSET
+ jz L(no_shstk)
+
+ /* Save EAX in EDX.  */
+ movl %eax, %edx
+
+ xorl %eax, %eax
+ cmpl %gs:SSP_BASE_OFFSET, %eax
+ jnz L(shadow_stack_bound_recorded)
+
+ /* Save EBX in the unused EAX slot.  */
+ movl %ebx, oEAX(%edx)
+
+ /* Get the base address and size of the default shadow stack
+   which must be the current shadow stack since nothing has
+   been recorded yet.  */
+ sub $24, %esp
+ mov %esp, %ecx
+ movl $ARCH_CET_STATUS, %ebx
+ movl $__NR_arch_prctl, %eax
+ ENTER_KERNEL
+ testl %eax, %eax
+ jz L(continue_no_err)
+
+ /* This should never happen.  */
+ hlt
+
+L(continue_no_err):
+ /* Restore EBX from the EAX slot.  */
+ movl oEAX(%edx), %ebx
+
+ /* Record the base of the current shadow stack.  */
+ movl 8(%esp), %eax
+ movl %eax, %gs:SSP_BASE_OFFSET
+ add $24, %esp
+
+L(shadow_stack_bound_recorded):
+ /* Load address of the context data structure.  */
+ movl 4(%esp), %eax
+
+ /* Get the current shadow stack pointer.  */
+ rdsspd %edx
+ /* NB: Save the caller's shadow stack so that we can jump back
+   to the caller directly.  */
+ addl $4, %edx
+ movl %edx, oSSP(%eax)
+
+ /* Save the current shadow stack base in ucontext.  */
+ movl %gs:SSP_BASE_OFFSET, %edx
+ movl %edx, (oSSP + 4)(%eax)
+
+L(no_shstk):
+#endif
  /* We have separate floating-point register content memory on the
    stack.  We use the __fpregs_mem block in the context.  Set the
    links up correctly.  */
diff --git a/sysdeps/unix/sysv/linux/i386/makecontext.S b/sysdeps/unix/sysv/linux/i386/makecontext.S
index e2f8afc895..2d227abeef 100644
--- a/sysdeps/unix/sysv/linux/i386/makecontext.S
+++ b/sysdeps/unix/sysv/linux/i386/makecontext.S
@@ -18,6 +18,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <asm/prctl.h>
 
 #include "ucontext_i.h"
 
@@ -68,6 +69,127 @@ ENTRY(__makecontext)
  jnz 1b
 2:
 
+#if SHSTK_ENABLED
+ /* Check if Shadow Stack is enabled.  */
+ testl $X86_FEATURE_1_SHSTK, %gs:FEATURE_1_OFFSET
+ jz L(skip_ssp)
+
+ /* Reload the pointer to ucontext.  */
+ movl 4(%esp), %eax
+
+ /* Shadow stack is enabled.  We need to allocate a new shadow
+   stack.  */
+ subl oSS_SP(%eax), %edx
+ shrl $STACK_SIZE_TO_SHADOW_STACK_SIZE_SHIFT, %edx
+
+ /* Align shadow stack size to 8 bytes.  */
+ addl $7, %edx
+ andl $-8, %edx
+
+ /* Store shadow stack size in __ssp[2].  */
+ movl %edx, (oSSP + 8)(%eax)
+
+ /* Save ESI in the unused ECX slot.  */
+ movl %esi, oECX(%eax)
+ /* Save EDI in the unused EDX slot.  */
+ movl %edi, oEDX(%eax)
+
+ /* Save the pointer to ucontext.  */
+ movl %eax, %edi
+
+ /* Get the original shadow stack pointer.  */
+ rdsspd %esi
+
+ /* Align the saved original shadow stack pointer to the next
+   8 byte aligned boundary.  */
+ andl $-8, %esi
+
+ /* Load the top of the new stack into EDX.  */
+ movl oESP(%eax), %edx
+
+ /* We need to terminate the FDE here because the unwinder looks
+   at ra-1 for unwind information.  */
+ cfi_endproc
+
+ /* Swap the original stack pointer with the top of the new
+   stack.  */
+ xchgl %esp, %edx
+
+ /* Add 4 bytes since CALL will push the 4-byte return address
+   onto stack.  */
+ addl $4, %esp
+
+ /* Allocate the new shadow stack.  Save EBX in the unused EAX
+   slot.  */
+ movl %ebx, oEAX(%eax)
+
+ /* CET syscall takes 64-bit sizes.  */
+ subl $16, %esp
+ movl (oSSP + 8)(%eax), %ecx
+ movl %ecx, (%esp)
+ movl $0, 4(%esp)
+ movl %ecx, 8(%esp)
+ movl $0, 12(%esp)
+ movl %esp, %ecx
+
+ movl $ARCH_CET_ALLOC_SHSTK, %ebx
+ movl $__NR_arch_prctl, %eax
+ ENTER_KERNEL
+ testl %eax, %eax
+ jne L(hlt) /* This should never happen.  */
+
+ /* Copy the base address of the new shadow stack to __ssp[1].  */
+ movl (%esp), %eax
+ movl %eax, (oSSP + 4)(%edi)
+
+ addl $16, %esp
+
+ /* Restore EBX from the EAX slot.  */
+ movl oEAX(%edi), %ebx
+
+ /* Get the size of the new shadow stack.  */
+ movl (oSSP + 8)(%edi), %ecx
+
+ /* Use the restore stoken to restore the new shadow stack.  */
+ rstorssp -8(%eax, %ecx)
+
+ /* Save the restore token at the next 8 byte aligned boundary
+   on the original shadow stack.  */
+ saveprevssp
+
+ /* Push the address of "jmp exitcode" onto the new stack as
+   well as the new shadow stack.  */
+ call 1f
+ jmp L(exitcode)
+1:
+
+ /* Get the new shadow stack pointer.  */
+ rdsspd %eax
+
+ /* Use the restore stoken to restore the original shadow stack.  */
+ rstorssp -8(%esi)
+
+ /* Save the restore token on the new shadow stack.  */
+ saveprevssp
+
+ /* Store the new shadow stack pointer in __ssp[0].  */
+ movl %eax, oSSP(%edi)
+
+ /* Restore the original stack.  */
+ mov %edx, %esp
+
+ cfi_startproc
+
+ /* Restore ESI from the ECX slot.  */
+ movl oECX(%edi), %esi
+ /* Restore EDI from the EDX slot.  */
+ movl oEDX(%edi), %edi
+
+ ret
+
+L(skip_ssp):
+#endif
+
  /* If the function we call returns we must continue with the
    context which is given in the uc_link element.  To do this
    set the return address for the function the user provides
@@ -123,6 +245,7 @@ L(call_exit):
  call HIDDEN_JUMPTARGET(exit)
  /* The 'exit' call should never return.  In case it does cause
    the process to terminate.  */
+L(hlt):
  hlt
  cfi_startproc
 END(__makecontext)
diff --git a/sysdeps/unix/sysv/linux/i386/setcontext.S b/sysdeps/unix/sysv/linux/i386/setcontext.S
index 9095584b44..3e55452d32 100644
--- a/sysdeps/unix/sysv/linux/i386/setcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/setcontext.S
@@ -18,6 +18,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <asm/prctl.h>
 
 #include "ucontext_i.h"
 
@@ -56,9 +57,6 @@ ENTRY(__setcontext)
  movl oFS(%eax), %ecx
  movw %cx, %fs
 
- /* Fetch the address to return to.  */
- movl oEIP(%eax), %ecx
-
  /* Load the new stack pointer.  */
  cfi_def_cfa (eax, 0)
  cfi_offset (edi, oEDI)
@@ -67,6 +65,103 @@ ENTRY(__setcontext)
  cfi_offset (ebx, oEBX)
  movl oESP(%eax), %esp
 
+#if SHSTK_ENABLED
+ /* Check if Shadow Stack is enabled.  */
+ testl $X86_FEATURE_1_SHSTK, %gs:FEATURE_1_OFFSET
+ jz L(no_shstk)
+
+ /* If the base of the target shadow stack is the same as the
+   base of the current shadow stack, we unwind the shadow
+   stack.  Otherwise it is a stack switch and we look for a
+   restore token.  */
+ movl oSSP(%eax), %esi
+ movl %esi, %edi
+
+ /* Get the base of the target shadow stack.  */
+ movl (oSSP + 4)(%eax), %ecx
+ cmpl %gs:SSP_BASE_OFFSET, %ecx
+ je L(unwind_shadow_stack)
+
+L(find_restore_token_loop):
+ /* Align the saved original shadow stack pointer to the next
+   8 byte aligned boundary.  */
+ andl $-8, %esi
+
+ /* Look for a restore token.  */
+ movl -8(%esi), %ebx
+ andl $-8, %ebx
+ cmpl %esi, %ebx
+ je L(restore_shadow_stack)
+
+ /* Try the next slot.  */
+ subl $8, %esi
+ jmp L(find_restore_token_loop)
+
+L(restore_shadow_stack):
+ /* Pop return address from the shadow stack since setcontext
+   will not return.  */
+ movl $1, %ebx
+ incsspd %ebx
+
+ /* Use the restore stoken to restore the target shadow stack.  */
+ rstorssp -8(%esi)
+
+ /* Save the restore token on the old shadow stack.  NB: This
+   restore token may be checked by setcontext or swapcontext
+   later.  */
+ saveprevssp
+
+ /* Record the new shadow stack base that was switched to.  */
+ movl (oSSP + 4)(%eax), %ebx
+ movl %ebx, %gs:SSP_BASE_OFFSET
+
+L(unwind_shadow_stack):
+ rdsspd %ebx
+ subl %edi, %ebx
+ je L(skip_unwind_shadow_stack)
+ negl %ebx
+ shrl $2, %ebx
+ movl $255, %esi
+L(loop):
+ cmpl %esi, %ebx
+ cmovb %ebx, %esi
+ incsspd %esi
+ subl %esi, %ebx
+ ja L(loop)
+
+L(skip_unwind_shadow_stack):
+
+ /* Load the values of all the preserved registers (except ESP).  */
+ movl oEDI(%eax), %edi
+ movl oESI(%eax), %esi
+ movl oEBP(%eax), %ebp
+ movl oEBX(%eax), %ebx
+
+ /* Get the return address set with getcontext.  */
+ movl oEIP(%eax), %ecx
+
+ /* Check if return address is valid for the case when setcontext
+   is invoked from L(exitcode) with linked context.  */
+ rdsspd %eax
+ cmpl (%eax), %ecx
+ /* Clear EAX to indicate success.  NB: Don't use xorl to keep
+   EFLAGS for jne.  */
+ movl $0, %eax
+ jne L(jmp)
+ /* Return to the new context if return address valid.  */
+ pushl %ecx
+ ret
+
+L(jmp):
+ /* Jump to the new context directly.  */
+ jmp *%ecx
+
+L(no_shstk):
+#endif
+
+ /* Fetch the address to return to.  */
+ movl oEIP(%eax), %ecx
+
  /* Push the return address on the new stack so we can return there.  */
  pushl %ecx
 
diff --git a/sysdeps/unix/sysv/linux/i386/swapcontext.S b/sysdeps/unix/sysv/linux/i386/swapcontext.S
index 83e5f0b2ff..977f291294 100644
--- a/sysdeps/unix/sysv/linux/i386/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/i386/swapcontext.S
@@ -18,6 +18,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <asm/prctl.h>
 
 #include "ucontext_i.h"
 
@@ -76,6 +77,144 @@ ENTRY(__swapcontext)
  movl oFS(%eax), %edx
  movw %dx, %fs
 
+#if SHSTK_ENABLED
+ /* Check if Shadow Stack is enabled.  */
+ testl $X86_FEATURE_1_SHSTK, %gs:FEATURE_1_OFFSET
+ jz L(no_shstk)
+
+ xorl %eax, %eax
+ cmpl %gs:SSP_BASE_OFFSET, %eax
+ jnz L(shadow_stack_bound_recorded)
+
+ /* Get the base address and size of the default shadow stack
+   which must be the current shadow stack since nothing has
+   been recorded yet.  */
+ sub $24, %esp
+ mov %esp, %ecx
+ movl $ARCH_CET_STATUS, %ebx
+ movl $__NR_arch_prctl, %eax
+ ENTER_KERNEL
+ testl %eax, %eax
+ jz L(continue_no_err)
+
+ /* This should never happen.  */
+ hlt
+
+L(continue_no_err):
+ /* Record the base of the current shadow stack.  */
+ movl 8(%esp), %eax
+ movl %eax, %gs:SSP_BASE_OFFSET
+ add $24, %esp
+
+L(shadow_stack_bound_recorded):
+ /* Load address of the context data structure we save in.  */
+ movl 4(%esp), %eax
+
+ /* Load address of the context data structure we swap in  */
+ movl 8(%esp), %edx
+
+       /* If we unwind the stack, we can't undo stack unwinding.  Just
+   save the target shadow stack pointer as the current shadow
+   stack pointer.   */
+ movl oSSP(%edx), %ecx
+ movl %ecx, oSSP(%eax)
+
+ /* Save the current shadow stack base in ucontext.  */
+ movl %gs:SSP_BASE_OFFSET, %ecx
+ movl %ecx, (oSSP + 4)(%eax)
+
+ /* If the base of the target shadow stack is the same as the
+   base of the current shadow stack, we unwind the shadow
+   stack.  Otherwise it is a stack switch and we look for a
+   restore token.  */
+ movl oSSP(%edx), %esi
+ movl %esi, %edi
+
+ /* Get the base of the target shadow stack.  */
+ movl (oSSP + 4)(%edx), %ecx
+ cmpl %gs:SSP_BASE_OFFSET, %ecx
+ je L(unwind_shadow_stack)
+
+L(find_restore_token_loop):
+ /* Align the saved original shadow stack pointer to the next
+   8 byte aligned boundary.  */
+ andl $-8, %esi
+
+ /* Look for a restore token.  */
+ movl -8(%esi), %ebx
+ andl $-8, %ebx
+ cmpl %esi, %ebx
+ je L(restore_shadow_stack)
+
+ /* Try the next slot.  */
+ subl $8, %esi
+ jmp L(find_restore_token_loop)
+
+L(restore_shadow_stack):
+ /* The target shadow stack will be restored.  Save the current
+   shadow stack pointer.  */
+ rdsspd %ecx
+ movl %ecx, oSSP(%eax)
+
+ /* Use the restore stoken to restore the target shadow stack.  */
+ rstorssp -8(%esi)
+
+ /* Save the restore token on the old shadow stack.  NB: This
+   restore token may be checked by setcontext or swapcontext
+   later.  */
+ saveprevssp
+
+ /* Record the new shadow stack base that was switched to.  */
+ movl (oSSP + 4)(%edx), %ebx
+ movl %ebx, %gs:SSP_BASE_OFFSET
+
+L(unwind_shadow_stack):
+ rdsspd %ebx
+ subl %edi, %ebx
+ je L(skip_unwind_shadow_stack)
+ negl %ebx
+ shrl $2, %ebx
+ movl $255, %esi
+L(loop):
+ cmpl %esi, %ebx
+ cmovb %ebx, %esi
+ incsspd %esi
+ subl %esi, %ebx
+ ja L(loop)
+
+L(skip_unwind_shadow_stack):
+
+ /* Load the new stack pointer.  */
+ movl oESP(%edx), %esp
+
+ /* Load the values of all the preserved registers (except ESP).  */
+ movl oEDI(%edx), %edi
+ movl oESI(%edx), %esi
+ movl oEBP(%edx), %ebp
+ movl oEBX(%edx), %ebx
+
+ /* Get the return address set with getcontext.  */
+ movl oEIP(%edx), %ecx
+
+ /* Check if return address is valid for the case when setcontext
+   is invoked from L(exitcode) with linked context.  */
+ rdsspd %eax
+ cmpl (%eax), %ecx
+ /* Clear EAX to indicate success.  NB: Don't use xorl to keep
+   EFLAGS for jne.  */
+ movl $0, %eax
+ jne L(jmp)
+ /* Return to the new context if return address valid.  */
+ pushl %ecx
+ ret
+
+L(jmp):
+ /* Jump to the new context directly.  */
+ jmp *%ecx
+
+L(no_shstk):
+#endif
+
  /* Fetch the address to return to.  */
  movl oEIP(%eax), %ecx
 
diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
index 7066ddc214..7bb83d5617 100644
--- a/sysdeps/unix/sysv/linux/i386/sysdep.h
+++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
@@ -659,4 +659,9 @@ struct libc_do_syscall_args
 # endif
 #endif
 
+/* Each shadow stack slot takes 4 bytes.  Assuming that each stack
+   frame takes 128 bytes, this is used to compute shadow stack size
+   from stack size.  */
+#define STACK_SIZE_TO_SHADOW_STACK_SIZE_SHIFT 5
+
 #endif /* linux/i386/sysdep.h */
diff --git a/sysdeps/unix/sysv/linux/i386/ucontext_i.sym b/sysdeps/unix/sysv/linux/i386/ucontext_i.sym
index b11a5509cd..933c1924eb 100644
--- a/sysdeps/unix/sysv/linux/i386/ucontext_i.sym
+++ b/sysdeps/unix/sysv/linux/i386/ucontext_i.sym
@@ -28,3 +28,4 @@ oEIP mreg (EIP)
 oFPREGS mcontext (fpregs)
 oSIGMASK ucontext (uc_sigmask)
 oFPREGSMEM ucontext (__fpregs_mem)
+oSSP ucontext (__ssp)
--
2.21.0

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/4] i386: Finish CET support

Florian Weimer-5
In reply to this post by H.J. Lu-30
* H. J. Lu:

> This patch set finishes CET support on i386:
>
> 1. getcontext, setcontext and swapcontext are updated not to preserve
> EAX, ECX and EDX.  Since they are caller-saved, caller will reload them
> after getcontext, setcontext and swapcontext calls if needed.  The extra
> scratch registers are used to enable CET.
> 2. Add missing _CET_ENDBR to i386 assembly files.
> 3. Enable CET support in i386 ucontext functions.
>
> Tested on i386 CET/non-CET machines.

Has the kernel ABI been finalized?

I wonder if we should add IFUNC resolvers which set a flag, and check
that flag at the start of (some of) these functions, so that they cannot
be used as ROP gadgets in programs that do not reference them.

Thanks,
Florian

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/4] i386: Finish CET support

H.J. Lu-30
On Thu, Dec 12, 2019 at 4:46 AM Florian Weimer <[hidden email]> wrote:

>
> * H. J. Lu:
>
> > This patch set finishes CET support on i386:
> >
> > 1. getcontext, setcontext and swapcontext are updated not to preserve
> > EAX, ECX and EDX.  Since they are caller-saved, caller will reload them
> > after getcontext, setcontext and swapcontext calls if needed.  The extra
> > scratch registers are used to enable CET.
> > 2. Add missing _CET_ENDBR to i386 assembly files.
> > 3. Enable CET support in i386 ucontext functions.
> >
> > Tested on i386 CET/non-CET machines.
>
> Has the kernel ABI been finalized?

These parts of kernel ABI haven't been changed.

> I wonder if we should add IFUNC resolvers which set a flag, and check
> that flag at the start of (some of) these functions, so that they cannot
> be used as ROP gadgets in programs that do not reference them.
>

What exactly do you have in mind?

--
H.J.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/4] i386: Finish CET support

H.J. Lu-30
In reply to this post by H.J. Lu-30
On Tue, Dec 10, 2019 at 12:47 PM H.J. Lu <[hidden email]> wrote:

>
> This patch set finishes CET support on i386:
>
> 1. getcontext, setcontext and swapcontext are updated not to preserve
> EAX, ECX and EDX.  Since they are caller-saved, caller will reload them
> after getcontext, setcontext and swapcontext calls if needed.  The extra
> scratch registers are used to enable CET.
> 2. Add missing _CET_ENDBR to i386 assembly files.
> 3. Enable CET support in i386 ucontext functions.
>
> Tested on i386 CET/non-CET machines.
>
>
> H.J. Lu (4):
>   i386: Don't unnecessarily save and restore EAX, ECX and EDX [BZ#
>     25262]
>   i386/sub_n.S: Add a missing _CET_ENDBR to indirect jump target
>   i386: Add _CET_ENDBR to assembly files without ENTRY
>   i386: Enable CET support in ucontext functions
>
>  sysdeps/i386/i386-mcount.S                  |   2 +
>  sysdeps/i386/nptl/pthread_spin_lock.S       |   2 +
>  sysdeps/i386/nptl/pthread_spin_unlock.S     |   3 +
>  sysdeps/i386/pthread_spin_trylock.S         |   2 +
>  sysdeps/i386/sub_n.S                        |   1 +
>  sysdeps/unix/sysv/linux/i386/_exit.S        |   1 +
>  sysdeps/unix/sysv/linux/i386/getcontext.S   |  64 +++++++-
>  sysdeps/unix/sysv/linux/i386/makecontext.S  | 123 +++++++++++++++
>  sysdeps/unix/sysv/linux/i386/setcontext.S   | 112 ++++++++++++--
>  sysdeps/unix/sysv/linux/i386/swapcontext.S  | 156 ++++++++++++++++++--
>  sysdeps/unix/sysv/linux/i386/sysdep.h       |   5 +
>  sysdeps/unix/sysv/linux/i386/ucontext_i.sym |   1 +
>  12 files changed, 443 insertions(+), 29 deletions(-)
>

I'd like to get them into glibc 2.31.  Is it too late?

Thanks.

--
H.J.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 3/4] i386: Add _CET_ENDBR to assembly files without ENTRY

Adhemerval Zanella-2
In reply to this post by H.J. Lu-30


On 10/12/2019 17:47, H.J. Lu wrote:

> diff --git a/sysdeps/unix/sysv/linux/i386/_exit.S b/sysdeps/unix/sysv/linux/i386/_exit.S
> index d59c127501..a21d5f69b3 100644
> --- a/sysdeps/unix/sysv/linux/i386/_exit.S
> +++ b/sysdeps/unix/sysv/linux/i386/_exit.S
> @@ -21,6 +21,7 @@
>   .type _exit,@function
>   .global _exit
>  _exit:
> + _CET_ENDBR
>   movl 4(%esp), %ebx
>  
>   /* Try the new syscall first.  */
>

Do we still need this for i386? I think the generic implementation
should suffice since __NR_exit_group is always support and
i386 does define ABORT_INSTRUCTION.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 3/4] i386: Add _CET_ENDBR to assembly files without ENTRY

H.J. Lu-30
On Tue, Jan 7, 2020 at 2:05 PM Adhemerval Zanella
<[hidden email]> wrote:

>
>
>
> On 10/12/2019 17:47, H.J. Lu wrote:
> > diff --git a/sysdeps/unix/sysv/linux/i386/_exit.S b/sysdeps/unix/sysv/linux/i386/_exit.S
> > index d59c127501..a21d5f69b3 100644
> > --- a/sysdeps/unix/sysv/linux/i386/_exit.S
> > +++ b/sysdeps/unix/sysv/linux/i386/_exit.S
> > @@ -21,6 +21,7 @@
> >       .type   _exit,@function
> >       .global _exit
> >  _exit:
> > +     _CET_ENDBR
> >       movl    4(%esp), %ebx
> >
> >       /* Try the new syscall first.  */
> >
>
> Do we still need this for i386? I think the generic implementation
> should suffice since __NR_exit_group is always support and
> i386 does define ABORT_INSTRUCTION.

_exit.S can re removed.

--
H.J.