Inform Intel SGX SDK about Occlum-defined stacks

There are two types of stacks: the kernel ones and the user ones. The kernel
stacks are used by Occlum and managed by Intel SGX SDK itself, while the user
stacks are used by the threads created and managed by Occlum. These user stacks
are transparent to Intel SGX SDK so far.

The problem is that Intel SGX SDK needs to be aware of the user stacks.
SGX exception handlers will check whether the rsp value---when the exception
happened---is within the stack of the current SGX thread. If the check fails,
the registered exception handler will not be triggered. But when exceptions are
triggered by the threads running upon Occlum, the rsp value points to the user
stacks, which Intel SGX SDK are completely unware of. So the check always
fails.

Therefore, we extend Intel SGX SDK with two new APIs:

  int sgx_enable_user_stack(size_t stack_base, size_t stack_limit);
  void sgx_disable_user_stack(void);

And this commit uses the two APIs to inform Intel SGX SDK about the
Occlum-managed stacks. And the rsp checks in SGX exception handlers will
check whether rsp is within the user stacks.
This commit is contained in:
Tate, Hongliang Tian 2019-07-18 07:16:17 +00:00
parent 2553298b1d
commit c096e7d0b9
15 changed files with 147 additions and 74 deletions

@ -13,10 +13,12 @@ extern "C" {
// See Struct Task in process.rs // See Struct Task in process.rs
struct Task { struct Task {
uint64_t kernel_stack_addr; uint64_t kernel_rsp;
uint64_t kernel_fsbase_addr; uint64_t kernel_fs;
uint64_t user_stack_addr; uint64_t user_rsp;
uint64_t user_fsbase_addr; uint64_t user_stack_base;
uint64_t user_stack_limit;
uint64_t user_fs;
uint64_t user_entry_addr; uint64_t user_entry_addr;
jmp_buf* saved_state; jmp_buf* saved_state;
}; };
@ -38,11 +40,11 @@ void do_exit_task(void);
/* Override the field for stack guard */ /* Override the field for stack guard */
#define TD_TASK_OFFSET TD_STACKGUARD_OFFSET #define TD_TASK_OFFSET TD_STACKGUARD_OFFSET
#define TASK_KERNEL_STACK_ADDR (8 * 0) #define TASK_KERNEL_RSP (8 * 0)
#define TASK_KERNEL_FSBASE_ADDR (8 * 1) #define TASK_KERNEL_FS (8 * 1)
#define TASK_USER_STACK_ADDR (8 * 2) #define TASK_USER_RSP (8 * 2)
#define TASK_USER_FSBASE_ADDR (8 * 3) #define TASK_USER_FS (8 * 5)
#define TASK_USER_ENTRY_ADDR (8 * 4) #define TASK_USER_ENTRY_ADDR (8 * 6)
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */

@ -31,14 +31,14 @@ pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<(), Error>
let current_ref = get_current(); let current_ref = get_current();
let mut current = current_ref.lock().unwrap(); let mut current = current_ref.lock().unwrap();
let task = &mut current.task; let task = &mut current.task;
task.user_fsbase_addr = addr as usize; task.set_user_fs(addr as usize);
} }
ArchPrctlCode::ARCH_GET_FS => { ArchPrctlCode::ARCH_GET_FS => {
let current_ref = get_current(); let current_ref = get_current();
let current = current_ref.lock().unwrap(); let current = current_ref.lock().unwrap();
let task = &current.task; let task = &current.task;
unsafe { unsafe {
*addr = task.user_fsbase_addr; *addr = task.get_user_fs();
} }
} }
ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => { ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => {

@ -71,8 +71,13 @@ pub fn do_spawn<P: AsRef<Path>>(
}; };
let auxtbl = init_auxtbl(base_addr, program_entry, &elf_file)?; let auxtbl = init_auxtbl(base_addr, program_entry, &elf_file)?;
let task = { let task = {
let stack_top = vm.get_stack_top(); let user_stack_base = vm.get_stack_base();
init_task(program_entry, stack_top, argv, envp, &auxtbl)? let user_stack_limit = vm.get_stack_limit();
let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxtbl)?;
unsafe {
Task::new(program_entry, user_rsp,
user_stack_base, user_stack_limit, None)?
}
}; };
let vm_ref = Arc::new(SgxMutex::new(vm)); let vm_ref = Arc::new(SgxMutex::new(vm));
let files_ref = { let files_ref = {
@ -141,21 +146,6 @@ fn init_files(parent_ref: &ProcessRef, file_actions: &[FileAction]) -> Result<Fi
Ok(file_table) Ok(file_table)
} }
fn init_task(
user_entry: usize,
stack_top: usize,
argv: &[CString],
envp: &[CString],
auxtbl: &AuxTable,
) -> Result<Task, Error> {
let user_stack = init_stack::do_init(stack_top, 4096, argv, envp, auxtbl)?;
Ok(Task {
user_stack_addr: user_stack,
user_entry_addr: user_entry,
..Default::default()
})
}
fn init_auxtbl( fn init_auxtbl(
base_addr: usize, base_addr: usize,
program_entry: usize, program_entry: usize,

@ -5,6 +5,10 @@ extern void __run_task(struct Task* task);
extern uint64_t __get_stack_guard(void); extern uint64_t __get_stack_guard(void);
extern void __set_stack_guard(uint64_t new_val); extern void __set_stack_guard(uint64_t new_val);
// From SGX SDK
int sgx_enable_user_stack(size_t stack_base, size_t stack_limit);
void sgx_disable_user_stack(void);
static uint64_t get_syscall_stack(struct Task* this_task) { static uint64_t get_syscall_stack(struct Task* this_task) {
#define LARGE_ENOUGH_GAP (8192) #define LARGE_ENOUGH_GAP (8192)
char libos_stack_var = 0; char libos_stack_var = 0;
@ -23,8 +27,11 @@ static uint64_t get_syscall_stack(struct Task* this_task) {
int do_run_task(struct Task* task) { int do_run_task(struct Task* task) {
jmp_buf libos_state = {0}; jmp_buf libos_state = {0};
task->saved_state = &libos_state; task->saved_state = &libos_state;
task->kernel_stack_addr = get_syscall_stack(task); task->kernel_rsp = get_syscall_stack(task);
if (sgx_enable_user_stack(task->user_stack_base, task->user_stack_limit)) {
return -1;
}
SET_CURRENT_TASK(task); SET_CURRENT_TASK(task);
int second = setjmp(libos_state); int second = setjmp(libos_state);
@ -32,8 +39,9 @@ int do_run_task(struct Task* task) {
__run_task(task); __run_task(task);
} }
// From occlum_exit // Jump from do_exit_task
RESET_CURRENT_TASK(); RESET_CURRENT_TASK();
sgx_disable_user_stack();
return 0; return 0;
} }

@ -6,14 +6,53 @@ use super::*;
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
#[repr(C)] #[repr(C)]
pub struct Task { pub struct Task {
pub kernel_stack_addr: usize, kernel_rsp: usize,
pub kernel_fsbase_addr: usize, kernel_fs: usize,
pub user_stack_addr: usize, user_rsp: usize,
pub user_fsbase_addr: usize, user_stack_base: usize,
pub user_entry_addr: usize, user_stack_limit: usize,
pub saved_state: usize, // struct jmpbuf* user_fs: usize,
user_entry_addr: usize,
saved_state: usize, // struct jmpbuf*
} }
impl Task {
pub unsafe fn new(
user_entry_addr: usize,
user_rsp: usize,
user_stack_base: usize,
user_stack_limit: usize,
user_fs: Option<usize>,
) -> Result<Task, Error> {
if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) {
return errno!(EINVAL, "Invalid user stack");
}
// Set the default user fsbase to an address on user stack, which is
// a relatively safe address in case the user program uses %fs before
// initializing fs base address.
let user_fs = user_fs.unwrap_or(user_stack_limit);
Ok(Task {
user_entry_addr,
user_rsp,
user_stack_base,
user_stack_limit,
user_fs,
..Default::default()
})
}
pub fn set_user_fs(&mut self, user_fs: usize) {
self.user_fs = user_fs;
}
pub fn get_user_fs(&self) -> usize {
self.user_fs
}
}
lazy_static! { lazy_static! {
static ref NEW_PROCESS_QUEUE: SgxMutex<VecDeque<ProcessRef>> = static ref NEW_PROCESS_QUEUE: SgxMutex<VecDeque<ProcessRef>> =
{ SgxMutex::new(VecDeque::new()) }; { SgxMutex::new(VecDeque::new()) };

@ -32,12 +32,12 @@ __set_stack_guard:
__run_task: __run_task:
// Save kernel fsbase // Save kernel fsbase
rdfsbase %r10 rdfsbase %r10
movq %r10, TASK_KERNEL_FSBASE_ADDR(%rdi) movq %r10, TASK_KERNEL_FS(%rdi)
// Use user fsbase // Use user fsbase
movq TASK_USER_FSBASE_ADDR(%rdi), %r10 movq TASK_USER_FS(%rdi), %r10
wrfsbase %r10 wrfsbase %r10
// Use user stack // Use user stack
movq TASK_USER_STACK_ADDR(%rdi), %rsp movq TASK_USER_RSP(%rdi), %rsp
// Run user code // Run user code
movq TASK_USER_ENTRY_ADDR(%rdi), %r11 movq TASK_USER_ENTRY_ADDR(%rdi), %r11
jmp *%r11 jmp *%r11

@ -1,4 +1,5 @@
use super::*; use super::*;
use super::vm::{VMRange};
pub struct ThreadGroup { pub struct ThreadGroup {
threads: Vec<ProcessRef>, threads: Vec<ProcessRef>,
@ -34,23 +35,40 @@ bitflags! {
pub fn do_clone( pub fn do_clone(
flags: CloneFlags, flags: CloneFlags,
stack_addr: usize, user_rsp: usize,
ptid: Option<*mut pid_t>, ptid: Option<*mut pid_t>,
ctid: Option<*mut pid_t>, ctid: Option<*mut pid_t>,
new_tls: Option<usize>, new_tls: Option<usize>,
) -> Result<pid_t, Error> { ) -> Result<pid_t, Error> {
info!( info!(
"clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}", "clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
flags, stack_addr, ptid, ctid, new_tls flags, user_rsp, ptid, ctid, new_tls
); );
// TODO: return error for unsupported flags // TODO: return error for unsupported flags
let current_ref = get_current(); let current_ref = get_current();
let current = current_ref.lock().unwrap(); let current = current_ref.lock().unwrap();
// The calling convention of Occlum clone syscall requires the user to
// store the entry point of the new thread at the top of the user stack.
let thread_entry = unsafe {
*(user_rsp as *mut usize)
// TODO: check user_entry is a cfi_label
};
let (new_thread_pid, new_thread_ref) = { let (new_thread_pid, new_thread_ref) = {
let task = new_thread_task(stack_addr, new_tls)?;
let vm_ref = current.get_vm().clone(); let vm_ref = current.get_vm().clone();
let task = {
let vm = vm_ref.lock().unwrap();
let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?;
let user_stack_base = user_stack_range.end();
let user_stack_limit = user_stack_range.start();
unsafe {
Task::new(thread_entry, user_rsp,
user_stack_base, user_stack_limit,
new_tls)?
}
};
let files_ref = current.get_files().clone(); let files_ref = current.get_files().clone();
let rlimits_ref = current.get_rlimits().clone(); let rlimits_ref = current.get_rlimits().clone();
let cwd = &current.cwd; let cwd = &current.cwd;
@ -85,22 +103,6 @@ pub fn do_clone(
Ok(new_thread_pid) Ok(new_thread_pid)
} }
fn new_thread_task(user_stack: usize, new_tls: Option<usize>) -> Result<Task, Error> {
// The calling convention of Occlum clone syscall requires the user to
// restore the entry point of the new thread at the top of the user stack.
let user_entry = unsafe {
*(user_stack as *mut usize)
// TODO: check user_entry is a cfi_label
};
Ok(Task {
user_stack_addr: user_stack,
user_entry_addr: user_entry,
// TODO: use 0 as the default value is not safe
user_fsbase_addr: new_tls.unwrap_or(0),
..Default::default()
})
}
pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t, Error> { pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t, Error> {
info!("set_tid_address: tidptr: {:#x}", tidptr as usize); info!("set_tid_address: tidptr: {:#x}", tidptr as usize);
let current_ref = get_current(); let current_ref = get_current();
@ -108,3 +110,24 @@ pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t, Error> {
current.clear_child_tid = Some(tidptr); current.clear_child_tid = Some(tidptr);
Ok(current.get_tid()) Ok(current.get_tid())
} }
fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange, Error> {
// The first case is most likely
if let Ok(stack_range) = vm.find_mmap_region(user_rsp) {
Ok(stack_range)
}
// The next three cases are very unlikely, but valid
else if vm.get_stack_range().contains(user_rsp) {
Ok(vm.get_stack_range())
}
else if vm.get_heap_range().contains(user_rsp) {
Ok(vm.get_heap_range())
}
else if vm.get_data_range().contains(user_rsp) {
Ok(vm.get_data_range())
}
// Invalid
else {
errno!(ESRCH, "invalid rsp")
}
}

@ -28,9 +28,9 @@ __occlum_syscall:
// Get current task // Get current task
movq %gs:(TD_TASK_OFFSET), %r12 movq %gs:(TD_TASK_OFFSET), %r12
// Switch to the kernel stack // Switch to the kernel stack
movq TASK_KERNEL_STACK_ADDR(%r12), %rsp movq TASK_KERNEL_RSP(%r12), %rsp
// Use kernel fsbase // Use kernel fsbase
movq TASK_KERNEL_FSBASE_ADDR(%r12), %r11 movq TASK_KERNEL_FS(%r12), %r11
wrfsbase %r11 wrfsbase %r11
// Make %rsp 16-byte aligned before call // Make %rsp 16-byte aligned before call
@ -41,7 +41,7 @@ __occlum_syscall:
call dispatch_syscall call dispatch_syscall
// Use user fsbase // Use user fsbase
movq TASK_USER_FSBASE_ADDR(%r12), %r11 movq TASK_USER_FS(%r12), %r11
wrfsbase %r11 wrfsbase %r11
// Switch to the user stack // Switch to the user stack

@ -46,9 +46,6 @@ typedef struct {
* components are to restored by this instruction. * components are to restored by this instruction.
*/ */
static void xrstor(xsave_area_t* xsave_area, uint64_t rfbm) { static void xrstor(xsave_area_t* xsave_area, uint64_t rfbm) {
uint32_t rfbm_l= rfbm;
uint32_t rfbm_h = rfbm >> 32;
#define REX_PREFIX "0x48, " #define REX_PREFIX "0x48, "
#define XRSTOR64 REX_PREFIX "0x0f,0xae,0x2f " #define XRSTOR64 REX_PREFIX "0x0f,0xae,0x2f "

@ -8,6 +8,7 @@ mod user_space_vm;
mod process_vm; mod process_vm;
pub use self::process_vm::{ProcessVM, MMapFlags, VMPerms}; pub use self::process_vm::{ProcessVM, MMapFlags, VMPerms};
pub use self::vm_manager::{VMRange};
pub fn do_mmap( pub fn do_mmap(
addr: usize, addr: usize,

@ -103,10 +103,14 @@ impl ProcessVM {
self.get_process_range().start() self.get_process_range().start()
} }
pub fn get_stack_top(&self) -> usize { pub fn get_stack_base(&self) -> usize {
self.get_stack_range().end() self.get_stack_range().end()
} }
pub fn get_stack_limit(&self) -> usize {
self.get_stack_range().start()
}
pub fn get_brk(&self) -> usize { pub fn get_brk(&self) -> usize {
self.brk self.brk
} }
@ -176,6 +180,10 @@ impl ProcessVM {
pub fn munmap(&mut self, addr: usize, size: usize) -> Result<(), Error> { pub fn munmap(&mut self, addr: usize, size: usize) -> Result<(), Error> {
self.mmap_manager.munmap(addr, size) self.mmap_manager.munmap(addr, size)
} }
pub fn find_mmap_region(&self, addr: usize) -> Result<&VMRange, Error> {
self.mmap_manager.find_mmap_region(addr)
}
} }

@ -218,6 +218,13 @@ impl VMManager {
Ok(()) Ok(())
} }
pub fn find_mmap_region(&self, addr: usize) -> Result<&VMRange, Error> {
self.sub_ranges.iter()
.find(|subrange| subrange.contains(addr))
.ok_or(Error::new(Errno::ESRCH,
"no mmap regions that contains the address"))
}
// Find the free subrange that satisfies the constraints of size and address // Find the free subrange that satisfies the constraints of size and address
fn find_free_subrange( fn find_free_subrange(
&mut self, &mut self,

@ -218,8 +218,6 @@ int SGX_CDECL main(int argc, const char *argv[])
gettimeofday(&startup, NULL); gettimeofday(&startup, NULL);
sgx_status_t sgx_ret = SGX_SUCCESS; sgx_status_t sgx_ret = SGX_SUCCESS;
int status = 0; int status = 0;
uint32_t sealed_log_size = 1024;
uint8_t sealed_log[1024] = {0};
if (argc < 2) { if (argc < 2) {
printf("ERROR: at least one argument must be provided\n\n"); printf("ERROR: at least one argument must be provided\n\n");

@ -13,13 +13,15 @@ else ifeq ($(findstring -m32, $(CXXFLAGS)), -m32)
SGX_ARCH := x86 SGX_ARCH := x86
endif endif
SGX_COMMON_CFLAGS := -Wall
ifeq ($(SGX_ARCH), x86) ifeq ($(SGX_ARCH), x86)
SGX_COMMON_CFLAGS := -m32 SGX_COMMON_CFLAGS += -m32
SGX_LIBRARY_PATH := $(SGX_SDK)/lib SGX_LIBRARY_PATH := $(SGX_SDK)/lib
SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x86/sgx_sign SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x86/sgx_sign
SGX_EDGER8R := $(SGX_SDK)/bin/x86/sgx_edger8r SGX_EDGER8R := $(SGX_SDK)/bin/x86/sgx_edger8r
else else
SGX_COMMON_CFLAGS := -m64 SGX_COMMON_CFLAGS += -m64
SGX_LIBRARY_PATH := $(SGX_SDK)/lib64 SGX_LIBRARY_PATH := $(SGX_SDK)/lib64
SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign
SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r

@ -1,5 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <string.h>
typedef struct t_cpuid { typedef struct t_cpuid {
unsigned int eax; unsigned int eax;
@ -10,6 +11,7 @@ typedef struct t_cpuid {
static inline void native_cpuid(int leaf, int subleaf, t_cpuid_t *p) static inline void native_cpuid(int leaf, int subleaf, t_cpuid_t *p)
{ {
memset(p, 0, sizeof(*p));
/* ecx is often an input as well as an output. */ /* ecx is often an input as well as an output. */
asm volatile("cpuid" asm volatile("cpuid"
: "=a" (p->eax), : "=a" (p->eax),
@ -22,7 +24,7 @@ static inline void native_cpuid(int leaf, int subleaf, t_cpuid_t *p)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
/* Gets CPUID information and tests the SGX support of the CPU */ /* Gets CPUID information and tests the SGX support of the CPU */
t_cpuid_t cpu = {0}; t_cpuid_t cpu;
int leaf = 1; int leaf = 1;
int subleaf = 0; int subleaf = 0;
@ -42,7 +44,6 @@ int main(int argc, char **argv)
printf("\nExtended feature bits (EAX=07H, ECX=0H)\n"); printf("\nExtended feature bits (EAX=07H, ECX=0H)\n");
leaf = 7; leaf = 7;
subleaf = 0; subleaf = 0;
cpu = {0};
native_cpuid(leaf, subleaf, &cpu); native_cpuid(leaf, subleaf, &cpu);
printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx);
@ -61,7 +62,6 @@ int main(int argc, char **argv)
printf("\nCPUID Leaf 12H, Sub-Leaf 0 of Intel SGX Capabilities (EAX=12H,ECX=0)\n"); printf("\nCPUID Leaf 12H, Sub-Leaf 0 of Intel SGX Capabilities (EAX=12H,ECX=0)\n");
leaf = 0x12; leaf = 0x12;
subleaf = 0; subleaf = 0;
cpu = {0};
native_cpuid(leaf, subleaf, &cpu); native_cpuid(leaf, subleaf, &cpu);
printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx);
@ -74,7 +74,6 @@ int main(int argc, char **argv)
printf("\nCPUID Leaf 12H, Sub-Leaf 1 of Intel SGX Capabilities (EAX=12H,ECX=1)\n"); printf("\nCPUID Leaf 12H, Sub-Leaf 1 of Intel SGX Capabilities (EAX=12H,ECX=1)\n");
leaf = 0x12; leaf = 0x12;
subleaf = 1; subleaf = 1;
cpu = {0};
native_cpuid(leaf, subleaf, &cpu); native_cpuid(leaf, subleaf, &cpu);
printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx);
@ -84,7 +83,6 @@ int main(int argc, char **argv)
printf("\nCPUID Leaf 12H, Sub-Leaf %d of Intel SGX Capabilities (EAX=12H,ECX=%d)\n", i, i); printf("\nCPUID Leaf 12H, Sub-Leaf %d of Intel SGX Capabilities (EAX=12H,ECX=%d)\n", i, i);
leaf = 0x12; leaf = 0x12;
subleaf = i; subleaf = i;
cpu = {0};
native_cpuid(leaf, subleaf, &cpu); native_cpuid(leaf, subleaf, &cpu);
printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx);
} }