From c096e7d0b9d72d42d2b8c377f6d7030c839a4d38 Mon Sep 17 00:00:00 2001 From: "Tate, Hongliang Tian" Date: Thu, 18 Jul 2019 07:16:17 +0000 Subject: [PATCH] Inform Intel SGX SDK about Occlum-defined stacks There are two types of stacks: the kernel ones and the user ones. The kernel stacks are used by Occlum and managed by Intel SGX SDK itself, while the user stacks are used by the threads created and managed by Occlum. These user stacks are transparent to Intel SGX SDK so far. The problem is that Intel SGX SDK needs to be aware of the user stacks. SGX exception handlers will check whether the rsp value---when the exception happened---is within the stack of the current SGX thread. If the check fails, the registered exception handler will not be triggered. But when exceptions are triggered by the threads running upon Occlum, the rsp value points to the user stacks, which Intel SGX SDK are completely unware of. So the check always fails. Therefore, we extend Intel SGX SDK with two new APIs: int sgx_enable_user_stack(size_t stack_base, size_t stack_limit); void sgx_disable_user_stack(void); And this commit uses the two APIs to inform Intel SGX SDK about the Occlum-managed stacks. And the rsp checks in SGX exception handlers will check whether rsp is within the user stacks. --- src/libos/include/task.h | 20 ++++--- src/libos/src/process/arch_prctl.rs | 4 +- src/libos/src/process/spawn/mod.rs | 24 +++----- src/libos/src/process/task.c | 12 +++- src/libos/src/process/task.rs | 51 ++++++++++++++-- src/libos/src/process/task_x86-64.S | 6 +- src/libos/src/process/thread.rs | 61 ++++++++++++++------ src/libos/src/syscall/syscall_entry_x86-64.S | 6 +- src/libos/src/util/mpx_util.c | 3 - src/libos/src/vm/mod.rs | 1 + src/libos/src/vm/process_vm.rs | 10 +++- src/libos/src/vm/vm_manager.rs | 7 +++ src/pal/pal.c | 2 - src/sgxenv.mk | 6 +- test/cpuid/main.c | 8 +-- 15 files changed, 147 insertions(+), 74 deletions(-) diff --git a/src/libos/include/task.h b/src/libos/include/task.h index 182dcf60..58d47ee6 100644 --- a/src/libos/include/task.h +++ b/src/libos/include/task.h @@ -13,10 +13,12 @@ extern "C" { // See Struct Task in process.rs struct Task { - uint64_t kernel_stack_addr; - uint64_t kernel_fsbase_addr; - uint64_t user_stack_addr; - uint64_t user_fsbase_addr; + uint64_t kernel_rsp; + uint64_t kernel_fs; + uint64_t user_rsp; + uint64_t user_stack_base; + uint64_t user_stack_limit; + uint64_t user_fs; uint64_t user_entry_addr; jmp_buf* saved_state; }; @@ -38,11 +40,11 @@ void do_exit_task(void); /* Override the field for stack guard */ #define TD_TASK_OFFSET TD_STACKGUARD_OFFSET -#define TASK_KERNEL_STACK_ADDR (8 * 0) -#define TASK_KERNEL_FSBASE_ADDR (8 * 1) -#define TASK_USER_STACK_ADDR (8 * 2) -#define TASK_USER_FSBASE_ADDR (8 * 3) -#define TASK_USER_ENTRY_ADDR (8 * 4) +#define TASK_KERNEL_RSP (8 * 0) +#define TASK_KERNEL_FS (8 * 1) +#define TASK_USER_RSP (8 * 2) +#define TASK_USER_FS (8 * 5) +#define TASK_USER_ENTRY_ADDR (8 * 6) #endif /* __ASSEMBLY__ */ diff --git a/src/libos/src/process/arch_prctl.rs b/src/libos/src/process/arch_prctl.rs index 54d5648b..8d0f36d9 100644 --- a/src/libos/src/process/arch_prctl.rs +++ b/src/libos/src/process/arch_prctl.rs @@ -31,14 +31,14 @@ pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<(), Error> let current_ref = get_current(); let mut current = current_ref.lock().unwrap(); let task = &mut current.task; - task.user_fsbase_addr = addr as usize; + task.set_user_fs(addr as usize); } ArchPrctlCode::ARCH_GET_FS => { let current_ref = get_current(); let current = current_ref.lock().unwrap(); let task = ¤t.task; unsafe { - *addr = task.user_fsbase_addr; + *addr = task.get_user_fs(); } } ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => { diff --git a/src/libos/src/process/spawn/mod.rs b/src/libos/src/process/spawn/mod.rs index 795eb5d8..dca781b8 100644 --- a/src/libos/src/process/spawn/mod.rs +++ b/src/libos/src/process/spawn/mod.rs @@ -71,8 +71,13 @@ pub fn do_spawn>( }; let auxtbl = init_auxtbl(base_addr, program_entry, &elf_file)?; let task = { - let stack_top = vm.get_stack_top(); - init_task(program_entry, stack_top, argv, envp, &auxtbl)? + let user_stack_base = vm.get_stack_base(); + let user_stack_limit = vm.get_stack_limit(); + let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxtbl)?; + unsafe { + Task::new(program_entry, user_rsp, + user_stack_base, user_stack_limit, None)? + } }; let vm_ref = Arc::new(SgxMutex::new(vm)); let files_ref = { @@ -141,21 +146,6 @@ fn init_files(parent_ref: &ProcessRef, file_actions: &[FileAction]) -> Result Result { - let user_stack = init_stack::do_init(stack_top, 4096, argv, envp, auxtbl)?; - Ok(Task { - user_stack_addr: user_stack, - user_entry_addr: user_entry, - ..Default::default() - }) -} - fn init_auxtbl( base_addr: usize, program_entry: usize, diff --git a/src/libos/src/process/task.c b/src/libos/src/process/task.c index b390e9ae..8a397d49 100644 --- a/src/libos/src/process/task.c +++ b/src/libos/src/process/task.c @@ -5,6 +5,10 @@ extern void __run_task(struct Task* task); extern uint64_t __get_stack_guard(void); extern void __set_stack_guard(uint64_t new_val); +// From SGX SDK +int sgx_enable_user_stack(size_t stack_base, size_t stack_limit); +void sgx_disable_user_stack(void); + static uint64_t get_syscall_stack(struct Task* this_task) { #define LARGE_ENOUGH_GAP (8192) char libos_stack_var = 0; @@ -23,8 +27,11 @@ static uint64_t get_syscall_stack(struct Task* this_task) { int do_run_task(struct Task* task) { jmp_buf libos_state = {0}; task->saved_state = &libos_state; - task->kernel_stack_addr = get_syscall_stack(task); + task->kernel_rsp = get_syscall_stack(task); + if (sgx_enable_user_stack(task->user_stack_base, task->user_stack_limit)) { + return -1; + } SET_CURRENT_TASK(task); int second = setjmp(libos_state); @@ -32,8 +39,9 @@ int do_run_task(struct Task* task) { __run_task(task); } - // From occlum_exit + // Jump from do_exit_task RESET_CURRENT_TASK(); + sgx_disable_user_stack(); return 0; } diff --git a/src/libos/src/process/task.rs b/src/libos/src/process/task.rs index 89fcde6a..aa79a1e9 100644 --- a/src/libos/src/process/task.rs +++ b/src/libos/src/process/task.rs @@ -6,14 +6,53 @@ use super::*; #[derive(Clone, Debug, Default)] #[repr(C)] pub struct Task { - pub kernel_stack_addr: usize, - pub kernel_fsbase_addr: usize, - pub user_stack_addr: usize, - pub user_fsbase_addr: usize, - pub user_entry_addr: usize, - pub saved_state: usize, // struct jmpbuf* + kernel_rsp: usize, + kernel_fs: usize, + user_rsp: usize, + user_stack_base: usize, + user_stack_limit: usize, + user_fs: usize, + user_entry_addr: usize, + saved_state: usize, // struct jmpbuf* } +impl Task { + pub unsafe fn new( + user_entry_addr: usize, + user_rsp: usize, + user_stack_base: usize, + user_stack_limit: usize, + user_fs: Option, + ) -> Result { + if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) { + return errno!(EINVAL, "Invalid user stack"); + } + + // Set the default user fsbase to an address on user stack, which is + // a relatively safe address in case the user program uses %fs before + // initializing fs base address. + let user_fs = user_fs.unwrap_or(user_stack_limit); + + Ok(Task { + user_entry_addr, + user_rsp, + user_stack_base, + user_stack_limit, + user_fs, + ..Default::default() + }) + } + + pub fn set_user_fs(&mut self, user_fs: usize) { + self.user_fs = user_fs; + } + + pub fn get_user_fs(&self) -> usize { + self.user_fs + } +} + + lazy_static! { static ref NEW_PROCESS_QUEUE: SgxMutex> = { SgxMutex::new(VecDeque::new()) }; diff --git a/src/libos/src/process/task_x86-64.S b/src/libos/src/process/task_x86-64.S index 00d8fb58..100a6137 100644 --- a/src/libos/src/process/task_x86-64.S +++ b/src/libos/src/process/task_x86-64.S @@ -32,12 +32,12 @@ __set_stack_guard: __run_task: // Save kernel fsbase rdfsbase %r10 - movq %r10, TASK_KERNEL_FSBASE_ADDR(%rdi) + movq %r10, TASK_KERNEL_FS(%rdi) // Use user fsbase - movq TASK_USER_FSBASE_ADDR(%rdi), %r10 + movq TASK_USER_FS(%rdi), %r10 wrfsbase %r10 // Use user stack - movq TASK_USER_STACK_ADDR(%rdi), %rsp + movq TASK_USER_RSP(%rdi), %rsp // Run user code movq TASK_USER_ENTRY_ADDR(%rdi), %r11 jmp *%r11 diff --git a/src/libos/src/process/thread.rs b/src/libos/src/process/thread.rs index d34fd6c5..1aa6a9ce 100644 --- a/src/libos/src/process/thread.rs +++ b/src/libos/src/process/thread.rs @@ -1,4 +1,5 @@ use super::*; +use super::vm::{VMRange}; pub struct ThreadGroup { threads: Vec, @@ -34,23 +35,40 @@ bitflags! { pub fn do_clone( flags: CloneFlags, - stack_addr: usize, + user_rsp: usize, ptid: Option<*mut pid_t>, ctid: Option<*mut pid_t>, new_tls: Option, ) -> Result { info!( "clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}", - flags, stack_addr, ptid, ctid, new_tls + flags, user_rsp, ptid, ctid, new_tls ); // TODO: return error for unsupported flags let current_ref = get_current(); let current = current_ref.lock().unwrap(); + // The calling convention of Occlum clone syscall requires the user to + // store the entry point of the new thread at the top of the user stack. + let thread_entry = unsafe { + *(user_rsp as *mut usize) + // TODO: check user_entry is a cfi_label + }; + let (new_thread_pid, new_thread_ref) = { - let task = new_thread_task(stack_addr, new_tls)?; let vm_ref = current.get_vm().clone(); + let task = { + let vm = vm_ref.lock().unwrap(); + let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?; + let user_stack_base = user_stack_range.end(); + let user_stack_limit = user_stack_range.start(); + unsafe { + Task::new(thread_entry, user_rsp, + user_stack_base, user_stack_limit, + new_tls)? + } + }; let files_ref = current.get_files().clone(); let rlimits_ref = current.get_rlimits().clone(); let cwd = ¤t.cwd; @@ -85,22 +103,6 @@ pub fn do_clone( Ok(new_thread_pid) } -fn new_thread_task(user_stack: usize, new_tls: Option) -> Result { - // The calling convention of Occlum clone syscall requires the user to - // restore the entry point of the new thread at the top of the user stack. - let user_entry = unsafe { - *(user_stack as *mut usize) - // TODO: check user_entry is a cfi_label - }; - Ok(Task { - user_stack_addr: user_stack, - user_entry_addr: user_entry, - // TODO: use 0 as the default value is not safe - user_fsbase_addr: new_tls.unwrap_or(0), - ..Default::default() - }) -} - pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result { info!("set_tid_address: tidptr: {:#x}", tidptr as usize); let current_ref = get_current(); @@ -108,3 +110,24 @@ pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result { current.clear_child_tid = Some(tidptr); Ok(current.get_tid()) } + +fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange, Error> { + // The first case is most likely + if let Ok(stack_range) = vm.find_mmap_region(user_rsp) { + Ok(stack_range) + } + // The next three cases are very unlikely, but valid + else if vm.get_stack_range().contains(user_rsp) { + Ok(vm.get_stack_range()) + } + else if vm.get_heap_range().contains(user_rsp) { + Ok(vm.get_heap_range()) + } + else if vm.get_data_range().contains(user_rsp) { + Ok(vm.get_data_range()) + } + // Invalid + else { + errno!(ESRCH, "invalid rsp") + } +} diff --git a/src/libos/src/syscall/syscall_entry_x86-64.S b/src/libos/src/syscall/syscall_entry_x86-64.S index f471c0e0..8d905f84 100644 --- a/src/libos/src/syscall/syscall_entry_x86-64.S +++ b/src/libos/src/syscall/syscall_entry_x86-64.S @@ -28,9 +28,9 @@ __occlum_syscall: // Get current task movq %gs:(TD_TASK_OFFSET), %r12 // Switch to the kernel stack - movq TASK_KERNEL_STACK_ADDR(%r12), %rsp + movq TASK_KERNEL_RSP(%r12), %rsp // Use kernel fsbase - movq TASK_KERNEL_FSBASE_ADDR(%r12), %r11 + movq TASK_KERNEL_FS(%r12), %r11 wrfsbase %r11 // Make %rsp 16-byte aligned before call @@ -41,7 +41,7 @@ __occlum_syscall: call dispatch_syscall // Use user fsbase - movq TASK_USER_FSBASE_ADDR(%r12), %r11 + movq TASK_USER_FS(%r12), %r11 wrfsbase %r11 // Switch to the user stack diff --git a/src/libos/src/util/mpx_util.c b/src/libos/src/util/mpx_util.c index d31882d4..3e8ff745 100644 --- a/src/libos/src/util/mpx_util.c +++ b/src/libos/src/util/mpx_util.c @@ -46,9 +46,6 @@ typedef struct { * components are to restored by this instruction. */ static void xrstor(xsave_area_t* xsave_area, uint64_t rfbm) { - uint32_t rfbm_l= rfbm; - uint32_t rfbm_h = rfbm >> 32; - #define REX_PREFIX "0x48, " #define XRSTOR64 REX_PREFIX "0x0f,0xae,0x2f " diff --git a/src/libos/src/vm/mod.rs b/src/libos/src/vm/mod.rs index 04995f5a..630953f9 100644 --- a/src/libos/src/vm/mod.rs +++ b/src/libos/src/vm/mod.rs @@ -8,6 +8,7 @@ mod user_space_vm; mod process_vm; pub use self::process_vm::{ProcessVM, MMapFlags, VMPerms}; +pub use self::vm_manager::{VMRange}; pub fn do_mmap( addr: usize, diff --git a/src/libos/src/vm/process_vm.rs b/src/libos/src/vm/process_vm.rs index 667f759f..96093d82 100644 --- a/src/libos/src/vm/process_vm.rs +++ b/src/libos/src/vm/process_vm.rs @@ -103,10 +103,14 @@ impl ProcessVM { self.get_process_range().start() } - pub fn get_stack_top(&self) -> usize { + pub fn get_stack_base(&self) -> usize { self.get_stack_range().end() } + pub fn get_stack_limit(&self) -> usize { + self.get_stack_range().start() + } + pub fn get_brk(&self) -> usize { self.brk } @@ -176,6 +180,10 @@ impl ProcessVM { pub fn munmap(&mut self, addr: usize, size: usize) -> Result<(), Error> { self.mmap_manager.munmap(addr, size) } + + pub fn find_mmap_region(&self, addr: usize) -> Result<&VMRange, Error> { + self.mmap_manager.find_mmap_region(addr) + } } diff --git a/src/libos/src/vm/vm_manager.rs b/src/libos/src/vm/vm_manager.rs index c5feb15f..7d7c7fe4 100644 --- a/src/libos/src/vm/vm_manager.rs +++ b/src/libos/src/vm/vm_manager.rs @@ -218,6 +218,13 @@ impl VMManager { Ok(()) } + pub fn find_mmap_region(&self, addr: usize) -> Result<&VMRange, Error> { + self.sub_ranges.iter() + .find(|subrange| subrange.contains(addr)) + .ok_or(Error::new(Errno::ESRCH, + "no mmap regions that contains the address")) + } + // Find the free subrange that satisfies the constraints of size and address fn find_free_subrange( &mut self, diff --git a/src/pal/pal.c b/src/pal/pal.c index 87514a21..ad28c299 100644 --- a/src/pal/pal.c +++ b/src/pal/pal.c @@ -218,8 +218,6 @@ int SGX_CDECL main(int argc, const char *argv[]) gettimeofday(&startup, NULL); sgx_status_t sgx_ret = SGX_SUCCESS; int status = 0; - uint32_t sealed_log_size = 1024; - uint8_t sealed_log[1024] = {0}; if (argc < 2) { printf("ERROR: at least one argument must be provided\n\n"); diff --git a/src/sgxenv.mk b/src/sgxenv.mk index 4c06ec85..9338dd77 100644 --- a/src/sgxenv.mk +++ b/src/sgxenv.mk @@ -13,13 +13,15 @@ else ifeq ($(findstring -m32, $(CXXFLAGS)), -m32) SGX_ARCH := x86 endif +SGX_COMMON_CFLAGS := -Wall + ifeq ($(SGX_ARCH), x86) - SGX_COMMON_CFLAGS := -m32 + SGX_COMMON_CFLAGS += -m32 SGX_LIBRARY_PATH := $(SGX_SDK)/lib SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x86/sgx_sign SGX_EDGER8R := $(SGX_SDK)/bin/x86/sgx_edger8r else - SGX_COMMON_CFLAGS := -m64 + SGX_COMMON_CFLAGS += -m64 SGX_LIBRARY_PATH := $(SGX_SDK)/lib64 SGX_ENCLAVE_SIGNER := $(SGX_SDK)/bin/x64/sgx_sign SGX_EDGER8R := $(SGX_SDK)/bin/x64/sgx_edger8r diff --git a/test/cpuid/main.c b/test/cpuid/main.c index 83d51439..c773167d 100644 --- a/test/cpuid/main.c +++ b/test/cpuid/main.c @@ -1,5 +1,6 @@ #include #include +#include typedef struct t_cpuid { unsigned int eax; @@ -10,6 +11,7 @@ typedef struct t_cpuid { static inline void native_cpuid(int leaf, int subleaf, t_cpuid_t *p) { + memset(p, 0, sizeof(*p)); /* ecx is often an input as well as an output. */ asm volatile("cpuid" : "=a" (p->eax), @@ -22,7 +24,7 @@ static inline void native_cpuid(int leaf, int subleaf, t_cpuid_t *p) int main(int argc, char **argv) { /* Gets CPUID information and tests the SGX support of the CPU */ - t_cpuid_t cpu = {0}; + t_cpuid_t cpu; int leaf = 1; int subleaf = 0; @@ -42,7 +44,6 @@ int main(int argc, char **argv) printf("\nExtended feature bits (EAX=07H, ECX=0H)\n"); leaf = 7; subleaf = 0; - cpu = {0}; native_cpuid(leaf, subleaf, &cpu); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); @@ -61,7 +62,6 @@ int main(int argc, char **argv) printf("\nCPUID Leaf 12H, Sub-Leaf 0 of Intel SGX Capabilities (EAX=12H,ECX=0)\n"); leaf = 0x12; subleaf = 0; - cpu = {0}; native_cpuid(leaf, subleaf, &cpu); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); @@ -74,7 +74,6 @@ int main(int argc, char **argv) printf("\nCPUID Leaf 12H, Sub-Leaf 1 of Intel SGX Capabilities (EAX=12H,ECX=1)\n"); leaf = 0x12; subleaf = 1; - cpu = {0}; native_cpuid(leaf, subleaf, &cpu); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); @@ -84,7 +83,6 @@ int main(int argc, char **argv) printf("\nCPUID Leaf 12H, Sub-Leaf %d of Intel SGX Capabilities (EAX=12H,ECX=%d)\n", i, i); leaf = 0x12; subleaf = i; - cpu = {0}; native_cpuid(leaf, subleaf, &cpu); printf("eax: %x ebx: %x ecx: %x edx: %x\n", cpu.eax, cpu.ebx, cpu.ecx, cpu.edx); }