From 03bb09abdfd11e747fbbf0e4d9755e41cd4ebc0f Mon Sep 17 00:00:00 2001 From: LI Qing Date: Tue, 25 Feb 2020 08:52:45 +0000 Subject: [PATCH] Add the emulation of SYSCALL instruction 1. Refactor the exception handling process 2. Support to emulate the SYSCALL instruction in the exception handler --- src/libos/src/exception/cpuid.rs | 17 ++---- src/libos/src/exception/mod.rs | 43 +++++++++++++-- src/libos/src/exception/rdtsc.rs | 29 ++-------- src/libos/src/exception/syscall.rs | 30 ++++++++++ src/libos/src/syscall/mod.rs | 15 +---- src/libos/src/time/mod.rs | 4 +- test/Makefile | 2 +- test/emulate_syscall/Makefile | 5 ++ test/emulate_syscall/main.c | 89 ++++++++++++++++++++++++++++++ 9 files changed, 178 insertions(+), 56 deletions(-) create mode 100644 src/libos/src/exception/syscall.rs create mode 100644 test/emulate_syscall/Makefile create mode 100644 test/emulate_syscall/main.c diff --git a/src/libos/src/exception/cpuid.rs b/src/libos/src/exception/cpuid.rs index 8479295b..597ae4d8 100644 --- a/src/libos/src/exception/cpuid.rs +++ b/src/libos/src/exception/cpuid.rs @@ -3,7 +3,7 @@ use sgx_types::*; use std::collections::HashMap; use std::rsgx_cpuidex; -const CPUID_OPCODE: u16 = 0xA20F; +pub const CPUID_OPCODE: u16 = 0xA20F; const CPUID_MIN_BASIC_LEAF: u32 = 0; const CPUID_MAX_BASIC_LEAF: u32 = 0x1F; const CPUID_MIN_EXTEND_LEAF: u32 = 0x8000_0000; @@ -18,7 +18,7 @@ struct CpuIdInput { } #[repr(C)] -#[derive(Eq, PartialEq, Hash, Clone, Copy)] +#[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)] struct CpuIdResult { eax: u32, ebx: u32, @@ -261,19 +261,12 @@ pub fn setup_cpuid_info() { let max_basic_leaf = CPUID.get_max_basic_leaf(); } -#[no_mangle] -pub extern "C" fn handle_cpuid_exception(info: *mut sgx_exception_info_t) -> u32 { - let info = unsafe { &mut *info }; - let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) }; - if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD - || info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE - || ip_opcode != CPUID_OPCODE - { - return EXCEPTION_CONTINUE_SEARCH; - } +pub fn handle_cpuid_exception(info: &mut sgx_exception_info_t) -> u32 { + debug!("handle CPUID exception"); let leaf = info.cpu_context.rax as u32; let subleaf = info.cpu_context.rcx as u32; let cpuid_result = CPUID.get_cpuid_info(leaf, subleaf); + trace!("cpuid result: {:?}", cpuid_result); info.cpu_context.rax = cpuid_result.eax as u64; info.cpu_context.rbx = cpuid_result.ebx as u64; info.cpu_context.rcx = cpuid_result.ecx as u64; diff --git a/src/libos/src/exception/mod.rs b/src/libos/src/exception/mod.rs index f7664792..182b15b8 100644 --- a/src/libos/src/exception/mod.rs +++ b/src/libos/src/exception/mod.rs @@ -1,15 +1,50 @@ -use self::cpuid::*; -use self::rdtsc::*; +use self::cpuid::{handle_cpuid_exception, setup_cpuid_info, CPUID_OPCODE}; +use self::rdtsc::{handle_rdtsc_exception, RDTSC_OPCODE}; +use self::syscall::{handle_syscall_exception, SYSCALL_OPCODE}; use super::*; +use crate::syscall::SyscallNum; use sgx_types::*; pub fn register_exception_handlers() { setup_cpuid_info(); unsafe { - sgx_register_exception_handler(1, handle_cpuid_exception); - sgx_register_exception_handler(1, handle_rdtsc_exception); + sgx_register_exception_handler(1, handle_exception); } } +#[no_mangle] +extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> u32 { + let ret = unsafe { __occlum_syscall(SyscallNum::Exception as u32, info) }; + assert!(ret == EXCEPTION_CONTINUE_EXECUTION); + ret +} + +pub fn do_handle_exception(info: *mut sgx_exception_info_t) -> Result { + let mut info = unsafe { &mut *info }; + // Assume the length of opcode is 2 bytes + let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) }; + if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD + || info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE + { + panic!( + "unable to process the exception, vector:{} type:{}", + info.exception_vector as u32, info.exception_type as u32 + ); + } + let ret = match ip_opcode { + #![deny(unreachable_patterns)] + CPUID_OPCODE => handle_cpuid_exception(&mut info), + RDTSC_OPCODE => handle_rdtsc_exception(&mut info), + SYSCALL_OPCODE => handle_syscall_exception(&mut info), + _ => panic!("unable to process the exception, opcode: {:#x}", ip_opcode), + }; + Ok(ret as isize) +} + +extern "C" { + fn __occlum_syscall(num: u32, info: *mut sgx_exception_info_t) -> u32; +} + mod cpuid; mod rdtsc; +mod syscall; diff --git a/src/libos/src/exception/rdtsc.rs b/src/libos/src/exception/rdtsc.rs index 9ffaec60..2134075e 100644 --- a/src/libos/src/exception/rdtsc.rs +++ b/src/libos/src/exception/rdtsc.rs @@ -1,34 +1,15 @@ use super::*; -use crate::syscall::SyscallNum; use sgx_types::*; -const RDTSC_OPCODE: u16 = 0x310F; +pub const RDTSC_OPCODE: u16 = 0x310F; -#[no_mangle] -pub extern "C" fn handle_rdtsc_exception(info: *mut sgx_exception_info_t) -> u32 { - let info = unsafe { &mut *info }; - let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) }; - if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD - || info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE - || ip_opcode != RDTSC_OPCODE - { - return EXCEPTION_CONTINUE_SEARCH; - } - - let (low, high) = { - let mut low = 0; - let mut high = 0; - let ret = unsafe { __occlum_syscall(SyscallNum::Rdtsc as u32, &mut low, &mut high) }; - assert!(ret == 0); - (low, high) - }; +pub fn handle_rdtsc_exception(info: &mut sgx_exception_info_t) -> u32 { + debug!("handle RDTSC exception"); + let (low, high) = time::do_rdtsc(); + trace!("do_rdtsc result {{ low: {:#x} high: {:#x}}}", low, high); info.cpu_context.rax = low as u64; info.cpu_context.rdx = high as u64; info.cpu_context.rip += 2; EXCEPTION_CONTINUE_EXECUTION } - -extern "C" { - fn __occlum_syscall(num: u32, arg0: *mut u32, arg1: *mut u32) -> i64; -} diff --git a/src/libos/src/exception/syscall.rs b/src/libos/src/exception/syscall.rs new file mode 100644 index 00000000..4116ec28 --- /dev/null +++ b/src/libos/src/exception/syscall.rs @@ -0,0 +1,30 @@ +use super::*; +use crate::syscall::{occlum_syscall, SyscallNum}; +use sgx_types::*; + +pub const SYSCALL_OPCODE: u16 = 0x050F; + +pub fn handle_syscall_exception(info: &mut sgx_exception_info_t) -> u32 { + debug!("handle SYSCALL exception"); + // SYSCALL, save RIP into RCX and RFLAGS into R11 + info.cpu_context.rcx = info.cpu_context.rip + 2; + info.cpu_context.r11 = info.cpu_context.rflags; + let num = info.cpu_context.rax as u32; + let arg0 = info.cpu_context.rdi as isize; + let arg1 = info.cpu_context.rsi as isize; + let arg2 = info.cpu_context.rdx as isize; + let arg3 = info.cpu_context.r10 as isize; + let arg4 = info.cpu_context.r8 as isize; + let arg5 = info.cpu_context.r9 as isize; + // syscall should not be an exception in Occlum + assert!(num != SyscallNum::Exception as u32); + let ret = occlum_syscall(num, arg0, arg1, arg2, arg3, arg4, arg5); + info.cpu_context.rax = ret as u64; + + // SYSRET, load RIP from RCX and loading RFLAGS from R11 + info.cpu_context.rip = info.cpu_context.rcx; + // Clear RF, VM, reserved bits; set bit 1 + info.cpu_context.rflags = (info.cpu_context.r11 & 0x3C7FD7) | 2; + + EXCEPTION_CONTINUE_EXECUTION +} diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 36325aa3..a1d9e7d7 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -16,6 +16,7 @@ use time::{clockid_t, timespec_t, timeval_t, GLOBAL_PROFILER}; use util::log::{self, LevelFilter}; use util::mem_util::from_user::*; +use crate::exception::do_handle_exception; use crate::fs::{ do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd, do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat, @@ -400,7 +401,7 @@ macro_rules! process_syscall_table_with_callback { // Occlum-specific system calls (Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp), // Exception handling - (Rdtsc = 361) => do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32), + (Exception = 361) => do_handle_exception(info: *mut sgx_exception_info_t), } }; } @@ -714,18 +715,6 @@ fn do_clock_gettime(clockid: clockid_t, ts_u: *mut timespec_t) -> Result Ok(0) } -fn do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32) -> Result { - check_mut_ptr(low_ptr)?; - check_mut_ptr(high_ptr)?; - let (low, high) = time::do_rdtsc()?; - debug!("do_rdtsc result {{ low: {:#x} high: {:#x}}}", low, high); - unsafe { - *low_ptr = low; - *high_ptr = high; - } - Ok(0) -} - // TODO: handle remainder fn do_nanosleep(req_u: *const timespec_t, rem_u: *mut timespec_t) -> Result { check_ptr(req_u)?; diff --git a/src/libos/src/time/mod.rs b/src/libos/src/time/mod.rs index e7026405..d5f6c761 100644 --- a/src/libos/src/time/mod.rs +++ b/src/libos/src/time/mod.rs @@ -160,7 +160,7 @@ pub fn do_thread_getcpuclock() -> Result { Ok(tv) } -pub fn do_rdtsc() -> Result<(u32, u32)> { +pub fn do_rdtsc() -> (u32, u32) { extern "C" { fn occlum_ocall_rdtsc(low: *mut u32, high: *mut u32) -> sgx_status_t; } @@ -168,7 +168,7 @@ pub fn do_rdtsc() -> Result<(u32, u32)> { let mut high = 0; let sgx_status = unsafe { occlum_ocall_rdtsc(&mut low, &mut high) }; assert!(sgx_status == sgx_status_t::SGX_SUCCESS); - Ok((low, high)) + (low, high) } // For SEFS diff --git a/test/Makefile b/test/Makefile index 9021f92e..ba4f6e37 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,7 +14,7 @@ TEST_DEPS := client data_sink TESTS ?= empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \ truncate readdir mkdir open stat link symlink chmod chown tls pthread uname rlimit \ server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group \ - ioctl fcntl eventfd + ioctl fcntl eventfd emulate_syscall # Benchmarks: need to be compiled and run by bench-% target BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput diff --git a/test/emulate_syscall/Makefile b/test/emulate_syscall/Makefile new file mode 100644 index 00000000..0401cacb --- /dev/null +++ b/test/emulate_syscall/Makefile @@ -0,0 +1,5 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := -Wno-int-to-pointer-cast +EXTRA_LINK_FLAGS := +BIN_ARGS := diff --git a/test/emulate_syscall/main.c b/test/emulate_syscall/main.c new file mode 100644 index 00000000..41e3b45a --- /dev/null +++ b/test/emulate_syscall/main.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include "test.h" + +// ============================================================================ +// Helper structs & functions +// ============================================================================ + +typedef struct syscall_args { + int num; + unsigned long arg0; + unsigned long arg1; + unsigned long arg2; + unsigned long arg3; + unsigned long arg4; + unsigned long arg5; +} syscall_args_t; + +static inline uint64_t native_syscall(syscall_args_t *p) { + uint64_t ret; + register int num asm ("rax") = p->num; + register unsigned long arg0 asm ("rdi") = p->arg0; + register unsigned long arg1 asm ("rsi") = p->arg1; + register unsigned long arg2 asm ("rdx") = p->arg2; + register unsigned long arg3 asm ("r10") = p->arg3; + register unsigned long arg4 asm ("r8") = p->arg4; + register unsigned long arg5 asm ("r9") = p->arg5; + + asm volatile("syscall" + : "=a" (ret) + : "r" (num), "r" (arg0), "r" (arg1), "r" (arg2), "r" (arg3), "r" (arg4), "r" (arg5)); + return ret; +} + +// ============================================================================ +// Test cases for syscall emulation +// ============================================================================ + +#define KB (1024UL) +#define PAGE_SIZE (4 * KB) + +/* + * We use mmap() to test because it employs all arguments. + */ +int test_mmap_and_munmap_via_syscall_instruction() { + int len = PAGE_SIZE; + syscall_args_t mmap_arg = { + .num= __NR_mmap, + .arg0 = (unsigned long) NULL, + .arg1 = len, + .arg2 = PROT_READ | PROT_WRITE, + .arg3 = MAP_PRIVATE | MAP_ANONYMOUS, + .arg4 = -1, + .arg5 = 0, + }; + char *buf = (char *) native_syscall(&mmap_arg); + if (buf == MAP_FAILED) { + THROW_ERROR("syscall mmap failed"); + } + for (size_t bi = 0; bi < len; bi++) { + if (buf[bi] != '\0') { + THROW_ERROR("invalid buffer contents"); + } + } + + syscall_args_t munmap_arg = { + .num= __NR_munmap, + .arg0 = (unsigned long) buf, + .arg1 = len, + }; + int ret = native_syscall(&munmap_arg); + if (ret < 0) { + THROW_ERROR("syscall munmap failed"); + } + return 0; +} + +// ============================================================================ +// Test suite main +// ============================================================================ +static test_case_t test_cases[] = { + TEST_CASE(test_mmap_and_munmap_via_syscall_instruction), +}; + +int main(int argc, const char* argv[]) { + return test_suite_run(test_cases, ARRAY_SIZE(test_cases)); +}