Add the emulation of SYSCALL instruction

1. Refactor the exception handling process
2. Support to emulate the SYSCALL instruction in the exception handler
This commit is contained in:
LI Qing 2020-02-25 08:52:45 +00:00 committed by Tate, Hongliang Tian
parent 79fd6570f5
commit 03bb09abdf
9 changed files with 178 additions and 56 deletions

@ -3,7 +3,7 @@ use sgx_types::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::rsgx_cpuidex; use std::rsgx_cpuidex;
const CPUID_OPCODE: u16 = 0xA20F; pub const CPUID_OPCODE: u16 = 0xA20F;
const CPUID_MIN_BASIC_LEAF: u32 = 0; const CPUID_MIN_BASIC_LEAF: u32 = 0;
const CPUID_MAX_BASIC_LEAF: u32 = 0x1F; const CPUID_MAX_BASIC_LEAF: u32 = 0x1F;
const CPUID_MIN_EXTEND_LEAF: u32 = 0x8000_0000; const CPUID_MIN_EXTEND_LEAF: u32 = 0x8000_0000;
@ -18,7 +18,7 @@ struct CpuIdInput {
} }
#[repr(C)] #[repr(C)]
#[derive(Eq, PartialEq, Hash, Clone, Copy)] #[derive(Eq, PartialEq, Hash, Clone, Copy, Debug)]
struct CpuIdResult { struct CpuIdResult {
eax: u32, eax: u32,
ebx: u32, ebx: u32,
@ -261,19 +261,12 @@ pub fn setup_cpuid_info() {
let max_basic_leaf = CPUID.get_max_basic_leaf(); let max_basic_leaf = CPUID.get_max_basic_leaf();
} }
#[no_mangle] pub fn handle_cpuid_exception(info: &mut sgx_exception_info_t) -> u32 {
pub extern "C" fn handle_cpuid_exception(info: *mut sgx_exception_info_t) -> u32 { debug!("handle CPUID exception");
let info = unsafe { &mut *info };
let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) };
if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD
|| info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE
|| ip_opcode != CPUID_OPCODE
{
return EXCEPTION_CONTINUE_SEARCH;
}
let leaf = info.cpu_context.rax as u32; let leaf = info.cpu_context.rax as u32;
let subleaf = info.cpu_context.rcx as u32; let subleaf = info.cpu_context.rcx as u32;
let cpuid_result = CPUID.get_cpuid_info(leaf, subleaf); let cpuid_result = CPUID.get_cpuid_info(leaf, subleaf);
trace!("cpuid result: {:?}", cpuid_result);
info.cpu_context.rax = cpuid_result.eax as u64; info.cpu_context.rax = cpuid_result.eax as u64;
info.cpu_context.rbx = cpuid_result.ebx as u64; info.cpu_context.rbx = cpuid_result.ebx as u64;
info.cpu_context.rcx = cpuid_result.ecx as u64; info.cpu_context.rcx = cpuid_result.ecx as u64;

@ -1,15 +1,50 @@
use self::cpuid::*; use self::cpuid::{handle_cpuid_exception, setup_cpuid_info, CPUID_OPCODE};
use self::rdtsc::*; use self::rdtsc::{handle_rdtsc_exception, RDTSC_OPCODE};
use self::syscall::{handle_syscall_exception, SYSCALL_OPCODE};
use super::*; use super::*;
use crate::syscall::SyscallNum;
use sgx_types::*; use sgx_types::*;
pub fn register_exception_handlers() { pub fn register_exception_handlers() {
setup_cpuid_info(); setup_cpuid_info();
unsafe { unsafe {
sgx_register_exception_handler(1, handle_cpuid_exception); sgx_register_exception_handler(1, handle_exception);
sgx_register_exception_handler(1, handle_rdtsc_exception);
} }
} }
#[no_mangle]
extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> u32 {
let ret = unsafe { __occlum_syscall(SyscallNum::Exception as u32, info) };
assert!(ret == EXCEPTION_CONTINUE_EXECUTION);
ret
}
pub fn do_handle_exception(info: *mut sgx_exception_info_t) -> Result<isize> {
let mut info = unsafe { &mut *info };
// Assume the length of opcode is 2 bytes
let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) };
if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD
|| info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE
{
panic!(
"unable to process the exception, vector:{} type:{}",
info.exception_vector as u32, info.exception_type as u32
);
}
let ret = match ip_opcode {
#![deny(unreachable_patterns)]
CPUID_OPCODE => handle_cpuid_exception(&mut info),
RDTSC_OPCODE => handle_rdtsc_exception(&mut info),
SYSCALL_OPCODE => handle_syscall_exception(&mut info),
_ => panic!("unable to process the exception, opcode: {:#x}", ip_opcode),
};
Ok(ret as isize)
}
extern "C" {
fn __occlum_syscall(num: u32, info: *mut sgx_exception_info_t) -> u32;
}
mod cpuid; mod cpuid;
mod rdtsc; mod rdtsc;
mod syscall;

@ -1,34 +1,15 @@
use super::*; use super::*;
use crate::syscall::SyscallNum;
use sgx_types::*; use sgx_types::*;
const RDTSC_OPCODE: u16 = 0x310F; pub const RDTSC_OPCODE: u16 = 0x310F;
#[no_mangle] pub fn handle_rdtsc_exception(info: &mut sgx_exception_info_t) -> u32 {
pub extern "C" fn handle_rdtsc_exception(info: *mut sgx_exception_info_t) -> u32 { debug!("handle RDTSC exception");
let info = unsafe { &mut *info }; let (low, high) = time::do_rdtsc();
let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) }; trace!("do_rdtsc result {{ low: {:#x} high: {:#x}}}", low, high);
if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD
|| info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE
|| ip_opcode != RDTSC_OPCODE
{
return EXCEPTION_CONTINUE_SEARCH;
}
let (low, high) = {
let mut low = 0;
let mut high = 0;
let ret = unsafe { __occlum_syscall(SyscallNum::Rdtsc as u32, &mut low, &mut high) };
assert!(ret == 0);
(low, high)
};
info.cpu_context.rax = low as u64; info.cpu_context.rax = low as u64;
info.cpu_context.rdx = high as u64; info.cpu_context.rdx = high as u64;
info.cpu_context.rip += 2; info.cpu_context.rip += 2;
EXCEPTION_CONTINUE_EXECUTION EXCEPTION_CONTINUE_EXECUTION
} }
extern "C" {
fn __occlum_syscall(num: u32, arg0: *mut u32, arg1: *mut u32) -> i64;
}

@ -0,0 +1,30 @@
use super::*;
use crate::syscall::{occlum_syscall, SyscallNum};
use sgx_types::*;
pub const SYSCALL_OPCODE: u16 = 0x050F;
pub fn handle_syscall_exception(info: &mut sgx_exception_info_t) -> u32 {
debug!("handle SYSCALL exception");
// SYSCALL, save RIP into RCX and RFLAGS into R11
info.cpu_context.rcx = info.cpu_context.rip + 2;
info.cpu_context.r11 = info.cpu_context.rflags;
let num = info.cpu_context.rax as u32;
let arg0 = info.cpu_context.rdi as isize;
let arg1 = info.cpu_context.rsi as isize;
let arg2 = info.cpu_context.rdx as isize;
let arg3 = info.cpu_context.r10 as isize;
let arg4 = info.cpu_context.r8 as isize;
let arg5 = info.cpu_context.r9 as isize;
// syscall should not be an exception in Occlum
assert!(num != SyscallNum::Exception as u32);
let ret = occlum_syscall(num, arg0, arg1, arg2, arg3, arg4, arg5);
info.cpu_context.rax = ret as u64;
// SYSRET, load RIP from RCX and loading RFLAGS from R11
info.cpu_context.rip = info.cpu_context.rcx;
// Clear RF, VM, reserved bits; set bit 1
info.cpu_context.rflags = (info.cpu_context.r11 & 0x3C7FD7) | 2;
EXCEPTION_CONTINUE_EXECUTION
}

@ -16,6 +16,7 @@ use time::{clockid_t, timespec_t, timeval_t, GLOBAL_PROFILER};
use util::log::{self, LevelFilter}; use util::log::{self, LevelFilter};
use util::mem_util::from_user::*; use util::mem_util::from_user::*;
use crate::exception::do_handle_exception;
use crate::fs::{ use crate::fs::{
do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd, do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd,
do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat, do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat,
@ -400,7 +401,7 @@ macro_rules! process_syscall_table_with_callback {
// Occlum-specific system calls // Occlum-specific system calls
(Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp), (Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp),
// Exception handling // Exception handling
(Rdtsc = 361) => do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32), (Exception = 361) => do_handle_exception(info: *mut sgx_exception_info_t),
} }
}; };
} }
@ -714,18 +715,6 @@ fn do_clock_gettime(clockid: clockid_t, ts_u: *mut timespec_t) -> Result<isize>
Ok(0) Ok(0)
} }
fn do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32) -> Result<isize> {
check_mut_ptr(low_ptr)?;
check_mut_ptr(high_ptr)?;
let (low, high) = time::do_rdtsc()?;
debug!("do_rdtsc result {{ low: {:#x} high: {:#x}}}", low, high);
unsafe {
*low_ptr = low;
*high_ptr = high;
}
Ok(0)
}
// TODO: handle remainder // TODO: handle remainder
fn do_nanosleep(req_u: *const timespec_t, rem_u: *mut timespec_t) -> Result<isize> { fn do_nanosleep(req_u: *const timespec_t, rem_u: *mut timespec_t) -> Result<isize> {
check_ptr(req_u)?; check_ptr(req_u)?;

@ -160,7 +160,7 @@ pub fn do_thread_getcpuclock() -> Result<timespec_t> {
Ok(tv) Ok(tv)
} }
pub fn do_rdtsc() -> Result<(u32, u32)> { pub fn do_rdtsc() -> (u32, u32) {
extern "C" { extern "C" {
fn occlum_ocall_rdtsc(low: *mut u32, high: *mut u32) -> sgx_status_t; fn occlum_ocall_rdtsc(low: *mut u32, high: *mut u32) -> sgx_status_t;
} }
@ -168,7 +168,7 @@ pub fn do_rdtsc() -> Result<(u32, u32)> {
let mut high = 0; let mut high = 0;
let sgx_status = unsafe { occlum_ocall_rdtsc(&mut low, &mut high) }; let sgx_status = unsafe { occlum_ocall_rdtsc(&mut low, &mut high) };
assert!(sgx_status == sgx_status_t::SGX_SUCCESS); assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
Ok((low, high)) (low, high)
} }
// For SEFS // For SEFS

@ -14,7 +14,7 @@ TEST_DEPS := client data_sink
TESTS ?= empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \ TESTS ?= empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \
truncate readdir mkdir open stat link symlink chmod chown tls pthread uname rlimit \ truncate readdir mkdir open stat link symlink chmod chown tls pthread uname rlimit \
server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group \ server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group \
ioctl fcntl eventfd ioctl fcntl eventfd emulate_syscall
# Benchmarks: need to be compiled and run by bench-% target # Benchmarks: need to be compiled and run by bench-% target
BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput

@ -0,0 +1,5 @@
include ../test_common.mk
EXTRA_C_FLAGS := -Wno-int-to-pointer-cast
EXTRA_LINK_FLAGS :=
BIN_ARGS :=

@ -0,0 +1,89 @@
#include <sys/mman.h>
#include <sys/syscall.h>
#include <stdio.h>
#include <stdint.h>
#include "test.h"
// ============================================================================
// Helper structs & functions
// ============================================================================
typedef struct syscall_args {
int num;
unsigned long arg0;
unsigned long arg1;
unsigned long arg2;
unsigned long arg3;
unsigned long arg4;
unsigned long arg5;
} syscall_args_t;
static inline uint64_t native_syscall(syscall_args_t *p) {
uint64_t ret;
register int num asm ("rax") = p->num;
register unsigned long arg0 asm ("rdi") = p->arg0;
register unsigned long arg1 asm ("rsi") = p->arg1;
register unsigned long arg2 asm ("rdx") = p->arg2;
register unsigned long arg3 asm ("r10") = p->arg3;
register unsigned long arg4 asm ("r8") = p->arg4;
register unsigned long arg5 asm ("r9") = p->arg5;
asm volatile("syscall"
: "=a" (ret)
: "r" (num), "r" (arg0), "r" (arg1), "r" (arg2), "r" (arg3), "r" (arg4), "r" (arg5));
return ret;
}
// ============================================================================
// Test cases for syscall emulation
// ============================================================================
#define KB (1024UL)
#define PAGE_SIZE (4 * KB)
/*
* We use mmap() to test because it employs all arguments.
*/
int test_mmap_and_munmap_via_syscall_instruction() {
int len = PAGE_SIZE;
syscall_args_t mmap_arg = {
.num= __NR_mmap,
.arg0 = (unsigned long) NULL,
.arg1 = len,
.arg2 = PROT_READ | PROT_WRITE,
.arg3 = MAP_PRIVATE | MAP_ANONYMOUS,
.arg4 = -1,
.arg5 = 0,
};
char *buf = (char *) native_syscall(&mmap_arg);
if (buf == MAP_FAILED) {
THROW_ERROR("syscall mmap failed");
}
for (size_t bi = 0; bi < len; bi++) {
if (buf[bi] != '\0') {
THROW_ERROR("invalid buffer contents");
}
}
syscall_args_t munmap_arg = {
.num= __NR_munmap,
.arg0 = (unsigned long) buf,
.arg1 = len,
};
int ret = native_syscall(&munmap_arg);
if (ret < 0) {
THROW_ERROR("syscall munmap failed");
}
return 0;
}
// ============================================================================
// Test suite main
// ============================================================================
static test_case_t test_cases[] = {
TEST_CASE(test_mmap_and_munmap_via_syscall_instruction),
};
int main(int argc, const char* argv[]) {
return test_suite_run(test_cases, ARRAY_SIZE(test_cases));
}