From cff0de1c395a87036e70f6f0f7aa3e9020ed4deb Mon Sep 17 00:00:00 2001 From: LI Qing Date: Wed, 24 Jul 2019 09:13:32 +0000 Subject: [PATCH] Add sched_xetaffinity() system calls in Occlum 1. LibOS support sched_getaffinity() and sched_setaffinity() system calls 2. Add sched_cpu_affinity test cases in occlum/test/sched --- src/Enclave.edl | 4 +- src/libos/src/entry.rs | 9 +- src/libos/src/process/mod.rs | 3 + src/libos/src/process/process.rs | 8 ++ src/libos/src/process/sched.rs | 112 ++++++++++++++++++++ src/libos/src/process/task.rs | 3 +- src/libos/src/syscall/mod.rs | 43 +++++++- src/pal/pal.c | 18 ++++ src/pal/task.c | 7 +- test/Makefile | 2 +- test/sched/Makefile | 5 + test/sched/main.c | 175 +++++++++++++++++++++++++++++++ 12 files changed, 380 insertions(+), 9 deletions(-) create mode 100644 src/libos/src/process/sched.rs create mode 100644 test/sched/Makefile create mode 100644 test/sched/main.c diff --git a/src/Enclave.edl b/src/Enclave.edl index b9878859..0667ab20 100644 --- a/src/Enclave.edl +++ b/src/Enclave.edl @@ -9,7 +9,7 @@ enclave { trusted { /* define ECALLs here. */ public int libos_boot([in, string] const char* executable_path, [user_check] const char** argv); - public int libos_run(void); + public int libos_run(int host_tid); /* This is only for debug usage */ public int dummy_ecall(void); }; @@ -20,5 +20,7 @@ enclave { void ocall_gettimeofday([out] long* sec, [out] long* us); void ocall_clock_gettime(int clockid, [out] long* sec, [out] long* ns); void ocall_sync(void); + int ocall_sched_getaffinity([out] int *error, int pid, size_t cpusize, [out, size=cpusize] unsigned char* buf); + int ocall_sched_setaffinity([out] int *error, int pid, size_t cpusize, [in, size=cpusize] const unsigned char* buf); }; }; diff --git a/src/libos/src/entry.rs b/src/libos/src/entry.rs index 0cf5cd0d..23601aef 100644 --- a/src/libos/src/entry.rs +++ b/src/libos/src/entry.rs @@ -1,4 +1,5 @@ use super::*; +use process::pid_t; use std::ffi::{CStr, CString, OsString}; use std::path::Path; use util::mem_util::from_untrusted::*; @@ -32,10 +33,10 @@ pub extern "C" fn libos_boot(path_buf: *const c_char, argv: *const *const c_char } #[no_mangle] -pub extern "C" fn libos_run() -> i32 { +pub extern "C" fn libos_run(host_tid: i32) -> i32 { let _ = backtrace::enable_backtrace("libocclum.signed.so", PrintFormat::Short); panic::catch_unwind(|| { - backtrace::__rust_begin_short_backtrace(|| match do_run() { + backtrace::__rust_begin_short_backtrace(|| match do_run(host_tid as pid_t) { Ok(exit_status) => exit_status, Err(err) => EXIT_STATUS_INTERNAL_ERROR, }) @@ -91,8 +92,8 @@ fn do_boot(path_str: &str, argv: &Vec) -> Result<(), Error> { } // TODO: make sure do_run() cannot be called after do_boot() -fn do_run() -> Result { - let exit_status = process::run_task()?; +fn do_run(host_tid: pid_t) -> Result { + let exit_status = process::run_task(host_tid)?; // sync file system // TODO: only sync when all processes exit diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index 4a27cd6e..5de400bb 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -7,6 +7,7 @@ pub use self::spawn::{do_spawn, FileAction}; pub use self::task::{current_pid, get_current, run_task}; pub use self::thread::{do_clone, do_set_tid_address, CloneFlags, ThreadGroup}; pub use self::wait::{WaitQueue, Waiter}; +pub use self::sched::{CpuSet, do_sched_getaffinity, do_sched_setaffinity}; #[allow(non_camel_case_types)] pub type pid_t = u32; @@ -18,6 +19,7 @@ pub struct Process { pid: pid_t, pgid: pid_t, tgid: pid_t, + host_tid: pid_t, exit_status: i32, // TODO: move cwd, root_inode into a FileSystem structure // TODO: should cwd be a String or INode? @@ -73,6 +75,7 @@ mod spawn; mod task; mod thread; mod wait; +mod sched; use self::task::Task; use super::*; diff --git a/src/libos/src/process/process.rs b/src/libos/src/process/process.rs index 3545c969..f1cdc238 100644 --- a/src/libos/src/process/process.rs +++ b/src/libos/src/process/process.rs @@ -12,6 +12,7 @@ lazy_static! { pid: 0, pgid: 1, tgid: 0, + host_tid: 0, exit_status: 0, cwd: "/".to_owned(), clear_child_tid: None, @@ -40,6 +41,7 @@ impl Process { pid: new_pid, pgid: 1, // TODO: implement pgid tgid: new_pid, + host_tid: 0, cwd: cwd.to_owned(), clear_child_tid: None, exit_status: 0, @@ -70,6 +72,12 @@ impl Process { pub fn get_pgid(&self) -> pid_t { self.pgid } + pub fn get_host_tid(&self) -> pid_t { + self.host_tid + } + pub fn set_host_tid(&mut self, host_tid: pid_t) { + self.host_tid = host_tid; + } pub fn get_status(&self) -> Status { self.status } diff --git a/src/libos/src/process/sched.rs b/src/libos/src/process/sched.rs new file mode 100644 index 00000000..ecfb1d76 --- /dev/null +++ b/src/libos/src/process/sched.rs @@ -0,0 +1,112 @@ +use super::*; + +extern "C" { + fn ocall_sched_getaffinity(ret: *mut i32, errno: *mut i32, pid: i32, cpusetsize: size_t, mask: *mut c_uchar) -> sgx_status_t; + fn ocall_sched_setaffinity(ret: *mut i32, errno: *mut i32, pid: i32, cpusetsize: size_t, mask: *const c_uchar) -> sgx_status_t; +} + +pub struct CpuSet { + vec: Vec, +} + +impl CpuSet { + pub fn new(len: usize) -> CpuSet { + let mut cpuset = CpuSet { vec: Vec::with_capacity(len) }; + cpuset.vec.resize(len, 0); + cpuset + } + + pub fn from_raw_buf(ptr: *const u8, cpusize: usize) -> CpuSet { + let mut cpuset = CpuSet { vec: Vec::with_capacity(cpusize) }; + let buf_slice = unsafe { std::slice::from_raw_parts(ptr, cpusize) }; + cpuset.vec.extend_from_slice(buf_slice); + cpuset + } + + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.vec.as_mut_ptr() + } + + pub fn as_ptr(&self) -> *const u8 { + self.vec.as_ptr() + } + + pub fn as_mut_slice(&mut self) -> &mut [u8] { + self.vec.as_mut_slice() + } + + pub fn as_slice(&self) -> &[u8] { + self.vec.as_slice() + } + + pub fn len(&self) -> usize { + self.vec.len() + } +} + +impl std::fmt::LowerHex for CpuSet { + fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for byte in &(self.vec) { + try!(fmtr.write_fmt(format_args!("{:02x}", byte))); + } + Ok(()) + } +} + +impl std::fmt::UpperHex for CpuSet { + fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + for byte in &(self.vec) { + try!(fmtr.write_fmt(format_args!("{:02X}", byte))); + } + Ok(()) + } +} + +fn find_host_tid(pid: pid_t) -> Result { + let process_ref = if pid == 0 { + get_current() + } else { + get(pid)? + }; + let mut process = process_ref.lock().unwrap(); + let host_tid = process.get_host_tid(); + Ok(host_tid) +} + +pub fn do_sched_getaffinity(pid: pid_t, cpu_set: &mut CpuSet) -> Result { + let host_tid = match pid { + 0 => 0, + _ => find_host_tid(pid)?, + }; + let buf = cpu_set.as_mut_ptr(); + let cpusize = cpu_set.len(); + let mut ret = 0; + let mut error = 0; + unsafe { + ocall_sched_getaffinity(&mut ret, &mut error, host_tid as i32, cpusize, buf); + } + if (ret < 0) { + let errno = Errno::from_errno(error); + return errno!(errno, "ocall_sched_getaffinity failed"); + } + Ok(ret) +} + +pub fn do_sched_setaffinity(pid: pid_t, cpu_set: &CpuSet) -> Result { + let host_tid = match pid { + 0 => 0, + _ => find_host_tid(pid)?, + }; + let buf = cpu_set.as_ptr(); + let cpusize = cpu_set.len(); + let mut ret = 0; + let mut error = 0; + unsafe { + ocall_sched_setaffinity(&mut ret, &mut error, host_tid as i32, cpusize, buf); + } + if (ret < 0) { + let errno = Errno::from_errno(error); + return errno!(errno, "ocall_sched_setaffinity failed"); + } + Ok(ret) +} diff --git a/src/libos/src/process/task.rs b/src/libos/src/process/task.rs index aa79a1e9..942515cc 100644 --- a/src/libos/src/process/task.rs +++ b/src/libos/src/process/task.rs @@ -72,13 +72,14 @@ fn dequeue_task() -> Option { NEW_PROCESS_QUEUE.lock().unwrap().pop_front() } -pub fn run_task() -> Result { +pub fn run_task(host_tid: pid_t) -> Result { let new_process: ProcessRef = dequeue_task().ok_or_else(|| (Errno::EAGAIN, "No new processes to run"))?; set_current(&new_process); let (pid, task) = { let mut process = new_process.lock().unwrap(); + process.set_host_tid(host_tid); let pid = process.get_pid(); let task = process.get_task_mut() as *mut Task; (pid, task) diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 1ab3783d..44efd518 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -10,7 +10,7 @@ use fs::*; use misc::{resource_t, rlimit_t, utsname_t}; use prelude::*; -use process::{pid_t, ChildProcessFilter, CloneFlags, FileAction, FutexFlags, FutexOp}; +use process::{pid_t, ChildProcessFilter, CloneFlags, FileAction, FutexFlags, FutexOp, CpuSet}; use std::ffi::{CStr, CString}; use std::ptr; use time::{timeval_t, clockid_t, timespec_t}; @@ -175,6 +175,8 @@ pub extern "C" fn dispatch_syscall( ), SYS_ARCH_PRCTL => do_arch_prctl(arg0 as u32, arg1 as *mut usize), SYS_SET_TID_ADDRESS => do_set_tid_address(arg0 as *mut pid_t), + SYS_SCHED_GETAFFINITY => do_sched_getaffinity(arg0 as pid_t, arg1 as size_t, arg2 as *mut c_uchar), + SYS_SCHED_SETAFFINITY => do_sched_setaffinity(arg0 as pid_t, arg1 as size_t, arg2 as *const c_uchar), // memory SYS_MMAP => do_mmap( @@ -960,6 +962,45 @@ fn do_set_tid_address(tidptr: *mut pid_t) -> Result { process::do_set_tid_address(tidptr).map(|tid| tid as isize) } +fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result { + // Construct safe Rust types + let mut buf_slice = { + check_mut_array(buf, cpusize)?; + if cpusize == 0 { + return errno!(EINVAL, "cpuset size must be greater than zero"); + } + if buf as *const _ == std::ptr::null() { + return errno!(EFAULT, "cpuset mask must NOT be null"); + } + unsafe { std::slice::from_raw_parts_mut(buf, cpusize) } + }; + // Call the memory-safe do_sched_getaffinity + let mut cpuset = CpuSet::new(cpusize); + let ret = process::do_sched_getaffinity(pid, &mut cpuset)?; + debug!("sched_getaffinity cpuset: {:#x}", cpuset); + // Copy from Rust types to C types + buf_slice.copy_from_slice(cpuset.as_slice()); + Ok(ret as isize) +} + +fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result { + // Convert unsafe C types into safe Rust types + let cpuset = { + check_array(buf, cpusize)?; + if cpusize == 0 { + return errno!(EINVAL, "cpuset size must be greater than zero"); + } + if buf as *const _ == std::ptr::null() { + return errno!(EFAULT, "cpuset mask must NOT be null"); + } + CpuSet::from_raw_buf(buf, cpusize) + }; + debug!("sched_setaffinity cpuset: {:#x}", cpuset); + // Call the memory-safe do_sched_setaffinity + let ret = process::do_sched_setaffinity(pid, &cpuset)?; + Ok(ret as isize) +} + fn do_socket(domain: c_int, socket_type: c_int, protocol: c_int) -> Result { info!( "socket: domain: {}, socket_type: {}, protocol: {}", diff --git a/src/pal/pal.c b/src/pal/pal.c index 3c438d80..09ff646e 100644 --- a/src/pal/pal.c +++ b/src/pal/pal.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #define MAX_PATH FILENAME_MAX @@ -213,6 +215,22 @@ void ocall_clock_gettime(int clockid, time_t* sec, long* ns) { *ns = ts.tv_nsec; } +int ocall_sched_getaffinity(int* error, int pid, size_t cpusize, unsigned char* buf) { + int ret = syscall(__NR_sched_getaffinity, pid, cpusize, buf); + if (error) { + *error = (ret == -1) ? errno : 0; + } + return ret; +} + +int ocall_sched_setaffinity(int* error, int pid, size_t cpusize, const unsigned char* buf) { + int ret = syscall(__NR_sched_setaffinity, pid, cpusize, buf); + if (error) { + *error = (ret == -1) ? errno : 0; + } + return ret; +} + void ocall_sync(void) { sync(); } diff --git a/src/pal/task.c b/src/pal/task.c index 6c7db174..0b46d4d6 100644 --- a/src/pal/task.c +++ b/src/pal/task.c @@ -1,11 +1,16 @@ #include #include #include +#include +#include #include "atomic.h" #include "futex.h" #include "sgx_urts.h" #include "Enclave_u.h" +int syscall(); +#define gettid() syscall(__NR_gettid) + static volatile int num_tasks = 0; static volatile int main_task_status = 0; static volatile int any_fatal_error = 0; @@ -29,7 +34,7 @@ static void* __run_task_thread(void* _data) { int status = 0; struct task_thread_data* data = _data; - sgx_status_t sgx_ret = libos_run(data->eid, &status); + sgx_status_t sgx_ret = libos_run(data->eid, &status, gettid()); if(sgx_ret != SGX_SUCCESS) { // TODO: deal with ECALL error printf("ERROR: ECall libos_run failed\n"); diff --git a/test/Makefile b/test/Makefile index 40bcc753..16400740 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,7 +4,7 @@ PROJECT_DIR := $(realpath $(CUR_DIR)/../) # Dependencies: need to be compiled but not to run by any Makefile target TEST_DEPS := dev_null # Tests: need to be compiled and run by test-% target -TESTS := empty env hello_world malloc mmap file getpid spawn pipe time \ +TESTS := empty env hello_world malloc mmap file getpid spawn sched pipe time \ truncate readdir mkdir link tls pthread uname rlimit client server \ server_epoll unix_socket cout hostfs cpuid rdtsc device # Benchmarks: need to be compiled and run by bench-% target diff --git a/test/sched/Makefile b/test/sched/Makefile new file mode 100644 index 00000000..9e1b6dec --- /dev/null +++ b/test/sched/Makefile @@ -0,0 +1,5 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := +EXTRA_LINK_FLAGS := +BIN_ARGS := diff --git a/test/sched/main.c b/test/sched/main.c new file mode 100644 index 00000000..b1f9101e --- /dev/null +++ b/test/sched/main.c @@ -0,0 +1,175 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test.h" + +// ============================================================================ +// Test cases for sched_cpu_affinity +// ============================================================================ + +static int test_sched_getaffinity_with_self_pid() { + cpu_set_t mask; + if (sched_getaffinity(0, sizeof(cpu_set_t), &mask) < 0) { + throw_error("failed to call sched_getaffinity"); + } + if (CPU_COUNT(&mask) <= 0) { + throw_error("failed to get cpuset mask"); + } + if (sysconf(_SC_NPROCESSORS_ONLN) != CPU_COUNT(&mask)) { + throw_error("cpuset num wrong"); + } + return 0; +} + +static int test_sched_setaffinity_with_self_pid() { + int nproc = sysconf(_SC_NPROCESSORS_ONLN); + cpu_set_t mask_old; + for (int i = 0; i < nproc; ++i) { + CPU_SET(i, &mask_old); + } + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(0, &mask); + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) < 0) { + throw_error("failed to call sched_setaffinity \n"); + } + cpu_set_t mask2; + if (sched_getaffinity(0, sizeof(cpu_set_t), &mask2) < 0) { + throw_error("failed to call sched_getaffinity"); + } + if (!CPU_EQUAL(&mask, &mask2)) { + throw_error("cpuset is wrong after get"); + } + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask_old) < 0) { + throw_error("recover cpuset error"); + } + return 0; +} + +static int test_sched_xetaffinity_with_child_pid() { + int status, child_pid; + int num = sysconf(_SC_NPROCESSORS_CONF); + if (num <= 0) { + throw_error("failed to get cpu number"); + } + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(num - 1 , &mask); + int ret = posix_spawn(&child_pid, "getpid", NULL, NULL, NULL, NULL); + if (ret < 0 ) { + throw_error("spawn process error"); + } + printf("Spawn a child process with pid=%d\n", child_pid); + if (sched_setaffinity(child_pid, sizeof(cpu_set_t), &mask) < 0) { + throw_error("failed to set child affinity"); + } + cpu_set_t mask2; + if (sched_getaffinity(child_pid, sizeof(cpu_set_t), &mask2) < 0) { + throw_error("failed to get child affinity"); + } + if (!CPU_EQUAL(&mask, &mask2)) { + throw_error("cpuset is wrong in child"); + } + ret = wait4(-1, &status, 0, NULL); + if (ret < 0) { + throw_error("failed to wait4 the child proces"); + } + return 0; +} + +#define CPU_SET_SIZE_LIMIT (1024) + +static int test_sched_getaffinity_via_explicit_syscall() { + unsigned char buf[CPU_SET_SIZE_LIMIT] = { 0 }; + int ret = syscall(__NR_sched_getaffinity, 0, CPU_SET_SIZE_LIMIT, buf); + if (ret <= 0) { + throw_error("failed to call __NR_sched_getaffinity"); + } + return 0; +} + +static int test_sched_setaffinity_via_explicit_syscall() { + int nproc = sysconf(_SC_NPROCESSORS_ONLN); + cpu_set_t mask_old; + for (int i = 0; i < nproc; ++i) { + CPU_SET(i, &mask_old); + } + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(0, &mask); + if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask) < 0) { + throw_error("failed to call __NR_sched_setaffinity"); + } + cpu_set_t mask2; + int ret_nproc = syscall(__NR_sched_getaffinity, 0, sizeof(cpu_set_t), &mask2); + if (ret_nproc <= 0) { + throw_error("failed to call __NR_sched_getaffinity"); + } + if (!CPU_EQUAL(&mask, &mask2)) { + throw_error("explicit syscall cpuset is wrong"); + } + if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask_old) < 0) { + throw_error("recover cpuset error"); + } + return 0; +} + +static int test_sched_getaffinity_with_zero_cpusetsize() { + cpu_set_t mask; + if (sched_getaffinity(0, 0, &mask) != -1) { + throw_error("check invalid cpusetsize(0) fail"); + } + return 0; +} + +static int test_sched_setaffinity_with_zero_cpusetsize() { + cpu_set_t mask; + if (sched_setaffinity(0, 0, &mask) != -1) { + throw_error("check invalid cpusetsize(0) fail"); + } + return 0; +} + +static int test_sched_getaffinity_with_null_buffer() { + unsigned char *buf = NULL; + if (sched_getaffinity(0, sizeof(cpu_set_t), (cpu_set_t*)buf) != -1) { + throw_error("check invalid buffer pointer(NULL) fail"); + } + return 0; +} + +static int test_sched_setaffinity_with_null_buffer() { + unsigned char *buf = NULL; + if (sched_setaffinity(0, sizeof(cpu_set_t), (cpu_set_t*)buf) != -1) { + throw_error("check invalid buffer pointer(NULL) fail"); + } + return 0; +} + +// ============================================================================ +// Test suite main +// ============================================================================ + +static test_case_t test_cases[] = { + TEST_CASE(test_sched_xetaffinity_with_child_pid), + TEST_CASE(test_sched_getaffinity_with_self_pid), + TEST_CASE(test_sched_setaffinity_with_self_pid), + TEST_CASE(test_sched_getaffinity_via_explicit_syscall), + TEST_CASE(test_sched_setaffinity_via_explicit_syscall), + TEST_CASE(test_sched_getaffinity_with_zero_cpusetsize), + TEST_CASE(test_sched_setaffinity_with_zero_cpusetsize), + TEST_CASE(test_sched_getaffinity_with_null_buffer), + TEST_CASE(test_sched_setaffinity_with_null_buffer), +}; + +int main() { + return test_suite_run(test_cases, ARRAY_SIZE(test_cases)); +}