diff --git a/src/Enclave.edl b/src/Enclave.edl index eed15d97..48a620a0 100644 --- a/src/Enclave.edl +++ b/src/Enclave.edl @@ -131,6 +131,10 @@ enclave { int occlum_ocall_mprotect([user_check] void* addr, size_t len, int prot); + int occlum_ocall_get_numa_topology( + [out, count=ncpus] uint32_t *numa_buf, + size_t ncpus + ) propagate_errno; void occlum_ocall_sched_yield(void); int occlum_ocall_sched_setaffinity( int host_tid, diff --git a/src/libos/src/sched/cpu_set.rs b/src/libos/src/sched/cpu_set.rs index 6314928a..bdedf415 100644 --- a/src/libos/src/sched/cpu_set.rs +++ b/src/libos/src/sched/cpu_set.rs @@ -57,6 +57,11 @@ impl CpuSet { self.bits.count_ones() } + /// Returns the first index of CPUs in set. + pub fn first_cpu_idx(&self) -> Option { + self.iter().position(|&b| b == true) + } + // Returns if the CpuSet is a subset of available cpu set pub fn is_subset_of(&self, other: &CpuSet) -> bool { (self.bits.clone() & other.bits.clone()) == self.bits diff --git a/src/libos/src/sched/do_getcpu.rs b/src/libos/src/sched/do_getcpu.rs new file mode 100644 index 00000000..0ff597fb --- /dev/null +++ b/src/libos/src/sched/do_getcpu.rs @@ -0,0 +1,43 @@ +use super::cpu_set::{CpuSet, NCORES}; +use crate::prelude::*; +use crate::process::ThreadRef; + +pub fn do_getcpu() -> Result<(u32, u32)> { + let cpu = pick_cpu_within_affinity_mask(); + let node = NUMA_TOPOLOGY[cpu as usize]; + debug!("do_getcpu cpu = {}, node = {}", cpu, node); + Ok((cpu, node)) +} + +fn pick_cpu_within_affinity_mask() -> u32 { + // Always return the idx of the first bit in the affnity mask for now. + // TODO: randomly choose a bit in the affinity mask. + let thread = current!(); + let sched = thread.sched().lock().unwrap(); + let idx = sched.affinity().first_cpu_idx().unwrap(); + idx as u32 +} + +fn validate_numa_topology(numa_topology: &Vec) -> Result<()> { + for node_id in numa_topology.iter() { + if *node_id >= numa_topology.len() as u32 { + return_errno!(EINVAL, "NUMA node id exceeds the core numbers"); + } + } + Ok(()) +} + +lazy_static! { + /// The information of Non-Uniform Memory Access(NUMA) topology + pub static ref NUMA_TOPOLOGY: Vec = { + extern "C" { + fn occlum_ocall_get_numa_topology(ret: *mut i32, numa_buf: *mut u32, ncpus: usize) -> sgx_status_t; + } + let mut numa_topology = vec![0; *NCORES]; + let mut retval: i32 = 0; + let status = unsafe { occlum_ocall_get_numa_topology(&mut retval, numa_topology.as_mut_ptr(), numa_topology.len()) }; + assert!(status == sgx_status_t::SGX_SUCCESS); + validate_numa_topology(&numa_topology).expect("ocall returned invalid NUMA topology"); + numa_topology + }; +} diff --git a/src/libos/src/sched/mod.rs b/src/libos/src/sched/mod.rs index 466a7cec..6e6fb49c 100644 --- a/src/libos/src/sched/mod.rs +++ b/src/libos/src/sched/mod.rs @@ -1,5 +1,6 @@ /// CPU scheduling for threads. mod cpu_set; +mod do_getcpu; mod do_sched_affinity; mod do_sched_yield; mod sched_agent; diff --git a/src/libos/src/sched/syscalls.rs b/src/libos/src/sched/syscalls.rs index bb96340e..83f584e1 100644 --- a/src/libos/src/sched/syscalls.rs +++ b/src/libos/src/sched/syscalls.rs @@ -56,3 +56,30 @@ pub fn do_sched_setaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *const u8) -> super::do_sched_affinity::do_sched_setaffinity(pid, affinity)?; Ok(0) } + +pub fn do_getcpu(cpu_ptr: *mut u32, node_ptr: *mut u32) -> Result { + // Do pointers check + match (cpu_ptr.is_null(), node_ptr.is_null()) { + (true, true) => return Ok(0), + (false, true) => check_mut_ptr(cpu_ptr)?, + (true, false) => check_mut_ptr(node_ptr)?, + (false, false) => { + check_mut_ptr(cpu_ptr)?; + check_mut_ptr(node_ptr)?; + } + } + // Call the memory-safe do_getcpu + let (cpu, node) = super::do_getcpu::do_getcpu()?; + // Copy to user + if !cpu_ptr.is_null() { + unsafe { + cpu_ptr.write(cpu); + } + } + if !node_ptr.is_null() { + unsafe { + node_ptr.write(node); + } + } + Ok(0) +} diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 77b4a9d5..3a384265 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -40,7 +40,7 @@ use crate::process::{ do_getpgid, do_getpid, do_getppid, do_gettid, do_getuid, do_prctl, do_set_tid_address, do_spawn, do_wait4, pid_t, FdOp, ThreadStatus, }; -use crate::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield}; +use crate::sched::{do_getcpu, do_sched_getaffinity, do_sched_setaffinity, do_sched_yield}; use crate::signal::{ do_kill, do_rt_sigaction, do_rt_sigpending, do_rt_sigprocmask, do_rt_sigreturn, do_sigaltstack, do_tgkill, do_tkill, sigaction_t, sigset_t, stack_t, @@ -388,7 +388,7 @@ macro_rules! process_syscall_table_with_callback { (Syncfs = 306) => handle_unsupported(), (Sendmmsg = 307) => handle_unsupported(), (Setns = 308) => handle_unsupported(), - (Getcpu = 309) => handle_unsupported(), + (Getcpu = 309) => do_getcpu(cpu_ptr: *mut u32, node_ptr: *mut u32), (ProcessVmReadv = 310) => handle_unsupported(), (ProcessVmWritev = 311) => handle_unsupported(), (Kcmp = 312) => handle_unsupported(), diff --git a/src/pal/src/ocalls/sched.c b/src/pal/src/ocalls/sched.c index c7213835..a9a0c4b5 100644 --- a/src/pal/src/ocalls/sched.c +++ b/src/pal/src/ocalls/sched.c @@ -1,5 +1,6 @@ #define _GNU_SOURCE #include +#include #include #include "ocalls.h" @@ -26,3 +27,56 @@ void occlum_ocall_sched_yield(void) { int occlum_ocall_ncores(void) { return sysconf(_SC_NPROCESSORS_CONF); } + +static int is_number(const char *str) { + size_t len = strlen(str); + for (size_t i = 0; i < len; i++) { + if (str[i] >= '0' && str[i] <= '9') { + continue; + } + return 0; + } + return len > 0; +} + +static int is_node_entry(struct dirent *d) { + return + d && + strncmp(d->d_name, "node", 4) == 0 && + is_number(d->d_name + 4); +} + +// The information about NUMA topology is stored in sysfs. +// By reading the node entries(node) in /sys/devices/system/cpu/cpu, +// we can find which cpu core locates at which NUMA node. +int occlum_ocall_get_numa_topology(uint32_t *numa_buf, size_t ncpus) { + uint32_t *ptr = numa_buf; + for (size_t i = 0; i < ncpus; i++) { + struct dirent *d; + char cpu_dir_path[128] = { 0 }; + int ret = snprintf(cpu_dir_path, sizeof(cpu_dir_path), "/sys/devices/system/cpu/cpu%ld", + i); + if (ret < 0 || ret >= sizeof(cpu_dir_path)) { + return -1; + } + DIR *dir = opendir(cpu_dir_path); + if (dir == NULL) { + return -1; + } + while ((d = readdir(dir))) { + if (is_node_entry(d)) { + errno = 0; + int node_id = strtol((d->d_name) + 4, (char **)NULL, 10); + if (errno) { + closedir(dir); + return -1; + } + *ptr = node_id; + break; + } + } + closedir(dir); + ptr++; + } + return 0; +} diff --git a/test/sched/main.c b/test/sched/main.c index 960597a1..6b7f1df4 100644 --- a/test/sched/main.c +++ b/test/sched/main.c @@ -252,6 +252,57 @@ static int test_sched_yield() { return 0; } +// ============================================================================ +// Test cases for getcpu +// ============================================================================ + +static int test_getcpu() { + int cpu, node; + if (syscall(__NR_getcpu, &cpu, &node, NULL) < 0) { + THROW_ERROR("getcpu with cpu&node fail"); + } + if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) { + THROW_ERROR("getcpu with cpu fail"); + } + if (syscall(__NR_getcpu, NULL, &node, NULL) < 0) { + THROW_ERROR("getcpu with node fail"); + } + if (syscall(__NR_getcpu, NULL, NULL, NULL) < 0) { + THROW_ERROR("getcpu with null fail"); + } + return 0; +} + +static int test_getcpu_after_setaffinity() { + int nproc = sysconf(_SC_NPROCESSORS_ONLN); + cpu_set_t mask_old; + CPU_ZERO(&mask_old); + for (int i = 0; i < nproc; ++i) { + CPU_SET(g_online_cpu_idxs[i], &mask_old); + } + + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(g_online_cpu_idxs[0], &mask); + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) < 0) { + THROW_ERROR("failed to call sched_setaffinity \n"); + } + + int cpu; + int ret = syscall(__NR_getcpu, &cpu, NULL, NULL); + if (ret < 0) { + THROW_ERROR("getcpu fail"); + } + if (cpu != g_online_cpu_idxs[0]) { + THROW_ERROR("check processor id fail"); + } + + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask_old) < 0) { + THROW_ERROR("recover cpuset error"); + } + return 0; +} + // ============================================================================ // Test suite main // ============================================================================ @@ -268,6 +319,8 @@ static test_case_t test_cases[] = { TEST_CASE(test_sched_setaffinity_with_null_buffer), TEST_CASE(test_sched_yield), TEST_CASE(test_sched_xetaffinity_children_inheritance), + TEST_CASE(test_getcpu), + TEST_CASE(test_getcpu_after_setaffinity), }; int main() {