diff --git a/src/Enclave.edl b/src/Enclave.edl index 3bf97fc4..b268b29d 100644 --- a/src/Enclave.edl +++ b/src/Enclave.edl @@ -67,16 +67,12 @@ enclave { void occlum_ocall_free([user_check] void* ptr); void occlum_ocall_sched_yield(void); - int occlum_ocall_sched_getaffinity( - int host_tid, - size_t cpusize, - [out, size=cpusize] unsigned char* buf - ) propagate_errno; int occlum_ocall_sched_setaffinity( int host_tid, size_t cpusize, [in, size=cpusize] const unsigned char* buf ) propagate_errno; + int occlum_ocall_ncores(void); sgx_status_t occlum_ocall_sgx_init_quote( [out] sgx_target_info_t* target_info, diff --git a/src/libos/Cargo.lock b/src/libos/Cargo.lock index 65e297c8..2d190f18 100644 --- a/src/libos/Cargo.lock +++ b/src/libos/Cargo.lock @@ -5,6 +5,7 @@ name = "Occlum" version = "0.10.0" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "bitvec 0.17.4 (registry+https://github.com/rust-lang/crates.io-index)", "derive_builder 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/src/libos/Cargo.toml b/src/libos/Cargo.toml index 2518cb8e..9f5d1b67 100644 --- a/src/libos/Cargo.toml +++ b/src/libos/Cargo.toml @@ -9,6 +9,7 @@ crate-type = ["staticlib"] [dependencies] bitflags = "1.0" +bitvec = { version = "0.17", default-features = false, features = ["alloc"] } log = "0.4" lazy_static = { version = "1.1.0", features = ["spin_no_std"] } # Implies nightly derive_builder = "0.7.2" diff --git a/src/libos/src/lib.rs b/src/libos/src/lib.rs index fe135779..069a450f 100644 --- a/src/libos/src/lib.rs +++ b/src/libos/src/lib.rs @@ -16,6 +16,8 @@ extern crate alloc; #[macro_use] extern crate bitflags; +#[macro_use] +extern crate bitvec; extern crate sgx_types; #[cfg(not(target_env = "sgx"))] #[macro_use] @@ -58,6 +60,7 @@ mod fs; mod misc; mod net; mod process; +mod sched; mod syscall; mod time; mod untrusted; diff --git a/src/libos/src/process/do_sched.rs b/src/libos/src/process/do_sched.rs deleted file mode 100644 index 778b22f7..00000000 --- a/src/libos/src/process/do_sched.rs +++ /dev/null @@ -1,130 +0,0 @@ -use super::table; -/// Process scheduling. -use crate::prelude::*; - -pub fn do_sched_getaffinity(tid: pid_t, cpu_set: &mut CpuSet) -> Result { - let host_tid = match tid { - 0 => 0, - _ => find_host_tid(tid)?, - }; - let buf = cpu_set.as_mut_ptr(); - let cpusize = cpu_set.len(); - let retval = try_libc!({ - let mut retval = 0; - let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf); - assert!(sgx_status == sgx_status_t::SGX_SUCCESS); - retval - }) as usize; - // Note: the first retval bytes in CpuSet are valid - Ok(retval) -} - -pub fn do_sched_setaffinity(tid: pid_t, cpu_set: &CpuSet) -> Result<()> { - let host_tid = match tid { - 0 => 0, - _ => find_host_tid(tid)?, - }; - let buf = cpu_set.as_ptr(); - let cpusize = cpu_set.len(); - try_libc!({ - let mut retval = 0; - let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf); - assert!(sgx_status == sgx_status_t::SGX_SUCCESS); - retval - }); - Ok(()) -} - -pub fn do_sched_yield() { - unsafe { - let status = occlum_ocall_sched_yield(); - assert!(status == sgx_status_t::SGX_SUCCESS); - } -} - -fn find_host_tid(tid: pid_t) -> Result { - let thread = table::get_thread(tid)?; - // TODO: fix the race condition of host_tid being available. - let host_tid = thread - .inner() - .host_tid() - .ok_or_else(|| errno!(ESRCH, "host_tid is not available"))?; - Ok(host_tid) -} - -pub struct CpuSet { - vec: Vec, -} - -impl CpuSet { - pub fn new(len: usize) -> CpuSet { - let mut cpuset = CpuSet { - vec: Vec::with_capacity(len), - }; - cpuset.vec.resize(len, 0); - cpuset - } - - pub fn from_raw_buf(ptr: *const u8, cpusize: usize) -> CpuSet { - let mut cpuset = CpuSet { - vec: Vec::with_capacity(cpusize), - }; - let buf_slice = unsafe { std::slice::from_raw_parts(ptr, cpusize) }; - cpuset.vec.extend_from_slice(buf_slice); - cpuset - } - - pub fn as_mut_ptr(&mut self) -> *mut u8 { - self.vec.as_mut_ptr() - } - - pub fn as_ptr(&self) -> *const u8 { - self.vec.as_ptr() - } - - pub fn as_mut_slice(&mut self) -> &mut [u8] { - self.vec.as_mut_slice() - } - - pub fn as_slice(&self) -> &[u8] { - self.vec.as_slice() - } - - pub fn len(&self) -> usize { - self.vec.len() - } -} - -impl std::fmt::LowerHex for CpuSet { - fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> std::fmt::Result { - for byte in &(self.vec) { - fmtr.write_fmt(format_args!("{:02x}", byte))?; - } - Ok(()) - } -} - -impl std::fmt::UpperHex for CpuSet { - fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> std::fmt::Result { - for byte in &(self.vec) { - fmtr.write_fmt(format_args!("{:02X}", byte))?; - } - Ok(()) - } -} - -extern "C" { - fn occlum_ocall_sched_getaffinity( - ret: *mut i32, - host_tid: i32, - cpusetsize: size_t, - mask: *mut c_uchar, - ) -> sgx_status_t; - fn occlum_ocall_sched_setaffinity( - ret: *mut i32, - host_tid: i32, - cpusetsize: size_t, - mask: *const c_uchar, - ) -> sgx_status_t; - fn occlum_ocall_sched_yield() -> sgx_status_t; -} diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index d058b3f2..2f96cb72 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -12,6 +12,7 @@ use crate::fs::{FileRef, FileTable, FsView}; use crate::misc::ResourceLimits; use crate::prelude::*; +use crate::sched::SchedAgent; use crate::vm::ProcessVM; use self::process::{ChildProcessFilter, ProcessBuilder, ProcessInner}; @@ -29,7 +30,6 @@ mod do_clone; mod do_exit; mod do_futex; mod do_getpid; -mod do_sched; mod do_set_tid_address; mod do_spawn; mod do_wait4; @@ -51,4 +51,5 @@ pub type ThreadRef = Arc; pub type FileTableRef = Arc>; pub type ProcessVMRef = Arc>; pub type FsViewRef = Arc>; +pub type SchedAgentRef = Arc>; pub type ResourceLimitsRef = Arc>; diff --git a/src/libos/src/process/syscalls.rs b/src/libos/src/process/syscalls.rs index 5d1e0441..33bdcc14 100644 --- a/src/libos/src/process/syscalls.rs +++ b/src/libos/src/process/syscalls.rs @@ -3,7 +3,6 @@ use std::ptr::NonNull; use super::do_arch_prctl::ArchPrctlCode; use super::do_clone::CloneFlags; use super::do_futex::{FutexFlags, FutexOp}; -use super::do_sched::CpuSet; use super::do_spawn::FileAction; use super::process::ChildProcessFilter; use crate::prelude::*; @@ -185,49 +184,6 @@ pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result { super::do_set_tid_address::do_set_tid_address(tidptr).map(|tid| tid as isize) } -pub fn do_sched_yield() -> Result { - super::do_sched::do_sched_yield(); - Ok(0) -} - -pub fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result { - // Construct safe Rust types - let mut buf_slice = { - check_mut_array(buf, cpusize)?; - if cpusize == 0 { - return_errno!(EINVAL, "cpuset size must be greater than zero"); - } - if buf as *const _ == std::ptr::null() { - return_errno!(EFAULT, "cpuset mask must NOT be null"); - } - unsafe { std::slice::from_raw_parts_mut(buf, cpusize) } - }; - // Call the memory-safe do_sched_getaffinity - let mut cpuset = CpuSet::new(cpusize); - let retval = super::do_sched::do_sched_getaffinity(pid, &mut cpuset)?; - // Copy from Rust types to C types - buf_slice.copy_from_slice(cpuset.as_slice()); - Ok(retval as isize) -} - -pub fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result { - // Convert unsafe C types into safe Rust types - let cpuset = { - check_array(buf, cpusize)?; - if cpusize == 0 { - return_errno!(EINVAL, "cpuset size must be greater than zero"); - } - if buf as *const _ == std::ptr::null() { - return_errno!(EFAULT, "cpuset mask must NOT be null"); - } - CpuSet::from_raw_buf(buf, cpusize) - }; - debug!("sched_setaffinity cpuset: {:#x}", cpuset); - // Call the memory-safe do_sched_setaffinity - super::do_sched::do_sched_setaffinity(pid, &cpuset)?; - Ok(0) -} - pub fn do_exit(status: i32) -> ! { debug!("exit: {}", status); super::do_exit::do_exit(status); diff --git a/src/libos/src/process/thread/builder.rs b/src/libos/src/process/thread/builder.rs index 0b1cfeba..353eeb57 100644 --- a/src/libos/src/process/thread/builder.rs +++ b/src/libos/src/process/thread/builder.rs @@ -1,8 +1,8 @@ use std::ptr::NonNull; use super::{ - FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, Task, Thread, - ThreadId, ThreadInner, ThreadRef, + FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef, + Task, Thread, ThreadId, ThreadInner, ThreadRef, }; use crate::prelude::*; @@ -16,6 +16,7 @@ pub struct ThreadBuilder { // Optional fields fs: Option, files: Option, + sched: Option, rlimits: Option, clear_ctid: Option>, } @@ -29,6 +30,7 @@ impl ThreadBuilder { vm: None, fs: None, files: None, + sched: None, rlimits: None, clear_ctid: None, } @@ -64,6 +66,11 @@ impl ThreadBuilder { self } + pub fn sched(mut self, sched: SchedAgentRef) -> Self { + self.sched = Some(sched); + self + } + pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self { self.rlimits = Some(rlimits); self @@ -87,6 +94,7 @@ impl ThreadBuilder { .ok_or_else(|| errno!(EINVAL, "memory is mandatory"))?; let fs = self.fs.unwrap_or_default(); let files = self.files.unwrap_or_default(); + let sched = self.sched.unwrap_or_default(); let rlimits = self.rlimits.unwrap_or_default(); let clear_ctid = SgxRwLock::new(self.clear_ctid); let inner = SgxMutex::new(ThreadInner::new()); @@ -100,6 +108,7 @@ impl ThreadBuilder { vm, fs, files, + sched, rlimits, }); diff --git a/src/libos/src/process/thread/mod.rs b/src/libos/src/process/thread/mod.rs index 3a698b49..4eed7af6 100644 --- a/src/libos/src/process/thread/mod.rs +++ b/src/libos/src/process/thread/mod.rs @@ -3,7 +3,8 @@ use std::ptr::NonNull; use super::task::Task; use super::{ - FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, ThreadRef, + FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef, + ThreadRef, }; use crate::prelude::*; @@ -27,6 +28,7 @@ pub struct Thread { vm: ProcessVMRef, fs: FsViewRef, files: FileTableRef, + sched: SchedAgentRef, rlimits: ResourceLimitsRef, } @@ -62,6 +64,10 @@ impl Thread { &self.files } + pub fn sched(&self) -> &SchedAgentRef { + &self.sched + } + /// Get a file from the file table. pub fn file(&self, fd: FileDesc) -> Result { self.files().lock().unwrap().get(fd) @@ -89,10 +95,13 @@ impl Thread { } pub(super) fn start(&self, host_tid: pid_t) { - self.inner().start(host_tid); + self.sched().lock().unwrap().attach(host_tid); + self.inner().start(); } pub(super) fn exit(&self, exit_status: i32) -> usize { + self.sched().lock().unwrap().detach(); + // Remove this thread from its owner process let mut process_inner = self.process.inner(); let threads = process_inner.threads_mut().unwrap(); @@ -143,7 +152,7 @@ unsafe impl Sync for Thread {} #[derive(Debug)] pub enum ThreadInner { Init, - Live { host_tid: pid_t }, + Running, Exited { exit_status: i32 }, } @@ -155,7 +164,7 @@ impl ThreadInner { pub fn status(&self) -> ThreadStatus { match self { Self::Init { .. } => ThreadStatus::Init, - Self::Live { .. } => ThreadStatus::Running, + Self::Running { .. } => ThreadStatus::Running, Self::Exited { .. } => ThreadStatus::Exited, } } @@ -167,16 +176,9 @@ impl ThreadInner { } } - pub fn host_tid(&self) -> Option { - match self { - Self::Live { host_tid } => Some(*host_tid), - _ => None, - } - } - - pub fn start(&mut self, host_tid: pid_t) { + pub fn start(&mut self) { debug_assert!(self.status() == ThreadStatus::Init); - *self = Self::Live { host_tid }; + *self = Self::Running; } pub fn exit(&mut self, exit_status: i32) { diff --git a/src/libos/src/sched/cpu_set.rs b/src/libos/src/sched/cpu_set.rs new file mode 100644 index 00000000..4b1a426f --- /dev/null +++ b/src/libos/src/sched/cpu_set.rs @@ -0,0 +1,123 @@ +//! A CpuSet is a bit mask used to represent a set of CPU cores. +//! +//! The number of bits contained in a CpuSet equals to the number of CPU cores +//! on the current platform. The bits in a CpuSet can be accessible via indexes +//! or iterators. +//! +//! The meaning of the i-th bits in a CpuSet `cpu_set`is as follows: +//! * If `cpu_set[i] == true`, then the i-th CPU core belongs to the set; +//! * Otherwise, the i-th CPU core is not in the set. + +use bitvec::prelude::*; +use std::ops::Index; + +use crate::prelude::*; + +#[derive(Debug, Clone, PartialEq)] +pub struct CpuSet { + bits: BitBox, +} + +impl CpuSet { + /// Returns the length of a CPU set in bytes. + pub fn len() -> usize { + align_up(Self::ncores(), 8) / 8 + } + + /// Returns the number CPU of cores in a CPU set. + pub fn ncores() -> usize { + *NCORES + } + + /// Create a CpuSet that consists of all of the CPU cores. + pub fn new_full() -> Self { + let mut bits = bitbox![Local, u8; 1; Self::len() * 8]; + Self::clear_unused(&mut bits); + Self { bits } + } + + /// Create a CpuSet that consists of none of the CPU cores. + pub fn new_empty() -> Self { + let bits = bitbox![Local, u8; 0; Self::len() * 8]; + Self { bits } + } + + /// Returns if the CpuSet has no CPU cores. + pub fn full(&self) -> bool { + self.bits.count_ones() == Self::ncores() + } + + /// Returns if the CpuSet has no CPU cores. + pub fn empty(&self) -> bool { + self.bits.count_ones() == 0 + } + + /// Create a CpuSet from bits given in a byte slice. + pub fn from_slice(slice: &[u8]) -> Result { + if slice.len() < Self::len() { + return_errno!(EINVAL, "slice is not long enough"); + } + let slice = &slice[..Self::len()]; + let mut bits = BitBox::from_slice(slice); + Self::clear_unused(&mut bits); + + Ok(Self { bits }) + } + + /// Returns the underlying byte slice. + /// + /// The last, unused bits in the byte slice are guaranteed to be zero. + pub fn as_slice(&self) -> &[u8] { + self.bits.as_slice() + } + + /// Returns an iterator that allows accessing the underlying bits. + pub fn iter(&self) -> Iter { + self.bits.iter() + } + + /// Returns an iterator that allows modifying the underlying bits. + pub fn iter_mut(&mut self) -> IterMut { + self.bits.iter_mut() + } + + fn clear_unused(bits: &mut BitSlice) { + let unused_bits = &mut bits[Self::ncores()..(Self::len() * 8)]; + for mut bit in unused_bits { + *bit = false; + } + } +} + +pub type Iter<'a> = bitvec::slice::Iter<'a, Local, u8>; +pub type IterMut<'a> = bitvec::slice::IterMut<'a, Local, u8>; + +impl Index for CpuSet { + type Output = bool; + + fn index(&self, index: usize) -> &bool { + assert!(index < Self::ncores()); + &self.bits[index] + } +} + +lazy_static! { + /// The number of all CPU cores on the platform + static ref NCORES: usize = { + extern "C" { + fn occlum_ocall_ncores(ret: *mut i32) -> sgx_status_t; + } + unsafe { + let mut ncores = 0; + let status = occlum_ocall_ncores(&mut ncores); + assert!( + status == sgx_status_t::SGX_SUCCESS && + // Ncores == 0 is meaningless + 0 < ncores && + // A reasonble upper limit for the foreseeable future + ncores <= 1024 + ); + ncores as usize + } + }; +} diff --git a/src/libos/src/sched/do_sched_affinity.rs b/src/libos/src/sched/do_sched_affinity.rs new file mode 100644 index 00000000..ddaabc0a --- /dev/null +++ b/src/libos/src/sched/do_sched_affinity.rs @@ -0,0 +1,30 @@ +use super::cpu_set::CpuSet; +use crate::prelude::*; +use crate::process::ThreadRef; + +pub fn do_sched_getaffinity(tid: pid_t) -> Result { + debug!("do_sched_getaffinity tid: {}", tid); + let thread = get_thread_by_tid(tid)?; + let sched = thread.sched().lock().unwrap(); + let affinity = sched.affinity().clone(); + Ok(affinity) +} + +pub fn do_sched_setaffinity(tid: pid_t, new_affinity: CpuSet) -> Result<()> { + debug!( + "do_sched_setaffinity tid: {}, new_affinity = {:?}", + tid, &new_affinity + ); + let thread = get_thread_by_tid(tid)?; + let mut sched = thread.sched().lock().unwrap(); + sched.set_affinity(new_affinity)?; + Ok(()) +} + +fn get_thread_by_tid(tid: pid_t) -> Result { + if tid == 0 { + Ok(current!()) + } else { + crate::process::table::get_thread(tid) + } +} diff --git a/src/libos/src/sched/do_sched_yield.rs b/src/libos/src/sched/do_sched_yield.rs new file mode 100644 index 00000000..9060eb03 --- /dev/null +++ b/src/libos/src/sched/do_sched_yield.rs @@ -0,0 +1,11 @@ +use crate::prelude::*; + +pub fn do_sched_yield() { + extern "C" { + fn occlum_ocall_sched_yield() -> sgx_status_t; + } + unsafe { + let status = occlum_ocall_sched_yield(); + assert!(status == sgx_status_t::SGX_SUCCESS); + } +} diff --git a/src/libos/src/sched/mod.rs b/src/libos/src/sched/mod.rs new file mode 100644 index 00000000..7abd2970 --- /dev/null +++ b/src/libos/src/sched/mod.rs @@ -0,0 +1,9 @@ +/// CPU scheduling for threads. +mod cpu_set; +mod do_sched_affinity; +mod do_sched_yield; +mod sched_agent; +mod syscalls; + +pub use sched_agent::SchedAgent; +pub use syscalls::*; diff --git a/src/libos/src/sched/sched_agent.rs b/src/libos/src/sched/sched_agent.rs new file mode 100644 index 00000000..ccc32e25 --- /dev/null +++ b/src/libos/src/sched/sched_agent.rs @@ -0,0 +1,148 @@ +//! SchedAgent manages the CPU scheduler settings for a thread. +//! +//! # Scheduler Settings +//! +//! Currently, the only scheduler setting that SchedAgent can access and update +//! is the CPU affinity of a thread. Other settings will be added in the future. +//! +//! # The Two Modes: Attached vs Detached +//! +//! SchedAgent works in one of the two modes: the attached mode and the detached +//! mode. +//! +//! When a SchedAgent is created, it is initially in the detached mode, +//! meaning that the SchedAgent is not attached to any host OS thread. Thus, +//! any call on SchedAgent to update scheduler settings does not actually affect any +//! host OS thread; SchedAgent just records the updates. +//! +//! After SchedAgent becomes attached to some host OS thread by invoking the `attach` +//! method, all previous updates recorded during in the detached mode will +//! be applied to the host OS thread. Afterwards, all setting updates will be applied +//! immediately to the host OS thread---until SchedAgent is detached from the +//! host OS thread. + +use super::cpu_set::CpuSet; +use crate::prelude::*; +use crate::util::dirty::Dirty; + +#[derive(Debug, Clone)] +pub struct SchedAgent { + // The use of Option does not mean inner is optional. In contrast, we maintain + // the invariant of `inner.is_some() == true`. We use Option so that we can + // move the Inner out of SchedAgent without upsetting Rust's borrow checker. + inner: Option, +} + +#[derive(Debug, Clone)] +enum Inner { + Detached { affinity: Dirty }, + Attached { host_tid: pid_t, affinity: CpuSet }, +} + +impl SchedAgent { + pub fn new() -> Self { + let inner = Some({ + let affinity = Dirty::new(CpuSet::new_full()); + Inner::Detached { affinity } + }); + Self { inner } + } + + pub fn affinity(&self) -> &CpuSet { + match self.inner() { + Inner::Detached { affinity } => affinity.as_ref(), + Inner::Attached { affinity, .. } => affinity, + } + } + + pub fn set_affinity(&mut self, new_affinity: CpuSet) -> Result<()> { + if new_affinity.empty() { + return_errno!(EINVAL, "there must be at least one CPU core in the CpuSet"); + } + match self.inner_mut() { + Inner::Detached { affinity } => { + *affinity.as_mut() = new_affinity; + } + Inner::Attached { host_tid, affinity } => { + update_affinity(*host_tid, &new_affinity); + *affinity = new_affinity; + } + }; + Ok(()) + } + + pub fn attach(&mut self, host_tid: pid_t) { + self.update_inner(|inner| match inner { + Inner::Detached { affinity } => { + let affinity = { + if affinity.dirty() { + update_affinity(host_tid, affinity.as_ref()) + } + affinity.unwrap() + }; + Inner::Attached { host_tid, affinity } + } + Inner::Attached { .. } => panic!("cannot attach when the agent is already attached"), + }); + } + + pub fn detach(&mut self) { + self.update_inner(|inner| match inner { + Inner::Detached { .. } => panic!("cannot detach when the agent is already detached"), + Inner::Attached { affinity, .. } => { + let affinity = Dirty::new(affinity); + Inner::Detached { affinity } + } + }); + } + + pub fn is_attached(&self) -> bool { + match self.inner() { + Inner::Detached { .. } => false, + Inner::Attached { .. } => true, + } + } + + fn inner(&self) -> &Inner { + self.inner.as_ref().unwrap() + } + + fn inner_mut(&mut self) -> &mut Inner { + self.inner.as_mut().unwrap() + } + + fn update_inner(&mut self, f: F) + where + F: FnOnce(Inner) -> Inner, + { + let old_inner = self.inner.take().unwrap(); + let new_inner = f(old_inner); + self.inner = Some(new_inner); + } +} + +impl Default for SchedAgent { + fn default() -> Self { + Self::new() + } +} + +fn update_affinity(host_tid: pid_t, affinity: &CpuSet) { + let mask = affinity.as_slice(); + let mut retval = 0; + let sgx_status = unsafe { + occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, mask.len(), mask.as_ptr()) + }; + assert!(sgx_status == sgx_status_t::SGX_SUCCESS); + // sched_setaffinity should never fail + assert!(retval == 0); +} + +extern "C" { + fn occlum_ocall_sched_setaffinity( + ret: *mut i32, + host_tid: i32, + cpusetsize: size_t, + mask: *const c_uchar, + ) -> sgx_status_t; +} diff --git a/src/libos/src/sched/syscalls.rs b/src/libos/src/sched/syscalls.rs new file mode 100644 index 00000000..893c876f --- /dev/null +++ b/src/libos/src/sched/syscalls.rs @@ -0,0 +1,52 @@ +use super::cpu_set::CpuSet; +use crate::prelude::*; +use crate::util::mem_util::from_user::*; + +pub fn do_sched_yield() -> Result { + super::do_sched_yield::do_sched_yield(); + Ok(0) +} + +pub fn do_sched_getaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *mut u8) -> Result { + // Construct safe Rust types + let buf_size = { + if buf_size < CpuSet::len() { + return_errno!(EINVAL, "buf size is not big enough"); + } + CpuSet::len() + }; + let mut buf_slice = { + check_mut_array(buf_ptr, buf_size)?; + if buf_ptr as *const _ == std::ptr::null() { + return_errno!(EFAULT, "buf ptr must NOT be null"); + } + unsafe { std::slice::from_raw_parts_mut(buf_ptr, buf_size) } + }; + // Call the memory-safe do_sched_getaffinity + let affinity = super::do_sched_affinity::do_sched_getaffinity(pid)?; + debug_assert!(affinity.as_slice().len() == CpuSet::len()); + // Copy from Rust types to C types + buf_slice.copy_from_slice(affinity.as_slice()); + Ok(CpuSet::len() as isize) +} + +pub fn do_sched_setaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *const u8) -> Result { + // Convert unsafe C types into safe Rust types + let buf_size = { + if buf_size < CpuSet::len() { + return_errno!(EINVAL, "buf size is not big enough"); + } + CpuSet::len() + }; + let buf_slice = { + check_array(buf_ptr, buf_size)?; + if buf_ptr as *const _ == std::ptr::null() { + return_errno!(EFAULT, "buf ptr must NOT be null"); + } + unsafe { std::slice::from_raw_parts(buf_ptr, buf_size) } + }; + // Call the memory-safe do_sched_setaffinity + let affinity = CpuSet::from_slice(buf_slice).unwrap(); + super::do_sched_affinity::do_sched_setaffinity(pid, affinity)?; + Ok(0) +} diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 1c92a030..36325aa3 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -32,9 +32,10 @@ use crate::net::{ }; use crate::process::{ do_arch_prctl, do_clone, do_exit, do_futex, do_getegid, do_geteuid, do_getgid, do_getpgid, - do_getpid, do_getppid, do_gettid, do_getuid, do_sched_getaffinity, do_sched_setaffinity, - do_sched_yield, do_set_tid_address, do_spawn, do_wait4, pid_t, FdOp, + do_getpid, do_getppid, do_gettid, do_getuid, do_set_tid_address, do_spawn, do_wait4, pid_t, + FdOp, }; +use crate::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield}; use crate::vm::{MMapFlags, VMPerms}; use crate::{fs, process, std, vm}; diff --git a/src/libos/src/util/dirty.rs b/src/libos/src/util/dirty.rs new file mode 100644 index 00000000..91014c34 --- /dev/null +++ b/src/libos/src/util/dirty.rs @@ -0,0 +1,60 @@ +/// Dirty is a wrapper type that remembers whether the internal object has been +/// borrowed mutably. +use std::fmt; + +pub struct Dirty { + inner: T, + dirty: bool, +} + +impl Dirty { + pub fn new(inner: T) -> Self { + let dirty = false; + Self { inner, dirty } + } + + pub fn dirty(&self) -> bool { + self.dirty + } + + pub fn clear_dirty(&mut self) { + self.dirty = false; + } + + pub fn unwrap(self) -> T { + self.inner + } +} + +impl fmt::Debug for Dirty { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Dirty") + .field("inner", &self.inner) + .field("dirty", &self.dirty) + .finish() + } +} + +impl AsRef for Dirty { + fn as_ref(&self) -> &T { + &self.inner + } +} + +impl AsMut for Dirty { + fn as_mut(&mut self) -> &mut T { + self.dirty = true; + &mut self.inner + } +} + +impl Copy for Dirty {} + +impl Clone for Dirty { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + dirty: self.dirty, + } + } +} diff --git a/src/libos/src/util/mod.rs b/src/libos/src/util/mod.rs index b7175958..dfbf6eab 100644 --- a/src/libos/src/util/mod.rs +++ b/src/libos/src/util/mod.rs @@ -1,5 +1,6 @@ use super::*; +pub mod dirty; pub mod log; pub mod mem_util; pub mod mpx_util; diff --git a/src/pal/src/ocalls/sched.c b/src/pal/src/ocalls/sched.c index bcb40bb1..e41aeb99 100644 --- a/src/pal/src/ocalls/sched.c +++ b/src/pal/src/ocalls/sched.c @@ -1,11 +1,8 @@ #define _GNU_SOURCE #include +#include #include "ocalls.h" -int occlum_ocall_sched_getaffinity(int host_tid, size_t cpusize, unsigned char* buf) { - return syscall(__NR_sched_getaffinity, host_tid, cpusize, buf); -} - int occlum_ocall_sched_setaffinity(int host_tid, size_t cpusize, const unsigned char* buf) { return syscall(__NR_sched_setaffinity, host_tid, cpusize, buf); } @@ -14,3 +11,7 @@ int occlum_ocall_sched_setaffinity(int host_tid, size_t cpusize, const unsigned void occlum_ocall_sched_yield(void) { sched_yield(); } + +int occlum_ocall_ncores(void) { + return sysconf(_SC_NPROCESSORS_CONF); +} diff --git a/test/sched/main.c b/test/sched/main.c index f2c0894a..8371b5e3 100644 --- a/test/sched/main.c +++ b/test/sched/main.c @@ -97,18 +97,15 @@ static int test_sched_getaffinity_via_explicit_syscall() { } static int test_sched_setaffinity_via_explicit_syscall() { - int nproc = sysconf(_SC_NPROCESSORS_ONLN); - cpu_set_t mask_old; - for (int i = 0; i < nproc; ++i) { - CPU_SET(i, &mask_old); - } cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(0, &mask); if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask) < 0) { THROW_ERROR("failed to call __NR_sched_setaffinity"); } + cpu_set_t mask2; + CPU_ZERO(&mask2); int ret_nproc = syscall(__NR_sched_getaffinity, 0, sizeof(cpu_set_t), &mask2); if (ret_nproc <= 0) { THROW_ERROR("failed to call __NR_sched_getaffinity"); @@ -116,6 +113,13 @@ static int test_sched_setaffinity_via_explicit_syscall() { if (!CPU_EQUAL(&mask, &mask2)) { THROW_ERROR("explicit syscall cpuset is wrong"); } + + // Recover the affinity mask + int nproc = sysconf(_SC_NPROCESSORS_ONLN); + cpu_set_t mask_old; + for (int i = 0; i < nproc; ++i) { + CPU_SET(i, &mask_old); + } if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask_old) < 0) { THROW_ERROR("recover cpuset error"); } @@ -171,7 +175,7 @@ static int test_sched_yield() { // ============================================================================ static test_case_t test_cases[] = { - //TEST_CASE(test_sched_xetaffinity_with_child_pid), + TEST_CASE(test_sched_xetaffinity_with_child_pid), TEST_CASE(test_sched_getaffinity_with_self_pid), TEST_CASE(test_sched_setaffinity_with_self_pid), TEST_CASE(test_sched_getaffinity_via_explicit_syscall),