Refactor the CPU scheduling subsystem
This commits improves both readability and correctness of the scheduling-related system calls. In terms of readability, it extracts all scheduling-related code ouf of the process/ directory and put it in a sched/ directory. In terms of correctness, the new scheduling subsystem introduces CpuSet and SchedAgent types to maintain and manipulate CPU scheduler settings in a secure and robust way.
This commit is contained in:
parent
2a1d3d98c5
commit
2b556f8de9
@ -67,16 +67,12 @@ enclave {
|
||||
void occlum_ocall_free([user_check] void* ptr);
|
||||
|
||||
void occlum_ocall_sched_yield(void);
|
||||
int occlum_ocall_sched_getaffinity(
|
||||
int host_tid,
|
||||
size_t cpusize,
|
||||
[out, size=cpusize] unsigned char* buf
|
||||
) propagate_errno;
|
||||
int occlum_ocall_sched_setaffinity(
|
||||
int host_tid,
|
||||
size_t cpusize,
|
||||
[in, size=cpusize] const unsigned char* buf
|
||||
) propagate_errno;
|
||||
int occlum_ocall_ncores(void);
|
||||
|
||||
sgx_status_t occlum_ocall_sgx_init_quote(
|
||||
[out] sgx_target_info_t* target_info,
|
||||
|
1
src/libos/Cargo.lock
generated
1
src/libos/Cargo.lock
generated
@ -5,6 +5,7 @@ name = "Occlum"
|
||||
version = "0.10.0"
|
||||
dependencies = [
|
||||
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitvec 0.17.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"derive_builder 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -9,6 +9,7 @@ crate-type = ["staticlib"]
|
||||
|
||||
[dependencies]
|
||||
bitflags = "1.0"
|
||||
bitvec = { version = "0.17", default-features = false, features = ["alloc"] }
|
||||
log = "0.4"
|
||||
lazy_static = { version = "1.1.0", features = ["spin_no_std"] } # Implies nightly
|
||||
derive_builder = "0.7.2"
|
||||
|
@ -16,6 +16,8 @@
|
||||
extern crate alloc;
|
||||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
#[macro_use]
|
||||
extern crate bitvec;
|
||||
extern crate sgx_types;
|
||||
#[cfg(not(target_env = "sgx"))]
|
||||
#[macro_use]
|
||||
@ -58,6 +60,7 @@ mod fs;
|
||||
mod misc;
|
||||
mod net;
|
||||
mod process;
|
||||
mod sched;
|
||||
mod syscall;
|
||||
mod time;
|
||||
mod untrusted;
|
||||
|
@ -1,130 +0,0 @@
|
||||
use super::table;
|
||||
/// Process scheduling.
|
||||
use crate::prelude::*;
|
||||
|
||||
pub fn do_sched_getaffinity(tid: pid_t, cpu_set: &mut CpuSet) -> Result<usize> {
|
||||
let host_tid = match tid {
|
||||
0 => 0,
|
||||
_ => find_host_tid(tid)?,
|
||||
};
|
||||
let buf = cpu_set.as_mut_ptr();
|
||||
let cpusize = cpu_set.len();
|
||||
let retval = try_libc!({
|
||||
let mut retval = 0;
|
||||
let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf);
|
||||
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
|
||||
retval
|
||||
}) as usize;
|
||||
// Note: the first retval bytes in CpuSet are valid
|
||||
Ok(retval)
|
||||
}
|
||||
|
||||
pub fn do_sched_setaffinity(tid: pid_t, cpu_set: &CpuSet) -> Result<()> {
|
||||
let host_tid = match tid {
|
||||
0 => 0,
|
||||
_ => find_host_tid(tid)?,
|
||||
};
|
||||
let buf = cpu_set.as_ptr();
|
||||
let cpusize = cpu_set.len();
|
||||
try_libc!({
|
||||
let mut retval = 0;
|
||||
let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf);
|
||||
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
|
||||
retval
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn do_sched_yield() {
|
||||
unsafe {
|
||||
let status = occlum_ocall_sched_yield();
|
||||
assert!(status == sgx_status_t::SGX_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
fn find_host_tid(tid: pid_t) -> Result<pid_t> {
|
||||
let thread = table::get_thread(tid)?;
|
||||
// TODO: fix the race condition of host_tid being available.
|
||||
let host_tid = thread
|
||||
.inner()
|
||||
.host_tid()
|
||||
.ok_or_else(|| errno!(ESRCH, "host_tid is not available"))?;
|
||||
Ok(host_tid)
|
||||
}
|
||||
|
||||
pub struct CpuSet {
|
||||
vec: Vec<u8>,
|
||||
}
|
||||
|
||||
impl CpuSet {
|
||||
pub fn new(len: usize) -> CpuSet {
|
||||
let mut cpuset = CpuSet {
|
||||
vec: Vec::with_capacity(len),
|
||||
};
|
||||
cpuset.vec.resize(len, 0);
|
||||
cpuset
|
||||
}
|
||||
|
||||
pub fn from_raw_buf(ptr: *const u8, cpusize: usize) -> CpuSet {
|
||||
let mut cpuset = CpuSet {
|
||||
vec: Vec::with_capacity(cpusize),
|
||||
};
|
||||
let buf_slice = unsafe { std::slice::from_raw_parts(ptr, cpusize) };
|
||||
cpuset.vec.extend_from_slice(buf_slice);
|
||||
cpuset
|
||||
}
|
||||
|
||||
pub fn as_mut_ptr(&mut self) -> *mut u8 {
|
||||
self.vec.as_mut_ptr()
|
||||
}
|
||||
|
||||
pub fn as_ptr(&self) -> *const u8 {
|
||||
self.vec.as_ptr()
|
||||
}
|
||||
|
||||
pub fn as_mut_slice(&mut self) -> &mut [u8] {
|
||||
self.vec.as_mut_slice()
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
self.vec.as_slice()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.vec.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::LowerHex for CpuSet {
|
||||
fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
for byte in &(self.vec) {
|
||||
fmtr.write_fmt(format_args!("{:02x}", byte))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::UpperHex for CpuSet {
|
||||
fn fmt(&self, fmtr: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
for byte in &(self.vec) {
|
||||
fmtr.write_fmt(format_args!("{:02X}", byte))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn occlum_ocall_sched_getaffinity(
|
||||
ret: *mut i32,
|
||||
host_tid: i32,
|
||||
cpusetsize: size_t,
|
||||
mask: *mut c_uchar,
|
||||
) -> sgx_status_t;
|
||||
fn occlum_ocall_sched_setaffinity(
|
||||
ret: *mut i32,
|
||||
host_tid: i32,
|
||||
cpusetsize: size_t,
|
||||
mask: *const c_uchar,
|
||||
) -> sgx_status_t;
|
||||
fn occlum_ocall_sched_yield() -> sgx_status_t;
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
use crate::fs::{FileRef, FileTable, FsView};
|
||||
use crate::misc::ResourceLimits;
|
||||
use crate::prelude::*;
|
||||
use crate::sched::SchedAgent;
|
||||
use crate::vm::ProcessVM;
|
||||
|
||||
use self::process::{ChildProcessFilter, ProcessBuilder, ProcessInner};
|
||||
@ -29,7 +30,6 @@ mod do_clone;
|
||||
mod do_exit;
|
||||
mod do_futex;
|
||||
mod do_getpid;
|
||||
mod do_sched;
|
||||
mod do_set_tid_address;
|
||||
mod do_spawn;
|
||||
mod do_wait4;
|
||||
@ -51,4 +51,5 @@ pub type ThreadRef = Arc<Thread>;
|
||||
pub type FileTableRef = Arc<SgxMutex<FileTable>>;
|
||||
pub type ProcessVMRef = Arc<SgxMutex<ProcessVM>>;
|
||||
pub type FsViewRef = Arc<SgxMutex<FsView>>;
|
||||
pub type SchedAgentRef = Arc<SgxMutex<SchedAgent>>;
|
||||
pub type ResourceLimitsRef = Arc<SgxMutex<ResourceLimits>>;
|
||||
|
@ -3,7 +3,6 @@ use std::ptr::NonNull;
|
||||
use super::do_arch_prctl::ArchPrctlCode;
|
||||
use super::do_clone::CloneFlags;
|
||||
use super::do_futex::{FutexFlags, FutexOp};
|
||||
use super::do_sched::CpuSet;
|
||||
use super::do_spawn::FileAction;
|
||||
use super::process::ChildProcessFilter;
|
||||
use crate::prelude::*;
|
||||
@ -185,49 +184,6 @@ pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<isize> {
|
||||
super::do_set_tid_address::do_set_tid_address(tidptr).map(|tid| tid as isize)
|
||||
}
|
||||
|
||||
pub fn do_sched_yield() -> Result<isize> {
|
||||
super::do_sched::do_sched_yield();
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result<isize> {
|
||||
// Construct safe Rust types
|
||||
let mut buf_slice = {
|
||||
check_mut_array(buf, cpusize)?;
|
||||
if cpusize == 0 {
|
||||
return_errno!(EINVAL, "cpuset size must be greater than zero");
|
||||
}
|
||||
if buf as *const _ == std::ptr::null() {
|
||||
return_errno!(EFAULT, "cpuset mask must NOT be null");
|
||||
}
|
||||
unsafe { std::slice::from_raw_parts_mut(buf, cpusize) }
|
||||
};
|
||||
// Call the memory-safe do_sched_getaffinity
|
||||
let mut cpuset = CpuSet::new(cpusize);
|
||||
let retval = super::do_sched::do_sched_getaffinity(pid, &mut cpuset)?;
|
||||
// Copy from Rust types to C types
|
||||
buf_slice.copy_from_slice(cpuset.as_slice());
|
||||
Ok(retval as isize)
|
||||
}
|
||||
|
||||
pub fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result<isize> {
|
||||
// Convert unsafe C types into safe Rust types
|
||||
let cpuset = {
|
||||
check_array(buf, cpusize)?;
|
||||
if cpusize == 0 {
|
||||
return_errno!(EINVAL, "cpuset size must be greater than zero");
|
||||
}
|
||||
if buf as *const _ == std::ptr::null() {
|
||||
return_errno!(EFAULT, "cpuset mask must NOT be null");
|
||||
}
|
||||
CpuSet::from_raw_buf(buf, cpusize)
|
||||
};
|
||||
debug!("sched_setaffinity cpuset: {:#x}", cpuset);
|
||||
// Call the memory-safe do_sched_setaffinity
|
||||
super::do_sched::do_sched_setaffinity(pid, &cpuset)?;
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn do_exit(status: i32) -> ! {
|
||||
debug!("exit: {}", status);
|
||||
super::do_exit::do_exit(status);
|
||||
|
@ -1,8 +1,8 @@
|
||||
use std::ptr::NonNull;
|
||||
|
||||
use super::{
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, Task, Thread,
|
||||
ThreadId, ThreadInner, ThreadRef,
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef,
|
||||
Task, Thread, ThreadId, ThreadInner, ThreadRef,
|
||||
};
|
||||
use crate::prelude::*;
|
||||
|
||||
@ -16,6 +16,7 @@ pub struct ThreadBuilder {
|
||||
// Optional fields
|
||||
fs: Option<FsViewRef>,
|
||||
files: Option<FileTableRef>,
|
||||
sched: Option<SchedAgentRef>,
|
||||
rlimits: Option<ResourceLimitsRef>,
|
||||
clear_ctid: Option<NonNull<pid_t>>,
|
||||
}
|
||||
@ -29,6 +30,7 @@ impl ThreadBuilder {
|
||||
vm: None,
|
||||
fs: None,
|
||||
files: None,
|
||||
sched: None,
|
||||
rlimits: None,
|
||||
clear_ctid: None,
|
||||
}
|
||||
@ -64,6 +66,11 @@ impl ThreadBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn sched(mut self, sched: SchedAgentRef) -> Self {
|
||||
self.sched = Some(sched);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self {
|
||||
self.rlimits = Some(rlimits);
|
||||
self
|
||||
@ -87,6 +94,7 @@ impl ThreadBuilder {
|
||||
.ok_or_else(|| errno!(EINVAL, "memory is mandatory"))?;
|
||||
let fs = self.fs.unwrap_or_default();
|
||||
let files = self.files.unwrap_or_default();
|
||||
let sched = self.sched.unwrap_or_default();
|
||||
let rlimits = self.rlimits.unwrap_or_default();
|
||||
let clear_ctid = SgxRwLock::new(self.clear_ctid);
|
||||
let inner = SgxMutex::new(ThreadInner::new());
|
||||
@ -100,6 +108,7 @@ impl ThreadBuilder {
|
||||
vm,
|
||||
fs,
|
||||
files,
|
||||
sched,
|
||||
rlimits,
|
||||
});
|
||||
|
||||
|
@ -3,7 +3,8 @@ use std::ptr::NonNull;
|
||||
|
||||
use super::task::Task;
|
||||
use super::{
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, ThreadRef,
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef,
|
||||
ThreadRef,
|
||||
};
|
||||
use crate::prelude::*;
|
||||
|
||||
@ -27,6 +28,7 @@ pub struct Thread {
|
||||
vm: ProcessVMRef,
|
||||
fs: FsViewRef,
|
||||
files: FileTableRef,
|
||||
sched: SchedAgentRef,
|
||||
rlimits: ResourceLimitsRef,
|
||||
}
|
||||
|
||||
@ -62,6 +64,10 @@ impl Thread {
|
||||
&self.files
|
||||
}
|
||||
|
||||
pub fn sched(&self) -> &SchedAgentRef {
|
||||
&self.sched
|
||||
}
|
||||
|
||||
/// Get a file from the file table.
|
||||
pub fn file(&self, fd: FileDesc) -> Result<FileRef> {
|
||||
self.files().lock().unwrap().get(fd)
|
||||
@ -89,10 +95,13 @@ impl Thread {
|
||||
}
|
||||
|
||||
pub(super) fn start(&self, host_tid: pid_t) {
|
||||
self.inner().start(host_tid);
|
||||
self.sched().lock().unwrap().attach(host_tid);
|
||||
self.inner().start();
|
||||
}
|
||||
|
||||
pub(super) fn exit(&self, exit_status: i32) -> usize {
|
||||
self.sched().lock().unwrap().detach();
|
||||
|
||||
// Remove this thread from its owner process
|
||||
let mut process_inner = self.process.inner();
|
||||
let threads = process_inner.threads_mut().unwrap();
|
||||
@ -143,7 +152,7 @@ unsafe impl Sync for Thread {}
|
||||
#[derive(Debug)]
|
||||
pub enum ThreadInner {
|
||||
Init,
|
||||
Live { host_tid: pid_t },
|
||||
Running,
|
||||
Exited { exit_status: i32 },
|
||||
}
|
||||
|
||||
@ -155,7 +164,7 @@ impl ThreadInner {
|
||||
pub fn status(&self) -> ThreadStatus {
|
||||
match self {
|
||||
Self::Init { .. } => ThreadStatus::Init,
|
||||
Self::Live { .. } => ThreadStatus::Running,
|
||||
Self::Running { .. } => ThreadStatus::Running,
|
||||
Self::Exited { .. } => ThreadStatus::Exited,
|
||||
}
|
||||
}
|
||||
@ -167,16 +176,9 @@ impl ThreadInner {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_tid(&self) -> Option<pid_t> {
|
||||
match self {
|
||||
Self::Live { host_tid } => Some(*host_tid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&mut self, host_tid: pid_t) {
|
||||
pub fn start(&mut self) {
|
||||
debug_assert!(self.status() == ThreadStatus::Init);
|
||||
*self = Self::Live { host_tid };
|
||||
*self = Self::Running;
|
||||
}
|
||||
|
||||
pub fn exit(&mut self, exit_status: i32) {
|
||||
|
123
src/libos/src/sched/cpu_set.rs
Normal file
123
src/libos/src/sched/cpu_set.rs
Normal file
@ -0,0 +1,123 @@
|
||||
//! A CpuSet is a bit mask used to represent a set of CPU cores.
|
||||
//!
|
||||
//! The number of bits contained in a CpuSet equals to the number of CPU cores
|
||||
//! on the current platform. The bits in a CpuSet can be accessible via indexes
|
||||
//! or iterators.
|
||||
//!
|
||||
//! The meaning of the i-th bits in a CpuSet `cpu_set`is as follows:
|
||||
//! * If `cpu_set[i] == true`, then the i-th CPU core belongs to the set;
|
||||
//! * Otherwise, the i-th CPU core is not in the set.
|
||||
|
||||
use bitvec::prelude::*;
|
||||
use std::ops::Index;
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct CpuSet {
|
||||
bits: BitBox<Local, u8>,
|
||||
}
|
||||
|
||||
impl CpuSet {
|
||||
/// Returns the length of a CPU set in bytes.
|
||||
pub fn len() -> usize {
|
||||
align_up(Self::ncores(), 8) / 8
|
||||
}
|
||||
|
||||
/// Returns the number CPU of cores in a CPU set.
|
||||
pub fn ncores() -> usize {
|
||||
*NCORES
|
||||
}
|
||||
|
||||
/// Create a CpuSet that consists of all of the CPU cores.
|
||||
pub fn new_full() -> Self {
|
||||
let mut bits = bitbox![Local, u8; 1; Self::len() * 8];
|
||||
Self::clear_unused(&mut bits);
|
||||
Self { bits }
|
||||
}
|
||||
|
||||
/// Create a CpuSet that consists of none of the CPU cores.
|
||||
pub fn new_empty() -> Self {
|
||||
let bits = bitbox![Local, u8; 0; Self::len() * 8];
|
||||
Self { bits }
|
||||
}
|
||||
|
||||
/// Returns if the CpuSet has no CPU cores.
|
||||
pub fn full(&self) -> bool {
|
||||
self.bits.count_ones() == Self::ncores()
|
||||
}
|
||||
|
||||
/// Returns if the CpuSet has no CPU cores.
|
||||
pub fn empty(&self) -> bool {
|
||||
self.bits.count_ones() == 0
|
||||
}
|
||||
|
||||
/// Create a CpuSet from bits given in a byte slice.
|
||||
pub fn from_slice(slice: &[u8]) -> Result<Self> {
|
||||
if slice.len() < Self::len() {
|
||||
return_errno!(EINVAL, "slice is not long enough");
|
||||
}
|
||||
let slice = &slice[..Self::len()];
|
||||
let mut bits = BitBox::from_slice(slice);
|
||||
Self::clear_unused(&mut bits);
|
||||
|
||||
Ok(Self { bits })
|
||||
}
|
||||
|
||||
/// Returns the underlying byte slice.
|
||||
///
|
||||
/// The last, unused bits in the byte slice are guaranteed to be zero.
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
self.bits.as_slice()
|
||||
}
|
||||
|
||||
/// Returns an iterator that allows accessing the underlying bits.
|
||||
pub fn iter(&self) -> Iter {
|
||||
self.bits.iter()
|
||||
}
|
||||
|
||||
/// Returns an iterator that allows modifying the underlying bits.
|
||||
pub fn iter_mut(&mut self) -> IterMut {
|
||||
self.bits.iter_mut()
|
||||
}
|
||||
|
||||
fn clear_unused(bits: &mut BitSlice<Local, u8>) {
|
||||
let unused_bits = &mut bits[Self::ncores()..(Self::len() * 8)];
|
||||
for mut bit in unused_bits {
|
||||
*bit = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Iter<'a> = bitvec::slice::Iter<'a, Local, u8>;
|
||||
pub type IterMut<'a> = bitvec::slice::IterMut<'a, Local, u8>;
|
||||
|
||||
impl Index<usize> for CpuSet {
|
||||
type Output = bool;
|
||||
|
||||
fn index(&self, index: usize) -> &bool {
|
||||
assert!(index < Self::ncores());
|
||||
&self.bits[index]
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// The number of all CPU cores on the platform
|
||||
static ref NCORES: usize = {
|
||||
extern "C" {
|
||||
fn occlum_ocall_ncores(ret: *mut i32) -> sgx_status_t;
|
||||
}
|
||||
unsafe {
|
||||
let mut ncores = 0;
|
||||
let status = occlum_ocall_ncores(&mut ncores);
|
||||
assert!(
|
||||
status == sgx_status_t::SGX_SUCCESS &&
|
||||
// Ncores == 0 is meaningless
|
||||
0 < ncores &&
|
||||
// A reasonble upper limit for the foreseeable future
|
||||
ncores <= 1024
|
||||
);
|
||||
ncores as usize
|
||||
}
|
||||
};
|
||||
}
|
30
src/libos/src/sched/do_sched_affinity.rs
Normal file
30
src/libos/src/sched/do_sched_affinity.rs
Normal file
@ -0,0 +1,30 @@
|
||||
use super::cpu_set::CpuSet;
|
||||
use crate::prelude::*;
|
||||
use crate::process::ThreadRef;
|
||||
|
||||
pub fn do_sched_getaffinity(tid: pid_t) -> Result<CpuSet> {
|
||||
debug!("do_sched_getaffinity tid: {}", tid);
|
||||
let thread = get_thread_by_tid(tid)?;
|
||||
let sched = thread.sched().lock().unwrap();
|
||||
let affinity = sched.affinity().clone();
|
||||
Ok(affinity)
|
||||
}
|
||||
|
||||
pub fn do_sched_setaffinity(tid: pid_t, new_affinity: CpuSet) -> Result<()> {
|
||||
debug!(
|
||||
"do_sched_setaffinity tid: {}, new_affinity = {:?}",
|
||||
tid, &new_affinity
|
||||
);
|
||||
let thread = get_thread_by_tid(tid)?;
|
||||
let mut sched = thread.sched().lock().unwrap();
|
||||
sched.set_affinity(new_affinity)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_thread_by_tid(tid: pid_t) -> Result<ThreadRef> {
|
||||
if tid == 0 {
|
||||
Ok(current!())
|
||||
} else {
|
||||
crate::process::table::get_thread(tid)
|
||||
}
|
||||
}
|
11
src/libos/src/sched/do_sched_yield.rs
Normal file
11
src/libos/src/sched/do_sched_yield.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use crate::prelude::*;
|
||||
|
||||
pub fn do_sched_yield() {
|
||||
extern "C" {
|
||||
fn occlum_ocall_sched_yield() -> sgx_status_t;
|
||||
}
|
||||
unsafe {
|
||||
let status = occlum_ocall_sched_yield();
|
||||
assert!(status == sgx_status_t::SGX_SUCCESS);
|
||||
}
|
||||
}
|
9
src/libos/src/sched/mod.rs
Normal file
9
src/libos/src/sched/mod.rs
Normal file
@ -0,0 +1,9 @@
|
||||
/// CPU scheduling for threads.
|
||||
mod cpu_set;
|
||||
mod do_sched_affinity;
|
||||
mod do_sched_yield;
|
||||
mod sched_agent;
|
||||
mod syscalls;
|
||||
|
||||
pub use sched_agent::SchedAgent;
|
||||
pub use syscalls::*;
|
148
src/libos/src/sched/sched_agent.rs
Normal file
148
src/libos/src/sched/sched_agent.rs
Normal file
@ -0,0 +1,148 @@
|
||||
//! SchedAgent manages the CPU scheduler settings for a thread.
|
||||
//!
|
||||
//! # Scheduler Settings
|
||||
//!
|
||||
//! Currently, the only scheduler setting that SchedAgent can access and update
|
||||
//! is the CPU affinity of a thread. Other settings will be added in the future.
|
||||
//!
|
||||
//! # The Two Modes: Attached vs Detached
|
||||
//!
|
||||
//! SchedAgent works in one of the two modes: the attached mode and the detached
|
||||
//! mode.
|
||||
//!
|
||||
//! When a SchedAgent is created, it is initially in the detached mode,
|
||||
//! meaning that the SchedAgent is not attached to any host OS thread. Thus,
|
||||
//! any call on SchedAgent to update scheduler settings does not actually affect any
|
||||
//! host OS thread; SchedAgent just records the updates.
|
||||
//!
|
||||
//! After SchedAgent becomes attached to some host OS thread by invoking the `attach`
|
||||
//! method, all previous updates recorded during in the detached mode will
|
||||
//! be applied to the host OS thread. Afterwards, all setting updates will be applied
|
||||
//! immediately to the host OS thread---until SchedAgent is detached from the
|
||||
//! host OS thread.
|
||||
|
||||
use super::cpu_set::CpuSet;
|
||||
use crate::prelude::*;
|
||||
use crate::util::dirty::Dirty;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SchedAgent {
|
||||
// The use of Option does not mean inner is optional. In contrast, we maintain
|
||||
// the invariant of `inner.is_some() == true`. We use Option so that we can
|
||||
// move the Inner out of SchedAgent without upsetting Rust's borrow checker.
|
||||
inner: Option<Inner>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Inner {
|
||||
Detached { affinity: Dirty<CpuSet> },
|
||||
Attached { host_tid: pid_t, affinity: CpuSet },
|
||||
}
|
||||
|
||||
impl SchedAgent {
|
||||
pub fn new() -> Self {
|
||||
let inner = Some({
|
||||
let affinity = Dirty::new(CpuSet::new_full());
|
||||
Inner::Detached { affinity }
|
||||
});
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn affinity(&self) -> &CpuSet {
|
||||
match self.inner() {
|
||||
Inner::Detached { affinity } => affinity.as_ref(),
|
||||
Inner::Attached { affinity, .. } => affinity,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_affinity(&mut self, new_affinity: CpuSet) -> Result<()> {
|
||||
if new_affinity.empty() {
|
||||
return_errno!(EINVAL, "there must be at least one CPU core in the CpuSet");
|
||||
}
|
||||
match self.inner_mut() {
|
||||
Inner::Detached { affinity } => {
|
||||
*affinity.as_mut() = new_affinity;
|
||||
}
|
||||
Inner::Attached { host_tid, affinity } => {
|
||||
update_affinity(*host_tid, &new_affinity);
|
||||
*affinity = new_affinity;
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn attach(&mut self, host_tid: pid_t) {
|
||||
self.update_inner(|inner| match inner {
|
||||
Inner::Detached { affinity } => {
|
||||
let affinity = {
|
||||
if affinity.dirty() {
|
||||
update_affinity(host_tid, affinity.as_ref())
|
||||
}
|
||||
affinity.unwrap()
|
||||
};
|
||||
Inner::Attached { host_tid, affinity }
|
||||
}
|
||||
Inner::Attached { .. } => panic!("cannot attach when the agent is already attached"),
|
||||
});
|
||||
}
|
||||
|
||||
pub fn detach(&mut self) {
|
||||
self.update_inner(|inner| match inner {
|
||||
Inner::Detached { .. } => panic!("cannot detach when the agent is already detached"),
|
||||
Inner::Attached { affinity, .. } => {
|
||||
let affinity = Dirty::new(affinity);
|
||||
Inner::Detached { affinity }
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub fn is_attached(&self) -> bool {
|
||||
match self.inner() {
|
||||
Inner::Detached { .. } => false,
|
||||
Inner::Attached { .. } => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> &Inner {
|
||||
self.inner.as_ref().unwrap()
|
||||
}
|
||||
|
||||
fn inner_mut(&mut self) -> &mut Inner {
|
||||
self.inner.as_mut().unwrap()
|
||||
}
|
||||
|
||||
fn update_inner<F>(&mut self, f: F)
|
||||
where
|
||||
F: FnOnce(Inner) -> Inner,
|
||||
{
|
||||
let old_inner = self.inner.take().unwrap();
|
||||
let new_inner = f(old_inner);
|
||||
self.inner = Some(new_inner);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SchedAgent {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn update_affinity(host_tid: pid_t, affinity: &CpuSet) {
|
||||
let mask = affinity.as_slice();
|
||||
let mut retval = 0;
|
||||
let sgx_status = unsafe {
|
||||
occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, mask.len(), mask.as_ptr())
|
||||
};
|
||||
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
|
||||
// sched_setaffinity should never fail
|
||||
assert!(retval == 0);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn occlum_ocall_sched_setaffinity(
|
||||
ret: *mut i32,
|
||||
host_tid: i32,
|
||||
cpusetsize: size_t,
|
||||
mask: *const c_uchar,
|
||||
) -> sgx_status_t;
|
||||
}
|
52
src/libos/src/sched/syscalls.rs
Normal file
52
src/libos/src/sched/syscalls.rs
Normal file
@ -0,0 +1,52 @@
|
||||
use super::cpu_set::CpuSet;
|
||||
use crate::prelude::*;
|
||||
use crate::util::mem_util::from_user::*;
|
||||
|
||||
pub fn do_sched_yield() -> Result<isize> {
|
||||
super::do_sched_yield::do_sched_yield();
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn do_sched_getaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *mut u8) -> Result<isize> {
|
||||
// Construct safe Rust types
|
||||
let buf_size = {
|
||||
if buf_size < CpuSet::len() {
|
||||
return_errno!(EINVAL, "buf size is not big enough");
|
||||
}
|
||||
CpuSet::len()
|
||||
};
|
||||
let mut buf_slice = {
|
||||
check_mut_array(buf_ptr, buf_size)?;
|
||||
if buf_ptr as *const _ == std::ptr::null() {
|
||||
return_errno!(EFAULT, "buf ptr must NOT be null");
|
||||
}
|
||||
unsafe { std::slice::from_raw_parts_mut(buf_ptr, buf_size) }
|
||||
};
|
||||
// Call the memory-safe do_sched_getaffinity
|
||||
let affinity = super::do_sched_affinity::do_sched_getaffinity(pid)?;
|
||||
debug_assert!(affinity.as_slice().len() == CpuSet::len());
|
||||
// Copy from Rust types to C types
|
||||
buf_slice.copy_from_slice(affinity.as_slice());
|
||||
Ok(CpuSet::len() as isize)
|
||||
}
|
||||
|
||||
pub fn do_sched_setaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *const u8) -> Result<isize> {
|
||||
// Convert unsafe C types into safe Rust types
|
||||
let buf_size = {
|
||||
if buf_size < CpuSet::len() {
|
||||
return_errno!(EINVAL, "buf size is not big enough");
|
||||
}
|
||||
CpuSet::len()
|
||||
};
|
||||
let buf_slice = {
|
||||
check_array(buf_ptr, buf_size)?;
|
||||
if buf_ptr as *const _ == std::ptr::null() {
|
||||
return_errno!(EFAULT, "buf ptr must NOT be null");
|
||||
}
|
||||
unsafe { std::slice::from_raw_parts(buf_ptr, buf_size) }
|
||||
};
|
||||
// Call the memory-safe do_sched_setaffinity
|
||||
let affinity = CpuSet::from_slice(buf_slice).unwrap();
|
||||
super::do_sched_affinity::do_sched_setaffinity(pid, affinity)?;
|
||||
Ok(0)
|
||||
}
|
@ -32,9 +32,10 @@ use crate::net::{
|
||||
};
|
||||
use crate::process::{
|
||||
do_arch_prctl, do_clone, do_exit, do_futex, do_getegid, do_geteuid, do_getgid, do_getpgid,
|
||||
do_getpid, do_getppid, do_gettid, do_getuid, do_sched_getaffinity, do_sched_setaffinity,
|
||||
do_sched_yield, do_set_tid_address, do_spawn, do_wait4, pid_t, FdOp,
|
||||
do_getpid, do_getppid, do_gettid, do_getuid, do_set_tid_address, do_spawn, do_wait4, pid_t,
|
||||
FdOp,
|
||||
};
|
||||
use crate::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield};
|
||||
use crate::vm::{MMapFlags, VMPerms};
|
||||
use crate::{fs, process, std, vm};
|
||||
|
||||
|
60
src/libos/src/util/dirty.rs
Normal file
60
src/libos/src/util/dirty.rs
Normal file
@ -0,0 +1,60 @@
|
||||
/// Dirty is a wrapper type that remembers whether the internal object has been
|
||||
/// borrowed mutably.
|
||||
use std::fmt;
|
||||
|
||||
pub struct Dirty<T> {
|
||||
inner: T,
|
||||
dirty: bool,
|
||||
}
|
||||
|
||||
impl<T> Dirty<T> {
|
||||
pub fn new(inner: T) -> Self {
|
||||
let dirty = false;
|
||||
Self { inner, dirty }
|
||||
}
|
||||
|
||||
pub fn dirty(&self) -> bool {
|
||||
self.dirty
|
||||
}
|
||||
|
||||
pub fn clear_dirty(&mut self) {
|
||||
self.dirty = false;
|
||||
}
|
||||
|
||||
pub fn unwrap(self) -> T {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Debug> fmt::Debug for Dirty<T> {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt.debug_struct("Dirty")
|
||||
.field("inner", &self.inner)
|
||||
.field("dirty", &self.dirty)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AsRef<T> for Dirty<T> {
|
||||
fn as_ref(&self) -> &T {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AsMut<T> for Dirty<T> {
|
||||
fn as_mut(&mut self) -> &mut T {
|
||||
self.dirty = true;
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Copy> Copy for Dirty<T> {}
|
||||
|
||||
impl<T: Clone> Clone for Dirty<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
inner: self.inner.clone(),
|
||||
dirty: self.dirty,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use super::*;
|
||||
|
||||
pub mod dirty;
|
||||
pub mod log;
|
||||
pub mod mem_util;
|
||||
pub mod mpx_util;
|
||||
|
@ -1,11 +1,8 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#include "ocalls.h"
|
||||
|
||||
int occlum_ocall_sched_getaffinity(int host_tid, size_t cpusize, unsigned char* buf) {
|
||||
return syscall(__NR_sched_getaffinity, host_tid, cpusize, buf);
|
||||
}
|
||||
|
||||
int occlum_ocall_sched_setaffinity(int host_tid, size_t cpusize, const unsigned char* buf) {
|
||||
return syscall(__NR_sched_setaffinity, host_tid, cpusize, buf);
|
||||
}
|
||||
@ -14,3 +11,7 @@ int occlum_ocall_sched_setaffinity(int host_tid, size_t cpusize, const unsigned
|
||||
void occlum_ocall_sched_yield(void) {
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
int occlum_ocall_ncores(void) {
|
||||
return sysconf(_SC_NPROCESSORS_CONF);
|
||||
}
|
||||
|
@ -97,18 +97,15 @@ static int test_sched_getaffinity_via_explicit_syscall() {
|
||||
}
|
||||
|
||||
static int test_sched_setaffinity_via_explicit_syscall() {
|
||||
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
cpu_set_t mask_old;
|
||||
for (int i = 0; i < nproc; ++i) {
|
||||
CPU_SET(i, &mask_old);
|
||||
}
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO(&mask);
|
||||
CPU_SET(0, &mask);
|
||||
if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask) < 0) {
|
||||
THROW_ERROR("failed to call __NR_sched_setaffinity");
|
||||
}
|
||||
|
||||
cpu_set_t mask2;
|
||||
CPU_ZERO(&mask2);
|
||||
int ret_nproc = syscall(__NR_sched_getaffinity, 0, sizeof(cpu_set_t), &mask2);
|
||||
if (ret_nproc <= 0) {
|
||||
THROW_ERROR("failed to call __NR_sched_getaffinity");
|
||||
@ -116,6 +113,13 @@ static int test_sched_setaffinity_via_explicit_syscall() {
|
||||
if (!CPU_EQUAL(&mask, &mask2)) {
|
||||
THROW_ERROR("explicit syscall cpuset is wrong");
|
||||
}
|
||||
|
||||
// Recover the affinity mask
|
||||
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
cpu_set_t mask_old;
|
||||
for (int i = 0; i < nproc; ++i) {
|
||||
CPU_SET(i, &mask_old);
|
||||
}
|
||||
if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask_old) < 0) {
|
||||
THROW_ERROR("recover cpuset error");
|
||||
}
|
||||
@ -171,7 +175,7 @@ static int test_sched_yield() {
|
||||
// ============================================================================
|
||||
|
||||
static test_case_t test_cases[] = {
|
||||
//TEST_CASE(test_sched_xetaffinity_with_child_pid),
|
||||
TEST_CASE(test_sched_xetaffinity_with_child_pid),
|
||||
TEST_CASE(test_sched_getaffinity_with_self_pid),
|
||||
TEST_CASE(test_sched_setaffinity_with_self_pid),
|
||||
TEST_CASE(test_sched_getaffinity_via_explicit_syscall),
|
||||
|
Loading…
Reference in New Issue
Block a user