Fix sched set/get affinity wrong behaviour

1. Fix child process not inherit affinity from parent process
2. Add warning for buffer length gap with kernel when getaffinity
This commit is contained in:
Hui, Chunyang 2020-05-19 09:33:13 +00:00 committed by tate.thl
parent e59ba66ccf
commit ed664d1143
6 changed files with 98 additions and 2 deletions

@ -162,12 +162,14 @@ fn new_process(
Arc::new(SgxMutex::new(files)) Arc::new(SgxMutex::new(files))
}; };
let fs_ref = Arc::new(SgxMutex::new(current_ref.fs().lock().unwrap().clone())); let fs_ref = Arc::new(SgxMutex::new(current_ref.fs().lock().unwrap().clone()));
let sched_ref = Arc::new(SgxMutex::new(current_ref.sched().lock().unwrap().clone()));
ProcessBuilder::new() ProcessBuilder::new()
.vm(vm_ref) .vm(vm_ref)
.exec_path(elf_path) .exec_path(elf_path)
.parent(process_ref) .parent(process_ref)
.task(task) .task(task)
.sched(sched_ref)
.fs(fs_ref) .fs(fs_ref)
.files(files_ref) .files(files_ref)
.build()? .build()?

@ -1,6 +1,8 @@
use super::super::task::Task; use super::super::task::Task;
use super::super::thread::{ThreadBuilder, ThreadId}; use super::super::thread::{ThreadBuilder, ThreadId};
use super::super::{FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef}; use super::super::{
FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef, SchedAgentRef,
};
use super::{Process, ProcessInner}; use super::{Process, ProcessInner};
use crate::prelude::*; use crate::prelude::*;
use crate::signal::{SigDispositions, SigQueues}; use crate::signal::{SigDispositions, SigQueues};
@ -54,6 +56,10 @@ impl ProcessBuilder {
self.thread_builder(|tb| tb.task(task)) self.thread_builder(|tb| tb.task(task))
} }
pub fn sched(mut self, sched: SchedAgentRef) -> Self {
self.thread_builder(|tb| tb.sched(sched))
}
pub fn vm(mut self, vm: ProcessVMRef) -> Self { pub fn vm(mut self, vm: ProcessVMRef) -> Self {
self.thread_builder(|tb| tb.vm(vm)) self.thread_builder(|tb| tb.vm(vm))
} }

@ -25,7 +25,7 @@ use super::cpu_set::{CpuSet, AVAIL_CPUSET};
use crate::prelude::*; use crate::prelude::*;
use crate::util::dirty::Dirty; use crate::util::dirty::Dirty;
#[derive(Debug, Clone)] #[derive(Debug)]
pub struct SchedAgent { pub struct SchedAgent {
// The use of Option does not mean inner is optional. In contrast, we maintain // The use of Option does not mean inner is optional. In contrast, we maintain
// the invariant of `inner.is_some() == true`. We use Option so that we can // the invariant of `inner.is_some() == true`. We use Option so that we can
@ -33,6 +33,34 @@ pub struct SchedAgent {
inner: Option<Inner>, inner: Option<Inner>,
} }
impl Clone for SchedAgent {
/// Clone a SchedAgent in a way that works well with clone and spawn syscall.
///
/// We cannot use the auto-derived implementation of clone for SchedAgent, which
/// would copy the fields of SchedAgent bit-by-bit. The reason is two-fold.
///
/// First, a SchedAgent, if in the attached mode, actually holds a host OS
/// resource (`host_tid`). Copying a SchedAgent bit-by-bit would result in
/// multiple instances of SchedAgent refer to the same host thread.
///
/// Second, we need to ensure that the scheduler settings in a cloned SchedAgent
/// instance will take effect when the SchedAgent is attached to a host thread.
///
/// This implementation carefully handles the two points above.
fn clone(&self) -> Self {
let mut affinity = Dirty::new(match self.inner() {
Inner::Detached { affinity } => affinity.as_ref().clone(),
Inner::Attached { affinity, .. } => affinity.clone(),
});
if affinity.as_ref().as_slice() != AVAIL_CPUSET.as_slice() {
affinity.set_dirty();
}
Self {
inner: Some(Inner::Detached { affinity }),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
enum Inner { enum Inner {
Detached { affinity: Dirty<CpuSet> }, Detached { affinity: Dirty<CpuSet> },

@ -13,6 +13,12 @@ pub fn do_sched_getaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *mut u8) -> R
if buf_size < CpuSet::len() { if buf_size < CpuSet::len() {
return_errno!(EINVAL, "buf size is not big enough"); return_errno!(EINVAL, "buf size is not big enough");
} }
// Linux stores the cpumask in an array of "unsigned long" so the buffer needs to be
// multiple of unsigned long. However, Occlum doesn't have this restriction.
if (buf_size & (std::mem::size_of::<u64>() - 1) != 0) {
warn!("cpuset buf size is not a multiple of unsigned long");
}
CpuSet::len() CpuSet::len()
}; };
let mut buf_slice = { let mut buf_slice = {

@ -21,6 +21,10 @@ impl<T> Dirty<T> {
self.dirty = false; self.dirty = false;
} }
pub fn set_dirty(&mut self) {
self.dirty = true;
}
pub fn unwrap(self) -> T { pub fn unwrap(self) -> T {
self.inner self.inner
} }

@ -117,6 +117,55 @@ static int test_sched_xetaffinity_with_child_pid() {
return 0; return 0;
} }
static int test_sched_xetaffinity_children_inheritance() {
int status, child_pid;
int num_core = sysconf(_SC_NPROCESSORS_ONLN);
if (num_core <= 0) {
THROW_ERROR("failed to get cpu number");
}
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(g_online_cpu_idxs[num_core - 1], &mask);
if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) < 0) {
THROW_ERROR("failed to set parent affinity");
}
int ret = posix_spawn(&child_pid, "/bin/getpid", NULL, NULL, NULL, NULL);
if (ret < 0 ) {
THROW_ERROR("spawn process error");
}
printf("Spawn a child process with pid=%d\n", child_pid);
cpu_set_t mask2;
if (sched_getaffinity(child_pid, sizeof(cpu_set_t), &mask2) < 0) {
THROW_ERROR("failed to get child affinity");
}
if (!CPU_EQUAL(&mask, &mask2)) {
THROW_ERROR("affinity inherited from parent is wrong in child");
}
// Set affinity to child should not affect parent process
CPU_SET(g_online_cpu_idxs[0], &mask2);
if (sched_setaffinity(child_pid, sizeof(cpu_set_t), &mask2) < 0) {
THROW_ERROR("failed to set child affinity");
}
CPU_ZERO(&mask2);
if (sched_getaffinity(0, sizeof(cpu_set_t), &mask2) < 0) {
THROW_ERROR("failed to get parent process affinity");
}
if (!CPU_EQUAL(&mask, &mask2)) {
THROW_ERROR("cpuset is wrong in parent process");
}
ret = wait4(-1, &status, 0, NULL);
if (ret < 0) {
THROW_ERROR("failed to wait4 the child procces");
}
return 0;
}
#define CPU_SET_SIZE_LIMIT (1024) #define CPU_SET_SIZE_LIMIT (1024)
static int test_sched_getaffinity_via_explicit_syscall() { static int test_sched_getaffinity_via_explicit_syscall() {
@ -218,6 +267,7 @@ static test_case_t test_cases[] = {
TEST_CASE(test_sched_getaffinity_with_null_buffer), TEST_CASE(test_sched_getaffinity_with_null_buffer),
TEST_CASE(test_sched_setaffinity_with_null_buffer), TEST_CASE(test_sched_setaffinity_with_null_buffer),
TEST_CASE(test_sched_yield), TEST_CASE(test_sched_yield),
TEST_CASE(test_sched_xetaffinity_children_inheritance),
}; };
int main() { int main() {