From ed664d1143a93cf622b1c56790df4b5832b20c72 Mon Sep 17 00:00:00 2001 From: "Hui, Chunyang" Date: Tue, 19 May 2020 09:33:13 +0000 Subject: [PATCH] Fix sched set/get affinity wrong behaviour 1. Fix child process not inherit affinity from parent process 2. Add warning for buffer length gap with kernel when getaffinity --- src/libos/src/process/do_spawn/mod.rs | 2 + src/libos/src/process/process/builder.rs | 8 +++- src/libos/src/sched/sched_agent.rs | 30 +++++++++++++- src/libos/src/sched/syscalls.rs | 6 +++ src/libos/src/util/dirty.rs | 4 ++ test/sched/main.c | 50 ++++++++++++++++++++++++ 6 files changed, 98 insertions(+), 2 deletions(-) diff --git a/src/libos/src/process/do_spawn/mod.rs b/src/libos/src/process/do_spawn/mod.rs index 9d29e9c0..0b8095dc 100644 --- a/src/libos/src/process/do_spawn/mod.rs +++ b/src/libos/src/process/do_spawn/mod.rs @@ -162,12 +162,14 @@ fn new_process( Arc::new(SgxMutex::new(files)) }; let fs_ref = Arc::new(SgxMutex::new(current_ref.fs().lock().unwrap().clone())); + let sched_ref = Arc::new(SgxMutex::new(current_ref.sched().lock().unwrap().clone())); ProcessBuilder::new() .vm(vm_ref) .exec_path(elf_path) .parent(process_ref) .task(task) + .sched(sched_ref) .fs(fs_ref) .files(files_ref) .build()? diff --git a/src/libos/src/process/process/builder.rs b/src/libos/src/process/process/builder.rs index c0d85d60..882d47fa 100644 --- a/src/libos/src/process/process/builder.rs +++ b/src/libos/src/process/process/builder.rs @@ -1,6 +1,8 @@ use super::super::task::Task; use super::super::thread::{ThreadBuilder, ThreadId}; -use super::super::{FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef}; +use super::super::{ + FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef, SchedAgentRef, +}; use super::{Process, ProcessInner}; use crate::prelude::*; use crate::signal::{SigDispositions, SigQueues}; @@ -54,6 +56,10 @@ impl ProcessBuilder { self.thread_builder(|tb| tb.task(task)) } + pub fn sched(mut self, sched: SchedAgentRef) -> Self { + self.thread_builder(|tb| tb.sched(sched)) + } + pub fn vm(mut self, vm: ProcessVMRef) -> Self { self.thread_builder(|tb| tb.vm(vm)) } diff --git a/src/libos/src/sched/sched_agent.rs b/src/libos/src/sched/sched_agent.rs index bb10bf06..6d0d006c 100644 --- a/src/libos/src/sched/sched_agent.rs +++ b/src/libos/src/sched/sched_agent.rs @@ -25,7 +25,7 @@ use super::cpu_set::{CpuSet, AVAIL_CPUSET}; use crate::prelude::*; use crate::util::dirty::Dirty; -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct SchedAgent { // The use of Option does not mean inner is optional. In contrast, we maintain // the invariant of `inner.is_some() == true`. We use Option so that we can @@ -33,6 +33,34 @@ pub struct SchedAgent { inner: Option, } +impl Clone for SchedAgent { + /// Clone a SchedAgent in a way that works well with clone and spawn syscall. + /// + /// We cannot use the auto-derived implementation of clone for SchedAgent, which + /// would copy the fields of SchedAgent bit-by-bit. The reason is two-fold. + /// + /// First, a SchedAgent, if in the attached mode, actually holds a host OS + /// resource (`host_tid`). Copying a SchedAgent bit-by-bit would result in + /// multiple instances of SchedAgent refer to the same host thread. + /// + /// Second, we need to ensure that the scheduler settings in a cloned SchedAgent + /// instance will take effect when the SchedAgent is attached to a host thread. + /// + /// This implementation carefully handles the two points above. + fn clone(&self) -> Self { + let mut affinity = Dirty::new(match self.inner() { + Inner::Detached { affinity } => affinity.as_ref().clone(), + Inner::Attached { affinity, .. } => affinity.clone(), + }); + if affinity.as_ref().as_slice() != AVAIL_CPUSET.as_slice() { + affinity.set_dirty(); + } + Self { + inner: Some(Inner::Detached { affinity }), + } + } +} + #[derive(Debug, Clone)] enum Inner { Detached { affinity: Dirty }, diff --git a/src/libos/src/sched/syscalls.rs b/src/libos/src/sched/syscalls.rs index 893c876f..3d5778e6 100644 --- a/src/libos/src/sched/syscalls.rs +++ b/src/libos/src/sched/syscalls.rs @@ -13,6 +13,12 @@ pub fn do_sched_getaffinity(pid: pid_t, buf_size: size_t, buf_ptr: *mut u8) -> R if buf_size < CpuSet::len() { return_errno!(EINVAL, "buf size is not big enough"); } + + // Linux stores the cpumask in an array of "unsigned long" so the buffer needs to be + // multiple of unsigned long. However, Occlum doesn't have this restriction. + if (buf_size & (std::mem::size_of::() - 1) != 0) { + warn!("cpuset buf size is not a multiple of unsigned long"); + } CpuSet::len() }; let mut buf_slice = { diff --git a/src/libos/src/util/dirty.rs b/src/libos/src/util/dirty.rs index 91014c34..e1aabf59 100644 --- a/src/libos/src/util/dirty.rs +++ b/src/libos/src/util/dirty.rs @@ -21,6 +21,10 @@ impl Dirty { self.dirty = false; } + pub fn set_dirty(&mut self) { + self.dirty = true; + } + pub fn unwrap(self) -> T { self.inner } diff --git a/test/sched/main.c b/test/sched/main.c index 66fbb97e..960597a1 100644 --- a/test/sched/main.c +++ b/test/sched/main.c @@ -117,6 +117,55 @@ static int test_sched_xetaffinity_with_child_pid() { return 0; } +static int test_sched_xetaffinity_children_inheritance() { + int status, child_pid; + int num_core = sysconf(_SC_NPROCESSORS_ONLN); + if (num_core <= 0) { + THROW_ERROR("failed to get cpu number"); + } + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(g_online_cpu_idxs[num_core - 1], &mask); + if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) < 0) { + THROW_ERROR("failed to set parent affinity"); + } + + int ret = posix_spawn(&child_pid, "/bin/getpid", NULL, NULL, NULL, NULL); + if (ret < 0 ) { + THROW_ERROR("spawn process error"); + } + printf("Spawn a child process with pid=%d\n", child_pid); + + cpu_set_t mask2; + if (sched_getaffinity(child_pid, sizeof(cpu_set_t), &mask2) < 0) { + THROW_ERROR("failed to get child affinity"); + } + if (!CPU_EQUAL(&mask, &mask2)) { + THROW_ERROR("affinity inherited from parent is wrong in child"); + } + + // Set affinity to child should not affect parent process + CPU_SET(g_online_cpu_idxs[0], &mask2); + if (sched_setaffinity(child_pid, sizeof(cpu_set_t), &mask2) < 0) { + THROW_ERROR("failed to set child affinity"); + } + + CPU_ZERO(&mask2); + if (sched_getaffinity(0, sizeof(cpu_set_t), &mask2) < 0) { + THROW_ERROR("failed to get parent process affinity"); + } + + if (!CPU_EQUAL(&mask, &mask2)) { + THROW_ERROR("cpuset is wrong in parent process"); + } + + ret = wait4(-1, &status, 0, NULL); + if (ret < 0) { + THROW_ERROR("failed to wait4 the child procces"); + } + return 0; +} + #define CPU_SET_SIZE_LIMIT (1024) static int test_sched_getaffinity_via_explicit_syscall() { @@ -218,6 +267,7 @@ static test_case_t test_cases[] = { TEST_CASE(test_sched_getaffinity_with_null_buffer), TEST_CASE(test_sched_setaffinity_with_null_buffer), TEST_CASE(test_sched_yield), + TEST_CASE(test_sched_xetaffinity_children_inheritance), }; int main() {