Fix sched agent failure for offline core

This commit is contained in:
Hui, Chunyang 2020-04-30 09:32:47 +00:00 committed by Tate, Hongliang Tian
parent 7a87d77509
commit 25350b0e85
5 changed files with 100 additions and 9 deletions

@ -72,6 +72,10 @@ enclave {
size_t cpusize,
[in, size=cpusize] const unsigned char* buf
) propagate_errno;
int occlum_ocall_sched_getaffinity(
size_t cpusize,
[out, size=cpusize] unsigned char* buf
) propagate_errno;
int occlum_ocall_ncores(void);
sgx_status_t occlum_ocall_sgx_init_quote(

@ -52,6 +52,11 @@ impl CpuSet {
self.bits.count_ones() == 0
}
// Returns if the CpuSet is a subset of available cpu set
pub fn is_subset_of(&self, other: &CpuSet) -> bool {
(self.bits.clone() & other.bits.clone()) == self.bits
}
/// Create a CpuSet from bits given in a byte slice.
pub fn from_slice(slice: &[u8]) -> Result<Self> {
if slice.len() < Self::len() {
@ -71,6 +76,10 @@ impl CpuSet {
self.bits.as_slice()
}
pub fn as_mut_slice(&mut self) -> &mut [u8] {
self.bits.as_mut_slice()
}
/// Returns an iterator that allows accessing the underlying bits.
pub fn iter(&self) -> Iter {
self.bits.iter()
@ -120,4 +129,29 @@ lazy_static! {
ncores as usize
}
};
/// The set of all available CPU cores.
///
/// While `AVAIL_CPUSET` is likely to be equal to `CpuSet::new_full()`, this is not always the
/// case. For example, when the enclave is running on a container or a virtual machine on a public
/// cloud platform, the container or vm is usually given access to a subset of the CPU cores on
/// the host machine.
///
/// Property: `AVAIL_CPU.empty() == false`.
pub static ref AVAIL_CPUSET: CpuSet = {
extern "C" {
fn occlum_ocall_sched_getaffinity(
ret: *mut i32,
cpusetsize: size_t,
mask: *mut c_uchar,
) -> sgx_status_t;
}
let mut cpuset = CpuSet::new_empty();
let mut retval = 0;
let sgx_status = unsafe{occlum_ocall_sched_getaffinity(&mut retval, CpuSet::len(), cpuset.as_mut_slice().as_mut_ptr())};
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
CpuSet::clear_unused(&mut cpuset.bits);
assert!(!cpuset.empty());
cpuset
};
}

@ -21,7 +21,7 @@
//! immediately to the host OS thread---until SchedAgent is detached from the
//! host OS thread.
use super::cpu_set::CpuSet;
use super::cpu_set::{CpuSet, AVAIL_CPUSET};
use crate::prelude::*;
use crate::util::dirty::Dirty;
@ -42,7 +42,7 @@ enum Inner {
impl SchedAgent {
pub fn new() -> Self {
let inner = Some({
let affinity = Dirty::new(CpuSet::new_full());
let affinity = Dirty::new(AVAIL_CPUSET.clone());
Inner::Detached { affinity }
});
Self { inner }
@ -59,6 +59,12 @@ impl SchedAgent {
if new_affinity.empty() {
return_errno!(EINVAL, "there must be at least one CPU core in the CpuSet");
}
if !new_affinity.is_subset_of(&AVAIL_CPUSET) {
return_errno!(
EINVAL,
"one or some of the CPU cores are not available to set"
);
}
match self.inner_mut() {
Inner::Detached { affinity } => {
*affinity.as_mut() = new_affinity;

@ -3,6 +3,16 @@
#include <unistd.h>
#include "ocalls.h"
int occlum_ocall_sched_getaffinity(size_t cpusize, unsigned char* buf) {
int ret;
cpu_set_t mask;
CPU_ZERO(&mask);
ret = syscall(__NR_sched_getaffinity, gettid(), sizeof(cpu_set_t), &mask);
memcpy(buf, &mask, cpusize);
return ret;
}
int occlum_ocall_sched_setaffinity(int host_tid, size_t cpusize, const unsigned char* buf) {
return syscall(__NR_sched_setaffinity, host_tid, cpusize, buf);
}

@ -11,6 +11,37 @@
#include <sys/wait.h>
#include "test.h"
// ============================================================================
// Helper function
// ============================================================================
#define MAX_CPU_NUM 1024
static int* g_online_cpu_idxs;
int get_online_cpu() {
int online_num = sysconf(_SC_NPROCESSORS_ONLN);
cpu_set_t mask;
int index = 0;
g_online_cpu_idxs = (int*)calloc(online_num, sizeof(int));
CPU_ZERO(&mask);
if (sched_getaffinity(0, sizeof(cpu_set_t), &mask) < 0) {
THROW_ERROR("failed to call sched_getaffinity");
}
printf("Online Core No: ");
for (int i = 0; index < online_num && i < MAX_CPU_NUM; i++) {
if (CPU_ISSET(i, &mask)) {
g_online_cpu_idxs[index] = i;
index++;
printf("%d ", i);
}
}
printf("\n");
return 0;
}
// ============================================================================
// Test cases for sched_cpu_affinity
// ============================================================================
@ -32,12 +63,13 @@ static int test_sched_getaffinity_with_self_pid() {
static int test_sched_setaffinity_with_self_pid() {
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
cpu_set_t mask_old;
CPU_ZERO(&mask_old);
for (int i = 0; i < nproc; ++i) {
CPU_SET(i, &mask_old);
CPU_SET(g_online_cpu_idxs[i], &mask_old);
}
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
CPU_SET(g_online_cpu_idxs[0], &mask);
if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) < 0) {
THROW_ERROR("failed to call sched_setaffinity \n");
}
@ -56,13 +88,13 @@ static int test_sched_setaffinity_with_self_pid() {
static int test_sched_xetaffinity_with_child_pid() {
int status, child_pid;
int num = sysconf(_SC_NPROCESSORS_CONF);
int num = sysconf(_SC_NPROCESSORS_ONLN);
if (num <= 0) {
THROW_ERROR("failed to get cpu number");
}
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(num - 1 , &mask);
CPU_SET(g_online_cpu_idxs[num - 1] , &mask);
int ret = posix_spawn(&child_pid, "/bin/getpid", NULL, NULL, NULL, NULL);
if (ret < 0 ) {
THROW_ERROR("spawn process error");
@ -99,7 +131,7 @@ static int test_sched_getaffinity_via_explicit_syscall() {
static int test_sched_setaffinity_via_explicit_syscall() {
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
CPU_SET(g_online_cpu_idxs[0], &mask);
if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask) < 0) {
THROW_ERROR("failed to call __NR_sched_setaffinity");
}
@ -117,8 +149,9 @@ static int test_sched_setaffinity_via_explicit_syscall() {
// Recover the affinity mask
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
cpu_set_t mask_old;
CPU_ZERO(&mask_old);
for (int i = 0; i < nproc; ++i) {
CPU_SET(i, &mask_old);
CPU_SET(g_online_cpu_idxs[i], &mask_old);
}
if (syscall(__NR_sched_setaffinity, 0, sizeof(cpu_set_t), &mask_old) < 0) {
THROW_ERROR("recover cpuset error");
@ -188,5 +221,9 @@ static test_case_t test_cases[] = {
};
int main() {
return test_suite_run(test_cases, ARRAY_SIZE(test_cases));
int ret;
get_online_cpu();
ret = test_suite_run(test_cases, ARRAY_SIZE(test_cases));
free(g_online_cpu_idxs);
return ret;
}