Workaround exit_group syscall

BACKGROUND

The exit_group syscall, which is implicitly called by libc after the main function
returns, kills all threads in a thread group, even if these threads are
running, sleeping, or waiting on a futex.

PROBLEM

In normal use cases, exit_group does nothing since a well-written program
should terminate all threads before the main function returns. But when this is
not the case, exit_group can clean up the mess.

Currently, Occlum does not implement exit_group. And the Occlum PAL process
waits for all tasks (i.e., SGX threads) to finish before exiting. So without
exit_group implemented, some tasks may be still running if after the main task
exits. And this causes the Occlum PAL process to wait---forever.

WORKAROUND

To implement a real exit_group, we need signals to kill threads. But we do not
have signals, yet. So we come up with a workaround: instead of waiting all
tasks to finish in PAL, we just wait for the main task. As soon as the main
task exits, the PAL process terminates, killing the remaining tasks.
This commit is contained in:
Tate, Hongliang Tian 2019-09-03 11:33:49 +00:00
parent 30c99add6f
commit 663f548f94
11 changed files with 166 additions and 16 deletions

@ -55,9 +55,11 @@ pub extern "C" fn libos_run(host_tid: i32) -> i32 {
pub extern "C" fn dummy_ecall() -> i32 {
0
}
// Use 127 as a special value to indicate internal error from libos, not from
// user programs, although it is completely ok for a user program to return 127.
const EXIT_STATUS_INTERNAL_ERROR: i32 = 127;
// Use -128 as a special value to indicate internal error from libos, not from
// user programs. The LibOS ensures that an user program can only return a
// value between 0 and 255 (inclusive).
const EXIT_STATUS_INTERNAL_ERROR: i32 = -128;
fn parse_arguments(
path_ptr: *const c_char,
@ -110,6 +112,16 @@ fn do_run(host_tid: pid_t) -> Result<i32> {
use rcore_fs::vfs::FileSystem;
crate::fs::ROOT_INODE.fs().sync()?;
// Only return the least significant 8 bits of the exit status
//
// From The Open Group Base Specifications Issue 7, 2018 edition:
// > The shell shall recognize the entire status value retrieved for the
// > command by the equivalent of the wait() function WEXITSTATUS macro...
//
// From the man page of wait() syscall:
// > WEXITSTATUS macro returns the exit status of the child. This consists of the least
// > significant 8 bits of the status
let exit_status = exit_status & 0x0000_00FF_i32;
Ok(exit_status)
}

@ -28,6 +28,7 @@ pub struct Process {
parent: Option<ProcessRef>,
children: Vec<ProcessWeakRef>,
waiting_children: Option<WaitQueue<ChildProcessFilter, pid_t>>,
//thread_group: ThreadGroupRef,
vm: ProcessVMRef,
file_table: FileTableRef,
rlimits: ResourceLimitsRef,
@ -37,6 +38,7 @@ pub type ProcessRef = Arc<SgxMutex<Process>>;
pub type ProcessWeakRef = std::sync::Weak<SgxMutex<Process>>;
pub type FileTableRef = Arc<SgxMutex<FileTable>>;
pub type ProcessVMRef = Arc<SgxMutex<ProcessVM>>;
pub type ThreadGroupRef = Arc<SgxMutex<ThreadGroup>>;
pub fn do_getpid() -> pid_t {
let current_ref = get_current();

@ -5,6 +5,8 @@ pub struct ThreadGroup {
threads: Vec<ProcessRef>,
}
impl ThreadGroup {}
bitflags! {
pub struct CloneFlags : u32 {
const CLONE_VM = 0x00000100;

@ -5,6 +5,10 @@ static inline int a_load(volatile int* n) {
return *(volatile int*)n;
}
static inline void a_store(volatile int* n, int x) {
*n = x;
}
static inline int a_fetch_and_add(volatile int* n, int a) {
return __sync_fetch_and_add(n, a);
}

@ -329,7 +329,11 @@ int SGX_CDECL main(int argc, const char *argv[])
return status;
}
status = wait_all_tasks();
// TODO: exit all tasks gracefully, instead of killing all remaining
// tasks automatically after the main task exits and the process
// terminates.
//status = wait_all_tasks();
status = wait_main_task();
gettimeofday(&appdie, NULL);
@ -339,8 +343,12 @@ int SGX_CDECL main(int argc, const char *argv[])
printf("LibOS startup time: %lu microseconds\n", libos_startup_time);
printf("Apps running time: %lu microseconds\n", app_runtime);
/* Destroy the enclave */
sgx_destroy_enclave(global_eid);
// TODO: destroy the enclave gracefully
// We cannot destroy the enclave gracefully since we may still have
// running threads that are using the enclave at this point, which blocks
// sgx_destory_enclave call. This issue is related to "TODO: exit all tasks
// gracefully" above.
//sgx_destroy_enclave(global_eid);
return status;
}

@ -1,3 +1,5 @@
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@ -12,9 +14,13 @@ int syscall();
#define gettid() syscall(__NR_gettid)
static volatile int num_tasks = 0;
static volatile int main_task_status = 0;
static volatile int any_fatal_error = 0;
// The LibOS never returns INT_MIN. As long as the main_task_status == INT_MIN,
// the main task must not have returned.
#define MAIN_TASK_NOT_RETURNED INT_MIN
static volatile int main_task_status = MAIN_TASK_NOT_RETURNED;
static int BEGIN_TASK(void) {
return a_fetch_and_add(&num_tasks, 1) == 0;
}
@ -41,7 +47,10 @@ static void* __run_task_thread(void* _data) {
any_fatal_error = 1;
}
if (data->is_main_task) main_task_status = status;
if (data->is_main_task) {
a_store(&main_task_status, status);
futex_wakeup(&main_task_status);
}
free(data);
END_TASK();
@ -66,6 +75,13 @@ int run_new_task(sgx_enclave_id_t eid) {
return 0;
}
int wait_main_task(void) {
while ((a_load(&main_task_status)) == MAIN_TASK_NOT_RETURNED) {
futex_wait(&main_task_status, MAIN_TASK_NOT_RETURNED);
}
return main_task_status;
}
int wait_all_tasks(void) {
int cur_num_tasks;
while ((cur_num_tasks = a_load(&num_tasks)) != 0) {

@ -3,5 +3,6 @@
int run_new_task(sgx_enclave_id_t eid);
int wait_all_tasks(void);
int wait_main_task(void);
#endif /* __TASK_H_ */

@ -7,7 +7,7 @@ TEST_DEPS := dev_null client
# Tests: need to be compiled and run by test-% target
TESTS := empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \
truncate readdir mkdir link tls pthread uname rlimit server \
server_epoll unix_socket cout hostfs cpuid rdtsc device sleep
server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group
# Benchmarks: need to be compiled and run by bench-% target
BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput

5
test/exit_group/Makefile Normal file

@ -0,0 +1,5 @@
include ../test_common.mk
EXTRA_C_FLAGS :=
EXTRA_LINK_FLAGS :=
BIN_ARGS :=

83
test/exit_group/main.c Normal file

@ -0,0 +1,83 @@
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <pthread.h>
#include <stdio.h>
#include <linux/futex.h>
#include "test.h"
// ============================================================================
// Test case
// ============================================================================
//
// Three types of threads that will not exit voluntarily
//
// Type 1: a busy loop thread
static void* busyloop_thread_func(void* _) {
while (1) {
// By calling getpid, we give the LibOS a chance to force the thread
// to terminate if exit_group is called by any thread in a thread group
getpid();
}
return NULL;
}
// Type 2: a sleeping thread
static void* sleeping_thread_func(void* _) {
unsigned int a_year_in_sec = 365 * 24 * 60 * 60;
sleep(a_year_in_sec);
return NULL;
}
// Type 3: a thead that keeps waiting on a futex
static void* futex_wait_thread_func(void* _) {
// Wait on a futex forever
int my_private_futex = 0;
syscall(SYS_futex, &my_private_futex, FUTEX_WAIT, my_private_futex);
return NULL;
}
// exit_group syscall should terminate all threads in a thread group.
int test_exit_group_to_force_threads_terminate(void) {
// Create three types of threads that will not exit voluntarily
pthread_t busyloop_thread;
if (pthread_create(&busyloop_thread, NULL, busyloop_thread_func, NULL) < 0) {
printf("ERROR: pthread_create failed\n");
return -1;
}
pthread_t sleeping_thread;
if (pthread_create(&sleeping_thread, NULL, sleeping_thread_func, NULL) < 0) {
printf("ERROR: pthread_create failed\n");
return -1;
}
pthread_t futex_wait_thread;
if (pthread_create(&futex_wait_thread, NULL, futex_wait_thread_func, NULL) < 0) {
printf("ERROR: pthread_create failed\n");
return -1;
}
// Sleep for a while to make sure all three threads are running
useconds_t _200ms = 200 * 1000;
usleep(_200ms);
// exit_group syscall will be called eventually by libc's exit, after the
// main function returns. If Occlum can terminate normally, this means
// exit_group syscall taking effect.
return 0;
}
// ============================================================================
// Test suite
// ============================================================================
static test_case_t test_cases[] = {
TEST_CASE(test_exit_group_to_force_threads_terminate)
};
int main() {
return test_suite_run(test_cases, ARRAY_SIZE(test_cases));
}

@ -1,21 +1,26 @@
#include <sys/types.h>
#include <pthread.h>
#include <stdio.h>
#include "test.h"
/*
* Child threads
*/
// ============================================================================
// Helper macros
// ============================================================================
#define NTHREADS (4)
#define STACK_SIZE (8 * 1024)
#define LOCAL_COUNT (100000L)
// ============================================================================
// The test case of concurrent counter
// ============================================================================
#define LOCAL_COUNT (100000UL)
#define EXPECTED_GLOBAL_COUNT (LOCAL_COUNT * NTHREADS)
struct thread_arg {
int ti;
long local_count;
volatile long* global_count;
volatile unsigned long* global_count;
pthread_mutex_t* mutex;
};
@ -31,11 +36,11 @@ static void* thread_func(void* _arg) {
return NULL;
}
int main(int argc, const char* argv[]) {
static int test_mutex_with_concurrent_counter(void) {
/*
* Multiple threads are to increase a global counter concurrently
*/
volatile long global_count = 0;
volatile unsigned long global_count = 0;
pthread_t threads[NTHREADS];
struct thread_arg thread_args[NTHREADS];
/*
@ -79,3 +84,15 @@ int main(int argc, const char* argv[]) {
pthread_mutex_destroy(&mutex);
return 0;
}
// ============================================================================
// Test suite main
// ============================================================================
static test_case_t test_cases[] = {
TEST_CASE(test_mutex_with_concurrent_counter)
};
int main() {
return test_suite_run(test_cases, ARRAY_SIZE(test_cases));
}