occlum/src/pal/task.c
Tate, Hongliang Tian 663f548f94 Workaround exit_group syscall
BACKGROUND

The exit_group syscall, which is implicitly called by libc after the main function
returns, kills all threads in a thread group, even if these threads are
running, sleeping, or waiting on a futex.

PROBLEM

In normal use cases, exit_group does nothing since a well-written program
should terminate all threads before the main function returns. But when this is
not the case, exit_group can clean up the mess.

Currently, Occlum does not implement exit_group. And the Occlum PAL process
waits for all tasks (i.e., SGX threads) to finish before exiting. So without
exit_group implemented, some tasks may be still running if after the main task
exits. And this causes the Occlum PAL process to wait---forever.

WORKAROUND

To implement a real exit_group, we need signals to kill threads. But we do not
have signals, yet. So we come up with a workaround: instead of waiting all
tasks to finish in PAL, we just wait for the main task. As soon as the main
task exits, the PAL process terminates, killing the remaining tasks.
2019-11-07 13:34:53 +00:00

92 lines
2.1 KiB
C

#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include "atomic.h"
#include "futex.h"
#include "sgx_urts.h"
#include "Enclave_u.h"
int syscall();
#define gettid() syscall(__NR_gettid)
static volatile int num_tasks = 0;
static volatile int any_fatal_error = 0;
// The LibOS never returns INT_MIN. As long as the main_task_status == INT_MIN,
// the main task must not have returned.
#define MAIN_TASK_NOT_RETURNED INT_MIN
static volatile int main_task_status = MAIN_TASK_NOT_RETURNED;
static int BEGIN_TASK(void) {
return a_fetch_and_add(&num_tasks, 1) == 0;
}
static void END_TASK(void) {
if (a_fetch_and_add(&num_tasks, -1) == 1) {
futex_wakeup(&num_tasks);
}
}
struct task_thread_data {
int is_main_task;
sgx_enclave_id_t eid;
};
static void* __run_task_thread(void* _data) {
int status = 0;
struct task_thread_data* data = _data;
sgx_status_t sgx_ret = libos_run(data->eid, &status, gettid());
if(sgx_ret != SGX_SUCCESS) {
// TODO: deal with ECALL error
printf("ERROR: ECall libos_run failed\n");
any_fatal_error = 1;
}
if (data->is_main_task) {
a_store(&main_task_status, status);
futex_wakeup(&main_task_status);
}
free(data);
END_TASK();
return NULL;
}
int run_new_task(sgx_enclave_id_t eid) {
int ret = 0;
pthread_t thread;
struct task_thread_data* data = malloc(sizeof(*data));
data->is_main_task = BEGIN_TASK();
data->eid = eid;
if ((ret = pthread_create(&thread, NULL, __run_task_thread, data)) < 0) {
free(data);
END_TASK();
return ret;
}
pthread_detach(thread);
return 0;
}
int wait_main_task(void) {
while ((a_load(&main_task_status)) == MAIN_TASK_NOT_RETURNED) {
futex_wait(&main_task_status, MAIN_TASK_NOT_RETURNED);
}
return main_task_status;
}
int wait_all_tasks(void) {
int cur_num_tasks;
while ((cur_num_tasks = a_load(&num_tasks)) != 0) {
futex_wait(&num_tasks, cur_num_tasks);
}
return any_fatal_error ? -1 : main_task_status;
}