diff --git a/src/libos/src/interrupt/mod.rs b/src/libos/src/interrupt/mod.rs index c3b14bdf..936a743a 100644 --- a/src/libos/src/interrupt/mod.rs +++ b/src/libos/src/interrupt/mod.rs @@ -46,6 +46,7 @@ pub fn broadcast_interrupts() -> Result { let should_interrupt_thread = |thread: &&ThreadRef| -> bool { // TODO: check Thread::sig_mask to reduce false positives thread.process().is_forced_to_exit() + || thread.is_forced_to_stop() || !thread.sig_queues().read().unwrap().empty() || !thread.process().sig_queues().read().unwrap().empty() }; diff --git a/src/libos/src/process/do_vfork.rs b/src/libos/src/process/do_vfork.rs index d0af9d48..b369c97e 100644 --- a/src/libos/src/process/do_vfork.rs +++ b/src/libos/src/process/do_vfork.rs @@ -1,5 +1,7 @@ +use super::untrusted_event::{set_event, wait_event}; use super::{ProcessRef, ThreadId, ThreadRef}; use crate::fs::FileTable; +use crate::interrupt::broadcast_interrupts; use crate::prelude::*; use crate::syscall::CpuContext; use std::collections::HashMap; @@ -12,10 +14,15 @@ use std::mem; // Thus in this implementation, the main idea is to let child use parent's task until exit or execve. // // Limitation: -// The child process will not have a complete process structure before execve. Thus during the time from vfork +// 1. The child process will not have a complete process structure before execve. Thus during the time from vfork // to new child process execve or exit, the child process just reuse the parent process's everything, including // task, pid and etc. And also the log of child process will not start from the point that vfork returns but the // point that execve returns. +// 2. When vfork is called and the current process has other running child threads, for Linux, the other threads remain +// running. For Occlum, this behavior is different. All the other threads will be frozen until the vfork returns or +// execve is called in the child process. The reason is that since Occlum doesn't support fork, many applications will +// use vfork to replace fork. For multi-threaded applications, if vfork doesn't stop other child threads, the application +// will be more likely to fail because the child process directly uses the VM and the file table of the parent process. lazy_static! { // Store all the parents's file tables who call vfork. It will be recovered when the child exits or has its own task. @@ -38,6 +45,16 @@ pub fn do_vfork(mut context: *mut CpuContext) -> Result { new_tid.as_u32() as pid_t }; + // stop all other child threads + let child_threads = current.process().threads(); + child_threads.iter().for_each(|thread| { + if thread.tid() != current.tid() { + thread.force_stop(); + } + }); + // Don't hesitate. Interrupt all threads right now to stop child threads. + broadcast_interrupts(); + // Save parent's context in TLS VFORK_CONTEXT.with(|cell| { let mut ctx = cell.borrow_mut(); @@ -78,7 +95,21 @@ pub fn vfork_return_to_parent( mut context: *mut CpuContext, current_ref: &ThreadRef, ) -> Result { - return restore_parent_process(context, current_ref); + let child_pid = restore_parent_process(context, current_ref)?; + + // Wake parent's child thread which are all sleeping + let current = current!(); + let child_threads = current.process().threads(); + child_threads.iter().for_each(|thread| { + thread.resume(); + let thread_ptr = thread.raw_ptr(); + if current.raw_ptr() != thread_ptr { + set_event(thread_ptr as *const c_void); + info!("Thread 0x{:x} is waken", thread_ptr); + } + }); + + Ok(child_pid) } fn restore_parent_process(mut context: *mut CpuContext, current_ref: &ThreadRef) -> Result { @@ -156,3 +187,17 @@ fn close_files_opened_by_child(current: &ThreadRef, parent_file_table: &FileTabl .for_each(|&fd| current.close_file(fd).expect("close child file error")); Ok(()) } + +pub fn handle_force_stop() { + let current = current!(); + if current.is_forced_to_stop() { + let current_thread_ptr = current.raw_ptr(); + info!( + "Thread 0x{:x} is forced to stop ...", + current_thread_ptr as usize + ); + while current.is_forced_to_stop() { + wait_event(current_thread_ptr as *const c_void); + } + } +} diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index be4dbc8d..921002bf 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -25,7 +25,7 @@ pub use self::do_exit::handle_force_exit; pub use self::do_futex::{futex_wait, futex_wake}; pub use self::do_robust_list::RobustListHead; pub use self::do_spawn::do_spawn_without_exec; -pub use self::do_vfork::do_vfork; +pub use self::do_vfork::{do_vfork, handle_force_stop}; pub use self::do_wait4::idle_reap_zombie_children; pub use self::process::{Process, ProcessFilter, ProcessStatus, IDLE}; pub use self::spawn_attribute::posix_spawnattr_t; @@ -53,6 +53,7 @@ mod spawn_attribute; mod syscalls; mod term_status; mod thread; +mod untrusted_event; mod wait; pub mod current; diff --git a/src/libos/src/process/thread/builder.rs b/src/libos/src/process/thread/builder.rs index aff98400..d532144c 100644 --- a/src/libos/src/process/thread/builder.rs +++ b/src/libos/src/process/thread/builder.rs @@ -134,6 +134,7 @@ impl ThreadBuilder { SgxMutex::new(None) }; let host_eventfd = Arc::new(HostEventFd::new()?); + let raw_ptr = RwLock::new(0); let new_thread = Arc::new(Thread { task, @@ -154,6 +155,7 @@ impl ThreadBuilder { sig_stack, profiler, host_eventfd, + raw_ptr, }); let mut inner = new_thread.process().inner(); diff --git a/src/libos/src/process/thread/mod.rs b/src/libos/src/process/thread/mod.rs index 468a6816..3af9bd41 100644 --- a/src/libos/src/process/thread/mod.rs +++ b/src/libos/src/process/thread/mod.rs @@ -48,6 +48,7 @@ pub struct Thread { profiler: SgxMutex>, // Misc host_eventfd: Arc, + raw_ptr: RwLock, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -55,6 +56,7 @@ pub enum ThreadStatus { Init, Running, Exited, + Stopped, } impl Thread { @@ -114,6 +116,11 @@ impl Thread { &self.profiler } + /// Get the host thread's raw pointer of this libos thread + pub fn raw_ptr(&self) -> usize { + self.raw_ptr.read().unwrap().clone() + } + /// Get a file from the file table. pub fn file(&self, fd: FileDesc) -> Result { self.files().lock().unwrap().get(fd) @@ -206,7 +213,15 @@ impl Thread { pub(super) fn start(&self, host_tid: pid_t) { self.sched().lock().unwrap().attach(host_tid); - self.inner().start(); + let mut raw_ptr = self.raw_ptr.write().unwrap(); + *raw_ptr = (unsafe { sgx_thread_get_self() } as usize); + + // Before the thread starts, this thread could be stopped by other threads + if self.is_forced_to_stop() { + info!("thread is forced to stopped before this thread starts"); + } else { + self.inner().start(); + } let eventfd = EventFile::new( 0, @@ -266,6 +281,20 @@ impl Thread { pub(super) fn inner(&self) -> SgxMutexGuard { self.inner.lock().unwrap() } + + pub fn force_stop(&self) { + let mut inner = self.inner(); + inner.stop(); + } + + pub fn is_forced_to_stop(&self) -> bool { + self.inner().status() == ThreadStatus::Stopped + } + + pub fn resume(&self) { + let mut inner = self.inner(); + inner.resume(); + } } impl PartialEq for Thread { @@ -302,6 +331,7 @@ pub enum ThreadInner { Init, Running, Exited { term_status: TermStatus }, + Stopped, } impl ThreadInner { @@ -314,6 +344,7 @@ impl ThreadInner { Self::Init { .. } => ThreadStatus::Init, Self::Running { .. } => ThreadStatus::Running, Self::Exited { .. } => ThreadStatus::Exited, + Self::Stopped { .. } => ThreadStatus::Stopped, } } @@ -325,7 +356,14 @@ impl ThreadInner { } pub fn start(&mut self) { - debug_assert!(self.status() == ThreadStatus::Init); + *self = Self::Running; + } + + pub fn stop(&mut self) { + *self = Self::Stopped; + } + + pub fn resume(&mut self) { *self = Self::Running; } @@ -334,3 +372,7 @@ impl ThreadInner { *self = Self::Exited { term_status }; } } + +extern "C" { + pub(crate) fn sgx_thread_get_self() -> *const c_void; +} diff --git a/src/libos/src/process/untrusted_event.rs b/src/libos/src/process/untrusted_event.rs new file mode 100644 index 00000000..1a8b884a --- /dev/null +++ b/src/libos/src/process/untrusted_event.rs @@ -0,0 +1,51 @@ +use crate::prelude::*; + +pub(crate) fn wait_event(thread: *const c_void) { + let mut ret: c_int = 0; + let mut sgx_ret: c_int = 0; + unsafe { + sgx_ret = sgx_thread_wait_untrusted_event_ocall(&mut ret as *mut c_int, thread); + } + if ret != 0 || sgx_ret != 0 { + panic!("ERROR: OCall failed!"); + } +} + +pub(crate) fn set_event(thread: *const c_void) { + let mut ret: c_int = 0; + let mut sgx_ret: c_int = 0; + unsafe { + sgx_ret = sgx_thread_set_untrusted_event_ocall(&mut ret as *mut c_int, thread); + } + if ret != 0 || sgx_ret != 0 { + panic!("ERROR: OCall failed!"); + } +} + +extern "C" { + /* Go outside and wait on my untrusted event */ + pub(crate) fn sgx_thread_wait_untrusted_event_ocall( + ret: *mut c_int, + self_thread: *const c_void, + ) -> c_int; + + /* Wake a thread waiting on its untrusted event */ + pub(crate) fn sgx_thread_set_untrusted_event_ocall( + ret: *mut c_int, + waiter_thread: *const c_void, + ) -> c_int; + + /* Wake a thread waiting on its untrusted event, and wait on my untrusted event */ + pub(crate) fn sgx_thread_setwait_untrusted_events_ocall( + ret: *mut c_int, + waiter_thread: *const c_void, + self_thread: *const c_void, + ) -> c_int; + + /* Wake multiple threads waiting on their untrusted events */ + pub(crate) fn sgx_thread_set_multiple_untrusted_events_ocall( + ret: *mut c_int, + waiter_threads: *const *const c_void, + total: size_t, + ) -> c_int; +} diff --git a/src/libos/src/process/wait.rs b/src/libos/src/process/wait.rs index f83fd06f..b6d42543 100644 --- a/src/libos/src/process/wait.rs +++ b/src/libos/src/process/wait.rs @@ -1,3 +1,5 @@ +use super::thread::sgx_thread_get_self; +use super::untrusted_event::{set_event, wait_event}; /// A wait/wakeup mechanism that connects wait4 and exit system calls. use crate::prelude::*; @@ -113,50 +115,3 @@ where 1 } } - -fn wait_event(thread: *const c_void) { - let mut ret: c_int = 0; - let mut sgx_ret: c_int = 0; - unsafe { - sgx_ret = sgx_thread_wait_untrusted_event_ocall(&mut ret as *mut c_int, thread); - } - if ret != 0 || sgx_ret != 0 { - panic!("ERROR: OCall failed!"); - } -} - -fn set_event(thread: *const c_void) { - let mut ret: c_int = 0; - let mut sgx_ret: c_int = 0; - unsafe { - sgx_ret = sgx_thread_set_untrusted_event_ocall(&mut ret as *mut c_int, thread); - } - if ret != 0 || sgx_ret != 0 { - panic!("ERROR: OCall failed!"); - } -} - -extern "C" { - fn sgx_thread_get_self() -> *const c_void; - - /* Go outside and wait on my untrusted event */ - fn sgx_thread_wait_untrusted_event_ocall(ret: *mut c_int, self_thread: *const c_void) -> c_int; - - /* Wake a thread waiting on its untrusted event */ - fn sgx_thread_set_untrusted_event_ocall(ret: *mut c_int, waiter_thread: *const c_void) - -> c_int; - - /* Wake a thread waiting on its untrusted event, and wait on my untrusted event */ - fn sgx_thread_setwait_untrusted_events_ocall( - ret: *mut c_int, - waiter_thread: *const c_void, - self_thread: *const c_void, - ) -> c_int; - - /* Wake multiple threads waiting on their untrusted events */ - fn sgx_thread_set_multiple_untrusted_events_ocall( - ret: *mut c_int, - waiter_threads: *const *const c_void, - total: size_t, - ) -> c_int; -} diff --git a/src/libos/src/signal/do_sigreturn.rs b/src/libos/src/signal/do_sigreturn.rs index dbf65e30..823ce7c9 100644 --- a/src/libos/src/signal/do_sigreturn.rs +++ b/src/libos/src/signal/do_sigreturn.rs @@ -67,7 +67,7 @@ pub fn deliver_signal(cpu_context: &mut CpuContext) { let thread = current!(); let process = thread.process(); - if !process.is_forced_to_exit() { + if !process.is_forced_to_exit() && !thread.is_forced_to_stop() { do_deliver_signal(&thread, &process, cpu_context); } diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 83000e8b..8f674aa6 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -730,6 +730,8 @@ fn do_syscall(user_context: &mut CpuContext) { crate::signal::deliver_signal(user_context); + crate::process::handle_force_stop(); + crate::process::handle_force_exit(); } diff --git a/test/vfork/Makefile b/test/vfork/Makefile index 02032dff..57e392ae 100644 --- a/test/vfork/Makefile +++ b/test/vfork/Makefile @@ -1,5 +1,5 @@ include ../test_common.mk EXTRA_C_FLAGS := -g -EXTRA_LINK_FLAGS := +EXTRA_LINK_FLAGS := -lpthread BIN_ARGS := diff --git a/test/vfork/main.c b/test/vfork/main.c index ad645e67..9943daa2 100644 --- a/test/vfork/main.c +++ b/test/vfork/main.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "test.h" // Note: This test intends to test the case that child process directly calls _exit() @@ -89,10 +90,87 @@ parent_exit: exit(1); } +volatile static int test_stop_child_flag = 0; + +static void *child_thread_routine(void *_arg) { + printf("Child thread starts\n"); + test_stop_child_flag = 1; + + struct timespec t1, t2; + if (clock_gettime(CLOCK_REALTIME, &t1)) { + return (void *) -1; + } + + sleep(1); + + if (clock_gettime(CLOCK_REALTIME, &t2)) { + return (void *) -1; + } + + // Parent thread vfork and will stop this thread for several seconds + if (t2.tv_sec - t1.tv_sec <= 1) { + printf("the thread is not stopped"); + exit(-1); + } + + printf("child thread exits\n"); + return NULL; +} + +// Test the behavior that when vfork is called, the parent process' other child threads are forced to stopped. +// +// This test case has different behaviors for Linux and Occlum +// This limitation is recorded in src/libos/src/process/do_vfork.rs +int test_vfork_stop_child_thread() { + pthread_t child_thread; + pid_t child_pid; + struct timespec ts; + ts.tv_sec = 3; + ts.tv_nsec = 0; + if (pthread_create(&child_thread, NULL, child_thread_routine, NULL) < 0) { + THROW_ERROR("pthread_create failed\n"); + } + + // Wait for child thread to start + while (test_stop_child_flag == 0); + + child_pid = vfork(); + if (child_pid == 0) { + printf("child process created\n"); + char **child_argv = calloc(1, sizeof(char *) * 2); + child_argv[0] = "getpid"; + + // Wait for a few seconds + while (1) { + int ret = nanosleep(&ts, &ts); + if (ret == 0) { + break; + } + if (ret < 0 && errno != EINTR) { + THROW_ERROR("nanosleep failed"); + } + } + + printf("child process exec\n"); + int ret = execve("/bin/getpid", child_argv, NULL); + if (ret != 0) { + printf("child process execve error\n"); + } + _exit(1); + } else { + printf("return to parent\n"); + + pthread_join(child_thread, NULL); + } + + return 0; +} + static test_case_t test_cases[] = { TEST_CASE(test_vfork_exit), TEST_CASE(test_multiple_vfork_execve), TEST_CASE(test_vfork_isolate_file_table), + TEST_CASE(test_vfork_stop_child_thread), }; int main() {