From 4c3ca791343a83b8dd2c555d4bc77f8e4e8f22a9 Mon Sep 17 00:00:00 2001 From: "Hui, Chunyang" Date: Tue, 27 Sep 2022 11:33:19 +0000 Subject: [PATCH] Make vfork stop parent child threads When vfork is called and the current process has other running child threads, for Linux, the other threads remain running. For Occlum, this behavior is different. All the other threads will be frozen until the vfork returns or execve is called in the child process. The reason is that since Occlum doesn't support fork, many applications will use vfork to replace fork. For multi-threaded applications, if vfork doesn't stop other child threads, the application will be more likely to fail because the child process directly uses the VM and the file table of the parent process. --- src/libos/src/interrupt/mod.rs | 1 + src/libos/src/process/do_vfork.rs | 49 ++++++++++++++- src/libos/src/process/mod.rs | 3 +- src/libos/src/process/thread/builder.rs | 2 + src/libos/src/process/thread/mod.rs | 46 +++++++++++++- src/libos/src/process/untrusted_event.rs | 51 ++++++++++++++++ src/libos/src/process/wait.rs | 49 +-------------- src/libos/src/signal/do_sigreturn.rs | 2 +- src/libos/src/syscall/mod.rs | 2 + test/vfork/Makefile | 2 +- test/vfork/main.c | 78 ++++++++++++++++++++++++ 11 files changed, 231 insertions(+), 54 deletions(-) create mode 100644 src/libos/src/process/untrusted_event.rs diff --git a/src/libos/src/interrupt/mod.rs b/src/libos/src/interrupt/mod.rs index c3b14bdf..936a743a 100644 --- a/src/libos/src/interrupt/mod.rs +++ b/src/libos/src/interrupt/mod.rs @@ -46,6 +46,7 @@ pub fn broadcast_interrupts() -> Result { let should_interrupt_thread = |thread: &&ThreadRef| -> bool { // TODO: check Thread::sig_mask to reduce false positives thread.process().is_forced_to_exit() + || thread.is_forced_to_stop() || !thread.sig_queues().read().unwrap().empty() || !thread.process().sig_queues().read().unwrap().empty() }; diff --git a/src/libos/src/process/do_vfork.rs b/src/libos/src/process/do_vfork.rs index d0af9d48..b369c97e 100644 --- a/src/libos/src/process/do_vfork.rs +++ b/src/libos/src/process/do_vfork.rs @@ -1,5 +1,7 @@ +use super::untrusted_event::{set_event, wait_event}; use super::{ProcessRef, ThreadId, ThreadRef}; use crate::fs::FileTable; +use crate::interrupt::broadcast_interrupts; use crate::prelude::*; use crate::syscall::CpuContext; use std::collections::HashMap; @@ -12,10 +14,15 @@ use std::mem; // Thus in this implementation, the main idea is to let child use parent's task until exit or execve. // // Limitation: -// The child process will not have a complete process structure before execve. Thus during the time from vfork +// 1. The child process will not have a complete process structure before execve. Thus during the time from vfork // to new child process execve or exit, the child process just reuse the parent process's everything, including // task, pid and etc. And also the log of child process will not start from the point that vfork returns but the // point that execve returns. +// 2. When vfork is called and the current process has other running child threads, for Linux, the other threads remain +// running. For Occlum, this behavior is different. All the other threads will be frozen until the vfork returns or +// execve is called in the child process. The reason is that since Occlum doesn't support fork, many applications will +// use vfork to replace fork. For multi-threaded applications, if vfork doesn't stop other child threads, the application +// will be more likely to fail because the child process directly uses the VM and the file table of the parent process. lazy_static! { // Store all the parents's file tables who call vfork. It will be recovered when the child exits or has its own task. @@ -38,6 +45,16 @@ pub fn do_vfork(mut context: *mut CpuContext) -> Result { new_tid.as_u32() as pid_t }; + // stop all other child threads + let child_threads = current.process().threads(); + child_threads.iter().for_each(|thread| { + if thread.tid() != current.tid() { + thread.force_stop(); + } + }); + // Don't hesitate. Interrupt all threads right now to stop child threads. + broadcast_interrupts(); + // Save parent's context in TLS VFORK_CONTEXT.with(|cell| { let mut ctx = cell.borrow_mut(); @@ -78,7 +95,21 @@ pub fn vfork_return_to_parent( mut context: *mut CpuContext, current_ref: &ThreadRef, ) -> Result { - return restore_parent_process(context, current_ref); + let child_pid = restore_parent_process(context, current_ref)?; + + // Wake parent's child thread which are all sleeping + let current = current!(); + let child_threads = current.process().threads(); + child_threads.iter().for_each(|thread| { + thread.resume(); + let thread_ptr = thread.raw_ptr(); + if current.raw_ptr() != thread_ptr { + set_event(thread_ptr as *const c_void); + info!("Thread 0x{:x} is waken", thread_ptr); + } + }); + + Ok(child_pid) } fn restore_parent_process(mut context: *mut CpuContext, current_ref: &ThreadRef) -> Result { @@ -156,3 +187,17 @@ fn close_files_opened_by_child(current: &ThreadRef, parent_file_table: &FileTabl .for_each(|&fd| current.close_file(fd).expect("close child file error")); Ok(()) } + +pub fn handle_force_stop() { + let current = current!(); + if current.is_forced_to_stop() { + let current_thread_ptr = current.raw_ptr(); + info!( + "Thread 0x{:x} is forced to stop ...", + current_thread_ptr as usize + ); + while current.is_forced_to_stop() { + wait_event(current_thread_ptr as *const c_void); + } + } +} diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index be4dbc8d..921002bf 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -25,7 +25,7 @@ pub use self::do_exit::handle_force_exit; pub use self::do_futex::{futex_wait, futex_wake}; pub use self::do_robust_list::RobustListHead; pub use self::do_spawn::do_spawn_without_exec; -pub use self::do_vfork::do_vfork; +pub use self::do_vfork::{do_vfork, handle_force_stop}; pub use self::do_wait4::idle_reap_zombie_children; pub use self::process::{Process, ProcessFilter, ProcessStatus, IDLE}; pub use self::spawn_attribute::posix_spawnattr_t; @@ -53,6 +53,7 @@ mod spawn_attribute; mod syscalls; mod term_status; mod thread; +mod untrusted_event; mod wait; pub mod current; diff --git a/src/libos/src/process/thread/builder.rs b/src/libos/src/process/thread/builder.rs index aff98400..d532144c 100644 --- a/src/libos/src/process/thread/builder.rs +++ b/src/libos/src/process/thread/builder.rs @@ -134,6 +134,7 @@ impl ThreadBuilder { SgxMutex::new(None) }; let host_eventfd = Arc::new(HostEventFd::new()?); + let raw_ptr = RwLock::new(0); let new_thread = Arc::new(Thread { task, @@ -154,6 +155,7 @@ impl ThreadBuilder { sig_stack, profiler, host_eventfd, + raw_ptr, }); let mut inner = new_thread.process().inner(); diff --git a/src/libos/src/process/thread/mod.rs b/src/libos/src/process/thread/mod.rs index 468a6816..3af9bd41 100644 --- a/src/libos/src/process/thread/mod.rs +++ b/src/libos/src/process/thread/mod.rs @@ -48,6 +48,7 @@ pub struct Thread { profiler: SgxMutex>, // Misc host_eventfd: Arc, + raw_ptr: RwLock, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -55,6 +56,7 @@ pub enum ThreadStatus { Init, Running, Exited, + Stopped, } impl Thread { @@ -114,6 +116,11 @@ impl Thread { &self.profiler } + /// Get the host thread's raw pointer of this libos thread + pub fn raw_ptr(&self) -> usize { + self.raw_ptr.read().unwrap().clone() + } + /// Get a file from the file table. pub fn file(&self, fd: FileDesc) -> Result { self.files().lock().unwrap().get(fd) @@ -206,7 +213,15 @@ impl Thread { pub(super) fn start(&self, host_tid: pid_t) { self.sched().lock().unwrap().attach(host_tid); - self.inner().start(); + let mut raw_ptr = self.raw_ptr.write().unwrap(); + *raw_ptr = (unsafe { sgx_thread_get_self() } as usize); + + // Before the thread starts, this thread could be stopped by other threads + if self.is_forced_to_stop() { + info!("thread is forced to stopped before this thread starts"); + } else { + self.inner().start(); + } let eventfd = EventFile::new( 0, @@ -266,6 +281,20 @@ impl Thread { pub(super) fn inner(&self) -> SgxMutexGuard { self.inner.lock().unwrap() } + + pub fn force_stop(&self) { + let mut inner = self.inner(); + inner.stop(); + } + + pub fn is_forced_to_stop(&self) -> bool { + self.inner().status() == ThreadStatus::Stopped + } + + pub fn resume(&self) { + let mut inner = self.inner(); + inner.resume(); + } } impl PartialEq for Thread { @@ -302,6 +331,7 @@ pub enum ThreadInner { Init, Running, Exited { term_status: TermStatus }, + Stopped, } impl ThreadInner { @@ -314,6 +344,7 @@ impl ThreadInner { Self::Init { .. } => ThreadStatus::Init, Self::Running { .. } => ThreadStatus::Running, Self::Exited { .. } => ThreadStatus::Exited, + Self::Stopped { .. } => ThreadStatus::Stopped, } } @@ -325,7 +356,14 @@ impl ThreadInner { } pub fn start(&mut self) { - debug_assert!(self.status() == ThreadStatus::Init); + *self = Self::Running; + } + + pub fn stop(&mut self) { + *self = Self::Stopped; + } + + pub fn resume(&mut self) { *self = Self::Running; } @@ -334,3 +372,7 @@ impl ThreadInner { *self = Self::Exited { term_status }; } } + +extern "C" { + pub(crate) fn sgx_thread_get_self() -> *const c_void; +} diff --git a/src/libos/src/process/untrusted_event.rs b/src/libos/src/process/untrusted_event.rs new file mode 100644 index 00000000..1a8b884a --- /dev/null +++ b/src/libos/src/process/untrusted_event.rs @@ -0,0 +1,51 @@ +use crate::prelude::*; + +pub(crate) fn wait_event(thread: *const c_void) { + let mut ret: c_int = 0; + let mut sgx_ret: c_int = 0; + unsafe { + sgx_ret = sgx_thread_wait_untrusted_event_ocall(&mut ret as *mut c_int, thread); + } + if ret != 0 || sgx_ret != 0 { + panic!("ERROR: OCall failed!"); + } +} + +pub(crate) fn set_event(thread: *const c_void) { + let mut ret: c_int = 0; + let mut sgx_ret: c_int = 0; + unsafe { + sgx_ret = sgx_thread_set_untrusted_event_ocall(&mut ret as *mut c_int, thread); + } + if ret != 0 || sgx_ret != 0 { + panic!("ERROR: OCall failed!"); + } +} + +extern "C" { + /* Go outside and wait on my untrusted event */ + pub(crate) fn sgx_thread_wait_untrusted_event_ocall( + ret: *mut c_int, + self_thread: *const c_void, + ) -> c_int; + + /* Wake a thread waiting on its untrusted event */ + pub(crate) fn sgx_thread_set_untrusted_event_ocall( + ret: *mut c_int, + waiter_thread: *const c_void, + ) -> c_int; + + /* Wake a thread waiting on its untrusted event, and wait on my untrusted event */ + pub(crate) fn sgx_thread_setwait_untrusted_events_ocall( + ret: *mut c_int, + waiter_thread: *const c_void, + self_thread: *const c_void, + ) -> c_int; + + /* Wake multiple threads waiting on their untrusted events */ + pub(crate) fn sgx_thread_set_multiple_untrusted_events_ocall( + ret: *mut c_int, + waiter_threads: *const *const c_void, + total: size_t, + ) -> c_int; +} diff --git a/src/libos/src/process/wait.rs b/src/libos/src/process/wait.rs index f83fd06f..b6d42543 100644 --- a/src/libos/src/process/wait.rs +++ b/src/libos/src/process/wait.rs @@ -1,3 +1,5 @@ +use super::thread::sgx_thread_get_self; +use super::untrusted_event::{set_event, wait_event}; /// A wait/wakeup mechanism that connects wait4 and exit system calls. use crate::prelude::*; @@ -113,50 +115,3 @@ where 1 } } - -fn wait_event(thread: *const c_void) { - let mut ret: c_int = 0; - let mut sgx_ret: c_int = 0; - unsafe { - sgx_ret = sgx_thread_wait_untrusted_event_ocall(&mut ret as *mut c_int, thread); - } - if ret != 0 || sgx_ret != 0 { - panic!("ERROR: OCall failed!"); - } -} - -fn set_event(thread: *const c_void) { - let mut ret: c_int = 0; - let mut sgx_ret: c_int = 0; - unsafe { - sgx_ret = sgx_thread_set_untrusted_event_ocall(&mut ret as *mut c_int, thread); - } - if ret != 0 || sgx_ret != 0 { - panic!("ERROR: OCall failed!"); - } -} - -extern "C" { - fn sgx_thread_get_self() -> *const c_void; - - /* Go outside and wait on my untrusted event */ - fn sgx_thread_wait_untrusted_event_ocall(ret: *mut c_int, self_thread: *const c_void) -> c_int; - - /* Wake a thread waiting on its untrusted event */ - fn sgx_thread_set_untrusted_event_ocall(ret: *mut c_int, waiter_thread: *const c_void) - -> c_int; - - /* Wake a thread waiting on its untrusted event, and wait on my untrusted event */ - fn sgx_thread_setwait_untrusted_events_ocall( - ret: *mut c_int, - waiter_thread: *const c_void, - self_thread: *const c_void, - ) -> c_int; - - /* Wake multiple threads waiting on their untrusted events */ - fn sgx_thread_set_multiple_untrusted_events_ocall( - ret: *mut c_int, - waiter_threads: *const *const c_void, - total: size_t, - ) -> c_int; -} diff --git a/src/libos/src/signal/do_sigreturn.rs b/src/libos/src/signal/do_sigreturn.rs index dbf65e30..823ce7c9 100644 --- a/src/libos/src/signal/do_sigreturn.rs +++ b/src/libos/src/signal/do_sigreturn.rs @@ -67,7 +67,7 @@ pub fn deliver_signal(cpu_context: &mut CpuContext) { let thread = current!(); let process = thread.process(); - if !process.is_forced_to_exit() { + if !process.is_forced_to_exit() && !thread.is_forced_to_stop() { do_deliver_signal(&thread, &process, cpu_context); } diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 83000e8b..8f674aa6 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -730,6 +730,8 @@ fn do_syscall(user_context: &mut CpuContext) { crate::signal::deliver_signal(user_context); + crate::process::handle_force_stop(); + crate::process::handle_force_exit(); } diff --git a/test/vfork/Makefile b/test/vfork/Makefile index 02032dff..57e392ae 100644 --- a/test/vfork/Makefile +++ b/test/vfork/Makefile @@ -1,5 +1,5 @@ include ../test_common.mk EXTRA_C_FLAGS := -g -EXTRA_LINK_FLAGS := +EXTRA_LINK_FLAGS := -lpthread BIN_ARGS := diff --git a/test/vfork/main.c b/test/vfork/main.c index ad645e67..9943daa2 100644 --- a/test/vfork/main.c +++ b/test/vfork/main.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "test.h" // Note: This test intends to test the case that child process directly calls _exit() @@ -89,10 +90,87 @@ parent_exit: exit(1); } +volatile static int test_stop_child_flag = 0; + +static void *child_thread_routine(void *_arg) { + printf("Child thread starts\n"); + test_stop_child_flag = 1; + + struct timespec t1, t2; + if (clock_gettime(CLOCK_REALTIME, &t1)) { + return (void *) -1; + } + + sleep(1); + + if (clock_gettime(CLOCK_REALTIME, &t2)) { + return (void *) -1; + } + + // Parent thread vfork and will stop this thread for several seconds + if (t2.tv_sec - t1.tv_sec <= 1) { + printf("the thread is not stopped"); + exit(-1); + } + + printf("child thread exits\n"); + return NULL; +} + +// Test the behavior that when vfork is called, the parent process' other child threads are forced to stopped. +// +// This test case has different behaviors for Linux and Occlum +// This limitation is recorded in src/libos/src/process/do_vfork.rs +int test_vfork_stop_child_thread() { + pthread_t child_thread; + pid_t child_pid; + struct timespec ts; + ts.tv_sec = 3; + ts.tv_nsec = 0; + if (pthread_create(&child_thread, NULL, child_thread_routine, NULL) < 0) { + THROW_ERROR("pthread_create failed\n"); + } + + // Wait for child thread to start + while (test_stop_child_flag == 0); + + child_pid = vfork(); + if (child_pid == 0) { + printf("child process created\n"); + char **child_argv = calloc(1, sizeof(char *) * 2); + child_argv[0] = "getpid"; + + // Wait for a few seconds + while (1) { + int ret = nanosleep(&ts, &ts); + if (ret == 0) { + break; + } + if (ret < 0 && errno != EINTR) { + THROW_ERROR("nanosleep failed"); + } + } + + printf("child process exec\n"); + int ret = execve("/bin/getpid", child_argv, NULL); + if (ret != 0) { + printf("child process execve error\n"); + } + _exit(1); + } else { + printf("return to parent\n"); + + pthread_join(child_thread, NULL); + } + + return 0; +} + static test_case_t test_cases[] = { TEST_CASE(test_vfork_exit), TEST_CASE(test_multiple_vfork_execve), TEST_CASE(test_vfork_isolate_file_table), + TEST_CASE(test_vfork_stop_child_thread), }; int main() {