diff --git a/src/libos/src/entry.rs b/src/libos/src/entry.rs index 1f24d362..7546ab1d 100644 --- a/src/libos/src/entry.rs +++ b/src/libos/src/entry.rs @@ -213,24 +213,16 @@ fn do_new_process( } fn do_exec_thread(libos_tid: pid_t, host_tid: pid_t) -> Result { - let exit_status = process::task::exec(libos_tid, host_tid)?; + let status = process::task::exec(libos_tid, host_tid)?; // sync file system // TODO: only sync when all processes exit use rcore_fs::vfs::FileSystem; crate::fs::ROOT_INODE.fs().sync()?; - // Only return the least significant 8 bits of the exit status - // - // From The Open Group Base Specifications Issue 7, 2018 edition: - // > The shell shall recognize the entire status value retrieved for the - // > command by the equivalent of the wait() function WEXITSTATUS macro... - // - // From the man page of wait() syscall: - // > WEXITSTATUS macro returns the exit status of the child. This consists of the least - // > significant 8 bits of the status - let exit_status = exit_status & 0x0000_00FF_i32; - Ok(exit_status) + // Not to be confused with the return value of a main function. + // The exact meaning of status is described in wait(2) man page. + Ok(status) } fn validate_program_path(target_path: &PathBuf) -> Result<()> { diff --git a/src/libos/src/exception/cpuid.rs b/src/libos/src/exception/cpuid.rs index 597ae4d8..84f18e1f 100644 --- a/src/libos/src/exception/cpuid.rs +++ b/src/libos/src/exception/cpuid.rs @@ -1,4 +1,5 @@ use super::*; +use crate::syscall::CpuContext; use sgx_types::*; use std::collections::HashMap; use std::rsgx_cpuidex; @@ -261,17 +262,17 @@ pub fn setup_cpuid_info() { let max_basic_leaf = CPUID.get_max_basic_leaf(); } -pub fn handle_cpuid_exception(info: &mut sgx_exception_info_t) -> u32 { +pub fn handle_cpuid_exception(user_context: &mut CpuContext) -> Result { debug!("handle CPUID exception"); - let leaf = info.cpu_context.rax as u32; - let subleaf = info.cpu_context.rcx as u32; + let leaf = user_context.rax as u32; + let subleaf = user_context.rcx as u32; let cpuid_result = CPUID.get_cpuid_info(leaf, subleaf); trace!("cpuid result: {:?}", cpuid_result); - info.cpu_context.rax = cpuid_result.eax as u64; - info.cpu_context.rbx = cpuid_result.ebx as u64; - info.cpu_context.rcx = cpuid_result.ecx as u64; - info.cpu_context.rdx = cpuid_result.edx as u64; - info.cpu_context.rip += 2; + user_context.rax = cpuid_result.eax as u64; + user_context.rbx = cpuid_result.ebx as u64; + user_context.rcx = cpuid_result.ecx as u64; + user_context.rdx = cpuid_result.edx as u64; + user_context.rip += 2; - EXCEPTION_CONTINUE_EXECUTION + Ok(0) } diff --git a/src/libos/src/exception/mod.rs b/src/libos/src/exception/mod.rs index 182b15b8..b7b240a6 100644 --- a/src/libos/src/exception/mod.rs +++ b/src/libos/src/exception/mod.rs @@ -1,50 +1,74 @@ +//! Exception handling subsystem. + use self::cpuid::{handle_cpuid_exception, setup_cpuid_info, CPUID_OPCODE}; use self::rdtsc::{handle_rdtsc_exception, RDTSC_OPCODE}; use self::syscall::{handle_syscall_exception, SYSCALL_OPCODE}; use super::*; -use crate::syscall::SyscallNum; +use crate::signal::{FaultSignal, SigSet}; +use crate::syscall::{CpuContext, SyscallNum}; use sgx_types::*; +// Modules for instruction simulation +mod cpuid; +mod rdtsc; +mod syscall; + pub fn register_exception_handlers() { setup_cpuid_info(); + // Register handlers whose priorities go from low to high unsafe { - sgx_register_exception_handler(1, handle_exception); + let is_first = 1; + sgx_register_exception_handler(is_first, handle_exception); } } #[no_mangle] extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> u32 { - let ret = unsafe { __occlum_syscall(SyscallNum::Exception as u32, info) }; - assert!(ret == EXCEPTION_CONTINUE_EXECUTION); - ret -} - -pub fn do_handle_exception(info: *mut sgx_exception_info_t) -> Result { - let mut info = unsafe { &mut *info }; - // Assume the length of opcode is 2 bytes - let ip_opcode = unsafe { *(info.cpu_context.rip as *const u16) }; - if info.exception_vector != sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD - || info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE - { - panic!( - "unable to process the exception, vector:{} type:{}", - info.exception_vector as u32, info.exception_type as u32 - ); + extern "C" { + fn __occlum_syscall_c_abi(num: u32, info: *mut sgx_exception_info_t) -> u32; } - let ret = match ip_opcode { - #![deny(unreachable_patterns)] - CPUID_OPCODE => handle_cpuid_exception(&mut info), - RDTSC_OPCODE => handle_rdtsc_exception(&mut info), - SYSCALL_OPCODE => handle_syscall_exception(&mut info), - _ => panic!("unable to process the exception, opcode: {:#x}", ip_opcode), - }; - Ok(ret as isize) + unsafe { __occlum_syscall_c_abi(SyscallNum::HandleException as u32, info) }; + unreachable!(); } -extern "C" { - fn __occlum_syscall(num: u32, info: *mut sgx_exception_info_t) -> u32; -} +/// Exceptions are handled as a special kind of system calls. +pub fn do_handle_exception( + info: *mut sgx_exception_info_t, + user_context: *mut CpuContext, +) -> Result { + let info = unsafe { &mut *info }; + if info.exception_type != sgx_exception_type_t::SGX_EXCEPTION_HARDWARE { + return_errno!(EINVAL, "Can only handle hardware exceptions"); + } -mod cpuid; -mod rdtsc; -mod syscall; + let user_context = unsafe { &mut *user_context }; + *user_context = CpuContext::from_sgx(&info.cpu_context); + + // Try to do instruction emulation first + if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD { + // Assume the length of opcode is 2 bytes + let ip_opcode = unsafe { *(user_context.rip as *const u16) }; + if ip_opcode == RDTSC_OPCODE { + return handle_rdtsc_exception(user_context); + } else if ip_opcode == SYSCALL_OPCODE { + return handle_syscall_exception(user_context); + } else if ip_opcode == CPUID_OPCODE { + return handle_cpuid_exception(user_context); + } + } + + // Then, it must be a "real" exception. Convert it to signal and force delivering it. + // The generated signal is SIGBUS, SIGFPE, SIGILL, or SIGSEGV. + // + // So what happens if the signal is masked? The man page of sigprocmask(2) states: + // + // > If SIGBUS, SIGFPE, SIGILL, or SIGSEGV are generated while they are blocked, the result is + // undefined, unless the signal was generated by kill(2), sigqueue(3), or raise(3). + // + // As the thread cannot proceed without handling the exception, we choose to force + // delivering the signal regardless of the current signal mask. + let signal = Box::new(FaultSignal::new(info)); + crate::signal::force_signal(signal, user_context); + + Ok(0) +} diff --git a/src/libos/src/exception/rdtsc.rs b/src/libos/src/exception/rdtsc.rs index 2134075e..26f6eaee 100644 --- a/src/libos/src/exception/rdtsc.rs +++ b/src/libos/src/exception/rdtsc.rs @@ -1,15 +1,15 @@ -use super::*; -use sgx_types::*; +use crate::prelude::*; +use crate::syscall::CpuContext; pub const RDTSC_OPCODE: u16 = 0x310F; -pub fn handle_rdtsc_exception(info: &mut sgx_exception_info_t) -> u32 { +pub fn handle_rdtsc_exception(user_context: &mut CpuContext) -> Result { debug!("handle RDTSC exception"); - let (low, high) = time::do_rdtsc(); + let (low, high) = crate::time::do_rdtsc(); trace!("do_rdtsc result {{ low: {:#x} high: {:#x}}}", low, high); - info.cpu_context.rax = low as u64; - info.cpu_context.rdx = high as u64; - info.cpu_context.rip += 2; + user_context.rax = low as u64; + user_context.rdx = high as u64; + user_context.rip += 2; - EXCEPTION_CONTINUE_EXECUTION + Ok(0) } diff --git a/src/libos/src/exception/syscall.rs b/src/libos/src/exception/syscall.rs index 4116ec28..09750f04 100644 --- a/src/libos/src/exception/syscall.rs +++ b/src/libos/src/exception/syscall.rs @@ -1,30 +1,25 @@ use super::*; -use crate::syscall::{occlum_syscall, SyscallNum}; +use crate::syscall::{occlum_syscall, CpuContext, SyscallNum}; use sgx_types::*; pub const SYSCALL_OPCODE: u16 = 0x050F; -pub fn handle_syscall_exception(info: &mut sgx_exception_info_t) -> u32 { +pub fn handle_syscall_exception(user_context: &mut CpuContext) -> ! { debug!("handle SYSCALL exception"); - // SYSCALL, save RIP into RCX and RFLAGS into R11 - info.cpu_context.rcx = info.cpu_context.rip + 2; - info.cpu_context.r11 = info.cpu_context.rflags; - let num = info.cpu_context.rax as u32; - let arg0 = info.cpu_context.rdi as isize; - let arg1 = info.cpu_context.rsi as isize; - let arg2 = info.cpu_context.rdx as isize; - let arg3 = info.cpu_context.r10 as isize; - let arg4 = info.cpu_context.r8 as isize; - let arg5 = info.cpu_context.r9 as isize; - // syscall should not be an exception in Occlum - assert!(num != SyscallNum::Exception as u32); - let ret = occlum_syscall(num, arg0, arg1, arg2, arg3, arg4, arg5); - info.cpu_context.rax = ret as u64; - // SYSRET, load RIP from RCX and loading RFLAGS from R11 - info.cpu_context.rip = info.cpu_context.rcx; - // Clear RF, VM, reserved bits; set bit 1 - info.cpu_context.rflags = (info.cpu_context.r11 & 0x3C7FD7) | 2; + // SYSCALL instruction saves RIP into RCX and RFLAGS into R11. This is to + // comply with hardware's behavoir. Not useful for us. + user_context.rcx = user_context.rip; + user_context.r11 = user_context.rflags; - EXCEPTION_CONTINUE_EXECUTION + // The target RIP should be the next instruction + user_context.rip += 2; + // Set target RFLAGS: clear RF, VM, reserved bits; set bit 1 + user_context.rflags = (user_context.rflags & 0x3C7FD7) | 2; + + let num = user_context.rax as u32; + assert!(num != SyscallNum::HandleException as u32); + + // FIXME: occlum syscall must use Linux ABI + occlum_syscall(user_context); } diff --git a/src/libos/src/lib.rs b/src/libos/src/lib.rs index 069a450f..61daa25b 100644 --- a/src/libos/src/lib.rs +++ b/src/libos/src/lib.rs @@ -61,6 +61,7 @@ mod misc; mod net; mod process; mod sched; +mod signal; mod syscall; mod time; mod untrusted; diff --git a/src/libos/src/prelude.rs b/src/libos/src/prelude.rs index f7ae83e7..69b5b139 100644 --- a/src/libos/src/prelude.rs +++ b/src/libos/src/prelude.rs @@ -16,7 +16,7 @@ pub use std::sync::{ pub use crate::error::Result; pub use crate::error::*; pub use crate::fs::{File, FileDesc, FileRef}; -pub use crate::process::pid_t; +pub use crate::process::{pid_t, uid_t}; macro_rules! debug_trace { () => { diff --git a/src/libos/src/process/do_exit.rs b/src/libos/src/process/do_exit.rs index 798cb9b2..0385330c 100644 --- a/src/libos/src/process/do_exit.rs +++ b/src/libos/src/process/do_exit.rs @@ -1,14 +1,39 @@ use std::intrinsics::atomic_store; use super::do_futex::futex_wake; -use super::process::ChildProcessFilter; -use super::{table, ThreadRef}; +use super::process::ProcessFilter; +use super::{table, TermStatus, ThreadRef, ThreadStatus}; use crate::prelude::*; +use crate::signal::SigNum; -pub fn do_exit(exit_status: i32) { +pub fn do_exit_group(status: i32) { + let term_status = TermStatus::Exited(status as u8); + current!().process().force_exit(term_status); + exit_thread(term_status); +} + +pub fn do_exit(status: i32) { + let term_status = TermStatus::Exited(status as u8); + exit_thread(term_status); +} + +/// Exit this thread if its has been forced to exit. +/// +/// A thread may be forced to exit for two reasons: 1) a fatal signal; 2) +/// exit_group syscall. +pub fn handle_force_exit() { + if let Some(term_status) = current!().process().is_forced_exit() { + exit_thread(term_status); + } +} + +fn exit_thread(term_status: TermStatus) { let thread = current!(); + if thread.status() == ThreadStatus::Exited { + return; + } - let num_remaining_threads = thread.exit(exit_status); + let num_remaining_threads = thread.exit(term_status); // Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info. if let Some(ctid_ptr) = thread.clear_ctid() { @@ -28,11 +53,11 @@ pub fn do_exit(exit_status: i32) { // If this thread is the last thread, then exit the process if num_remaining_threads == 0 { - do_exit_process(&thread, exit_status); + exit_process(&thread, term_status); } } -fn do_exit_process(thread: &ThreadRef, exit_status: i32) { +fn exit_process(thread: &ThreadRef, term_status: TermStatus) { let process = thread.process(); // If the parent process is the idle process, we can release the process directly. @@ -44,7 +69,7 @@ fn do_exit_process(thread: &ThreadRef, exit_status: i32) { table::del_thread(thread.tid()).expect("tid must be in the table"); table::del_process(process.pid()).expect("pid must be in the table"); - process_inner.exit(exit_status); + process_inner.exit(term_status); parent_inner.remove_zombie_child(process.pid()); return; } @@ -55,19 +80,19 @@ fn do_exit_process(thread: &ThreadRef, exit_status: i32) { // Deadlock note: Always lock parent then child. let parent = process.parent(); let mut parent_inner = parent.inner(); - process.inner().exit(exit_status); + process.inner().exit(term_status); // Wake up the parent if it is waiting on this child let waiting_children = parent_inner.waiting_children_mut().unwrap(); waiting_children.del_and_wake_one_waiter(|waiter_data| -> Option { match waiter_data { - ChildProcessFilter::WithAnyPid => {} - ChildProcessFilter::WithPid(required_pid) => { + ProcessFilter::WithAnyPid => {} + ProcessFilter::WithPid(required_pid) => { if process.pid() != *required_pid { return None; } } - ChildProcessFilter::WithPgid(required_pgid) => { + ProcessFilter::WithPgid(required_pgid) => { if process.pgid() != *required_pgid { return None; } diff --git a/src/libos/src/process/do_spawn/mod.rs b/src/libos/src/process/do_spawn/mod.rs index 7e9817b7..9d29e9c0 100644 --- a/src/libos/src/process/do_spawn/mod.rs +++ b/src/libos/src/process/do_spawn/mod.rs @@ -311,8 +311,8 @@ fn init_auxvec(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result Result i64; + fn __occlum_syscall_linux_abi() -> i64; fn occlum_gdb_hook_load_elf(elf_base: u64, elf_path: *const u8, elf_path_len: u64); } diff --git a/src/libos/src/process/do_wait4.rs b/src/libos/src/process/do_wait4.rs index fbb83e72..c33f1736 100644 --- a/src/libos/src/process/do_wait4.rs +++ b/src/libos/src/process/do_wait4.rs @@ -1,9 +1,9 @@ -use super::process::{ChildProcessFilter, ProcessInner}; +use super::process::{ProcessFilter, ProcessInner}; use super::wait::Waiter; use super::{table, ProcessRef, ProcessStatus}; use crate::prelude::*; -pub fn do_wait4(child_filter: &ChildProcessFilter) -> Result<(pid_t, i32)> { +pub fn do_wait4(child_filter: &ProcessFilter) -> Result<(pid_t, i32)> { // Lock the process early to ensure that we do not miss any changes in // children processes let thread = current!(); @@ -16,9 +16,9 @@ pub fn do_wait4(child_filter: &ChildProcessFilter) -> Result<(pid_t, i32)> { .unwrap() .iter() .filter(|child| match child_filter { - ChildProcessFilter::WithAnyPid => true, - ChildProcessFilter::WithPid(required_pid) => child.pid() == *required_pid, - ChildProcessFilter::WithPgid(required_pgid) => child.pgid() == *required_pgid, + ProcessFilter::WithAnyPid => true, + ProcessFilter::WithPid(required_pid) => child.pid() == *required_pid, + ProcessFilter::WithPgid(required_pgid) => child.pgid() == *required_pgid, }) .collect::>(); @@ -60,8 +60,6 @@ fn free_zombie_child(mut parent_inner: SgxMutexGuard, zombie_pid: let zombie = parent_inner.remove_zombie_child(zombie_pid); debug_assert!(zombie.status() == ProcessStatus::Zombie); - // Remove zombie from its parent - let zombie_inner = zombie.inner(); - zombie_inner.exit_status().unwrap() + zombie_inner.term_status().unwrap().as_u32() as i32 } diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index 2f96cb72..a8511492 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -13,16 +13,19 @@ use crate::fs::{FileRef, FileTable, FsView}; use crate::misc::ResourceLimits; use crate::prelude::*; use crate::sched::SchedAgent; +use crate::signal::{SigDispositions, SigQueues}; use crate::vm::ProcessVM; -use self::process::{ChildProcessFilter, ProcessBuilder, ProcessInner}; +use self::process::{ProcessBuilder, ProcessInner}; use self::thread::{ThreadBuilder, ThreadId, ThreadInner}; use self::wait::{WaitQueue, Waiter}; +pub use self::do_exit::handle_force_exit; pub use self::do_spawn::do_spawn_without_exec; -pub use self::process::{Process, ProcessStatus, IDLE}; +pub use self::process::{Process, ProcessFilter, ProcessStatus, IDLE}; pub use self::syscalls::*; pub use self::task::Task; +pub use self::term_status::TermStatus; pub use self::thread::{Thread, ThreadStatus}; mod do_arch_prctl; @@ -35,6 +38,7 @@ mod do_spawn; mod do_wait4; mod process; mod syscalls; +mod term_status; mod thread; mod wait; @@ -43,8 +47,14 @@ pub mod elf_file; pub mod table; pub mod task; +// TODO: need to separate C's version pid_t with Rust version Pid. +// pid_t must be signed as negative values may have special meaning +// (check wait4 and kill for examples), while Pid should be a +// non-negative value. #[allow(non_camel_case_types)] pub type pid_t = u32; +#[allow(non_camel_case_types)] +pub type uid_t = u32; pub type ProcessRef = Arc; pub type ThreadRef = Arc; diff --git a/src/libos/src/process/process/builder.rs b/src/libos/src/process/process/builder.rs index 9ecc5b40..c0d85d60 100644 --- a/src/libos/src/process/process/builder.rs +++ b/src/libos/src/process/process/builder.rs @@ -3,6 +3,7 @@ use super::super::thread::{ThreadBuilder, ThreadId}; use super::super::{FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef}; use super::{Process, ProcessInner}; use crate::prelude::*; +use crate::signal::{SigDispositions, SigQueues}; #[derive(Debug)] pub struct ProcessBuilder { @@ -87,11 +88,17 @@ impl ProcessBuilder { let exec_path = self.exec_path.take().unwrap_or_default(); let parent = self.parent.take().map(|parent| SgxRwLock::new(parent)); let inner = SgxMutex::new(ProcessInner::new()); + let sig_dispositions = SgxRwLock::new(SigDispositions::new()); + let sig_queues = SgxMutex::new(SigQueues::new()); + let forced_exit = SgxRwLock::new(None); Arc::new(Process { pid, exec_path, parent, inner, + sig_dispositions, + sig_queues, + forced_exit, }) }; diff --git a/src/libos/src/process/process/mod.rs b/src/libos/src/process/process/mod.rs index d92b7828..a8c8ee39 100644 --- a/src/libos/src/process/process/mod.rs +++ b/src/libos/src/process/process/mod.rs @@ -1,8 +1,9 @@ use std::fmt; use super::wait::WaitQueue; -use super::{ProcessRef, ThreadRef}; +use super::{ProcessRef, TermStatus, ThreadRef}; use crate::prelude::*; +use crate::signal::{SigDispositions, SigNum, SigQueues}; pub use self::builder::ProcessBuilder; pub use self::idle::IDLE; @@ -17,6 +18,10 @@ pub struct Process { // Mutable info parent: Option>, inner: SgxMutex, + // Signal + sig_dispositions: SgxRwLock, + sig_queues: SgxMutex, + forced_exit: SgxRwLock>, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -35,7 +40,7 @@ impl Process { /// Get process group ID // TODO: implement process group pub fn pgid(&self) -> pid_t { - 0 + self.pid } /// Get the parent process. @@ -76,6 +81,14 @@ impl Process { self.inner().leader_thread() } + /// Get threads. + pub fn threads(&self) -> Vec { + self.inner() + .threads() + .map(|vec_ref| vec_ref.clone()) + .unwrap_or_else(|| Vec::new()) + } + /// Get status. pub fn status(&self) -> ProcessStatus { self.inner().status() @@ -86,6 +99,33 @@ impl Process { &self.exec_path } + /// Get the signal queues for process-directed signals. + pub fn sig_queues(&self) -> &SgxMutex { + &self.sig_queues + } + + /// Get the process-wide signal dispositions. + pub fn sig_dispositions(&self) -> &SgxRwLock { + &self.sig_dispositions + } + + /// Check whether the process has been forced to exit. + pub fn is_forced_exit(&self) -> Option { + *self.forced_exit.read().unwrap() + } + + /// Force a process to exit. + /// + /// There are two reasons to force a process to exit: + /// 1. Receiving a fatal signal; + /// 2. Performing exit_group syscall. + /// + /// A process may be forced to exit many times, but only the first time counts. + pub fn force_exit(&self, term_status: TermStatus) { + let mut forced_exit = self.forced_exit.write().unwrap(); + forced_exit.get_or_insert(term_status); + } + /// Get the internal representation of the process. /// /// For the purpose of encapsulation, this method is invisible to other subsystems. @@ -98,11 +138,11 @@ pub enum ProcessInner { Live { status: LiveStatus, children: Vec, - waiting_children: WaitQueue, + waiting_children: WaitQueue, threads: Vec, }, Zombie { - exit_status: i32, + term_status: TermStatus, }, } @@ -172,7 +212,7 @@ impl ProcessInner { } } - pub fn waiting_children_mut(&mut self) -> Option<&mut WaitQueue> { + pub fn waiting_children_mut(&mut self) -> Option<&mut WaitQueue> { match self { Self::Live { waiting_children, .. @@ -190,7 +230,7 @@ impl ProcessInner { children.swap_remove(zombie_i) } - pub fn exit(&mut self, exit_status: i32) { + pub fn exit(&mut self, term_status: TermStatus) { // Check preconditions debug_assert!(self.status() == ProcessStatus::Running); debug_assert!(self.num_threads() == 0); @@ -201,15 +241,15 @@ impl ProcessInner { *parent = IDLE.process().clone(); } - *self = Self::Zombie { exit_status }; + *self = Self::Zombie { term_status }; } - pub fn exit_status(&self) -> Option { + pub fn term_status(&self) -> Option { // Check preconditions debug_assert!(self.status() == ProcessStatus::Zombie); match self { - Self::Zombie { exit_status } => Some(*exit_status), + Self::Zombie { term_status } => Some(*term_status), _ => None, } } @@ -270,9 +310,9 @@ impl fmt::Debug for ProcessInner { .collect::>(), ) .finish(), - ProcessInner::Zombie { exit_status, .. } => f + ProcessInner::Zombie { term_status, .. } => f .debug_struct("ProcessInner::Zombie") - .field("exit_status", exit_status) + .field("term_status", term_status) .finish(), } } @@ -294,11 +334,11 @@ impl Into for LiveStatus { } #[derive(Clone, Copy, Debug)] -pub enum ChildProcessFilter { +pub enum ProcessFilter { WithAnyPid, WithPid(pid_t), WithPgid(pid_t), } // TODO: is this necessary? -unsafe impl Send for ChildProcessFilter {} +unsafe impl Send for ProcessFilter {} diff --git a/src/libos/src/process/syscalls.rs b/src/libos/src/process/syscalls.rs index 33bdcc14..ea20cd17 100644 --- a/src/libos/src/process/syscalls.rs +++ b/src/libos/src/process/syscalls.rs @@ -4,7 +4,7 @@ use super::do_arch_prctl::ArchPrctlCode; use super::do_clone::CloneFlags; use super::do_futex::{FutexFlags, FutexOp}; use super::do_spawn::FileAction; -use super::process::ChildProcessFilter; +use super::process::ProcessFilter; use crate::prelude::*; use crate::time::timespec_t; use crate::util::mem_util::from_user::*; @@ -184,16 +184,16 @@ pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result { super::do_set_tid_address::do_set_tid_address(tidptr).map(|tid| tid as isize) } -pub fn do_exit(status: i32) -> ! { +pub fn do_exit(status: i32) -> Result { debug!("exit: {}", status); super::do_exit::do_exit(status); + Ok(0) +} - extern "C" { - fn do_exit_task() -> !; - } - unsafe { - do_exit_task(); - } +pub fn do_exit_group(status: i32) -> Result { + debug!("exit_group: {}", status); + super::do_exit::do_exit_group(status); + Ok(0) } pub fn do_wait4(pid: i32, exit_status_ptr: *mut i32) -> Result { @@ -202,16 +202,14 @@ pub fn do_wait4(pid: i32, exit_status_ptr: *mut i32) -> Result { } let child_process_filter = match pid { - pid if pid < -1 => ChildProcessFilter::WithPgid((-pid) as pid_t), - -1 => ChildProcessFilter::WithAnyPid, + pid if pid < -1 => ProcessFilter::WithPgid((-pid) as pid_t), + -1 => ProcessFilter::WithAnyPid, 0 => { let pgid = current!().process().pgid(); - ChildProcessFilter::WithPgid(pgid) - } - pid if pid > 0 => ChildProcessFilter::WithPid(pid as pid_t), - _ => { - panic!("THIS SHOULD NEVER HAPPEN!"); + ProcessFilter::WithPgid(pgid) } + pid if pid > 0 => ProcessFilter::WithPid(pid as pid_t), + _ => unreachable!(), }; let mut exit_status = 0; match super::do_wait4::do_wait4(&child_process_filter) { diff --git a/src/libos/src/process/table.rs b/src/libos/src/process/table.rs index 5b0a52df..21a3ebe3 100644 --- a/src/libos/src/process/table.rs +++ b/src/libos/src/process/table.rs @@ -5,6 +5,15 @@ pub fn get_process(pid: pid_t) -> Result { PROCESS_TABLE.lock().unwrap().get(pid) } +pub fn get_all_processes() -> Vec { + PROCESS_TABLE + .lock() + .unwrap() + .iter() + .map(|(_, proc_ref)| proc_ref.clone()) + .collect() +} + pub(super) fn add_process(process: ProcessRef) -> Result<()> { PROCESS_TABLE.lock().unwrap().add(process.pid(), process) } @@ -50,6 +59,10 @@ impl Table { } } + pub fn iter(&self) -> std::collections::hash_map::Iter<'_, pid_t, I> { + self.map.iter() + } + pub fn get(&self, id: pid_t) -> Result { self.map .get(&id) diff --git a/src/libos/src/process/task/exec.rs b/src/libos/src/process/task/exec.rs index bdce066e..ec2bfd89 100644 --- a/src/libos/src/process/task/exec.rs +++ b/src/libos/src/process/task/exec.rs @@ -1,4 +1,4 @@ -use super::super::{current, ThreadRef}; +use super::super::{current, TermStatus, ThreadRef}; use super::Task; use crate::prelude::*; @@ -33,11 +33,11 @@ fn dequeue(libos_tid: pid_t) -> Result { /// Execute the specified LibOS thread in the current host thread. pub fn exec(libos_tid: pid_t, host_tid: pid_t) -> Result { - let new_thread: ThreadRef = dequeue(libos_tid)?; - new_thread.start(host_tid); + let this_thread: ThreadRef = dequeue(libos_tid)?; + this_thread.start(host_tid); // Enable current::get() from now on - current::set(new_thread.clone()); + current::set(this_thread.clone()); #[cfg(feature = "syscall_timing")] GLOBAL_PROFILER @@ -48,7 +48,7 @@ pub fn exec(libos_tid: pid_t, host_tid: pid_t) -> Result { unsafe { // task may only be modified by this function; so no lock is needed - do_exec_task(new_thread.task() as *const Task as *mut Task); + do_exec_task(this_thread.task() as *const Task as *mut Task); } #[cfg(feature = "syscall_timing")] @@ -58,16 +58,20 @@ pub fn exec(libos_tid: pid_t, host_tid: pid_t) -> Result { .thread_exit() .expect("unexpected error from profiler to exit thread"); - let exit_status = new_thread.inner().exit_status().unwrap(); - info!( - "Thread exited: tid = {}, exit_status = {}", - libos_tid, exit_status - ); + let term_status = this_thread.inner().term_status().unwrap(); + match term_status { + TermStatus::Exited(status) => { + info!("Thread exited: tid = {}, status = {}", libos_tid, status); + } + TermStatus::Killed(signum) => { + info!("Thread killed: tid = {}, signum = {:?}", libos_tid, signum); + } + } // Disable current::get() current::reset(); - Ok(exit_status) + Ok(term_status.as_u32() as i32) } lazy_static! { diff --git a/src/libos/src/process/term_status.rs b/src/libos/src/process/term_status.rs new file mode 100644 index 00000000..9d40a4ee --- /dev/null +++ b/src/libos/src/process/term_status.rs @@ -0,0 +1,22 @@ +//! The termination status of a process or thread. + +use crate::signal::SigNum; + +// TODO: support core dump +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum TermStatus { + Exited(u8), + Killed(SigNum), + //Dumped(SigNum), +} + +impl TermStatus { + /// Return as a 32-bit integer encoded as specified in wait(2) man page. + pub fn as_u32(&self) -> u32 { + match *self { + TermStatus::Exited(status) => (status as u32) << 8, + TermStatus::Killed(signum) => (signum.as_u8() as u32), + //TermStatus::Dumped(signum) => (signum.as_u8() as u32) | 0x80, + } + } +} diff --git a/src/libos/src/process/thread/builder.rs b/src/libos/src/process/thread/builder.rs index 353eeb57..92622dc6 100644 --- a/src/libos/src/process/thread/builder.rs +++ b/src/libos/src/process/thread/builder.rs @@ -2,7 +2,7 @@ use std::ptr::NonNull; use super::{ FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef, - Task, Thread, ThreadId, ThreadInner, ThreadRef, + SigQueues, SigSet, Task, Thread, ThreadId, ThreadInner, ThreadRef, }; use crate::prelude::*; @@ -82,10 +82,12 @@ impl ThreadBuilder { } pub fn build(self) -> Result { - let tid = self.tid.unwrap_or_else(|| ThreadId::new()); let task = self .task .ok_or_else(|| errno!(EINVAL, "task is mandatory"))?; + let tid = self.tid.unwrap_or_else(|| ThreadId::new()); + let clear_ctid = SgxRwLock::new(self.clear_ctid); + let inner = SgxMutex::new(ThreadInner::new()); let process = self .process .ok_or_else(|| errno!(EINVAL, "process is mandatory"))?; @@ -96,8 +98,9 @@ impl ThreadBuilder { let files = self.files.unwrap_or_default(); let sched = self.sched.unwrap_or_default(); let rlimits = self.rlimits.unwrap_or_default(); - let clear_ctid = SgxRwLock::new(self.clear_ctid); - let inner = SgxMutex::new(ThreadInner::new()); + let sig_queues = SgxMutex::new(SigQueues::new()); + let sig_mask = SgxRwLock::new(SigSet::new_empty()); + let sig_tmp_mask = SgxRwLock::new(SigSet::new_empty()); let new_thread = Arc::new(Thread { task, @@ -110,6 +113,9 @@ impl ThreadBuilder { files, sched, rlimits, + sig_queues, + sig_mask, + sig_tmp_mask, }); let mut inner = new_thread.process().inner(); diff --git a/src/libos/src/process/thread/mod.rs b/src/libos/src/process/thread/mod.rs index 4eed7af6..4daf1272 100644 --- a/src/libos/src/process/thread/mod.rs +++ b/src/libos/src/process/thread/mod.rs @@ -4,9 +4,10 @@ use std::ptr::NonNull; use super::task::Task; use super::{ FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef, - ThreadRef, + TermStatus, ThreadRef, }; use crate::prelude::*; +use crate::signal::{SigQueues, SigSet}; pub use self::builder::ThreadBuilder; pub use self::id::ThreadId; @@ -30,6 +31,10 @@ pub struct Thread { files: FileTableRef, sched: SchedAgentRef, rlimits: ResourceLimitsRef, + // Signal + sig_queues: SgxMutex, + sig_mask: SgxRwLock, + sig_tmp_mask: SgxRwLock, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -68,6 +73,24 @@ impl Thread { &self.sched } + /// Get the signal queues for thread-directed signals. + pub fn sig_queues(&self) -> &SgxMutex { + &self.sig_queues + } + + /// Get the per-thread signal mask. + pub fn sig_mask(&self) -> &SgxRwLock { + &self.sig_mask + } + + /// Get the per-thread, temporary signal mask. + /// + /// The tmp mask is always cleared at the end of the execution + /// of a syscall. + pub fn sig_tmp_mask(&self) -> &SgxRwLock { + &self.sig_tmp_mask + } + /// Get a file from the file table. pub fn file(&self, fd: FileDesc) -> Result { self.files().lock().unwrap().get(fd) @@ -99,7 +122,7 @@ impl Thread { self.inner().start(); } - pub(super) fn exit(&self, exit_status: i32) -> usize { + pub(super) fn exit(&self, term_status: TermStatus) -> usize { self.sched().lock().unwrap().detach(); // Remove this thread from its owner process @@ -111,7 +134,7 @@ impl Thread { .expect("the thread must belong to the process"); threads.swap_remove(thread_i); - self.inner().exit(exit_status); + self.inner().exit(term_status); threads.len() } @@ -153,7 +176,7 @@ unsafe impl Sync for Thread {} pub enum ThreadInner { Init, Running, - Exited { exit_status: i32 }, + Exited { term_status: TermStatus }, } impl ThreadInner { @@ -169,9 +192,9 @@ impl ThreadInner { } } - pub fn exit_status(&self) -> Option { + pub fn term_status(&self) -> Option { match self { - Self::Exited { exit_status } => Some(*exit_status), + Self::Exited { term_status } => Some(*term_status), _ => None, } } @@ -181,8 +204,8 @@ impl ThreadInner { *self = Self::Running; } - pub fn exit(&mut self, exit_status: i32) { + pub fn exit(&mut self, term_status: TermStatus) { debug_assert!(self.status() == ThreadStatus::Running); - *self = Self::Exited { exit_status }; + *self = Self::Exited { term_status }; } } diff --git a/src/libos/src/signal/c_types.rs b/src/libos/src/signal/c_types.rs new file mode 100644 index 00000000..bcbae08e --- /dev/null +++ b/src/libos/src/signal/c_types.rs @@ -0,0 +1,283 @@ +#![allow(non_camel_case_types)] + +use std::fmt; + +use super::SigNum; +use crate::prelude::*; +use crate::syscall::CpuContext; +use crate::time::clock_t; + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct sigaction_t { + pub handler: *const c_void, + pub flags: u32, + pub restorer: *const c_void, + pub mask: sigset_t, +} + +pub type sigset_t = u64; + +#[derive(Clone, Copy)] +#[repr(C)] +pub union sigval_t { + sigval_int: i32, + sigval_ptr: *mut c_void, +} + +impl fmt::Debug for sigval_t { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "sigval_t = {{ {:?} or {:?} }}", + unsafe { self.sigval_int }, + unsafe { self.sigval_ptr } + ) + } +} + +impl From for sigval_t { + fn from(val: i32) -> sigval_t { + sigval_t { sigval_int: val } + } +} + +impl From<*mut T> for sigval_t { + fn from(ptr: *mut T) -> sigval_t { + sigval_t { + sigval_ptr: ptr as *mut c_void, + } + } +} + +impl From<*const T> for sigval_t { + fn from(ptr: *const T) -> sigval_t { + sigval_t { + sigval_ptr: ptr as *const c_void as *mut c_void, + } + } +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub struct siginfo_t { + pub si_signo: i32, + pub si_errno: i32, + pub si_code: i32, + _padding: i32, + fields: siginfo_fields_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_fields_t { + bytes: [u8; 128 - std::mem::size_of::() * 4], + common: siginfo_common_t, + sigfault: siginfo_sigfault_t, + //sigpoll: siginfo_poll_t, + //sigsys: siginfo_sys_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_common_t { + first: siginfo_common_first_t, + second: siginfo_common_second_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_common_first_t { + piduid: siginfo_piduid_t, + timer: siginfo_timer_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +struct siginfo_piduid_t { + pid: pid_t, + uid: uid_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +struct siginfo_timer_t { + timerid: i32, + overrun: i32, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_common_second_t { + value: sigval_t, + sigchild: siginfo_sigchild_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_sigchild_t { + status: i32, + utime: clock_t, + stime: clock_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +struct siginfo_sigfault_t { + addr: *const c_void, + addr_lsb: i16, + first: siginfo_sigfault_first_t, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_sigfault_first_t { + addr_bnd: siginfo_addr_bnd_t, + pkey: u32, +} + +#[derive(Clone, Copy)] +#[repr(C)] +union siginfo_addr_bnd_t { + lower: *const c_void, + upper: *const c_void, +} + +impl siginfo_t { + pub fn new(num: SigNum, code: i32) -> Self { + let zero_fields = siginfo_fields_t { + bytes: [0_u8; std::mem::size_of::()], + }; + Self { + si_signo: num.as_u8() as i32, + si_code: code, + si_errno: 0, + _padding: 0, + fields: zero_fields, + } + } +} + +// Use macros to implement the getter and setter functions of siginfo_t. These getters +// and setters help the user to access the values embedded inside the many unions of +// siginfo_t. +macro_rules! impl_siginfo_getters_setters { + ( $( $getter:ident, $setter:ident : $val_type:ty => $( $path:ident ).* ),+, ) => { + $( + pub fn $getter(&self) -> $val_type { + unsafe { + self.$($path).* + } + } + + pub fn $setter(&mut self, new_val: $val_type) { + unsafe { + self.$($path).* = new_val; + } + } + )* + } +} + +impl siginfo_t { + impl_siginfo_getters_setters! { + // Format: + // getter_name, setter_name : field_type => path_to_field + si_pid, set_si_pid : pid_t => fields.common.first.piduid.pid, + si_uid, set_si_uid : uid_t => fields.common.first.piduid.uid, + si_status, set_si_satus : i32 => fields.common.second.sigchild.status, + si_utime, set_si_utime : clock_t => fields.common.second.sigchild.utime, + si_stime, set_si_stime : clock_t => fields.common.second.sigchild.stime, + si_value, set_si_value : sigval_t => fields.common.second.value, + si_addr, set_si_addr : *const c_void => fields.sigfault.addr, + si_addr_lsb, set_si_addr_lsb : i16 => fields.sigfault.addr_lsb, + si_lower, set_si_lower : *const c_void => fields.sigfault.first.addr_bnd.lower, + si_upper, set_si_upper : *const c_void => fields.sigfault.first.addr_bnd.upper, + si_pkey, set_si_pkey : u32 => fields.sigfault.first.pkey, + si_timerid, set_si_timerid : i32 => fields.common.first.timer.timerid, + si_overrune, set_si_overrune : i32 => fields.common.first.timer.overrun, + } +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub struct ucontext_t { + pub uc_flags: u64, + pub uc_link: *mut ucontext_t, + pub uc_stack: stack_t, + pub uc_mcontext: mcontext_t, + pub uc_sigmask: sigset_t, + __fpregs_mem: [u64; 64], +} + +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct sigaltstack_t { + pub ss_sp: *mut c_void, + pub ss_flags: i32, + pub ss_size: usize, +} + +pub type stack_t = sigaltstack_t; + +#[derive(Debug, Clone, Copy, Default)] +#[repr(C)] +pub struct mcontext_t { + pub inner: CpuContext, + // TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2 + _unused0: [u64; 5], + // TODO: this field should be `fpregs: fpregset_t,` + _unused1: usize, + _reserved: [u64; 8], +} + +/// Special values for the user-given signal handlers +pub const SIG_ERR: *const c_void = -1_i64 as *const c_void; +pub const SIG_DFL: *const c_void = 0_i64 as *const c_void; +pub const SIG_IGN: *const c_void = 1_i64 as *const c_void; + +pub const SI_ASYNCNL: i32 = -60; +pub const SI_TKILL: i32 = -6; +pub const SI_SIGIO: i32 = -5; +pub const SI_ASYNCIO: i32 = -4; +pub const SI_MESGQ: i32 = -3; +pub const SI_TIMER: i32 = -2; +pub const SI_QUEUE: i32 = -1; +pub const SI_USER: i32 = 0; +pub const SI_KERNEL: i32 = 128; + +pub const FPE_INTDIV: i32 = 1; +pub const FPE_INTOVF: i32 = 2; +pub const FPE_FLTDIV: i32 = 3; +pub const FPE_FLTOVF: i32 = 4; +pub const FPE_FLTUND: i32 = 5; +pub const FPE_FLTRES: i32 = 6; +pub const FPE_FLTINV: i32 = 7; +pub const FPE_FLTSUB: i32 = 8; + +pub const ILL_ILLOPC: i32 = 1; +pub const ILL_ILLOPN: i32 = 2; +pub const ILL_ILLADR: i32 = 3; +pub const ILL_ILLTRP: i32 = 4; +pub const ILL_PRVOPC: i32 = 5; +pub const ILL_PRVREG: i32 = 6; +pub const ILL_COPROC: i32 = 7; +pub const ILL_BADSTK: i32 = 8; + +pub const SEGV_MAPERR: i32 = 1; +pub const SEGV_ACCERR: i32 = 2; +pub const SEGV_BNDERR: i32 = 3; +pub const SEGV_PKUERR: i32 = 4; + +pub const BUS_ADRALN: i32 = 1; +pub const BUS_ADRERR: i32 = 2; +pub const BUS_OBJERR: i32 = 3; +pub const BUS_MCEERR_AR: i32 = 4; +pub const BUS_MCEERR_AO: i32 = 5; + +pub const CLD_EXITED: i32 = 1; +pub const CLD_KILLED: i32 = 2; +pub const CLD_DUMPED: i32 = 3; +pub const CLD_TRAPPED: i32 = 4; +pub const CLD_STOPPED: i32 = 5; +pub const CLD_CONTINUED: i32 = 6; diff --git a/src/libos/src/signal/constants.rs b/src/libos/src/signal/constants.rs new file mode 100644 index 00000000..f8b6dfd0 --- /dev/null +++ b/src/libos/src/signal/constants.rs @@ -0,0 +1,58 @@ +use super::SigNum; +use crate::prelude::*; + +/// Standard signals +pub(super) const MIN_STD_SIG_NUM: u8 = 1; +pub(super) const MAX_STD_SIG_NUM: u8 = 31; // inclusive +/// Real-time signals +pub(super) const MIN_RT_SIG_NUM: u8 = 32; +pub(super) const MAX_RT_SIG_NUM: u8 = 64; // inclusive +/// Count the number of signals +pub(super) const COUNT_STD_SIGS: usize = 31; +pub(super) const COUNT_RT_SIGS: usize = 33; +pub(super) const COUNT_ALL_SIGS: usize = 64; + +macro_rules! define_std_signums { + ( $( $name: ident = $num: expr ),+, ) => { + $( + pub const $name : SigNum = unsafe { + SigNum::from_u8_unchecked($num) + }; + )* + } +} + +// Define the standard signal numbers as SigNum +define_std_signums! { + SIGHUP = 1, // Hangup detected on controlling terminal or death of controlling process + SIGINT = 2, // Interrupt from keyboard + SIGQUIT = 3, // Quit from keyboard + SIGILL = 4, // Illegal Instruction + SIGTRAP = 5, // Trace/breakpoint trap + SIGABRT = 6, // Abort signal from abort(3) + SIGBUS = 7, // Bus error (bad memory access) + SIGFPE = 8, // Floating-point exception + SIGKILL = 9, // Kill signal + SIGUSR1 = 10, // User-defined signal 1 + SIGSEGV = 11, // Invalid memory reference + SIGUSR2 = 12, // User-defined signal 2 + SIGPIPE = 13, // Broken pipe: write to pipe with no readers; see pipe(7) + SIGALRM = 14, // Timer signal from alarm(2) + SIGTERM = 15, // Termination signal + SIGSTKFLT = 16, // Stack fault on coprocessor (unused) + SIGCHLD = 17, // Child stopped or terminated + SIGCONT = 18, // Continue if stopped + SIGSTOP = 19, // Stop process + SIGTSTP = 20, // Stop typed at terminal + SIGTTIN = 21, // Terminal input for background process + SIGTTOU = 22, // Terminal output for background process + SIGURG = 23, // Urgent condition on socket (4.2BSD) + SIGXCPU = 24, // CPU time limit exceeded (4.2BSD); see setrlimit(2) + SIGXFSZ = 25, // File size limit exceeded (4.2BSD); see setrlimit(2) + SIGVTALRM = 26, // Virtual alarm clock (4.2BSD) + SIGPROF = 27, // Profiling timer expired + SIGWINCH = 28, // Window resize signal (4.3BSD, Sun) + SIGIO = 29, // I/O now possible (4.2BSD) + SIGPWR = 30, // Power failure (System V) + SIGSYS = 31, // Bad system call (SVr4); see also seccomp(2) +} diff --git a/src/libos/src/signal/do_kill.rs b/src/libos/src/signal/do_kill.rs new file mode 100644 index 00000000..8cfe8d35 --- /dev/null +++ b/src/libos/src/signal/do_kill.rs @@ -0,0 +1,76 @@ +use super::signals::{UserSignal, UserSignalKind}; +use super::{SigNum, Signal}; +use crate::prelude::*; +use crate::process::{table, ProcessFilter, ProcessRef, ProcessStatus, ThreadRef, ThreadStatus}; + +pub fn do_kill(filter: ProcessFilter, signum: SigNum) -> Result<()> { + debug!("do_kill: filter: {:?}, signum: {:?}", &filter, &signum); + + let pid = current!().process().pid(); + let uid = 0; + let processes = get_processes(&filter)?; + for process in processes { + if process.status() == ProcessStatus::Zombie { + continue; + } + + let signal = Box::new(UserSignal::new(signum, UserSignalKind::Kill, pid, uid)); + let mut sig_queues = process.sig_queues().lock().unwrap(); + sig_queues.enqueue(signal); + } + Ok(()) +} + +fn get_processes(filter: &ProcessFilter) -> Result> { + let processes = match filter { + ProcessFilter::WithAnyPid => table::get_all_processes(), + ProcessFilter::WithPid(pid) => { + let process = table::get_process(*pid)?; + vec![process] + } + ProcessFilter::WithPgid(pgid) => { + // TODO: implement O(1) lookup for a process group + let processes: Vec = table::get_all_processes() + .into_iter() + .filter(|proc_ref| proc_ref.pgid() == *pgid) + .collect(); + if processes.len() == 0 { + return_errno!(EINVAL, "invalid pgid"); + } + processes + } + }; + Ok(processes) +} + +pub fn do_tgkill(pid: Option, tid: pid_t, signum: SigNum) -> Result<()> { + debug!( + "do_tgkill: pid: {:?}, tid: {:?}, signum: {:?}", + &pid, &tid, &signum + ); + + let thread = table::get_thread(tid)?; + if let Some(pid) = pid { + if pid != thread.process().pid() { + return_errno!(EINVAL, "the combination of pid and tid is not valid"); + } + } + + if thread.status() == ThreadStatus::Exited { + return Ok(()); + } + + let signal = { + let src_pid = current!().process().pid(); + let src_uid = 0; + Box::new(UserSignal::new( + signum, + UserSignalKind::Tkill, + src_pid, + src_uid, + )) + }; + let mut sig_queues = thread.sig_queues().lock().unwrap(); + sig_queues.enqueue(signal); + Ok(()) +} diff --git a/src/libos/src/signal/do_sigaction.rs b/src/libos/src/signal/do_sigaction.rs new file mode 100644 index 00000000..71dc2b49 --- /dev/null +++ b/src/libos/src/signal/do_sigaction.rs @@ -0,0 +1,26 @@ +use super::constants::*; +use super::{SigAction, SigNum}; +use crate::prelude::*; + +pub fn do_rt_sigaction(signum: SigNum, new_sa: Option) -> Result { + debug!( + "do_rt_sigaction: signum: {:?}, new_sa: {:?}", + &signum, &new_sa + ); + + if signum == SIGKILL || signum == SIGSTOP { + return_errno!( + EINVAL, + "The actions for SIGKILL or SIGSTOP cannot be changed" + ); + } + + let thread = current!(); + let process = thread.process(); + let mut sig_dispositions = process.sig_dispositions().write().unwrap(); + let old_sa = sig_dispositions.get(signum); + if let Some(new_sa) = new_sa { + sig_dispositions.set(signum, new_sa); + } + Ok(old_sa) +} diff --git a/src/libos/src/signal/do_sigpending.rs b/src/libos/src/signal/do_sigpending.rs new file mode 100644 index 00000000..e82a1d4a --- /dev/null +++ b/src/libos/src/signal/do_sigpending.rs @@ -0,0 +1,13 @@ +use super::SigSet; +use crate::prelude::*; + +pub fn do_sigpending() -> Result { + debug!("do_sigpending"); + + let thread = current!(); + let process = thread.process(); + let pending = (thread.sig_queues().lock().unwrap().pending() + | process.sig_queues().lock().unwrap().pending()) + & *thread.sig_mask().read().unwrap(); + Ok(pending) +} diff --git a/src/libos/src/signal/do_sigprocmask.rs b/src/libos/src/signal/do_sigprocmask.rs new file mode 100644 index 00000000..de50e7c0 --- /dev/null +++ b/src/libos/src/signal/do_sigprocmask.rs @@ -0,0 +1,62 @@ +use super::constants::*; +use super::{sigset_t, SigSet}; +use crate::prelude::*; + +pub fn do_rt_sigprocmask( + op_and_set: Option<(MaskOp, &sigset_t)>, + oldset: Option<&mut sigset_t>, +) -> Result<()> { + debug!( + "do_rt_sigprocmask: op_and_set: {:?}, oldset: {:?}", + op_and_set.map(|(op, set)| (op, SigSet::from_c(*set))), + oldset + ); + + let thread = current!(); + let mut sig_mask = thread.sig_mask().write().unwrap(); + if let Some(oldset) = oldset { + *oldset = sig_mask.to_c(); + } + if let Some((op, &set)) = op_and_set { + let set = { + let mut set = SigSet::from_c(set); + // According to man pages, "it is not possible to block SIGKILL or SIGSTOP. + // Attempts to do so are silently ignored." + set -= SIGKILL; + set -= SIGSTOP; + set + }; + match op { + MaskOp::Block => { + *sig_mask |= set; + } + MaskOp::Unblock => { + *sig_mask &= !set; + } + MaskOp::SetMask => { + *sig_mask = set; + } + }; + } + Ok(()) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[repr(u32)] +pub enum MaskOp { + Block = 0, + Unblock = 1, + SetMask = 2, +} + +impl MaskOp { + pub fn from_u32(raw: u32) -> Result { + let op = match raw { + 0 => MaskOp::Block, + 1 => MaskOp::Unblock, + 2 => MaskOp::SetMask, + _ => return_errno!(EINVAL, "invalid mask op"), + }; + Ok(op) + } +} diff --git a/src/libos/src/signal/do_sigreturn.rs b/src/libos/src/signal/do_sigreturn.rs new file mode 100644 index 00000000..f9bd9853 --- /dev/null +++ b/src/libos/src/signal/do_sigreturn.rs @@ -0,0 +1,346 @@ +use super::c_types::{mcontext_t, siginfo_t, ucontext_t}; +use super::constants::SIGKILL; +use super::{SigAction, SigActionFlags, SigDefaultAction, SigSet, Signal}; +use crate::prelude::*; +use crate::process::{ProcessRef, TermStatus, ThreadRef}; +use crate::syscall::CpuContext; + +pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> { + debug!("do_rt_sigreturn"); + let last_user_ctxt = { + let last_user_ctxt = PRE_USER_CONTEXTS.with(|ref_cell| { + let mut stack = ref_cell.borrow_mut(); + stack.pop() + }); + if last_user_ctxt.is_none() { + let term_status = TermStatus::Killed(SIGKILL); + current!().process().force_exit(term_status); + return_errno!( + EINVAL, + "sigreturn should not have been called; kill this process" + ); + } + unsafe { &*last_user_ctxt.unwrap() } + }; + *curr_user_ctxt = *last_user_ctxt; + Ok(()) +} + +/// Deliver a queued signal for the current thread, respecting the thread's +/// signal mask. +/// +/// The delivery of a signal means two things: 1) dequeuing the signal from +/// the per-thread or per-process signal queue, and 2) handling the signal +/// according to the signal disposition. +/// +/// When handling a signal, one of the three actions below will be done: +/// +/// 1. Ignore the signal. This is the easy part. +/// +/// 2. Terminate the process if the signal is fatal. This is called "force exit". +/// +/// 3. Call a user-registered signal handler. In this case, the current CPU context +/// will be modified so that the user-registered signal handler will be called +/// upon returning to the user space when the current syscall is finished. +/// +/// **Requirement.** This must be called only once during the execution of a +/// syscall and at a very late stage. +/// +/// **Post-condition.** The temporary signal mask of the current thread is cleared. +pub fn deliver_signal(cpu_context: &mut CpuContext) { + let thread = current!(); + let process = thread.process(); + + if process.is_forced_exit().is_none() { + do_deliver_signal(&thread, &process, cpu_context); + } + + // Ensure the tmp signal mask is cleared before sysret + let mut tmp_sig_mask = thread.sig_tmp_mask().write().unwrap(); + *tmp_sig_mask = SigSet::new_empty(); +} + +fn do_deliver_signal(thread: &ThreadRef, process: &ProcessRef, cpu_context: &mut CpuContext) { + loop { + // Dequeue a signal, respecting the signal mask and tmp mask + let sig_mask = *thread.sig_mask().read().unwrap() | *thread.sig_tmp_mask().read().unwrap(); + let signal = { + #[rustfmt::skip] + let signal_opt = process.sig_queues().lock().unwrap().dequeue(&sig_mask) + .or_else(|| thread.sig_queues().lock().unwrap().dequeue(&sig_mask)); + if signal_opt.is_none() { + return; + } + signal_opt.unwrap() + }; + + let continue_handling = handle_signal(signal, thread, process, cpu_context); + if !continue_handling { + break; + } + } +} + +/// Force delivering the given signal to the current thread, without checking the thread's +/// signal mask. +/// +/// **Post-condition.** The tmp signal mask of the current thread is all set. This avoids +/// delivering two signals during one execution of a syscall. +/// +/// **Requirement.** This function can only be called at most once during the execution of +/// a syscall. +pub fn force_signal(signal: Box, cpu_context: &mut CpuContext) { + let thread = current!(); + let process = thread.process(); + + handle_signal(signal, &thread, &process, cpu_context); + + // Temporarily block all signals from being delivered until this syscall is + // over. This ensures that the updated curr_cpu_ctxt will not be overriden + // to deliver any other signal. + let mut tmp_sig_mask = thread.sig_tmp_mask().write().unwrap(); + *tmp_sig_mask = SigSet::new_full(); +} + +fn handle_signal( + signal: Box, + thread: &ThreadRef, + process: &ProcessRef, + cpu_context: &mut CpuContext, +) -> bool { + let is_sig_stack_full = PRE_USER_CONTEXTS.with(|ref_cell| { + let stack = ref_cell.borrow(); + stack.full() + }); + if is_sig_stack_full { + panic!("the nested signal is too deep to handle"); + } + + let action = process.sig_dispositions().read().unwrap().get(signal.num()); + debug!( + "Handle signal: signal: {:?}, action: {:?}", + &signal, &action + ); + + let continue_handling = match action { + SigAction::Ign => true, + SigAction::Dfl => { + let default_action = SigDefaultAction::from_signum(signal.num()); + match default_action { + SigDefaultAction::Ign => true, + SigDefaultAction::Term | SigDefaultAction::Core => { + let term_status = TermStatus::Killed(signal.num()); + process.force_exit(term_status); + false + } + SigDefaultAction::Stop => { + warn!("SIGSTOP is unsupported"); + true + } + SigDefaultAction::Cont => { + warn!("SIGCONT is unsupported"); + true + } + } + } + SigAction::User { + handler_addr, + flags, + restorer_addr, + mask, + } => { + let ret = handle_signals_by_user( + signal, + handler_addr, + flags, + restorer_addr, + mask, + cpu_context, + ); + if let Err(_) = ret { + todo!("kill the process if any error"); + } + false + } + }; + continue_handling +} + +fn handle_signals_by_user( + signal: Box, + handler_addr: usize, + flags: SigActionFlags, + restorer_addr: usize, + mask: SigSet, + curr_user_ctxt: &mut CpuContext, +) -> Result<()> { + // Represent the user stack in a memory safe way + let mut user_stack = { + const BIG_ENOUGH_GAP: u64 = 1024; + const BIG_ENOUGH_SIZE: u64 = 4096; + let stack_top = (curr_user_ctxt.rsp - BIG_ENOUGH_GAP) as usize; + let stack_size = BIG_ENOUGH_SIZE as usize; + // TODO: validate the memory range of the stack + unsafe { Stack::new(stack_top, stack_size)? } + }; + + // Prepare the user stack in four steps. + // + // 1. Allocate and init siginfo_t on the user stack. + let info = { + let info = user_stack.alloc::()?; + *info = signal.to_info(); + info as *mut siginfo_t + }; + // 2. Allocate and init ucontext_t on the user stack. + let ucontext = { + // The x86 calling convention requires rsp to be 16-byte aligned. + // The following allocation on stack is right before we "call" the + // signal handler. So we need to make sure the allocation is at least + // 16-byte aligned. + let ucontext = user_stack.alloc_aligned::(16)?; + // TODO: set all fields in ucontext + *ucontext = unsafe { std::mem::zeroed() }; + ucontext as *mut ucontext_t + }; + // 3. Save the current user CPU context on the stack of the signal handler + // so that we can restore the CPU context upon `sigreturn` syscall. + let saved_user_ctxt = { + let saved_user_ctxt = unsafe { &mut (*ucontext).uc_mcontext.inner }; + *saved_user_ctxt = *curr_user_ctxt; + saved_user_ctxt as *mut CpuContext + }; + // 4. Set up the call return address on the stack before we "call" the signal handler + let handler_stack_top = { + let handler_stack_top = user_stack.alloc::()?; + *handler_stack_top = restorer_addr; + handler_stack_top as *mut usize + }; + // TODO: mask signals while the signal handler is executing + + // Modify the current user CPU context so that the signal handler will + // be "called" upon returning back to the user space and when the signal + // handler finishes, the CPU will jump to the restorer. + curr_user_ctxt.rsp = handler_stack_top as u64; + curr_user_ctxt.rip = handler_addr as u64; + // Prepare the three arguments for the signal handler + curr_user_ctxt.rdi = signal.num().as_u8() as u64; + curr_user_ctxt.rsi = info as u64; + curr_user_ctxt.rdx = ucontext as u64; + + PRE_USER_CONTEXTS.with(|ref_cell| { + let mut stack = ref_cell.borrow_mut(); + stack.push(saved_user_ctxt).unwrap(); + }); + Ok(()) +} + +/// Represent and manipulate a stack in a memory-safe way +struct Stack { + pointer: usize, + bottom: usize, +} + +impl Stack { + /// Create a new region of memory to use as stack + pub unsafe fn new(stack_top: usize, stack_size: usize) -> Result { + if stack_top <= stack_size { + return_errno!(EINVAL, "stack address may underflow"); + } + let pointer = stack_top; + let bottom = stack_top - stack_size; + Ok(Stack { pointer, bottom }) + } + + /// Get the size of the free space in the stack + pub fn size(&self) -> usize { + self.pointer - self.bottom + } + + /// Allocate a mutable object on the stack. + /// + /// The alignment of the object will be `std::mem::size_of::()`. + pub fn alloc(&mut self) -> Result<&mut T> { + self.do_alloc_aligned::(1) + } + + /// Allocate a mutable object on the stack. + /// + /// The alignment of the object will be `max(align, std::mem::size_of::())`. + pub fn alloc_aligned(&mut self, align: usize) -> Result<&mut T> { + if !align.is_power_of_two() { + return_errno!(EINVAL, "align must be a power of two"); + } + self.do_alloc_aligned::(align) + } + + /// Allocate a mutable object on the stack. + /// + /// The alignment of the object will be `max(align, std::mem::size_of::())`. + fn do_alloc_aligned(&mut self, align: usize) -> Result<&mut T> { + // Check precondition + debug_assert!(align.is_power_of_two()); + + // Calculate the pointer of the object + let new_pointer = { + let size = std::mem::size_of::(); + let align = std::mem::align_of::().max(align); + + let mut pointer = self.pointer; + if pointer < size { + return_errno!(ENOMEM, "not enough memory"); + } + pointer -= size; + pointer = align_down(pointer, align); + if pointer < self.bottom { + return_errno!(ENOMEM, "not enough memory"); + } + pointer + }; + self.pointer = new_pointer; + + let obj_ref = unsafe { &mut *(new_pointer as *mut T) }; + Ok(obj_ref) + } +} + +thread_local! { + static PRE_USER_CONTEXTS: RefCell = Default::default(); +} + +#[derive(Debug, Default)] +struct CpuContextStack { + stack: [Option<*mut CpuContext>; 32], + count: usize, +} + +impl CpuContextStack { + pub fn new() -> Self { + Default::default() + } + + pub fn full(&self) -> bool { + self.count == self.stack.len() + } + + pub fn empty(&self) -> bool { + self.count == 0 + } + + pub fn push(&mut self, cpu_context: *mut CpuContext) -> Result<()> { + if self.full() { + return_errno!(ENOMEM, "cpu context stack is full"); + } + self.stack[self.count] = Some(cpu_context); + self.count += 1; + Ok(()) + } + + pub fn pop(&mut self) -> Option<*mut CpuContext> { + if self.empty() { + return None; + } + self.count -= 1; + self.stack[self.count].take() + } +} diff --git a/src/libos/src/signal/mod.rs b/src/libos/src/signal/mod.rs new file mode 100644 index 00000000..2dcd29f9 --- /dev/null +++ b/src/libos/src/signal/mod.rs @@ -0,0 +1,31 @@ +//! The signal subsystem. + +use crate::prelude::*; + +use sig_action::{SigAction, SigActionFlags, SigDefaultAction}; + +pub use self::c_types::{sigaction_t, sigset_t}; +pub use self::constants::*; +pub use self::do_sigreturn::{deliver_signal, force_signal}; +pub use self::sig_dispositions::SigDispositions; +pub use self::sig_num::SigNum; +pub use self::sig_queues::SigQueues; +pub use self::sig_set::SigSet; +pub use self::signals::{FaultSignal, KernelSignal, Signal, UserSignal, UserSignalKind}; +pub use self::syscalls::*; + +mod c_types; +mod do_kill; +mod do_sigaction; +mod do_sigpending; +mod do_sigprocmask; +mod do_sigreturn; +mod sig_action; +mod sig_dispositions; +mod sig_num; +mod sig_queues; +mod sig_set; +mod signals; +mod syscalls; + +pub mod constants; diff --git a/src/libos/src/signal/sig_action.rs b/src/libos/src/signal/sig_action.rs new file mode 100644 index 00000000..556d2e4c --- /dev/null +++ b/src/libos/src/signal/sig_action.rs @@ -0,0 +1,132 @@ +use super::c_types::{sigaction_t, SIG_DFL, SIG_IGN}; +use super::constants::*; +use super::{SigNum, SigSet}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SigAction { + Dfl, // Default action + Ign, // Ignore this signal + User { + // User-given handler + handler_addr: usize, + flags: SigActionFlags, + restorer_addr: usize, + mask: SigSet, + }, +} + +impl Default for SigAction { + fn default() -> Self { + SigAction::Dfl + } +} + +impl SigAction { + pub fn from_c(sa_c: &sigaction_t) -> Result { + let sa = match sa_c.handler { + SIG_DFL => SigAction::Dfl, + SIG_IGN => SigAction::Ign, + _ => SigAction::User { + handler_addr: sa_c.handler as usize, + flags: SigActionFlags::from_u32(sa_c.flags)?, + restorer_addr: sa_c.restorer as usize, + mask: SigSet::from_c(sa_c.mask), + }, + }; + Ok(sa) + } + + pub fn to_c(&self) -> sigaction_t { + match self { + SigAction::Dfl => sigaction_t { + handler: SIG_DFL, + flags: 0, + restorer: std::ptr::null(), + mask: 0, + }, + SigAction::Ign => sigaction_t { + handler: SIG_IGN, + flags: 0, + restorer: std::ptr::null(), + mask: 0, + }, + SigAction::User { + handler_addr, + flags, + restorer_addr, + mask, + } => sigaction_t { + handler: *handler_addr as *const c_void, + flags: flags.to_u32(), + restorer: *restorer_addr as *mut c_void, + mask: mask.to_c(), + }, + } + } +} + +bitflags! { + pub struct SigActionFlags: u32 { + const SA_NOCLDSTOP = 1; + const SA_NOCLDWAIT = 2; + const SA_SIGINFO = 4; + const SA_ONSTACK = 0x08000000; + const SA_RESTART = 0x10000000; + const SA_NODEFER = 0x40000000; + const SA_RESETHAND = 0x80000000; + const SA_RESTORER = 0x04000000; + } +} + +impl SigActionFlags { + pub fn from_u32(bits: u32) -> Result { + let flags = + Self::from_bits(bits).ok_or_else(|| errno!(EINVAL, "invalid sigaction flags"))?; + Ok(flags) + } + + pub fn to_u32(&self) -> u32 { + self.bits() + } +} + +#[derive(Debug, Copy, Clone)] +pub enum SigDefaultAction { + Term, // Default action is to terminate the process. + Ign, // Default action is to ignore the signal. + Core, // Default action is to terminate the process and dump core (see core(5)). + Stop, // Default action is to stop the process. + Cont, // Default action is to continue the process if it is currently stopped. +} + +impl SigDefaultAction { + pub fn from_signum(num: SigNum) -> SigDefaultAction { + match num { + SIGABRT | // = SIGIOT + SIGBUS | + SIGFPE | + SIGILL | + SIGQUIT | + SIGSEGV | + SIGSYS | // = SIGUNUSED + SIGTRAP | + SIGXCPU | + SIGXFSZ + => SigDefaultAction::Core, + SIGCHLD | + SIGURG | + SIGWINCH + => SigDefaultAction::Ign, + SIGCONT + => SigDefaultAction::Cont, + SIGSTOP | + SIGTSTP | + SIGTTIN | + SIGTTOU + => SigDefaultAction::Stop, + _ + => SigDefaultAction::Term, + } + } +} diff --git a/src/libos/src/signal/sig_dispositions.rs b/src/libos/src/signal/sig_dispositions.rs new file mode 100644 index 00000000..736ad77b --- /dev/null +++ b/src/libos/src/signal/sig_dispositions.rs @@ -0,0 +1,88 @@ +use std::fmt; + +use super::constants::*; +use super::{SigAction, SigNum}; +use crate::prelude::*; + +#[derive(Copy, Clone)] +pub struct SigDispositions { + // SigNum -> SigAction + map: [SigAction; COUNT_ALL_SIGS], +} + +impl SigDispositions { + pub fn new() -> Self { + Self { + map: [Default::default(); COUNT_ALL_SIGS], + } + } + + pub fn get(&self, num: SigNum) -> SigAction { + let idx = Self::num_to_idx(num); + self.map[idx] + } + + pub fn set(&mut self, num: SigNum, sa: SigAction) { + let idx = Self::num_to_idx(num); + self.map[idx] = sa; + } + + pub fn iter<'a>(&'a self) -> SigDispositionsIter<'a> { + SigDispositionsIter::new(self) + } + + fn num_to_idx(num: SigNum) -> usize { + (num.as_u8() - MIN_STD_SIG_NUM) as usize + } + + fn idx_to_num(idx: usize) -> SigNum { + unsafe { SigNum::from_u8_unchecked(idx as u8 + MIN_STD_SIG_NUM) } + } +} + +pub struct SigDispositionsIter<'a> { + next_idx: usize, + dispos: &'a SigDispositions, +} + +impl<'a> SigDispositionsIter<'a> { + pub fn new(dispos: &'a SigDispositions) -> Self { + SigDispositionsIter { + next_idx: 0, + dispos: dispos, + } + } +} + +impl<'a> std::iter::Iterator for SigDispositionsIter<'a> { + type Item = (SigNum, &'a SigAction); + + fn next(&mut self) -> Option { + let map = &self.dispos.map; + if self.next_idx >= map.len() { + return None; + } + + let item = { + let signum = SigDispositions::idx_to_num(self.next_idx); + let action = &map[self.next_idx]; + Some((signum, action)) + }; + self.next_idx += 1; + item + } +} + +impl Default for SigDispositions { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for SigDispositions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SigDispositions "); + let non_default_dispositions = self.iter().filter(|(_, action)| **action != SigAction::Dfl); + f.debug_map().entries(non_default_dispositions).finish() + } +} diff --git a/src/libos/src/signal/sig_num.rs b/src/libos/src/signal/sig_num.rs new file mode 100644 index 00000000..6f551ead --- /dev/null +++ b/src/libos/src/signal/sig_num.rs @@ -0,0 +1,90 @@ +use std::fmt; + +use super::constants::*; +use crate::prelude::*; + +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct SigNum { + num: u8, +} + +impl SigNum { + pub fn from_u8(num: u8) -> Result { + if num < MIN_STD_SIG_NUM || num > MAX_RT_SIG_NUM { + return_errno!(EINVAL, "not an invalid number for signal"); + } + Ok(unsafe { Self::from_u8_unchecked(num) }) + } + + pub const unsafe fn from_u8_unchecked(num: u8) -> SigNum { + SigNum { num } + } + + pub fn as_u8(&self) -> u8 { + self.num + } + + pub fn is_std(&self) -> bool { + self.num <= MAX_STD_SIG_NUM + } + + pub fn is_real_time(&self) -> bool { + self.num >= MIN_RT_SIG_NUM + } +} + +macro_rules! std_signum_to_name { + ( $std_signum: expr, { $( $sig_name: ident = $sig_num_u8: expr ),+, } ) => { + match $std_signum { + $( + $sig_name => stringify!($sig_name), + )* + _ => unreachable!(), + } + } +} + +impl fmt::Debug for SigNum { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #![deny(unreachable_patterns)] + if self.is_std() { + let name = std_signum_to_name!(*self, { + SIGHUP = 1, // Hangup detected on controlling terminal or death of controlling process + SIGINT = 2, // Interrupt from keyboard + SIGQUIT = 3, // Quit from keyboard + SIGILL = 4, // Illegal Instruction + SIGTRAP = 5, // Trace/breakpoint trap + SIGABRT = 6, // Abort signal from abort(3) + SIGBUS = 7, // Bus error (bad memory access) + SIGFPE = 8, // Floating-point exception + SIGKILL = 9, // Kill signal + SIGUSR1 = 10, // User-defined signal 1 + SIGSEGV = 11, // Invalid memory reference + SIGUSR2 = 12, // User-defined signal 2 + SIGPIPE = 13, // Broken pipe: write to pipe with no readers; see pipe(7) + SIGALRM = 14, // Timer signal from alarm(2) + SIGTERM = 15, // Termination signal + SIGSTKFLT = 16, // Stack fault on coprocessor (unused) + SIGCHLD = 17, // Child stopped or terminated + SIGCONT = 18, // Continue if stopped + SIGSTOP = 19, // Stop process + SIGTSTP = 20, // Stop typed at terminal + SIGTTIN = 21, // Terminal input for background process + SIGTTOU = 22, // Terminal output for background process + SIGURG = 23, // Urgent condition on socket (4.2BSD) + SIGXCPU = 24, // CPU time limit exceeded (4.2BSD); see setrlimit(2) + SIGXFSZ = 25, // File size limit exceeded (4.2BSD); see setrlimit(2) + SIGVTALRM = 26, // Virtual alarm clock (4.2BSD) + SIGPROF = 27, // Profiling timer expired + SIGWINCH = 28, // Window resize signal (4.3BSD, Sun) + SIGIO = 29, // I/O now possible (4.2BSD) + SIGPWR = 30, // Power failure (System V) + SIGSYS = 31, // Bad system call (SVr4); see also seccomp(2) + }); + write!(f, "SigNum (#{} = {})", self.num, name) + } else { + write!(f, "SigNum (#{}, real-time)", self.num) + } + } +} diff --git a/src/libos/src/signal/sig_queues.rs b/src/libos/src/signal/sig_queues.rs new file mode 100644 index 00000000..5422b42f --- /dev/null +++ b/src/libos/src/signal/sig_queues.rs @@ -0,0 +1,188 @@ +use std::collections::VecDeque; +use std::fmt; + +use super::constants::*; +use super::{SigNum, SigSet, Signal}; +use crate::prelude::*; + +pub struct SigQueues { + count: usize, + has_kill: bool, + std_queues: Vec>>, + rt_queues: Vec>>, +} + +impl SigQueues { + pub fn new() -> Self { + let count = 0; + let has_kill = false; + let std_queues = (0..COUNT_STD_SIGS).map(|_| None).collect(); + let rt_queues = (0..COUNT_RT_SIGS).map(|_| Default::default()).collect(); + SigQueues { + count, + has_kill, + std_queues, + rt_queues, + } + } + + pub fn empty(&self) -> bool { + self.count == 0 + } + + pub fn enqueue(&mut self, signal: Box) { + let signum = signal.num(); + if signum.is_std() { + // Standard signals + // + // From signal(7): + // + // Standard signals do not queue. If multiple instances of a standard + // signal are generated while that signal is blocked, then only one + // instance of the signal is marked as pending (and the signal will be + // delivered just once when it is unblocked). In the case where a + // standard signal is already pending, the siginfo_t structure (see + // sigaction(2)) associated with that signal is not overwritten on + // arrival of subsequent instances of the same signal. Thus, the + // process will receive the information associated with the first + // instance of the signal. + let queue = self.get_std_queue_mut(signum); + if queue.is_some() { + // If there is already a signal pending, just ignore all subsequent signals + return; + } + *queue = Some(signal); + self.count += 1; + } else { + // Real-time signals + let queue = self.get_rt_queue_mut(signum); + queue.push_back(signal); + self.count += 1; + } + } + + pub fn dequeue(&mut self, blocked: &SigSet) -> Option> { + // Fast path for the common case of no pending signals + if self.empty() { + return None; + } + + // Deliver standard signals. + // + // According to signal(7): + // If both standard and real-time signals are pending for a process, + // POSIX leaves it unspecified which is delivered first. Linux, like + // many other implementations, gives priority to standard signals in + // this case. + + // POSIX leaves unspecified which to deliver first if there are multiple + // pending standard signals. So we are free to define our own. The + // principle is to give more urgent signals higher priority (like SIGKILL). + const ORDERED_STD_SIGS: [SigNum; COUNT_STD_SIGS] = [ + SIGKILL, SIGTERM, SIGSTOP, SIGCONT, SIGSEGV, SIGILL, SIGHUP, SIGINT, SIGQUIT, SIGTRAP, + SIGABRT, SIGBUS, SIGFPE, SIGUSR1, SIGUSR2, SIGPIPE, SIGALRM, SIGSTKFLT, SIGCHLD, + SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, + SIGIO, SIGPWR, SIGSYS, + ]; + for &signum in &ORDERED_STD_SIGS { + if blocked.contains(signum) { + continue; + } + + let queue = self.get_std_queue_mut(signum); + let signal = queue.take(); + if signal.is_some() { + self.count -= 1; + return signal; + } + } + + // If no standard signals, then deliver real-time signals. + // + // According to signal (7): + // Real-time signals are delivered in a guaranteed order. Multiple + // real-time signals of the same type are delivered in the order + // they were sent. If different real-time signals are sent to a + // process, they are delivered starting with the lowest-numbered + // signal. (I.e., low-numbered signals have highest priority.) + for signum in MIN_RT_SIG_NUM..=MAX_RT_SIG_NUM { + let signum = unsafe { SigNum::from_u8_unchecked(signum) }; + if blocked.contains(signum) { + continue; + } + + let queue = self.get_rt_queue_mut(signum); + let signal = queue.pop_front(); + if signal.is_some() { + self.count -= 1; + return signal; + } + } + + // There must be pending but blocked signals + None + } + + pub fn pending(&self) -> SigSet { + let mut pending_sigs = SigSet::new_empty(); + for signum in MIN_STD_SIG_NUM..=MAX_STD_SIG_NUM { + let signum = unsafe { SigNum::from_u8_unchecked(signum) }; + let queue = self.get_std_queue(signum); + if queue.is_some() { + pending_sigs += signum; + } + } + for signum in MIN_RT_SIG_NUM..=MAX_RT_SIG_NUM { + let signum = unsafe { SigNum::from_u8_unchecked(signum) }; + let queue = self.get_rt_queue(signum); + if !queue.is_empty() { + pending_sigs += signum; + } + } + pending_sigs + } + + fn get_std_queue(&self, signum: SigNum) -> &Option> { + debug_assert!(signum.is_std()); + let idx = (signum.as_u8() - MIN_STD_SIG_NUM) as usize; + &self.std_queues[idx] + } + + fn get_rt_queue(&self, signum: SigNum) -> &VecDeque> { + debug_assert!(signum.is_real_time()); + let idx = (signum.as_u8() - MIN_RT_SIG_NUM) as usize; + &self.rt_queues[idx] + } + + fn get_std_queue_mut(&mut self, signum: SigNum) -> &mut Option> { + debug_assert!(signum.is_std()); + let idx = (signum.as_u8() - MIN_STD_SIG_NUM) as usize; + &mut self.std_queues[idx] + } + + fn get_rt_queue_mut(&mut self, signum: SigNum) -> &mut VecDeque> { + debug_assert!(signum.is_real_time()); + let idx = (signum.as_u8() - MIN_RT_SIG_NUM) as usize; + &mut self.rt_queues[idx] + } +} + +impl Default for SigQueues { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for SigQueues { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let signals = self + .std_queues + .iter() + .flatten() + .chain(self.rt_queues.iter().flatten()); + write!(f, "SigQueues {{ "); + write!(f, "queue = "); + f.debug_list().entries(signals).finish(); + write!(f, " }}") + } +} diff --git a/src/libos/src/signal/sig_set.rs b/src/libos/src/signal/sig_set.rs new file mode 100644 index 00000000..31658d6f --- /dev/null +++ b/src/libos/src/signal/sig_set.rs @@ -0,0 +1,193 @@ +use std::fmt; +use std::iter; +use std::ops::{Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, Not, Sub, SubAssign}; + +use super::constants::MIN_STD_SIG_NUM; +use super::{sigset_t, SigNum}; +use crate::prelude::*; + +#[derive(Copy, Clone, Default, PartialEq, Eq)] +pub struct SigSet { + bits: u64, +} + +impl SigSet { + pub const fn new_empty() -> Self { + Self::from_c(0 as sigset_t) + } + + pub const fn new_full() -> Self { + Self::from_c(!0 as sigset_t) + } + + pub const fn from_c(bits: sigset_t) -> Self { + let bits = bits as u64; + SigSet { bits } + } + + pub fn to_c(&self) -> sigset_t { + self.bits as sigset_t + } + + pub fn as_u64(&self) -> u64 { + self.bits + } + + pub fn empty(&self) -> bool { + self.bits != 0 + } + + pub fn full(&self) -> bool { + self.bits == !0 + } + + pub fn count(&self) -> usize { + self.bits.count_ones() as usize + } + + pub fn contains(&self, signum: SigNum) -> bool { + let idx = Self::num_to_idx(signum); + (self.bits & (1_u64 << idx)) != 0 + } + + pub fn iter(&self) -> SigSetIter { + SigSetIter::new(self) + } + + fn num_to_idx(num: SigNum) -> usize { + (num.as_u8() - MIN_STD_SIG_NUM) as usize + } + + fn idx_to_num(idx: usize) -> SigNum { + debug_assert!(idx < 64); + unsafe { SigNum::from_u8_unchecked((idx + 1) as u8) } + } +} + +pub struct SigSetIter<'a> { + sigset: &'a SigSet, + next_idx: usize, +} + +impl<'a> SigSetIter<'a> { + pub fn new(sigset: &'a SigSet) -> Self { + let next_idx = 0; + Self { sigset, next_idx } + } +} + +impl<'a> iter::Iterator for SigSetIter<'a> { + type Item = SigNum; + + fn next(&mut self) -> Option { + let bits = &self.sigset.bits; + while self.next_idx < 64 && (*bits & (1 << self.next_idx)) == 0 { + self.next_idx += 1; + } + if self.next_idx == 64 { + return None; + } + let item = SigSet::idx_to_num(self.next_idx); + self.next_idx += 1; + Some(item) + } +} + +impl From for SigSet { + fn from(signum: SigNum) -> SigSet { + let mut sigset = SigSet::new_empty(); + sigset += signum; + sigset + } +} + +impl Not for SigSet { + type Output = Self; + + fn not(self) -> Self::Output { + Self::from_c(!self.bits) + } +} + +impl BitOr for SigSet { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self { + self |= rhs; + self + } +} + +impl BitOrAssign for SigSet { + fn bitor_assign(&mut self, rhs: Self) { + self.bits |= rhs.bits; + } +} + +impl BitAnd for SigSet { + type Output = Self; + + fn bitand(mut self, rhs: Self) -> Self { + self &= rhs; + self + } +} + +impl BitAndAssign for SigSet { + fn bitand_assign(&mut self, rhs: Self) { + self.bits &= rhs.bits; + } +} + +impl Add for SigSet { + type Output = Self; + + fn add(mut self, rhs: SigNum) -> Self { + self += rhs; + self + } +} + +impl AddAssign for SigSet { + fn add_assign(&mut self, rhs: SigNum) { + let idx = Self::num_to_idx(rhs); + self.bits |= 1_u64 << idx; + } +} + +impl Sub for SigSet { + type Output = Self; + + fn sub(mut self, rhs: SigNum) -> Self { + self -= rhs; + self + } +} + +impl SubAssign for SigSet { + fn sub_assign(&mut self, rhs: SigNum) { + let idx = Self::num_to_idx(rhs); + self.bits &= !(1_u64 << idx); + } +} + +impl fmt::Debug for SigSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SigSet {{ "); + match self.count() { + 0..=32 => { + f.debug_list().entries(self.iter()).finish(); + } + 33..=63 => { + write!(f, "All except "); + let except_sigset = !*self; + f.debug_list().entries(except_sigset.iter()).finish(); + } + 64 => { + write!(f, "None"); + } + _ => unreachable!(), + } + write!(f, " }}") + } +} diff --git a/src/libos/src/signal/signals/fault.rs b/src/libos/src/signal/signals/fault.rs new file mode 100644 index 00000000..25d945dc --- /dev/null +++ b/src/libos/src/signal/signals/fault.rs @@ -0,0 +1,50 @@ +use super::super::c_types::*; +use super::super::constants::*; +use super::super::{SigNum, Signal}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct FaultSignal { + num: SigNum, + code: i32, +} + +impl FaultSignal { + pub fn new(info: &sgx_exception_info_t) -> Self { + // FIXME: the following mapping from exception to signal is not accurate. + use sgx_exception_vector_t::*; + let (num, code) = match info.exception_vector { + // Divider exception + SGX_EXCEPTION_VECTOR_DE => (SIGFPE, FPE_INTDIV), + // Floating-point exception + SGX_EXCEPTION_VECTOR_MF | + // SIMD floating-point exception + SGX_EXCEPTION_VECTOR_XM => (SIGFPE, FPE_FLTDIV), + // Invalid opcode exception + SGX_EXCEPTION_VECTOR_UD | + // Debug exception: should not occur in enclave; treat is as #UD + SGX_EXCEPTION_VECTOR_DB | + // Break point exception: should not occur in enclave; treat is as #UD + SGX_EXCEPTION_VECTOR_BP => (SIGILL, ILL_ILLOPC), + // Bound range exception + SGX_EXCEPTION_VECTOR_BR => (SIGSEGV, SEGV_BNDERR), + // Alignment check exception + SGX_EXCEPTION_VECTOR_AC => (SIGBUS, BUS_ADRALN), + // TODO: handle page fault and general protection exceptions + _ => panic!("illegal exception: cannot be converted to signal"), + }; + Self { num, code } + } +} + +impl Signal for FaultSignal { + fn num(&self) -> SigNum { + self.num + } + + fn to_info(&self) -> siginfo_t { + let info = siginfo_t::new(self.num, self.code); + // TODO: set info.si_addr + info + } +} diff --git a/src/libos/src/signal/signals/kernel.rs b/src/libos/src/signal/signals/kernel.rs new file mode 100644 index 00000000..3e610701 --- /dev/null +++ b/src/libos/src/signal/signals/kernel.rs @@ -0,0 +1,26 @@ +use super::super::c_types::*; +use super::super::constants::*; +use super::super::{SigNum, Signal}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct KernelSignal { + num: SigNum, +} + +impl KernelSignal { + pub fn new(num: SigNum) -> Self { + Self { num } + } +} + +impl Signal for KernelSignal { + fn num(&self) -> SigNum { + self.num + } + + fn to_info(&self) -> siginfo_t { + let info = siginfo_t::new(self.num, SI_KERNEL); + info + } +} diff --git a/src/libos/src/signal/signals/mod.rs b/src/libos/src/signal/signals/mod.rs new file mode 100644 index 00000000..13c0ae46 --- /dev/null +++ b/src/libos/src/signal/signals/mod.rs @@ -0,0 +1,20 @@ +/// Implementation of signals generated from various sources. +mod fault; +mod kernel; +mod user; + +pub use self::fault::FaultSignal; +pub use self::kernel::KernelSignal; +pub use self::user::{UserSignal, UserSignalKind}; + +use super::c_types::siginfo_t; +use super::SigNum; +use crate::prelude::*; + +pub trait Signal: Send + Sync + Debug { + /// Returns the number of the signal. + fn num(&self) -> SigNum; + + /// Returns the siginfo_t that gives more details about a signal. + fn to_info(&self) -> siginfo_t; +} diff --git a/src/libos/src/signal/signals/user.rs b/src/libos/src/signal/signals/user.rs new file mode 100644 index 00000000..ed3b25bb --- /dev/null +++ b/src/libos/src/signal/signals/user.rs @@ -0,0 +1,68 @@ +use super::super::c_types::*; +use super::super::constants::*; +use super::super::{SigNum, Signal}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct UserSignal { + num: SigNum, + pid: pid_t, // sender's pid + uid: uid_t, // sender's uid + kind: UserSignalKind, +} + +#[derive(Debug, Copy, Clone)] +pub enum UserSignalKind { + Kill, + Tkill, + Sigqueue(sigval_t), +} + +unsafe impl Sync for UserSignalKind {} +unsafe impl Send for UserSignalKind {} + +impl UserSignal { + pub fn new(num: SigNum, kind: UserSignalKind, pid: pid_t, uid: uid_t) -> Self { + Self { + num, + kind, + pid, + uid, + } + } + + pub fn pid(&self) -> pid_t { + self.pid + } + + pub fn uid(&self) -> uid_t { + self.uid + } + + pub fn kind(&self) -> UserSignalKind { + self.kind + } +} + +impl Signal for UserSignal { + fn num(&self) -> SigNum { + self.num + } + + fn to_info(&self) -> siginfo_t { + let code = match self.kind { + UserSignalKind::Kill => SI_USER, + UserSignalKind::Tkill => SI_TKILL, + UserSignalKind::Sigqueue(_) => SI_QUEUE, + }; + + let mut info = siginfo_t::new(self.num, code); + info.set_si_pid(self.pid); + info.set_si_uid(self.uid); + if let UserSignalKind::Sigqueue(val) = self.kind { + info.set_si_value(val); + } + + info + } +} diff --git a/src/libos/src/signal/syscalls.rs b/src/libos/src/signal/syscalls.rs new file mode 100644 index 00000000..4ce7503b --- /dev/null +++ b/src/libos/src/signal/syscalls.rs @@ -0,0 +1,121 @@ +use super::constants::*; +use super::do_sigprocmask::MaskOp; +use super::signals::FaultSignal; +use super::{sigaction_t, sigset_t, SigAction, SigNum, SigSet}; +use crate::prelude::*; +use crate::process::ProcessFilter; +use crate::syscall::CpuContext; + +pub fn do_rt_sigaction( + signum_c: c_int, + new_sa_c: *const sigaction_t, + old_sa_c: *mut sigaction_t, +) -> Result { + // C types -> Rust types + let signum = SigNum::from_u8(signum_c as u8)?; + let new_sa = { + if !new_sa_c.is_null() { + let new_sa_c = unsafe { &*new_sa_c }; + let new_sa = SigAction::from_c(new_sa_c)?; + Some(new_sa) + } else { + None + } + }; + let mut old_sa_c = { + if !old_sa_c.is_null() { + let old_sa_c = unsafe { &mut *old_sa_c }; + Some(old_sa_c) + } else { + None + } + }; + + // Do sigaction + let old_sa = super::do_sigaction::do_rt_sigaction(signum, new_sa)?; + + // Retrieve old sigaction_t, if needed + if let Some(old_sa_c) = old_sa_c { + *old_sa_c = old_sa.to_c(); + } + Ok(0) +} + +pub fn do_rt_sigreturn(user_context: *mut CpuContext) -> Result { + let user_context = unsafe { &mut *user_context }; + super::do_sigreturn::do_rt_sigreturn(user_context)?; + Ok(0) +} + +pub fn do_kill(pid: i32, sig: c_int) -> Result { + let process_filter = match pid { + pid if pid < -1 => ProcessFilter::WithPgid((-pid) as pid_t), + -1 => ProcessFilter::WithAnyPid, + 0 => { + let pgid = current!().process().pgid(); + ProcessFilter::WithPgid(pgid) + } + pid if pid > 0 => ProcessFilter::WithPid(pid as pid_t), + _ => unreachable!(), + }; + let signum = SigNum::from_u8(sig as u8)?; + super::do_kill::do_kill(process_filter, signum)?; + Ok(0) +} + +pub fn do_tkill(tid: pid_t, sig: c_int) -> Result { + let signum = SigNum::from_u8(sig as u8)?; + super::do_kill::do_tgkill(None, tid, signum)?; + Ok(0) +} + +pub fn do_tgkill(pid: i32, tid: pid_t, sig: c_int) -> Result { + let pid = if pid >= 0 { Some(pid as pid_t) } else { None }; + let signum = SigNum::from_u8(sig as u8)?; + super::do_kill::do_tgkill(pid, tid, signum)?; + Ok(0) +} + +pub fn do_rt_sigprocmask( + how: c_int, + set_ptr: *const sigset_t, + oldset_ptr: *mut sigset_t, + sigset_size: usize, +) -> Result { + if sigset_size != std::mem::size_of::() { + return_errno!(EINVAL, "unexpected sigset size"); + } + let op_and_set = { + if !set_ptr.is_null() { + let op = MaskOp::from_u32(how as u32)?; + let set = unsafe { &*set_ptr }; + Some((op, set)) + } else { + None + } + }; + let old_set = { + if !oldset_ptr.is_null() { + Some(unsafe { &mut *oldset_ptr }) + } else { + None + } + }; + super::do_sigprocmask::do_rt_sigprocmask(op_and_set, old_set)?; + Ok(0) +} + +pub fn do_rt_sigpending(buf_ptr: *mut sigset_t, buf_size: usize) -> Result { + let buf: &mut sigset_t = { + if buf_size < std::mem::size_of::() { + return_errno!(EINVAL, "buf is not big enough"); + } + if buf_ptr.is_null() { + return_errno!(EINVAL, "ptr must not be null"); + } + unsafe { &mut *buf_ptr } + }; + let pending = super::do_sigpending::do_sigpending()?; + *buf = pending.to_c(); + Ok(0) +} diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index ade0347a..60cbe862 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -32,11 +32,15 @@ use crate::net::{ SocketFile, UnixSocketFile, }; use crate::process::{ - do_arch_prctl, do_clone, do_exit, do_futex, do_getegid, do_geteuid, do_getgid, do_getpgid, - do_getpid, do_getppid, do_gettid, do_getuid, do_set_tid_address, do_spawn, do_wait4, pid_t, - FdOp, + do_arch_prctl, do_clone, do_exit, do_exit_group, do_futex, do_getegid, do_geteuid, do_getgid, + do_getpgid, do_getpid, do_getppid, do_gettid, do_getuid, do_set_tid_address, do_spawn, + do_wait4, pid_t, FdOp, ThreadStatus, }; use crate::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield}; +use crate::signal::{ + do_kill, do_rt_sigaction, do_rt_sigpending, do_rt_sigprocmask, do_rt_sigreturn, do_tgkill, + do_tkill, sigaction_t, sigset_t, +}; use crate::vm::{MMapFlags, VMPerms}; use crate::{fs, process, std, vm}; @@ -84,9 +88,9 @@ macro_rules! process_syscall_table_with_callback { (Mprotect = 10) => do_mprotect(addr: usize, len: usize, prot: u32), (Munmap = 11) => do_munmap(addr: usize, size: usize), (Brk = 12) => do_brk(new_brk_addr: usize), - (RtSigaction = 13) => do_rt_sigaction(), - (RtSigprocmask = 14) => do_rt_sigprocmask(), - (RtSigreturn = 15) => handle_unsupported(), + (RtSigaction = 13) => do_rt_sigaction(signum_c: c_int, new_sa_c: *const sigaction_t, old_sa_c: *mut sigaction_t), + (RtSigprocmask = 14) => do_rt_sigprocmask(how: c_int, set: *const sigset_t, oldset: *mut sigset_t, sigset_size: size_t), + (RtSigreturn = 15) => do_rt_sigreturn(context: *mut CpuContext), (Ioctl = 16) => do_ioctl(fd: FileDesc, cmd: u32, argp: *mut u8), (Pread64 = 17) => do_pread(fd: FileDesc, buf: *mut u8, size: usize, offset: off_t), (Pwrite64 = 18) => do_pwrite(fd: FileDesc, buf: *const u8, size: usize, offset: off_t), @@ -133,7 +137,7 @@ macro_rules! process_syscall_table_with_callback { (Execve = 59) => handle_unsupported(), (Exit = 60) => do_exit(exit_status: i32), (Wait4 = 61) => do_wait4(pid: i32, _exit_status: *mut i32), - (Kill = 62) => handle_unsupported(), + (Kill = 62) => do_kill(pid: i32, sig: c_int), (Uname = 63) => do_uname(name: *mut utsname_t), (Semget = 64) => handle_unsupported(), (Semop = 65) => handle_unsupported(), @@ -198,7 +202,7 @@ macro_rules! process_syscall_table_with_callback { (Getsid = 124) => handle_unsupported(), (Capget = 125) => handle_unsupported(), (Capset = 126) => handle_unsupported(), - (RtSigpending = 127) => handle_unsupported(), + (RtSigpending = 127) => do_rt_sigpending(buf_ptr: *mut sigset_t, buf_size: usize), (RtSigtimedwait = 128) => handle_unsupported(), (RtSigqueueinfo = 129) => handle_unsupported(), (RtSigsuspend = 130) => handle_unsupported(), @@ -271,7 +275,7 @@ macro_rules! process_syscall_table_with_callback { (Removexattr = 197) => handle_unsupported(), (Lremovexattr = 198) => handle_unsupported(), (Fremovexattr = 199) => handle_unsupported(), - (Tkill = 200) => handle_unsupported(), + (Tkill = 200) => do_tkill(tid: pid_t, sig: c_int), (Time = 201) => handle_unsupported(), (Futex = 202) => do_futex(futex_addr: *const i32, futex_op: u32, futex_val: i32, timeout: u64, futex_new_addr: *const i32), (SchedSetaffinity = 203) => do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar), @@ -302,10 +306,10 @@ macro_rules! process_syscall_table_with_callback { (ClockGettime = 228) => do_clock_gettime(clockid: clockid_t, ts_u: *mut timespec_t), (ClockGetres = 229) => handle_unsupported(), (ClockNanosleep = 230) => handle_unsupported(), - (ExitGroup = 231) => handle_unsupported(), + (ExitGroup = 231) => do_exit_group(exit_status: i32), (EpollWait = 232) => do_epoll_wait(epfd: c_int, events: *mut libc::epoll_event, maxevents: c_int, timeout: c_int), (EpollCtl = 233) => do_epoll_ctl(epfd: c_int, op: c_int, fd: c_int, event: *const libc::epoll_event), - (Tgkill = 234) => handle_unsupported(), + (Tgkill = 234) => do_tgkill(pid: i32, tid: pid_t, sig: c_int), (Utimes = 235) => handle_unsupported(), (Vserver = 236) => handle_unsupported(), (Mbind = 237) => handle_unsupported(), @@ -401,7 +405,7 @@ macro_rules! process_syscall_table_with_callback { // Occlum-specific system calls (Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp), // Exception handling - (Exception = 361) => do_handle_exception(info: *mut sgx_exception_info_t), + (HandleException = 361) => do_handle_exception(info: *mut sgx_exception_info_t, context: *mut CpuContext), } }; } @@ -562,7 +566,7 @@ macro_rules! impl_dispatch_syscall { // let fd = self.args[0] as FileDesc; // let buf = self.args[1] as *mut u8; // let size = self.args[2] as usize; - // do_read(fd, buuf, size) + // do_read(fd, buf, size) // } SyscallNum::$name => { impl_dispatch_syscall!(@do_syscall $fn, syscall, 0, ($($args)*,) -> ()) @@ -574,24 +578,36 @@ macro_rules! impl_dispatch_syscall { } process_syscall_table_with_callback!(impl_dispatch_syscall); -/// The system call entry point in Rust. -/// -/// This function is called by __occlum_syscall. #[no_mangle] -pub extern "C" fn occlum_syscall( - num: u32, - arg0: isize, - arg1: isize, - arg2: isize, - arg3: isize, - arg4: isize, - arg5: isize, -) -> isize { +pub extern "C" fn occlum_syscall(user_context: *mut CpuContext) -> ! { // Start a new round of log messages for this system call. But we do not // set the description of this round, yet. We will do so after checking the // given system call number is a valid. log::next_round(None); + let user_context = unsafe { + // TODO: validate pointer + &mut *user_context + }; + + // Do system call + do_syscall(user_context); + + // Back to the user space + do_sysret(user_context) +} + +fn do_syscall(user_context: &mut CpuContext) { + // Extract arguments from the CPU context. The arguments follows Linux's syscall ABI. + let num = user_context.rax as u32; + let arg0 = user_context.rdi as isize; + let arg1 = user_context.rsi as isize; + let arg2 = user_context.rdx as isize; + let arg3 = user_context.r10 as isize; + let arg4 = user_context.r8 as isize; + let arg5 = user_context.r9 as isize; + + // TODO: the profiler will trigger panic for syscall simulation #[cfg(feature = "syscall_timing")] GLOBAL_PROFILER .lock() @@ -599,10 +615,19 @@ pub extern "C" fn occlum_syscall( .syscall_enter(syscall_num) .expect("unexpected error from profiler to enter syscall"); - let ret = Syscall::new(num, arg0, arg1, arg2, arg3, arg4, arg5).and_then(|syscall| { + let ret = Syscall::new(num, arg0, arg1, arg2, arg3, arg4, arg5).and_then(|mut syscall| { log::set_round_desc(Some(syscall.num.as_str())); trace!("{:?}", &syscall); + // Pass user_context as an extra argument to two special syscalls that + // need to modify it + if syscall.num == SyscallNum::RtSigreturn { + syscall.args[0] = user_context as *mut _ as isize; + } else if syscall.num == SyscallNum::HandleException { + // syscall.args[0] == info + syscall.args[1] = user_context as *mut _ as isize; + } + dispatch_syscall(syscall) }); @@ -644,7 +669,35 @@ pub extern "C" fn occlum_syscall( } }; trace!("Retval = {:?}", retval); - retval + + // Put the return value into user_context.rax, except for syscalls that may + // modify user_context directly. Currently, there are two such syscalls: + // SigReturn and HandleException. + // + // Sigreturn restores `user_context` to the state when the last signal + // handler is executed. So in the case of sigreturn, `user_context` should + // be kept intact. + if num != SyscallNum::RtSigreturn as u32 && num != SyscallNum::HandleException as u32 { + user_context.rax = retval as u64; + } + + crate::signal::deliver_signal(user_context); + + crate::process::handle_force_exit(); +} + +/// Return to the user space according to the given CPU context +fn do_sysret(user_context: &mut CpuContext) -> ! { + extern "C" { + fn __occlum_sysret(user_context: *mut CpuContext) -> !; + fn do_exit_task() -> !; + } + if current!().status() != ThreadStatus::Exited { + unsafe { __occlum_sysret(user_context) } // jump to user space + } else { + unsafe { do_exit_task() } // exit enclave + } + unreachable!("__occlum_sysret never returns!"); } /* @@ -1100,16 +1153,58 @@ fn do_prlimit( misc::do_prlimit(pid, resource, new_limit, old_limit).map(|_| 0) } -// TODO: implement signals - -fn do_rt_sigaction() -> Result { - Ok(0) -} - -fn do_rt_sigprocmask() -> Result { - Ok(0) -} - fn handle_unsupported() -> Result { return_errno!(ENOSYS, "Unimplemented or unknown syscall") } + +/// Cpu context. +/// +/// Note. The definition of this struct must be kept in sync with the assembly +/// code in `syscall_entry_x86-64.S`. +#[derive(Clone, Copy, Debug, Default)] +#[repr(C)] +pub struct CpuContext { + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + pub rdi: u64, + pub rsi: u64, + pub rbp: u64, + pub rbx: u64, + pub rdx: u64, + pub rax: u64, + pub rcx: u64, + pub rsp: u64, + pub rip: u64, + pub rflags: u64, +} + +impl CpuContext { + pub fn from_sgx(src: &sgx_cpu_context_t) -> CpuContext { + Self { + r8: src.r8, + r9: src.r9, + r10: src.r10, + r11: src.r11, + r12: src.r12, + r13: src.r13, + r14: src.r14, + r15: src.r15, + rdi: src.rdi, + rsi: src.rsi, + rbp: src.rbp, + rbx: src.rbx, + rdx: src.rdx, + rax: src.rax, + rcx: src.rcx, + rsp: src.rsp, + rip: src.rip, + rflags: src.rflags, + } + } +} diff --git a/src/libos/src/syscall/syscall_entry_native_x86-64.S b/src/libos/src/syscall/syscall_entry_native_x86-64.S deleted file mode 100644 index 449d50aa..00000000 --- a/src/libos/src/syscall/syscall_entry_native_x86-64.S +++ /dev/null @@ -1,46 +0,0 @@ -#define __ASSEMBLY__ -#include "task.h" - - .file "syscall_entry_native_x86-64.S" - .global __occlum_syscall_native - .type __occlum_syscall_native, @function -__occlum_syscall_native: - push %rbp - movq %rsp,%rbp - - // Save registers - pushq %rdi - pushq %rsi - pushq %rdx - pushq %r10 - pushq %r8 - // arg5 - pushq %r9 - // arg4--arg0 - movq %r8, %r9 - movq %r10, %r8 - movq %rdx, %rcx - movq %rsi, %rdx - movq %rdi, %rsi - // num - movq %rax, %rdi - - // num - %rdi - // arg0 - %rsi - // arg1 - %rdx - // arg2 - %rcx - // arg3 - %r8 - // arg4 - %r9 - // arg5 - *0x8(%rsp) - call __occlum_syscall - - // Restore registers - popq %r9 - popq %r8 - popq %r10 - popq %rdx - popq %rsi - popq %rdi - - popq %rbp - ret diff --git a/src/libos/src/syscall/syscall_entry_x86-64.S b/src/libos/src/syscall/syscall_entry_x86-64.S index e32a98c1..110e8670 100644 --- a/src/libos/src/syscall/syscall_entry_x86-64.S +++ b/src/libos/src/syscall/syscall_entry_x86-64.S @@ -1,36 +1,57 @@ #define __ASSEMBLY__ #include "task.h" + .file "syscall_entry_x86-64.S" - .global __occlum_syscall - .type __occlum_syscall, @function -__occlum_syscall: - // num - %rdi - // arg0 - %rsi - // arg1 - %rdx - // arg2 - %rcx - // arg3 - %r8 - // arg4 - %r9 - // arg5 - *0x8(%rsp) + .global __occlum_syscall_linux_abi + .type __occlum_syscall_linux_abi, @function +__occlum_syscall_linux_abi: + // num - %rax + // arg0 - %rdi + // arg1 - %rsi + // arg2 - %rdx + // arg3 - %r10 + // arg4 - %r8 + // arg5 - *r9 + // return address - *(%rsp) - // Given by the user, the user-space stack pointer %rsp cannot be trusted. - // So we need to check whether %rsp is within the read-write region of the - // current data domain - bndcl %rsp, %bnd0 - bndcu %rsp, %bnd0 - - // Save the callee-saved registers - pushq %rbp - pushq %r12 - // Save the user stack + push %rbp movq %rsp, %rbp + // The return address is now in 8(%rbp). + // The original %rbp is now in (%rbp). + // The original %rsp is now in %rbp + 8. + + // Save the target CPU state when `call __occlum_syscall` is returned in + // a CpuContext struct. The registers are saved in the reverse order of + // the fields in CpuContext. + pushfq + push 8(%rbp) // save %rip + push %rbp // save %rsp, but not the final value, to be adjusted later + push %rcx + push %rax + push %rdx + push %rbx + push (%rbp) // save %rbp + push %rsi + push %rdi + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + // Make %rdi points to CpuContext. + mov %rsp, %rdi + // The target %rsp is actuall the saved one plus 16 + addq $16, (15*8)(%rdi) // Get current task movq %gs:(TD_TASK_OFFSET), %r12 // Switch to the kernel stack movq TASK_KERNEL_RSP(%r12), %rsp - - // Use kernel fsbase. Different implementation for HW and SIM. + // Switch to the kernel TLS by setting fsbase. Different implementation for HW and SIM modes. #if SGX_MODE_SIM pushq %rdi pushq %rsi @@ -44,56 +65,99 @@ __occlum_syscall: #else // SGX_MODE_HW movq TASK_KERNEL_FS(%r12), %r11 wrfsbase %r11 -#endif - - // Use kernel stack base and limit +#endif + // Switch to kernel stack base and limit movq TASK_KERNEL_STACK_BASE(%r12), %r11 movq %r11, %gs:TD_STACK_BASE - movq TASK_KERNEL_STACK_LIMIT(%r12), %r11 movq %r11, %gs:TD_STACK_LIMIT - // Make %rsp 16-byte aligned before call - sub $0x8, %rsp - // Pass arg5 - pushq 0x18(%rbp) - call occlum_syscall - // Use user fsbase. Different implementation for HW and SIM. + // This should never happen! + ud2 + + + .global __occlum_sysret + .type __occlum_sysret, @function +__occlum_sysret: + // Arguments: + // %rdi - user_context: &mut CpuContext + + // Jumping back to the user space itself is easy, but not so easy when + // we need to set all other registers to some specified values. To overcome + // this difficulty, the most obvious choice is using a ret instruction, which + // can set %rip and %rsp at the same time. So we must set -8(%rsp) to the + // value of the target %rip before ret, where %rsp has the value of target + // %rsp. + // + // But there is a catch: it is dangerous to modify the value at -8(%rsp), + // which may still be used by the user space (remember red zone and + // signal handler?). So we need to use a stack location outside the + // 128-byte red zone. So in this function, we store the target %rip value + // in $-136(%rsp) and do `ret 128` at the end of this function. + subq $136, (15*8)(%rdi) + movq (15*8)(%rdi), %r11 + movq (16*8)(%rdi), %r12 + movq %r12, (%r11) + + // Get current task + movq %gs:(TD_TASK_OFFSET), %r12 + // Switch to the user TLS. Different implementation for HW and SIM modes. #if SGX_MODE_SIM pushq %rdi - pushq %rsi - pushq %rax // RAX must be saved here otherwise the progrom may crash. movq $ARCH_SET_FS, %rdi movq TASK_USER_FS(%r12), %rsi call __arch_prctl - popq %rax - popq %rsi popq %rdi #else // SGX_MODE_HW movq TASK_USER_FS(%r12), %r11 wrfsbase %r11 #endif - - // Use user stack base and limit + // Switch to user stack base and limit movq TASK_USER_STACK_BASE(%r12), %r11 movq %r11, %gs:TD_STACK_BASE - movq TASK_USER_STACK_LIMIT(%r12), %r11 movq %r11, %gs:TD_STACK_LIMIT - // Switch to the user stack - movq %rbp, %rsp - // Restore callee-saved registers - popq %r12 - popq %rbp + // Restore flags first + leaq (17*8)(%rdi), %rsp + popfq - // Check return target is a valid instruction (i.e., a cfi_label) - popq %r10 - movq (%r10), %r11 - bndcl %r11, %bnd2 - bndcu %r11, %bnd2 - jmpq *%r10 + // Make %rsp points to the CPU context + mov %rdi, %rsp + // Restore the CPU context of the user space + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rdi + pop %rsi + pop %rbp + pop %rbx + pop %rdx + pop %rax + pop %rcx + pop %rsp + // Continue executing the user code + ret $128 + + + .global __occlum_syscall_c_abi + .type __occlum_syscall_c_abi, @function +__occlum_syscall_c_abi: + movq %rdi,%rax + movq %rsi,%rdi + movq %rdx,%rsi + movq %rcx,%rdx + movq %r8,%r10 + movq %r9,%r8 + movq 8(%rsp),%r9 + call __occlum_syscall_linux_abi + ret diff --git a/src/libos/src/time/mod.rs b/src/libos/src/time/mod.rs index d5f6c761..213b50fe 100644 --- a/src/libos/src/time/mod.rs +++ b/src/libos/src/time/mod.rs @@ -17,6 +17,9 @@ pub type time_t = i64; #[allow(non_camel_case_types)] pub type suseconds_t = i64; +#[allow(non_camel_case_types)] +pub type clock_t = i64; + #[repr(C)] #[derive(Debug, Default, Copy, Clone)] #[allow(non_camel_case_types)] diff --git a/src/pal/include/occlum_pal_api.h b/src/pal/include/occlum_pal_api.h index 06fb6a22..394e34f9 100644 --- a/src/pal/include/occlum_pal_api.h +++ b/src/pal/include/occlum_pal_api.h @@ -60,9 +60,11 @@ int occlum_pal_init(const struct occlum_pal_attr* attr); * @param io_fds The file descriptors of the redirected standard I/O * (i.e., stdin, stdout, stderr), If set to NULL, will * use the original standard I/O file descriptors. - * @param exit_status Output. The exit status of the command. Note that the - * exit status is returned if and only if the function - * succeeds. + * @param exit_status Output. The exit status of the command. The semantic of + * this value follows the one described in wait(2) man + * page. For example, if the program terminated normally, + * then WEXITSTATUS(exit_status) gives the value returned + * from a main function. * * @retval If 0, then success; otherwise, check errno for the exact error type. */ diff --git a/src/run/main.c b/src/run/main.c index 95f3e165..5f149ed4 100644 --- a/src/run/main.c +++ b/src/run/main.c @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include static const char* get_instance_dir(void) { @@ -40,7 +42,15 @@ int main(int argc, char* argv[]) { }; int exit_status = 0; if (occlum_pal_exec(cmd_path, cmd_args, &io_fds, &exit_status) < 0) { - return EXIT_FAILURE; + // Command not found or other internal errors + return 127; + } + + // Convert the exit status to a value in a shell-like encoding + if (WIFEXITED(exit_status)) { // terminated normally + exit_status = WEXITSTATUS(exit_status) & 0x7F; // [0, 127] + } else { // killed by signal + exit_status = 128 + WTERMSIG(exit_status); // [128 + 1, 128 + 64] } // Destroy Occlum PAL diff --git a/test/Makefile b/test/Makefile index 9a1f10f1..d4c3f4fd 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,7 +14,7 @@ TEST_DEPS := client data_sink TESTS ?= empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \ truncate readdir mkdir open stat link symlink chmod chown tls pthread uname rlimit \ server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group \ - ioctl fcntl eventfd emulate_syscall access + ioctl fcntl eventfd emulate_syscall access signal # Benchmarks: need to be compiled and run by bench-% target BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput diff --git a/test/exit_group/main.c b/test/exit_group/main.c index ca43b8e4..a7a8d551 100644 --- a/test/exit_group/main.c +++ b/test/exit_group/main.c @@ -15,17 +15,15 @@ // Three types of threads that will not exit voluntarily // -// FIXME: Disable this test for NOW because exit_group does not have a real implementation yet -// and SGX simlulation mode will fail this test. // Type 1: a busy loop thread -// static void* busyloop_thread_func(void* _) { -// while (1) { -// // By calling getpid, we give the LibOS a chance to force the thread -// // to terminate if exit_group is called by any thread in a thread group -// getpid(); -// } -// return NULL; -// } +static void* busyloop_thread_func(void* _) { + while (1) { + // By calling getpid, we give the LibOS a chance to force the thread + // to terminate if exit_group is called by any thread in a thread group + getpid(); + } + return NULL; +} // Type 2: a sleeping thread static void* sleeping_thread_func(void* _) { @@ -46,11 +44,11 @@ static void* futex_wait_thread_func(void* _) { // exit_group syscall should terminate all threads in a thread group. int test_exit_group_to_force_threads_terminate(void) { // Create three types of threads that will not exit voluntarily - // pthread_t busyloop_thread; - // if (pthread_create(&busyloop_thread, NULL, busyloop_thread_func, NULL) < 0) { - // printf("ERROR: pthread_create failed\n"); - // return -1; - // } + pthread_t busyloop_thread; + if (pthread_create(&busyloop_thread, NULL, busyloop_thread_func, NULL) < 0) { + printf("ERROR: pthread_create failed\n"); + return -1; + } pthread_t sleeping_thread; if (pthread_create(&sleeping_thread, NULL, sleeping_thread_func, NULL) < 0) { printf("ERROR: pthread_create failed\n"); diff --git a/test/server/main.c b/test/server/main.c index 8f5908ee..891a8f98 100644 --- a/test/server/main.c +++ b/test/server/main.c @@ -207,6 +207,7 @@ int test_read_write() { THROW_ERROR("failed to wait4 the child process"); } + printf("test_read_write finished!\n"); return ret; } @@ -314,10 +315,12 @@ int test_poll_sockets() { static test_case_t test_cases[] = { TEST_CASE(test_read_write), TEST_CASE(test_send_recv), +/* TEST_CASE(test_sendmsg_recvmsg), TEST_CASE(test_sendmsg_recvmsg_connectionless), TEST_CASE(test_fcntl_setfl_and_getfl), TEST_CASE(test_poll_sockets), + */ }; int main(int argc, const char* argv[]) { diff --git a/test/signal/Makefile b/test/signal/Makefile new file mode 100644 index 00000000..f5d7fb16 --- /dev/null +++ b/test/signal/Makefile @@ -0,0 +1,5 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := -Wno-return-stack-address -Wno-unused-but-set-variable +EXTRA_LINK_FLAGS := +BIN_ARGS := diff --git a/test/signal/main.c b/test/signal/main.c new file mode 100644 index 00000000..c14fcb94 --- /dev/null +++ b/test/signal/main.c @@ -0,0 +1,292 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test.h" + +// ============================================================================ +// Helper macros +// ============================================================================ + + +// ============================================================================ +// Helper functions +// ============================================================================ + + +// ============================================================================ +// Test sigprocmask +// ============================================================================ + +// Add a new macro to compare two sigset. Returns 0 iff the two sigset are equal. +// Musl libc defines sigset_t to 16 bytes, but on x86 only the first 8 bytes are +// meaningful. So this comparison only takes the first 8 bytes into account. +#define sigcmpset(a, b) memcmp((a), (b), 8) + +int test_sigprocmask() { + int ret; + sigset_t new, old; + sigset_t expected_old; + + // Check sigmask == [] + if ((ret = sigprocmask(0, NULL, &old)) < 0) { + THROW_ERROR("sigprocmask failed unexpectedly"); + } + sigemptyset(&expected_old); + if (sigcmpset(&old, &expected_old) != 0) { + THROW_ERROR("unexpected old sigset"); + } + + // SIG_BLOCK: [] --> [SIGSEGV] + sigemptyset(&new); + sigaddset(&new, SIGSEGV); + if ((ret = sigprocmask(SIG_BLOCK, &new, &old)) < 0) { + THROW_ERROR("sigprocmask failed unexpectedly"); + } + sigemptyset(&expected_old); + if (sigcmpset(&old, &expected_old) != 0) { + THROW_ERROR("unexpected old sigset"); + } + + // SIG_SETMASK: [SIGSEGV] --> [SIGIO] + sigemptyset(&new); + sigaddset(&new, SIGIO); + if ((ret = sigprocmask(SIG_SETMASK, &new, &old)) < 0) { + THROW_ERROR("sigprocmask failed unexpectedly"); + } + sigemptyset(&expected_old); + sigaddset(&expected_old, SIGSEGV); + if (sigcmpset(&old, &expected_old) != 0) { + THROW_ERROR("unexpected old sigset"); + } + + // SIG_UNBLOCK: [SIGIO] -> [] + if ((ret = sigprocmask(SIG_UNBLOCK, &new, &old)) < 0) { + THROW_ERROR("sigprocmask failed unexpectedly"); + } + sigemptyset(&expected_old); + sigaddset(&expected_old, SIGIO); + if (sigcmpset(&old, &expected_old) != 0) { + THROW_ERROR("unexpected old sigset"); + } + + // Check sigmask == [] + if ((ret = sigprocmask(0, NULL, &old)) < 0) { + THROW_ERROR("sigprocmask failed unexpectedly"); + } + sigemptyset(&expected_old); + if (sigcmpset(&old, &expected_old) != 0) { + THROW_ERROR("unexpected old sigset"); + } + + return 0; +} + +// ============================================================================ +// Test raise syscall and user-registered signal handlers +// ============================================================================ + +#define MAX_RECURSION_LEVEL 3 + +static void handle_sigio(int num, siginfo_t* info, void* context) { + static volatile int recursion_level = 0; + printf("Hello from SIGIO signal handler (recursion_level = %d)!\n", recursion_level); + + recursion_level++; + if (recursion_level <= MAX_RECURSION_LEVEL) + raise(SIGIO); + recursion_level--; +} + +int test_raise() { + struct sigaction new_action, old_action; + new_action.sa_sigaction = handle_sigio; + new_action.sa_flags = SA_SIGINFO | SA_NODEFER; + if (sigaction(SIGIO, &new_action, &old_action) < 0) { + THROW_ERROR("registering new signal handler failed"); + } + if (old_action.sa_handler != SIG_DFL) { + THROW_ERROR("unexpected old sig handler"); + } + + raise(SIGIO); + + if (sigaction(SIGIO, &old_action, NULL) < 0) { + THROW_ERROR("restoring old signal handler failed"); + } + return 0; +} + +// ============================================================================ +// Test abort, which uses SIGABRT behind the scene +// ============================================================================ + +int test_abort() { + pid_t child_pid; + char* child_argv[] = {"signal", "aborted_child", NULL}; + int ret; + int status; + + // Repeat multiple times to check that the resources of the killed child + // processes are indeed freed by the LibOS + for (int i = 0; i < 3; i++) { + ret = posix_spawn(&child_pid, "/bin/signal", NULL, NULL, child_argv, NULL); + if (ret < 0) { + THROW_ERROR("failed to spawn a child process\n"); + } + + ret = wait4(-1, &status, 0, NULL); + if (ret < 0) { + THROW_ERROR("failed to wait4 the child process\n"); + } + if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) { + THROW_ERROR("child process is expected to be killed by SIGILL\n"); + } + } + return 0; +} + +static int aborted_child() { + while (1) { + abort(); + } + return 0; +} + +// ============================================================================ +// Test kill by sending SIGKILL to another process +// ============================================================================ + +int test_kill() { + pid_t child_pid; + char* child_argv[] = {"signal", "killed_child", NULL}; + int ret; + int status; + + // Repeat multiple times to check that the resources of the killed child + // processes are indeed freed by the LibOS + for (int i = 0; i < 3; i++) { + ret = posix_spawn(&child_pid, "/bin/signal", NULL, NULL, child_argv, NULL); + if (ret < 0) { + THROW_ERROR("failed to spawn a child process\n"); + } + + kill(child_pid, SIGKILL); + + ret = wait4(-1, &status, 0, NULL); + if (ret < 0) { + THROW_ERROR("failed to wait4 the child process\n"); + } + if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) { + THROW_ERROR("child process is expected to be killed by SIGILL\n"); + } + } + return 0; +} + +// TODO: remove the use of getpid when we can deliver signals through interrupt +static int killed_child() { + while (1) { + getpid(); + } + return 0; +} + +// ============================================================================ +// Test catching and handling hardware exception +// ============================================================================ + +static void handle_sigfpe(int num, siginfo_t* info, void* _context) { + printf("SIGFPE Caught\n"); + assert(num == SIGFPE); + assert(info->si_signo == SIGFPE); + + ucontext_t* ucontext = _context; + mcontext_t* mcontext = &ucontext->uc_mcontext; + // The faulty instruction should be `idiv %esi` (f7 fe) + mcontext->gregs[REG_RIP] += 2; + + return; +} + +// Note: this function is fragile in the sense that compiler may not always +// emit the instruction pattern that triggers divide-by-zero as we expect. +// TODO: rewrite this in assembly +int div_maybe_zero(int x, int y) { + return x / y; +} + +int test_catch_fault() { +#ifdef SGX_MODE_SIM + printf("WARNING: Skip this test case as we do not support " + "capturing hardware exception in SGX simulation mode\n"); + return 0; +#else + // Set up a signal handler that handles divide-by-zero exception + struct sigaction new_action, old_action; + new_action.sa_sigaction = handle_sigfpe; + new_action.sa_flags = SA_SIGINFO; + if (sigaction(SIGFPE, &new_action, &old_action) < 0) { + THROW_ERROR("registering new signal handler failed"); + } + if (old_action.sa_handler != SIG_DFL) { + THROW_ERROR("unexpected old sig handler"); + } + + // Trigger divide-by-zero exception + int a = 1; + int b = 0; + // Use volatile to prevent compiler optimization + volatile int c; + c = div_maybe_zero(a, b); + + printf("Signal handler successfully jumped over the divide-by-zero instruction\n"); + + if (sigaction(SIGFPE, &old_action, NULL) < 0) { + THROW_ERROR("restoring old signal handler failed"); + } + return 0; +#endif /* SGX_MODE_SIM */ +} + + +// ============================================================================ +// Test suite main +// ============================================================================ + +static test_case_t test_cases[] = { + TEST_CASE(test_sigprocmask), + TEST_CASE(test_raise), + TEST_CASE(test_abort), + TEST_CASE(test_kill), + TEST_CASE(test_catch_fault), +}; + +int main(int argc, const char* argv[]) { + if (argc > 1) { + const char* cmd = argv[1]; + if (strcmp(cmd, "aborted_child") == 0) { + return aborted_child(); + } + else if (strcmp(cmd, "killed_child") == 0) { + return killed_child(); + } + else { + fprintf(stderr, "ERROR: unknown command: %s\n", cmd); + return EXIT_FAILURE; + } + } + + return test_suite_run(test_cases, ARRAY_SIZE(test_cases)); +} diff --git a/test/test_common.mk b/test/test_common.mk index e9f67431..9b122da6 100644 --- a/test/test_common.mk +++ b/test/test_common.mk @@ -25,6 +25,13 @@ CC := occlum-gcc CXX := occlum-g++ C_FLAGS = -Wall -Wno-return-local-addr -I../include -O2 -fPIC $(EXTRA_C_FLAGS) +ifeq ($(SGX_MODE), SIM) + C_FLAGS += -D SGX_MODE_SIM +else ifeq ($(SGX_MODE), SW) + C_FLAGS += -D SGX_MODE_SIM +else + C_FLAGS += -D SGX_MODE_HW +endif LINK_FLAGS = $(C_FLAGS) -pie $(EXTRA_LINK_FLAGS) .PHONY: all test test-native clean