From 4cf8777592fae11fd9d5e8faee62bebdf0412364 Mon Sep 17 00:00:00 2001 From: "Tate, Hongliang Tian" Date: Mon, 1 Apr 2019 22:16:59 +0800 Subject: [PATCH] Add clone system call --- src/libos/src/fs/mod.rs | 51 ++++++++++-------- src/libos/src/process/mod.rs | 10 +++- src/libos/src/process/process.rs | 18 +++---- src/libos/src/process/spawn/mod.rs | 12 +++-- src/libos/src/process/thread.rs | 84 ++++++++++++++++++++++++++++++ src/libos/src/syscall/mod.rs | 43 ++++++++++++++- src/libos/src/vm/mod.rs | 20 ++++--- test/Makefile | 2 +- test/clone/Makefile | 5 ++ test/clone/main.c | 54 +++++++++++++++++++ test/test_common.mk | 2 +- 11 files changed, 249 insertions(+), 52 deletions(-) create mode 100644 src/libos/src/process/thread.rs create mode 100644 test/clone/Makefile create mode 100644 test/clone/main.c diff --git a/src/libos/src/fs/mod.rs b/src/libos/src/fs/mod.rs index b4c5eecb..162f7062 100644 --- a/src/libos/src/fs/mod.rs +++ b/src/libos/src/fs/mod.rs @@ -56,7 +56,7 @@ pub fn do_open(path: &str, flags: u32, mode: u32) -> Result { let fd = { let close_on_spawn = flags.contains(OpenFlags::CLOEXEC); - proc.get_files_mut().put(file_ref, close_on_spawn) + proc.get_files().lock().unwrap().put(file_ref, close_on_spawn) }; Ok(fd) } @@ -65,7 +65,7 @@ pub fn do_write(fd: FileDesc, buf: &[u8]) -> Result { info!("write: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.write(buf) } @@ -73,7 +73,7 @@ pub fn do_read(fd: FileDesc, buf: &mut [u8]) -> Result { info!("read: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.read(buf) } @@ -81,7 +81,7 @@ pub fn do_writev(fd: FileDesc, bufs: &[&[u8]]) -> Result { info!("writev: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.writev(bufs) } @@ -89,7 +89,7 @@ pub fn do_readv(fd: FileDesc, bufs: &mut [&mut [u8]]) -> Result { info!("readv: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.readv(bufs) } @@ -97,7 +97,7 @@ pub fn do_pwrite(fd: FileDesc, buf: &[u8], offset: usize) -> Result Result Result { info!("fstat: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; let stat = Stat::from(file_ref.metadata()?); // TODO: handle symlink Ok(stat) @@ -137,7 +137,7 @@ pub fn do_lstat(path: &str) -> Result { pub fn do_lseek(fd: FileDesc, offset: SeekFrom) -> Result { let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.seek(offset) } @@ -145,7 +145,7 @@ pub fn do_fsync(fd: FileDesc) -> Result<(), Error> { info!("fsync: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.sync_all()?; Ok(()) } @@ -154,7 +154,7 @@ pub fn do_fdatasync(fd: FileDesc) -> Result<(), Error> { info!("fdatasync: fd: {}", fd); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.sync_data()?; Ok(()) } @@ -171,7 +171,7 @@ pub fn do_ftruncate(fd: FileDesc, len: usize) -> Result<(), Error> { info!("ftruncate: fd: {}, len: {}", fd, len); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; file_ref.set_len(len as u64)?; Ok(()) } @@ -185,7 +185,7 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result { ); let current_ref = process::get_current(); let current_process = current_ref.lock().unwrap(); - let file_ref = current_process.get_files().get(fd)?; + let file_ref = current_process.get_files().lock().unwrap().get(fd)?; let info = file_ref.metadata()?; if info.type_ != FileType::Dir { return Err(Error::new(ENOTDIR, "")); @@ -207,8 +207,9 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result { pub fn do_close(fd: FileDesc) -> Result<(), Error> { let current_ref = process::get_current(); - let mut current_process = current_ref.lock().unwrap(); - let file_table = current_process.get_files_mut(); + let current_process = current_ref.lock().unwrap(); + let file_table_ref = current_process.get_files(); + let mut file_table = file_table_ref.lock().unwrap(); file_table.del(fd)?; Ok(()) } @@ -216,10 +217,11 @@ pub fn do_close(fd: FileDesc) -> Result<(), Error> { pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2], Error> { let flags = OpenFlags::from_bits_truncate(flags); let current_ref = process::get_current(); - let mut current = current_ref.lock().unwrap(); + let current = current_ref.lock().unwrap(); let pipe = Pipe::new()?; - let mut file_table = current.get_files_mut(); + let file_table_ref = current.get_files(); + let mut file_table = file_table_ref.lock().unwrap(); let close_on_spawn = flags.contains(OpenFlags::CLOEXEC); let reader_fd = file_table.put(Arc::new(Box::new(pipe.reader)), close_on_spawn); let writer_fd = file_table.put(Arc::new(Box::new(pipe.writer)), close_on_spawn); @@ -228,8 +230,9 @@ pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2], Error> { pub fn do_dup(old_fd: FileDesc) -> Result { let current_ref = process::get_current(); - let mut current = current_ref.lock().unwrap(); - let file_table = current.get_files_mut(); + let current = current_ref.lock().unwrap(); + let file_table_ref = current.get_files(); + let mut file_table = file_table_ref.lock().unwrap(); let file = file_table.get(old_fd)?; let new_fd = file_table.put(file, false); Ok(new_fd) @@ -237,8 +240,9 @@ pub fn do_dup(old_fd: FileDesc) -> Result { pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result { let current_ref = process::get_current(); - let mut current = current_ref.lock().unwrap(); - let file_table = current.get_files_mut(); + let current = current_ref.lock().unwrap(); + let file_table_ref = current.get_files(); + let mut file_table = file_table_ref.lock().unwrap(); let file = file_table.get(old_fd)?; if old_fd != new_fd { file_table.put_at(new_fd, file, false); @@ -249,8 +253,9 @@ pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result { pub fn do_dup3(old_fd: FileDesc, new_fd: FileDesc, flags: u32) -> Result { let flags = OpenFlags::from_bits_truncate(flags); let current_ref = process::get_current(); - let mut current = current_ref.lock().unwrap(); - let file_table = current.get_files_mut(); + let current = current_ref.lock().unwrap(); + let file_table_ref = current.get_files(); + let mut file_table = file_table_ref.lock().unwrap(); let file = file_table.get(old_fd)?; if old_fd == new_fd { return errno!(EINVAL, "old_fd must not be equal to new_fd"); diff --git a/src/libos/src/process/mod.rs b/src/libos/src/process/mod.rs index 4d5315aa..099ffc27 100644 --- a/src/libos/src/process/mod.rs +++ b/src/libos/src/process/mod.rs @@ -6,6 +6,7 @@ pub mod table { pub use self::exit::{do_exit, do_wait4, ChildProcessFilter}; pub use self::spawn::{do_spawn, FileAction}; pub use self::wait::{WaitQueue, Waiter}; +pub use self::thread::{do_clone, CloneFlags, ThreadGroup}; #[allow(non_camel_case_types)] pub type pid_t = u32; @@ -18,16 +19,20 @@ pub struct Process { pgid: pid_t, tgid: pid_t, exit_status: i32, + // TODO: move cwd, root_inode into a FileSystem structure + // TODO: should cwd be a String or INode? cwd: String, parent: Option, children: Vec, waiting_children: Option>, - vm: ProcessVM, - file_table: FileTable, + vm: ProcessVMRef, + file_table: FileTableRef, } pub type ProcessRef = Arc>; pub type ProcessWeakRef = std::sync::Weak>; +pub type FileTableRef = Arc>; +pub type ProcessVMRef = Arc>; pub fn do_getpid() -> pid_t { let current_ref = get_current(); @@ -57,6 +62,7 @@ mod process_table; mod spawn; mod task; mod wait; +mod thread; use self::task::Task; use super::*; diff --git a/src/libos/src/process/process.rs b/src/libos/src/process/process.rs index 74afa537..2a203d0f 100644 --- a/src/libos/src/process/process.rs +++ b/src/libos/src/process/process.rs @@ -27,8 +27,8 @@ impl Process { pub fn new( cwd: &str, task: Task, - vm: ProcessVM, - file_table: FileTable, + vm_ref: ProcessVMRef, + file_table_ref: FileTableRef, ) -> Result<(pid_t, ProcessRef), Error> { let new_pid = process_table::alloc_pid(); let new_process_ref = Arc::new(SgxMutex::new(Process { @@ -42,8 +42,8 @@ impl Process { parent: None, children: Vec::new(), waiting_children: None, - vm: vm, - file_table: file_table, + vm: vm_ref, + file_table: file_table_ref, })); Ok((new_pid, new_process_ref)) } @@ -72,18 +72,12 @@ impl Process { pub fn get_cwd(&self) -> &str { &self.cwd } - pub fn get_vm(&self) -> &ProcessVM { + pub fn get_vm(&self) -> &ProcessVMRef { &self.vm } - pub fn get_vm_mut(&mut self) -> &mut ProcessVM { - &mut self.vm - } - pub fn get_files(&self) -> &FileTable { + pub fn get_files(&self) -> &FileTableRef { &self.file_table } - pub fn get_files_mut(&mut self) -> &mut FileTable { - &mut self.file_table - } pub fn get_parent(&self) -> &ProcessRef { self.parent.as_ref().unwrap() } diff --git a/src/libos/src/process/spawn/mod.rs b/src/libos/src/process/spawn/mod.rs index b015d74b..b4c85ba0 100644 --- a/src/libos/src/process/spawn/mod.rs +++ b/src/libos/src/process/spawn/mod.rs @@ -52,6 +52,7 @@ pub fn do_spawn>( }; let (new_pid, new_process_ref) = { + let cwd = elf_path.as_ref().parent().unwrap().to_str().unwrap(); let vm = init_vm::do_init(&elf_file, &elf_buf[..])?; let task = { let program_entry = { @@ -64,9 +65,12 @@ pub fn do_spawn>( let stack_top = vm.get_stack_top(); init_task(program_entry, stack_top, argv, envp)? }; - let files = init_files(parent_ref, file_actions)?; - let cwd = elf_path.as_ref().parent().unwrap().to_str().unwrap(); - Process::new(cwd, task, vm, files)? + let vm_ref = Arc::new(SgxMutex::new(vm)); + let files_ref = { + let files = init_files(parent_ref, file_actions)?; + Arc::new(SgxMutex::new(files)) + }; + Process::new(cwd, task, vm_ref, files_ref)? }; parent_adopts_new_child(&parent_ref, &new_process_ref); process_table::put(new_pid, new_process_ref.clone()); @@ -79,7 +83,7 @@ fn init_files(parent_ref: &ProcessRef, file_actions: &[FileAction]) -> Result 0; if should_inherit_file_table { - let mut cloned_file_table = parent.get_files().clone(); + let mut cloned_file_table = parent.get_files().lock().unwrap().clone(); // Perform file actions to modify the cloned file table for file_action in file_actions { match file_action { diff --git a/src/libos/src/process/thread.rs b/src/libos/src/process/thread.rs new file mode 100644 index 00000000..b1efeda1 --- /dev/null +++ b/src/libos/src/process/thread.rs @@ -0,0 +1,84 @@ +use super::*; + +pub struct ThreadGroup { + threads: Vec, +} + + +bitflags! { + pub struct CloneFlags : u32 { + const CLONE_VM = 0x00000100; + const CLONE_FS = 0x00000200; + const CLONE_FILES = 0x00000400; + const CLONE_SIGHAND = 0x00000800; + const CLONE_PTRACE = 0x00002000; + const CLONE_VFORK = 0x00004000; + const CLONE_PARENT = 0x00008000; + const CLONE_THREAD = 0x00010000; + const CLONE_NEWNS = 0x00020000; + const CLONE_SYSVSEM = 0x00040000; + const CLONE_SETTLS = 0x00080000; + const CLONE_PARENT_SETTID = 0x00100000; + const CLONE_CHILD_CLEARTID = 0x00200000; + const CLONE_DETACHED = 0x00400000; + const CLONE_UNTRACED = 0x00800000; + const CLONE_CHILD_SETTID = 0x01000000; + const CLONE_NEWCGROUP = 0x02000000; + const CLONE_NEWUTS = 0x04000000; + const CLONE_NEWIPC = 0x08000000; + const CLONE_NEWUSER = 0x10000000; + const CLONE_NEWPID = 0x20000000; + const CLONE_NEWNET = 0x40000000; + const CLONE_IO = 0x80000000; + } +} + +pub fn do_clone( + flags: CloneFlags, + stack_addr: usize, + ptid: Option<*mut i32>, + ctid: Option<*mut i32>, + new_tls: usize +) -> Result { + info!("clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}", + flags, stack_addr, ptid, ctid, new_tls); + + let current_ref = get_current(); + let current = current_ref.lock().unwrap(); + + let (new_thread_pid, new_thread_ref) = { + let task = new_thread_task(stack_addr, new_tls)?; + let vm_ref = current.get_vm().clone(); + let files_ref = current.get_files().clone(); + let cwd = ¤t.cwd; + Process::new(cwd, task, vm_ref, files_ref)? + }; + + // TODO: always get parent lock first to avoid deadlock + { + let parent_ref = current.parent.as_ref().unwrap(); + let mut parent = parent_ref.lock().unwrap(); + let mut new_thread = new_thread_ref.lock().unwrap(); + parent.children.push(Arc::downgrade(&new_thread_ref)); + new_thread.parent = Some(parent_ref.clone()); + } + + process_table::put(new_thread_pid, new_thread_ref.clone()); + task::enqueue_task(new_thread_ref); + Ok(new_thread_pid) +} + +fn new_thread_task(user_stack: usize, new_tls: usize) -> Result { + // The calling convention of Occlum clone syscall requires the user to + // restore the entry point of the new thread at the top of the user stack. + let user_entry = unsafe { + *(user_stack as *mut usize) + // TODO: check user_entry is a cfi_label + }; + Ok(Task { + user_stack_addr: user_stack, + user_entry_addr: user_entry, + user_fsbase_addr: new_tls, + ..Default::default() + }) +} diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index 78ff7c10..78615466 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -2,7 +2,7 @@ use {fs, process, std, vm}; use fs::{FileDesc, off_t}; use fs::File; use prelude::*; -use process::{ChildProcessFilter, FileAction, pid_t}; +use process::{ChildProcessFilter, FileAction, pid_t, CloneFlags}; use std::ffi::{CStr, CString}; use std::ptr; use time::timeval_t; @@ -79,6 +79,13 @@ pub extern "C" fn dispatch_syscall( arg3 as *const *const i8, arg4 as *const FdOp, ), + SYS_CLONE => do_clone( + arg0 as u32, + arg1 as usize, + arg2 as *mut i32, + arg3 as *mut i32, + arg4 as usize, + ), SYS_WAIT4 => do_wait4(arg0 as i32, arg1 as *mut i32), SYS_GETPID => do_getpid(), SYS_GETPPID => do_getppid(), @@ -197,6 +204,40 @@ fn do_spawn( Ok(0) } +pub fn do_clone( + flags: u32, + stack_addr: usize, + ptid: *mut i32, + ctid: *mut i32, + new_tls: usize, +) -> Result { + let flags = CloneFlags::from_bits_truncate(flags); + check_mut_ptr(stack_addr as *mut u64)?; + let ptid = { + if ptid != ptr::null_mut() { + check_mut_ptr(ptid)?; + Some(ptid) + } + else { + None + } + }; + let ctid = { + if ctid != ptr::null_mut() { + check_mut_ptr(ctid)?; + Some(ctid) + } + else { + None + } + }; + check_mut_ptr(new_tls as *mut u64)?; + + let child_pid = process::do_clone(flags, stack_addr, ptid, ctid, new_tls)?; + + Ok(child_pid as isize) +} + fn do_open(path: *const i8, flags: u32, mode: u32) -> Result { let path = clone_cstring_safely(path)?.to_string_lossy().into_owned(); let fd = fs::do_open(&path, flags, mode)?; diff --git a/src/libos/src/vm/mod.rs b/src/libos/src/vm/mod.rs index 42112c2f..db1500ab 100644 --- a/src/libos/src/vm/mod.rs +++ b/src/libos/src/vm/mod.rs @@ -19,15 +19,17 @@ pub use self::vm_range::{VMRange, VMRangeTrait}; // TODO: accept fd and offset pub fn do_mmap(addr: usize, size: usize, flags: VMAreaFlags) -> Result { let current_ref = get_current(); - let mut current_process = current_ref.lock().unwrap(); - let current_vm = current_process.get_vm_mut(); + let current_process = current_ref.lock().unwrap(); + let current_vm_ref = current_process.get_vm(); + let mut current_vm = current_vm_ref.lock().unwrap(); current_vm.mmap(addr, size, flags) } pub fn do_munmap(addr: usize, size: usize) -> Result<(), Error> { let current_ref = get_current(); - let mut current_process = current_ref.lock().unwrap(); - let current_vm = current_process.get_vm_mut(); + let current_process = current_ref.lock().unwrap(); + let current_vm_ref = current_process.get_vm(); + let mut current_vm = current_vm_ref.lock().unwrap(); current_vm.munmap(addr, size) } @@ -38,15 +40,17 @@ pub fn do_mremap( options: &VMResizeOptions, ) -> Result { let current_ref = get_current(); - let mut current_process = current_ref.lock().unwrap(); - let current_vm = current_process.get_vm_mut(); + let current_process = current_ref.lock().unwrap(); + let current_vm_ref = current_process.get_vm(); + let mut current_vm = current_vm_ref.lock().unwrap(); current_vm.mremap(old_addr, old_size, options) } pub fn do_brk(addr: usize) -> Result { let current_ref = get_current(); - let mut current_process = current_ref.lock().unwrap(); - let current_vm = current_process.get_vm_mut(); + let current_process = current_ref.lock().unwrap(); + let current_vm_ref = current_process.get_vm(); + let mut current_vm = current_vm_ref.lock().unwrap(); current_vm.brk(addr) } diff --git a/test/Makefile b/test/Makefile index 37a90fae..07afb9f8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,7 +4,7 @@ PROJECT_DIR := $(realpath $(CUR_DIR)/../) # Dependencies: need to be compiled but not to run by any Makefile target TEST_DEPS := dev_null # Tests: need to be compiled and run by test-% target -TESTS := empty argv hello_world malloc file getpid spawn pipe time truncate readdir mkdir link +TESTS := empty argv hello_world malloc file getpid spawn pipe time truncate readdir mkdir link clone # Benchmarks: need to be compiled and run by bench-% target BENCHES := spawn_and_exit_latency pipe_throughput diff --git a/test/clone/Makefile b/test/clone/Makefile new file mode 100644 index 00000000..9e1b6dec --- /dev/null +++ b/test/clone/Makefile @@ -0,0 +1,5 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := +EXTRA_LINK_FLAGS := +BIN_ARGS := diff --git a/test/clone/main.c b/test/clone/main.c new file mode 100644 index 00000000..908241af --- /dev/null +++ b/test/clone/main.c @@ -0,0 +1,54 @@ +#include +#include +#define _GNU_SOURCE +#include + +#define NTHREADS 4 +#define STACK_SIZE (8 * 1024) + +// From file arch/x86_64/atomic_arch.h in musl libc. MIT License. +static inline void a_inc(volatile int *p) +{ + __asm__ __volatile__( + "lock ; incl %0" + : "=m"(*p) : "m"(*p) : "memory" ); +} + +volatile int num_exit_threads = 0; + +int thread_func(void* arg) { + int* tid = arg; + //printf("tid = %d\n", *tid); + a_inc(&num_exit_threads); + return 0; +} + +int main(int argc, const char* argv[]) { + unsigned int clone_flags = CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_DETACHED; + + printf("Creating %d threads...", NTHREADS); + int thread_ids[NTHREADS]; + for (int tid = 0; tid < NTHREADS; tid++) { + void* thread_stack = malloc(STACK_SIZE); + if (thread_stack == NULL) { + printf("ERROR: malloc failed for thread %d\n", tid); + return -1; + } + + thread_ids[tid] = tid; + void* thread_arg = &thread_ids[tid]; + if (clone(thread_func, thread_stack, clone_flags, thread_arg) < 0) { + printf("ERROR: clone failed for thread %d\n", tid); + return -1; + } + } + printf("done.\n"); + + printf("Waiting for %d threads to exit...", NTHREADS); + // Wait for all threads to exit + while (num_exit_threads != NTHREADS); + printf("done.\n"); + + return 0; +} diff --git a/test/test_common.mk b/test/test_common.mk index 6e969fdc..c0909b9e 100644 --- a/test/test_common.mk +++ b/test/test_common.mk @@ -9,7 +9,7 @@ S_FILES := $(C_SRCS:%.c=%.S) C_OBJS := $(C_SRCS:%.c=%.o) FS_PATH := ../fs BIN_NAME := $(shell basename $(CUR_DIR)) -BIN_FS_PATH := /$(BIN_NAME) +BIN_FS_PATH := $(BIN_NAME) BIN_PATH := $(FS_PATH)/$(BIN_FS_PATH) OBJDUMP_FILE := bin.objdump READELF_FILE := bin.readelf