Add clone system call

This commit is contained in:
Tate, Hongliang Tian 2019-04-01 22:16:59 +08:00 committed by Tate Tian
parent 684f0df286
commit 4cf8777592
11 changed files with 249 additions and 52 deletions

@ -56,7 +56,7 @@ pub fn do_open(path: &str, flags: u32, mode: u32) -> Result<FileDesc, Error> {
let fd = {
let close_on_spawn = flags.contains(OpenFlags::CLOEXEC);
proc.get_files_mut().put(file_ref, close_on_spawn)
proc.get_files().lock().unwrap().put(file_ref, close_on_spawn)
};
Ok(fd)
}
@ -65,7 +65,7 @@ pub fn do_write(fd: FileDesc, buf: &[u8]) -> Result<usize, Error> {
info!("write: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.write(buf)
}
@ -73,7 +73,7 @@ pub fn do_read(fd: FileDesc, buf: &mut [u8]) -> Result<usize, Error> {
info!("read: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.read(buf)
}
@ -81,7 +81,7 @@ pub fn do_writev(fd: FileDesc, bufs: &[&[u8]]) -> Result<usize, Error> {
info!("writev: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.writev(bufs)
}
@ -89,7 +89,7 @@ pub fn do_readv(fd: FileDesc, bufs: &mut [&mut [u8]]) -> Result<usize, Error> {
info!("readv: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.readv(bufs)
}
@ -97,7 +97,7 @@ pub fn do_pwrite(fd: FileDesc, buf: &[u8], offset: usize) -> Result<usize, Error
info!("pwrite: fd: {}, offset: {}", fd, offset);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.write_at(offset, buf)
}
@ -105,7 +105,7 @@ pub fn do_pread(fd: FileDesc, buf: &mut [u8], offset: usize) -> Result<usize, Er
info!("pread: fd: {}, offset: {}", fd, offset);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.read_at(offset, buf)
}
@ -118,7 +118,7 @@ pub fn do_fstat(fd: u32) -> Result<Stat, Error> {
info!("fstat: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
let stat = Stat::from(file_ref.metadata()?);
// TODO: handle symlink
Ok(stat)
@ -137,7 +137,7 @@ pub fn do_lstat(path: &str) -> Result<Stat, Error> {
pub fn do_lseek(fd: FileDesc, offset: SeekFrom) -> Result<off_t, Error> {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.seek(offset)
}
@ -145,7 +145,7 @@ pub fn do_fsync(fd: FileDesc) -> Result<(), Error> {
info!("fsync: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.sync_all()?;
Ok(())
}
@ -154,7 +154,7 @@ pub fn do_fdatasync(fd: FileDesc) -> Result<(), Error> {
info!("fdatasync: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.sync_data()?;
Ok(())
}
@ -171,7 +171,7 @@ pub fn do_ftruncate(fd: FileDesc, len: usize) -> Result<(), Error> {
info!("ftruncate: fd: {}, len: {}", fd, len);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
file_ref.set_len(len as u64)?;
Ok(())
}
@ -185,7 +185,7 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result<usize, Error> {
);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_ref = current_process.get_files().get(fd)?;
let file_ref = current_process.get_files().lock().unwrap().get(fd)?;
let info = file_ref.metadata()?;
if info.type_ != FileType::Dir {
return Err(Error::new(ENOTDIR, ""));
@ -207,8 +207,9 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result<usize, Error> {
pub fn do_close(fd: FileDesc) -> Result<(), Error> {
let current_ref = process::get_current();
let mut current_process = current_ref.lock().unwrap();
let file_table = current_process.get_files_mut();
let current_process = current_ref.lock().unwrap();
let file_table_ref = current_process.get_files();
let mut file_table = file_table_ref.lock().unwrap();
file_table.del(fd)?;
Ok(())
}
@ -216,10 +217,11 @@ pub fn do_close(fd: FileDesc) -> Result<(), Error> {
pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2], Error> {
let flags = OpenFlags::from_bits_truncate(flags);
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
let current = current_ref.lock().unwrap();
let pipe = Pipe::new()?;
let mut file_table = current.get_files_mut();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let close_on_spawn = flags.contains(OpenFlags::CLOEXEC);
let reader_fd = file_table.put(Arc::new(Box::new(pipe.reader)), close_on_spawn);
let writer_fd = file_table.put(Arc::new(Box::new(pipe.writer)), close_on_spawn);
@ -228,8 +230,9 @@ pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2], Error> {
pub fn do_dup(old_fd: FileDesc) -> Result<FileDesc, Error> {
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
let file_table = current.get_files_mut();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
let new_fd = file_table.put(file, false);
Ok(new_fd)
@ -237,8 +240,9 @@ pub fn do_dup(old_fd: FileDesc) -> Result<FileDesc, Error> {
pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result<FileDesc, Error> {
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
let file_table = current.get_files_mut();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
if old_fd != new_fd {
file_table.put_at(new_fd, file, false);
@ -249,8 +253,9 @@ pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result<FileDesc, Error> {
pub fn do_dup3(old_fd: FileDesc, new_fd: FileDesc, flags: u32) -> Result<FileDesc, Error> {
let flags = OpenFlags::from_bits_truncate(flags);
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
let file_table = current.get_files_mut();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
if old_fd == new_fd {
return errno!(EINVAL, "old_fd must not be equal to new_fd");

@ -6,6 +6,7 @@ pub mod table {
pub use self::exit::{do_exit, do_wait4, ChildProcessFilter};
pub use self::spawn::{do_spawn, FileAction};
pub use self::wait::{WaitQueue, Waiter};
pub use self::thread::{do_clone, CloneFlags, ThreadGroup};
#[allow(non_camel_case_types)]
pub type pid_t = u32;
@ -18,16 +19,20 @@ pub struct Process {
pgid: pid_t,
tgid: pid_t,
exit_status: i32,
// TODO: move cwd, root_inode into a FileSystem structure
// TODO: should cwd be a String or INode?
cwd: String,
parent: Option<ProcessRef>,
children: Vec<ProcessWeakRef>,
waiting_children: Option<WaitQueue<ChildProcessFilter, pid_t>>,
vm: ProcessVM,
file_table: FileTable,
vm: ProcessVMRef,
file_table: FileTableRef,
}
pub type ProcessRef = Arc<SgxMutex<Process>>;
pub type ProcessWeakRef = std::sync::Weak<SgxMutex<Process>>;
pub type FileTableRef = Arc<SgxMutex<FileTable>>;
pub type ProcessVMRef = Arc<SgxMutex<ProcessVM>>;
pub fn do_getpid() -> pid_t {
let current_ref = get_current();
@ -57,6 +62,7 @@ mod process_table;
mod spawn;
mod task;
mod wait;
mod thread;
use self::task::Task;
use super::*;

@ -27,8 +27,8 @@ impl Process {
pub fn new(
cwd: &str,
task: Task,
vm: ProcessVM,
file_table: FileTable,
vm_ref: ProcessVMRef,
file_table_ref: FileTableRef,
) -> Result<(pid_t, ProcessRef), Error> {
let new_pid = process_table::alloc_pid();
let new_process_ref = Arc::new(SgxMutex::new(Process {
@ -42,8 +42,8 @@ impl Process {
parent: None,
children: Vec::new(),
waiting_children: None,
vm: vm,
file_table: file_table,
vm: vm_ref,
file_table: file_table_ref,
}));
Ok((new_pid, new_process_ref))
}
@ -72,18 +72,12 @@ impl Process {
pub fn get_cwd(&self) -> &str {
&self.cwd
}
pub fn get_vm(&self) -> &ProcessVM {
pub fn get_vm(&self) -> &ProcessVMRef {
&self.vm
}
pub fn get_vm_mut(&mut self) -> &mut ProcessVM {
&mut self.vm
}
pub fn get_files(&self) -> &FileTable {
pub fn get_files(&self) -> &FileTableRef {
&self.file_table
}
pub fn get_files_mut(&mut self) -> &mut FileTable {
&mut self.file_table
}
pub fn get_parent(&self) -> &ProcessRef {
self.parent.as_ref().unwrap()
}

@ -52,6 +52,7 @@ pub fn do_spawn<P: AsRef<Path>>(
};
let (new_pid, new_process_ref) = {
let cwd = elf_path.as_ref().parent().unwrap().to_str().unwrap();
let vm = init_vm::do_init(&elf_file, &elf_buf[..])?;
let task = {
let program_entry = {
@ -64,9 +65,12 @@ pub fn do_spawn<P: AsRef<Path>>(
let stack_top = vm.get_stack_top();
init_task(program_entry, stack_top, argv, envp)?
};
let files = init_files(parent_ref, file_actions)?;
let cwd = elf_path.as_ref().parent().unwrap().to_str().unwrap();
Process::new(cwd, task, vm, files)?
let vm_ref = Arc::new(SgxMutex::new(vm));
let files_ref = {
let files = init_files(parent_ref, file_actions)?;
Arc::new(SgxMutex::new(files))
};
Process::new(cwd, task, vm_ref, files_ref)?
};
parent_adopts_new_child(&parent_ref, &new_process_ref);
process_table::put(new_pid, new_process_ref.clone());
@ -79,7 +83,7 @@ fn init_files(parent_ref: &ProcessRef, file_actions: &[FileAction]) -> Result<Fi
let parent = parent_ref.lock().unwrap();
let should_inherit_file_table = parent.get_pid() > 0;
if should_inherit_file_table {
let mut cloned_file_table = parent.get_files().clone();
let mut cloned_file_table = parent.get_files().lock().unwrap().clone();
// Perform file actions to modify the cloned file table
for file_action in file_actions {
match file_action {

@ -0,0 +1,84 @@
use super::*;
pub struct ThreadGroup {
threads: Vec<ProcessRef>,
}
bitflags! {
pub struct CloneFlags : u32 {
const CLONE_VM = 0x00000100;
const CLONE_FS = 0x00000200;
const CLONE_FILES = 0x00000400;
const CLONE_SIGHAND = 0x00000800;
const CLONE_PTRACE = 0x00002000;
const CLONE_VFORK = 0x00004000;
const CLONE_PARENT = 0x00008000;
const CLONE_THREAD = 0x00010000;
const CLONE_NEWNS = 0x00020000;
const CLONE_SYSVSEM = 0x00040000;
const CLONE_SETTLS = 0x00080000;
const CLONE_PARENT_SETTID = 0x00100000;
const CLONE_CHILD_CLEARTID = 0x00200000;
const CLONE_DETACHED = 0x00400000;
const CLONE_UNTRACED = 0x00800000;
const CLONE_CHILD_SETTID = 0x01000000;
const CLONE_NEWCGROUP = 0x02000000;
const CLONE_NEWUTS = 0x04000000;
const CLONE_NEWIPC = 0x08000000;
const CLONE_NEWUSER = 0x10000000;
const CLONE_NEWPID = 0x20000000;
const CLONE_NEWNET = 0x40000000;
const CLONE_IO = 0x80000000;
}
}
pub fn do_clone(
flags: CloneFlags,
stack_addr: usize,
ptid: Option<*mut i32>,
ctid: Option<*mut i32>,
new_tls: usize
) -> Result<pid_t, Error> {
info!("clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
flags, stack_addr, ptid, ctid, new_tls);
let current_ref = get_current();
let current = current_ref.lock().unwrap();
let (new_thread_pid, new_thread_ref) = {
let task = new_thread_task(stack_addr, new_tls)?;
let vm_ref = current.get_vm().clone();
let files_ref = current.get_files().clone();
let cwd = &current.cwd;
Process::new(cwd, task, vm_ref, files_ref)?
};
// TODO: always get parent lock first to avoid deadlock
{
let parent_ref = current.parent.as_ref().unwrap();
let mut parent = parent_ref.lock().unwrap();
let mut new_thread = new_thread_ref.lock().unwrap();
parent.children.push(Arc::downgrade(&new_thread_ref));
new_thread.parent = Some(parent_ref.clone());
}
process_table::put(new_thread_pid, new_thread_ref.clone());
task::enqueue_task(new_thread_ref);
Ok(new_thread_pid)
}
fn new_thread_task(user_stack: usize, new_tls: usize) -> Result<Task, Error> {
// The calling convention of Occlum clone syscall requires the user to
// restore the entry point of the new thread at the top of the user stack.
let user_entry = unsafe {
*(user_stack as *mut usize)
// TODO: check user_entry is a cfi_label
};
Ok(Task {
user_stack_addr: user_stack,
user_entry_addr: user_entry,
user_fsbase_addr: new_tls,
..Default::default()
})
}

@ -2,7 +2,7 @@ use {fs, process, std, vm};
use fs::{FileDesc, off_t};
use fs::File;
use prelude::*;
use process::{ChildProcessFilter, FileAction, pid_t};
use process::{ChildProcessFilter, FileAction, pid_t, CloneFlags};
use std::ffi::{CStr, CString};
use std::ptr;
use time::timeval_t;
@ -79,6 +79,13 @@ pub extern "C" fn dispatch_syscall(
arg3 as *const *const i8,
arg4 as *const FdOp,
),
SYS_CLONE => do_clone(
arg0 as u32,
arg1 as usize,
arg2 as *mut i32,
arg3 as *mut i32,
arg4 as usize,
),
SYS_WAIT4 => do_wait4(arg0 as i32, arg1 as *mut i32),
SYS_GETPID => do_getpid(),
SYS_GETPPID => do_getppid(),
@ -197,6 +204,40 @@ fn do_spawn(
Ok(0)
}
pub fn do_clone(
flags: u32,
stack_addr: usize,
ptid: *mut i32,
ctid: *mut i32,
new_tls: usize,
) -> Result<isize, Error> {
let flags = CloneFlags::from_bits_truncate(flags);
check_mut_ptr(stack_addr as *mut u64)?;
let ptid = {
if ptid != ptr::null_mut() {
check_mut_ptr(ptid)?;
Some(ptid)
}
else {
None
}
};
let ctid = {
if ctid != ptr::null_mut() {
check_mut_ptr(ctid)?;
Some(ctid)
}
else {
None
}
};
check_mut_ptr(new_tls as *mut u64)?;
let child_pid = process::do_clone(flags, stack_addr, ptid, ctid, new_tls)?;
Ok(child_pid as isize)
}
fn do_open(path: *const i8, flags: u32, mode: u32) -> Result<isize, Error> {
let path = clone_cstring_safely(path)?.to_string_lossy().into_owned();
let fd = fs::do_open(&path, flags, mode)?;

@ -19,15 +19,17 @@ pub use self::vm_range::{VMRange, VMRangeTrait};
// TODO: accept fd and offset
pub fn do_mmap(addr: usize, size: usize, flags: VMAreaFlags) -> Result<usize, Error> {
let current_ref = get_current();
let mut current_process = current_ref.lock().unwrap();
let current_vm = current_process.get_vm_mut();
let current_process = current_ref.lock().unwrap();
let current_vm_ref = current_process.get_vm();
let mut current_vm = current_vm_ref.lock().unwrap();
current_vm.mmap(addr, size, flags)
}
pub fn do_munmap(addr: usize, size: usize) -> Result<(), Error> {
let current_ref = get_current();
let mut current_process = current_ref.lock().unwrap();
let current_vm = current_process.get_vm_mut();
let current_process = current_ref.lock().unwrap();
let current_vm_ref = current_process.get_vm();
let mut current_vm = current_vm_ref.lock().unwrap();
current_vm.munmap(addr, size)
}
@ -38,15 +40,17 @@ pub fn do_mremap(
options: &VMResizeOptions,
) -> Result<usize, Error> {
let current_ref = get_current();
let mut current_process = current_ref.lock().unwrap();
let current_vm = current_process.get_vm_mut();
let current_process = current_ref.lock().unwrap();
let current_vm_ref = current_process.get_vm();
let mut current_vm = current_vm_ref.lock().unwrap();
current_vm.mremap(old_addr, old_size, options)
}
pub fn do_brk(addr: usize) -> Result<usize, Error> {
let current_ref = get_current();
let mut current_process = current_ref.lock().unwrap();
let current_vm = current_process.get_vm_mut();
let current_process = current_ref.lock().unwrap();
let current_vm_ref = current_process.get_vm();
let mut current_vm = current_vm_ref.lock().unwrap();
current_vm.brk(addr)
}

@ -4,7 +4,7 @@ PROJECT_DIR := $(realpath $(CUR_DIR)/../)
# Dependencies: need to be compiled but not to run by any Makefile target
TEST_DEPS := dev_null
# Tests: need to be compiled and run by test-% target
TESTS := empty argv hello_world malloc file getpid spawn pipe time truncate readdir mkdir link
TESTS := empty argv hello_world malloc file getpid spawn pipe time truncate readdir mkdir link clone
# Benchmarks: need to be compiled and run by bench-% target
BENCHES := spawn_and_exit_latency pipe_throughput

5
test/clone/Makefile Normal file

@ -0,0 +1,5 @@
include ../test_common.mk
EXTRA_C_FLAGS :=
EXTRA_LINK_FLAGS :=
BIN_ARGS :=

54
test/clone/main.c Normal file

@ -0,0 +1,54 @@
#include <stdio.h>
#include <stdlib.h>
#define _GNU_SOURCE
#include <sched.h>
#define NTHREADS 4
#define STACK_SIZE (8 * 1024)
// From file arch/x86_64/atomic_arch.h in musl libc. MIT License.
static inline void a_inc(volatile int *p)
{
__asm__ __volatile__(
"lock ; incl %0"
: "=m"(*p) : "m"(*p) : "memory" );
}
volatile int num_exit_threads = 0;
int thread_func(void* arg) {
int* tid = arg;
//printf("tid = %d\n", *tid);
a_inc(&num_exit_threads);
return 0;
}
int main(int argc, const char* argv[]) {
unsigned int clone_flags = CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_DETACHED;
printf("Creating %d threads...", NTHREADS);
int thread_ids[NTHREADS];
for (int tid = 0; tid < NTHREADS; tid++) {
void* thread_stack = malloc(STACK_SIZE);
if (thread_stack == NULL) {
printf("ERROR: malloc failed for thread %d\n", tid);
return -1;
}
thread_ids[tid] = tid;
void* thread_arg = &thread_ids[tid];
if (clone(thread_func, thread_stack, clone_flags, thread_arg) < 0) {
printf("ERROR: clone failed for thread %d\n", tid);
return -1;
}
}
printf("done.\n");
printf("Waiting for %d threads to exit...", NTHREADS);
// Wait for all threads to exit
while (num_exit_threads != NTHREADS);
printf("done.\n");
return 0;
}

@ -9,7 +9,7 @@ S_FILES := $(C_SRCS:%.c=%.S)
C_OBJS := $(C_SRCS:%.c=%.o)
FS_PATH := ../fs
BIN_NAME := $(shell basename $(CUR_DIR))
BIN_FS_PATH := /$(BIN_NAME)
BIN_FS_PATH := $(BIN_NAME)
BIN_PATH := $(FS_PATH)/$(BIN_FS_PATH)
OBJDUMP_FILE := bin.objdump
READELF_FILE := bin.readelf