Refactor the process/thread subsystem

As a major rewrite to the process/thread subsystem, this commits: 1. Implements threads as a first-class object, which represents a group of OS resources and a thread of execution; 2. Implements processes as a first-class object that manages threads and maintains the parent-child relationship between processes; 3. Refactors the code in process subsystem to follow the improved coding style and conventions emerged in recent commits; 4. Refactors the code in other subsystems to use the new process/thread subsystem.
2020-04-07 10:08:02 +00:00 · 2020-04-07 10:08:02 +00:00 · 2a1d3d98c5
commit 2a1d3d98c5
parent f9df83f559
85 changed files with 2909 additions and 1978 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,4 @@
 *.so
 build/
 build_sim/
+.DS_Store
--- a/src/libos/Makefile
+++ b/src/libos/Makefile
@ -67,7 +67,7 @@ C_SRCS := $(filter-out $(BUILTIN_C_SRCS),$(sort $(wildcard src/*.c src/*/*.c src
 C_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(C_SRCS:.c=.o))
 CXX_SRCS := $(sort $(wildcard src/*.cpp src/*/*.cpp))
 CXX_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(CXX_SRCS:.cpp=.o))
-S_SRCS := $(sort $(wildcard src/*.S src/*/*.S))
+S_SRCS := $(sort $(wildcard src/*.S src/*/*.S src/*/*/*.S))
 S_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(S_SRCS:.S=.o))

 ALL_BUILD_SUBDIRS := $(sort $(patsubst %/,%,$(dir $(LIBOS_SO) $(EDL_C_OBJS) $(BUILTIN_C_OBJS) $(C_OBJS) $(CXX_OBJS) $(S_OBJS)) $(RUST_TARGET_DIR) $(RUST_OUT_DIR)))
--- a/src/libos/src/entry.rs
+++ b/src/libos/src/entry.rs
@ -175,7 +175,7 @@ fn do_new_process(

    let envp = &config::LIBOS_CONFIG.env;
    let file_actions = Vec::new();
-    let parent = &process::IDLE_PROCESS;
+    let current = &process::IDLE;
    let program_path_str = program_path.to_str().unwrap();
    let new_tid = process::do_spawn_without_exec(
        &program_path_str,
@ -183,13 +183,13 @@ fn do_new_process(
        envp,
        &file_actions,
        host_stdio_fds,
-        parent,
+        current,
    )?;
    Ok(new_tid)
 }

 fn do_exec_thread(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
-    let exit_status = process::run_task(libos_tid, host_tid)?;
+    let exit_status = process::task::exec(libos_tid, host_tid)?;

    // sync file system
    // TODO: only sync when all processes exit
--- a/src/libos/src/fs/file_ops/access.rs
+++ b/src/libos/src/fs/file_ops/access.rs
@ -47,9 +47,9 @@ pub fn do_faccessat(
 pub fn do_access(path: &str, mode: AccessibilityCheckMode) -> Result<()> {
    debug!("access: path: {:?}, mode: {:?}", path, mode);
    let inode = {
-        let current_ref = process::get_current();
-        let mut current = current_ref.lock().unwrap();
-        current.lookup_inode(path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(path)?
    };
    //let metadata = inode.get_metadata();
    // TODO: check metadata.mode with mode
--- a/src/libos/src/fs/file_ops/chdir.rs
+++ b/src/libos/src/fs/file_ops/chdir.rs
@ -1,17 +0,0 @@
-use super::*;
-
-pub fn do_chdir(path: &str) -> Result<()> {
-    debug!("chdir: path: {:?}", path);
-
-    let current_ref = process::get_current();
-    let mut current_process = current_ref.lock().unwrap();
-
-    let inode = current_process.lookup_inode(path)?;
-    let info = inode.metadata()?;
-    if info.type_ != FileType::Dir {
-        return_errno!(ENOTDIR, "");
-    }
-
-    current_process.change_cwd(path);
-    Ok(())
-}
--- a/src/libos/src/fs/file_ops/chmod.rs
+++ b/src/libos/src/fs/file_ops/chmod.rs
@ -58,9 +58,9 @@ impl FileMode {
 pub fn do_chmod(path: &str, mode: FileMode) -> Result<()> {
    debug!("chmod: path: {:?}, mode: {:?}", path, mode);
    let inode = {
-        let current_ref = process::get_current();
-        let mut current = current_ref.lock().unwrap();
-        current.lookup_inode(path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(path)?
    };
    let mut info = inode.metadata()?;
    info.mode = mode.bits();
@ -70,7 +70,7 @@ pub fn do_chmod(path: &str, mode: FileMode) -> Result<()> {

 pub fn do_fchmod(fd: FileDesc, mode: FileMode) -> Result<()> {
    debug!("fchmod: fd: {}, mode: {:?}", fd, mode);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    let mut info = file_ref.metadata()?;
    info.mode = mode.bits();
    file_ref.set_metadata(&info)?;
--- a/src/libos/src/fs/file_ops/chown.rs
+++ b/src/libos/src/fs/file_ops/chown.rs
@ -7,7 +7,7 @@ pub fn do_chown(path: &str, uid: u32, gid: u32) -> Result<()> {

 pub fn do_fchown(fd: FileDesc, uid: u32, gid: u32) -> Result<()> {
    debug!("fchown: fd: {}, uid: {}, gid: {}", fd, uid, gid);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    let mut info = file_ref.metadata()?;
    info.uid = uid as usize;
    info.gid = gid as usize;
@ -18,9 +18,9 @@ pub fn do_fchown(fd: FileDesc, uid: u32, gid: u32) -> Result<()> {
 pub fn do_lchown(path: &str, uid: u32, gid: u32) -> Result<()> {
    debug!("lchown: path: {:?}, uid: {}, gid: {}", path, uid, gid);
    let inode = {
-        let current_ref = process::get_current();
-        let mut current = current_ref.lock().unwrap();
-        current.lookup_inode(path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(path)?
    };
    let mut info = inode.metadata()?;
    info.uid = uid as usize;
--- a/src/libos/src/fs/file_ops/close.rs
+++ b/src/libos/src/fs/file_ops/close.rs
@ -2,10 +2,8 @@ use super::*;

 pub fn do_close(fd: FileDesc) -> Result<()> {
    debug!("close: fd: {}", fd);
-    let current_ref = process::get_current();
-    let current_process = current_ref.lock().unwrap();
-    let file_table_ref = current_process.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
-    file_table.del(fd)?;
+    let current = current!();
+    let mut files = current.files().lock().unwrap();
+    files.del(fd)?;
    Ok(())
 }
--- a/src/libos/src/fs/file_ops/dirent.rs
+++ b/src/libos/src/fs/file_ops/dirent.rs
@ -67,7 +67,7 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result<usize> {
        buf.as_ptr(),
        buf.len()
    );
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    let info = file_ref.metadata()?;
    if info.type_ != FileType::Dir {
        return_errno!(ENOTDIR, "");
--- a/src/libos/src/fs/file_ops/dirfd.rs
+++ b/src/libos/src/fs/file_ops/dirfd.rs
@ -24,7 +24,7 @@ impl DirFd {
 // Get the absolute path of directory
 pub fn get_dir_path(dirfd: FileDesc) -> Result<String> {
    let dir_path = {
-        let file_ref = process::get_file(dirfd)?;
+        let file_ref = current!().file(dirfd)?;
        if let Ok(inode_file) = file_ref.as_inode_file() {
            if inode_file.metadata()?.type_ != FileType::Dir {
                return_errno!(ENOTDIR, "not a directory");
--- a/src/libos/src/fs/file_ops/dup.rs
+++ b/src/libos/src/fs/file_ops/dup.rs
@ -1,37 +1,30 @@
 use super::*;

 pub fn do_dup(old_fd: FileDesc) -> Result<FileDesc> {
-    let current_ref = process::get_current();
-    let current = current_ref.lock().unwrap();
-    let file_table_ref = current.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
-    let file = file_table.get(old_fd)?;
-    let new_fd = file_table.put(file, false);
+    let current = current!();
+    let file = current.file(old_fd)?;
+    let new_fd = current.add_file(file, false);
    Ok(new_fd)
 }

 pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result<FileDesc> {
-    let current_ref = process::get_current();
-    let current = current_ref.lock().unwrap();
-    let file_table_ref = current.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
-    let file = file_table.get(old_fd)?;
+    let current = current!();
+    let mut files = current.files().lock().unwrap();
+    let file = files.get(old_fd)?;
    if old_fd != new_fd {
-        file_table.put_at(new_fd, file, false);
+        files.put_at(new_fd, file, false);
    }
    Ok(new_fd)
 }

 pub fn do_dup3(old_fd: FileDesc, new_fd: FileDesc, flags: u32) -> Result<FileDesc> {
    let creation_flags = CreationFlags::from_bits_truncate(flags);
-    let current_ref = process::get_current();
-    let current = current_ref.lock().unwrap();
-    let file_table_ref = current.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
-    let file = file_table.get(old_fd)?;
+    let current = current!();
+    let mut files = current.files().lock().unwrap();
+    let file = files.get(old_fd)?;
    if old_fd == new_fd {
        return_errno!(EINVAL, "old_fd must not be equal to new_fd");
    }
-    file_table.put_at(new_fd, file, creation_flags.must_close_on_spawn());
+    files.put_at(new_fd, file, creation_flags.must_close_on_spawn());
    Ok(new_fd)
 }
--- a/src/libos/src/fs/file_ops/fcntl.rs
+++ b/src/libos/src/fs/file_ops/fcntl.rs
@ -53,10 +53,10 @@ impl<'a> FcntlCmd<'a> {

 pub fn do_fcntl(fd: FileDesc, cmd: &mut FcntlCmd) -> Result<isize> {
    debug!("fcntl: fd: {:?}, cmd: {:?}", &fd, cmd);
-    let current_ref = process::get_current();
-    let mut current = current_ref.lock().unwrap();
-    let file_table_ref = current.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
+
+    let current = current!();
+    let mut file_table = current.files().lock().unwrap();
+
    let ret = match cmd {
        FcntlCmd::DupFd(min_fd) => {
            let dup_fd = file_table.dup(fd, *min_fd, false)?;
--- a/src/libos/src/fs/file_ops/fsync.rs
+++ b/src/libos/src/fs/file_ops/fsync.rs
@ -2,14 +2,14 @@ use super::*;

 pub fn do_fsync(fd: FileDesc) -> Result<()> {
    debug!("fsync: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.sync_all()?;
    Ok(())
 }

 pub fn do_fdatasync(fd: FileDesc) -> Result<()> {
    debug!("fdatasync: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.sync_data()?;
    Ok(())
 }
--- a/src/libos/src/fs/file_ops/ioctl/mod.rs
+++ b/src/libos/src/fs/file_ops/ioctl/mod.rs
@ -66,6 +66,6 @@ impl<'a> IoctlCmd<'a> {

 pub fn do_ioctl(fd: FileDesc, cmd: &mut IoctlCmd) -> Result<()> {
    debug!("ioctl: fd: {}, cmd: {:?}", fd, cmd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.ioctl(cmd)
 }
--- a/src/libos/src/fs/file_ops/link.rs
+++ b/src/libos/src/fs/file_ops/link.rs
@ -5,10 +5,10 @@ pub fn do_link(oldpath: &str, newpath: &str) -> Result<()> {

    let (new_dir_path, new_file_name) = split_path(&newpath);
    let (inode, new_dir_inode) = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        let inode = current_process.lookup_inode(&oldpath)?;
-        let new_dir_inode = current_process.lookup_inode(new_dir_path)?;
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        let inode = fs.lookup_inode(&oldpath)?;
+        let new_dir_inode = fs.lookup_inode(new_dir_path)?;
        (inode, new_dir_inode)
    };
    new_dir_inode.link(new_file_name, &inode)?;
--- a/src/libos/src/fs/file_ops/lseek.rs
+++ b/src/libos/src/fs/file_ops/lseek.rs
@ -1,6 +1,6 @@
 use super::*;

 pub fn do_lseek(fd: FileDesc, offset: SeekFrom) -> Result<off_t> {
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.seek(offset)
 }
--- a/src/libos/src/fs/file_ops/mkdir.rs
+++ b/src/libos/src/fs/file_ops/mkdir.rs
@ -6,9 +6,9 @@ pub fn do_mkdir(path: &str, mode: usize) -> Result<()> {

    let (dir_path, file_name) = split_path(&path);
    let inode = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.lookup_inode(dir_path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(dir_path)?
    };
    if inode.find(file_name).is_ok() {
        return_errno!(EEXIST, "");
--- a/src/libos/src/fs/file_ops/mod.rs
+++ b/src/libos/src/fs/file_ops/mod.rs
@ -3,7 +3,6 @@ use super::*;
 use process::Process;

 pub use self::access::{do_access, do_faccessat, AccessibilityCheckFlags, AccessibilityCheckMode};
-pub use self::chdir::do_chdir;
 pub use self::chmod::{do_chmod, do_fchmod, FileMode};
 pub use self::chown::{do_chown, do_fchown, do_lchown};
 pub use self::close::do_close;
@ -30,7 +29,6 @@ pub use self::unlink::do_unlink;
 pub use self::write::{do_pwrite, do_write, do_writev};

 mod access;
-mod chdir;
 mod chmod;
 mod chown;
 mod close;
@ -56,85 +54,6 @@ mod truncate;
 mod unlink;
 mod write;

-impl Process {
-    /// Open a file on the process. But DO NOT add it to file table.
-    pub fn open_file(&self, path: &str, flags: u32, mode: u32) -> Result<Box<dyn File>> {
-        if path == "/dev/null" {
-            return Ok(Box::new(DevNull));
-        }
-        if path == "/dev/zero" {
-            return Ok(Box::new(DevZero));
-        }
-        if path == "/dev/random" || path == "/dev/urandom" || path == "/dev/arandom" {
-            return Ok(Box::new(DevRandom));
-        }
-        if path == "/dev/sgx" {
-            return Ok(Box::new(DevSgx));
-        }
-        let creation_flags = CreationFlags::from_bits_truncate(flags);
-        let inode = if creation_flags.can_create() {
-            let (dir_path, file_name) = split_path(&path);
-            let dir_inode = self.lookup_inode(dir_path)?;
-            match dir_inode.find(file_name) {
-                Ok(file_inode) => {
-                    if creation_flags.is_exclusive() {
-                        return_errno!(EEXIST, "file exists");
-                    }
-                    file_inode
-                }
-                Err(FsError::EntryNotFound) => {
-                    if !dir_inode.allow_write()? {
-                        return_errno!(EPERM, "file cannot be created");
-                    }
-                    dir_inode.create(file_name, FileType::File, mode)?
-                }
-                Err(e) => return Err(Error::from(e)),
-            }
-        } else {
-            self.lookup_inode(&path)?
-        };
-        let abs_path = self.convert_to_abs_path(&path);
-        Ok(Box::new(INodeFile::open(inode, &abs_path, flags)?))
-    }
-
-    /// Lookup INode from the cwd of the process
-    pub fn lookup_inode(&self, path: &str) -> Result<Arc<dyn INode>> {
-        debug!("lookup_inode: cwd: {:?}, path: {:?}", self.get_cwd(), path);
-        if path.len() > 0 && path.as_bytes()[0] == b'/' {
-            // absolute path
-            let abs_path = path.trim_start_matches('/');
-            let inode = ROOT_INODE.lookup(abs_path)?;
-            Ok(inode)
-        } else {
-            // relative path
-            let cwd = self.get_cwd().trim_start_matches('/');
-            let inode = ROOT_INODE.lookup(cwd)?.lookup(path)?;
-            Ok(inode)
-        }
-    }
-
-    /// Convert the path to be absolute
-    pub fn convert_to_abs_path(&self, path: &str) -> String {
-        debug!(
-            "convert_to_abs_path: cwd: {:?}, path: {:?}",
-            self.get_cwd(),
-            path
-        );
-        if path.len() > 0 && path.as_bytes()[0] == b'/' {
-            // path is absolute path already
-            return path.to_owned();
-        }
-        let cwd = {
-            if !self.get_cwd().ends_with("/") {
-                self.get_cwd().to_owned() + "/"
-            } else {
-                self.get_cwd().to_owned()
-            }
-        };
-        cwd + path
-    }
-}
-
 /// Split a `path` str to `(base_path, file_name)`
 pub fn split_path(path: &str) -> (&str, &str) {
    let mut split = path.trim_end_matches('/').rsplitn(2, '/');
--- a/src/libos/src/fs/file_ops/open.rs
+++ b/src/libos/src/fs/file_ops/open.rs
@ -1,18 +1,15 @@
 use super::*;

 fn do_open(path: &str, flags: u32, mode: u32) -> Result<FileDesc> {
-    let current_ref = process::get_current();
-    let mut proc = current_ref.lock().unwrap();
+    let current = current!();
+    let fs = current.fs().lock().unwrap();

-    let file = proc.open_file(path, flags, mode)?;
+    let file = fs.open_file(path, flags, mode)?;
    let file_ref: Arc<Box<dyn File>> = Arc::new(file);

    let fd = {
        let creation_flags = CreationFlags::from_bits_truncate(flags);
-        proc.get_files()
-            .lock()
-            .unwrap()
-            .put(file_ref, creation_flags.must_close_on_spawn())
+        current.add_file(file_ref, creation_flags.must_close_on_spawn())
    };
    Ok(fd)
 }
--- a/src/libos/src/fs/file_ops/read.rs
+++ b/src/libos/src/fs/file_ops/read.rs
@ -2,18 +2,18 @@ use super::*;

 pub fn do_read(fd: FileDesc, buf: &mut [u8]) -> Result<usize> {
    debug!("read: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.read(buf)
 }

 pub fn do_readv(fd: FileDesc, bufs: &mut [&mut [u8]]) -> Result<usize> {
    debug!("readv: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.readv(bufs)
 }

 pub fn do_pread(fd: FileDesc, buf: &mut [u8], offset: usize) -> Result<usize> {
    debug!("pread: fd: {}, offset: {}", fd, offset);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.read_at(offset, buf)
 }
--- a/src/libos/src/fs/file_ops/rename.rs
+++ b/src/libos/src/fs/file_ops/rename.rs
@ -1,14 +1,15 @@
 use super::*;

 pub fn do_rename(oldpath: &str, newpath: &str) -> Result<()> {
-    let current_ref = process::get_current();
-    let current_process = current_ref.lock().unwrap();
    debug!("rename: oldpath: {:?}, newpath: {:?}", oldpath, newpath);

+    let current = current!();
+    let fs = current.fs().lock().unwrap();
+
    let (old_dir_path, old_file_name) = split_path(&oldpath);
    let (new_dir_path, new_file_name) = split_path(&newpath);
-    let old_dir_inode = current_process.lookup_inode(old_dir_path)?;
-    let new_dir_inode = current_process.lookup_inode(new_dir_path)?;
+    let old_dir_inode = fs.lookup_inode(old_dir_path)?;
+    let new_dir_inode = fs.lookup_inode(new_dir_path)?;
    let old_file_mode = {
        let old_file_inode = old_dir_inode.find(old_file_name)?;
        let metadata = old_file_inode.metadata()?;
--- a/src/libos/src/fs/file_ops/rmdir.rs
+++ b/src/libos/src/fs/file_ops/rmdir.rs
@ -5,9 +5,9 @@ pub fn do_rmdir(path: &str) -> Result<()> {

    let (dir_path, file_name) = split_path(&path);
    let dir_inode = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.lookup_inode(dir_path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(dir_path)?
    };
    let file_inode = dir_inode.find(file_name)?;
    if file_inode.metadata()?.type_ != FileType::Dir {
--- a/src/libos/src/fs/file_ops/sendfile.rs
+++ b/src/libos/src/fs/file_ops/sendfile.rs
@ -11,13 +11,10 @@ pub fn do_sendfile(
        "sendfile: out: {}, in: {}, offset: {:?}, count: {}",
        out_fd, in_fd, offset, count
    );
-    let current_ref = process::get_current();
-    let current_process = current_ref.lock().unwrap();
-    let file_table_ref = current_process.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();

-    let in_file = file_table.get(in_fd)?;
-    let out_file = file_table.get(out_fd)?;
+    let current = current!();
+    let in_file = current.file(in_fd)?;
+    let out_file = current.file(out_fd)?;
    let mut buffer: [u8; 1024 * 11] = unsafe { MaybeUninit::uninit().assume_init() };

    let mut read_offset = match offset {
--- a/src/libos/src/fs/file_ops/stat.rs
+++ b/src/libos/src/fs/file_ops/stat.rs
@ -141,7 +141,7 @@ fn do_stat(path: &str) -> Result<Stat> {

 pub fn do_fstat(fd: u32) -> Result<Stat> {
    debug!("fstat: fd: {}", fd);
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    let stat = Stat::from(file_ref.metadata()?);
    // TODO: handle symlink
    Ok(stat)
@ -150,9 +150,9 @@ pub fn do_fstat(fd: u32) -> Result<Stat> {
 pub fn do_lstat(path: &str) -> Result<Stat> {
    debug!("lstat: path: {}", path);
    let inode = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.lookup_inode(&path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(&path)?
    };
    let stat = Stat::from(inode.metadata()?);
    Ok(stat)
--- a/src/libos/src/fs/file_ops/symlink.rs
+++ b/src/libos/src/fs/file_ops/symlink.rs
@ -4,15 +4,13 @@ pub fn do_readlink(path: &str, buf: &mut [u8]) -> Result<usize> {
    debug!("readlink: path: {:?}", path);
    let file_path = {
        if path == "/proc/self/exe" {
-            let current_ref = process::get_current();
-            let current = current_ref.lock().unwrap();
-            current.get_elf_path().to_owned()
+            current!().process().exec_path().to_owned()
        } else if path.starts_with("/proc/self/fd") {
            let fd = path
                .trim_start_matches("/proc/self/fd/")
                .parse::<FileDesc>()
                .map_err(|e| errno!(EBADF, "Invalid file descriptor"))?;
-            let file_ref = process::get_file(fd)?;
+            let file_ref = current!().file(fd)?;
            if let Ok(inode_file) = file_ref.as_inode_file() {
                inode_file.get_abs_path().to_owned()
            } else {
--- a/src/libos/src/fs/file_ops/truncate.rs
+++ b/src/libos/src/fs/file_ops/truncate.rs
@ -3,9 +3,9 @@ use super::*;
 pub fn do_truncate(path: &str, len: usize) -> Result<()> {
    debug!("truncate: path: {:?}, len: {}", path, len);
    let inode = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.lookup_inode(&path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(&path)?
    };
    inode.resize(len)?;
    Ok(())
@ -13,7 +13,7 @@ pub fn do_truncate(path: &str, len: usize) -> Result<()> {

 pub fn do_ftruncate(fd: FileDesc, len: usize) -> Result<()> {
    debug!("ftruncate: fd: {}, len: {}", fd, len);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.set_len(len as u64)?;
    Ok(())
 }
--- a/src/libos/src/fs/file_ops/unlink.rs
+++ b/src/libos/src/fs/file_ops/unlink.rs
@ -5,9 +5,9 @@ pub fn do_unlink(path: &str) -> Result<()> {

    let (dir_path, file_name) = split_path(&path);
    let dir_inode = {
-        let current_ref = process::get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.lookup_inode(dir_path)?
+        let current = current!();
+        let fs = current.fs().lock().unwrap();
+        fs.lookup_inode(dir_path)?
    };
    let file_inode = dir_inode.find(file_name)?;
    let metadata = file_inode.metadata()?;
--- a/src/libos/src/fs/file_ops/write.rs
+++ b/src/libos/src/fs/file_ops/write.rs
@ -2,18 +2,18 @@ use super::*;

 pub fn do_write(fd: FileDesc, buf: &[u8]) -> Result<usize> {
    debug!("write: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.write(buf)
 }

 pub fn do_writev(fd: FileDesc, bufs: &[&[u8]]) -> Result<usize> {
    debug!("writev: fd: {}", fd);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.writev(bufs)
 }

 pub fn do_pwrite(fd: FileDesc, buf: &[u8], offset: usize) -> Result<usize> {
    debug!("pwrite: fd: {}, offset: {}", fd, offset);
-    let file_ref = process::get_file(fd)?;
+    let file_ref = current!().file(fd)?;
    file_ref.write_at(offset, buf)
 }
--- a/src/libos/src/fs/fs_ops/chdir.rs
+++ b/src/libos/src/fs/fs_ops/chdir.rs
@ -0,0 +1,17 @@
+use super::*;
+
+pub fn do_chdir(path: &str) -> Result<()> {
+    debug!("chdir: path: {:?}", path);
+
+    let current = current!();
+    let mut fs = current.fs().lock().unwrap();
+
+    let inode = fs.lookup_inode(path)?;
+    let info = inode.metadata()?;
+    if info.type_ != FileType::Dir {
+        return_errno!(ENOTDIR, "cwd must be directory");
+    }
+
+    fs.set_cwd(path)?;
+    Ok(())
+}
--- a/src/libos/src/fs/fs_ops/getcwd.rs
+++ b/src/libos/src/fs/fs_ops/getcwd.rs
@ -0,0 +1,9 @@
+use super::*;
+
+pub fn do_getcwd() -> Result<String> {
+    debug!("getcwd");
+    let thread = current!();
+    let fs = thread.fs().lock().unwrap();
+    let cwd = fs.cwd().to_owned();
+    Ok(cwd)
+}
--- a/src/libos/src/fs/fs_ops/mod.rs
+++ b/src/libos/src/fs/fs_ops/mod.rs
@ -1,5 +1,9 @@
 use super::*;

+pub use self::chdir::do_chdir;
+pub use self::getcwd::do_getcwd;
 pub use self::sync::do_sync;

+mod chdir;
+mod getcwd;
 mod sync;
--- a/src/libos/src/fs/fs_view.rs
+++ b/src/libos/src/fs/fs_view.rs
@ -0,0 +1,125 @@
+use super::dev_fs::{DevNull, DevRandom, DevSgx, DevZero};
+/// Present a per-process view of FS.
+use super::*;
+
+#[derive(Debug, Clone)]
+pub struct FsView {
+    cwd: String,
+}
+
+impl FsView {
+    pub fn new() -> FsView {
+        Self {
+            cwd: "/".to_owned(),
+        }
+    }
+
+    /// Get the current working directory.
+    pub fn cwd(&self) -> &str {
+        &self.cwd
+    }
+
+    /// Set the current working directory.
+    pub fn set_cwd(&mut self, path: &str) -> Result<()> {
+        if path.len() == 0 {
+            return_errno!(EINVAL, "empty path");
+        }
+
+        if path.as_bytes()[0] == b'/' {
+            // absolute
+            self.cwd = path.to_owned();
+        } else {
+            // relative
+            if !self.cwd.ends_with("/") {
+                self.cwd += "/";
+            }
+            self.cwd += path;
+        }
+        Ok(())
+    }
+
+    /// Open a file on the process. But DO NOT add it to file table.
+    pub fn open_file(&self, path: &str, flags: u32, mode: u32) -> Result<Box<dyn File>> {
+        if path == "/dev/null" {
+            return Ok(Box::new(DevNull));
+        }
+        if path == "/dev/zero" {
+            return Ok(Box::new(DevZero));
+        }
+        if path == "/dev/random" || path == "/dev/urandom" || path == "/dev/arandom" {
+            return Ok(Box::new(DevRandom));
+        }
+        if path == "/dev/sgx" {
+            return Ok(Box::new(DevSgx));
+        }
+        let creation_flags = CreationFlags::from_bits_truncate(flags);
+        let inode = if creation_flags.can_create() {
+            let (dir_path, file_name) = split_path(&path);
+            let dir_inode = self.lookup_inode(dir_path)?;
+            match dir_inode.find(file_name) {
+                Ok(file_inode) => {
+                    if creation_flags.is_exclusive() {
+                        return_errno!(EEXIST, "file exists");
+                    }
+                    file_inode
+                }
+                Err(FsError::EntryNotFound) => {
+                    if !dir_inode.allow_write()? {
+                        return_errno!(EPERM, "file cannot be created");
+                    }
+                    dir_inode.create(file_name, FileType::File, mode)?
+                }
+                Err(e) => return Err(Error::from(e)),
+            }
+        } else {
+            self.lookup_inode(&path)?
+        };
+        let abs_path = self.convert_to_abs_path(&path);
+        Ok(Box::new(INodeFile::open(inode, &abs_path, flags)?))
+    }
+
+    /// Lookup INode from the cwd of the process
+    pub fn lookup_inode(&self, path: &str) -> Result<Arc<dyn INode>> {
+        debug!("lookup_inode: cwd: {:?}, path: {:?}", self.cwd(), path);
+        if path.len() > 0 && path.as_bytes()[0] == b'/' {
+            // absolute path
+            let abs_path = path.trim_start_matches('/');
+            let inode = ROOT_INODE.lookup(abs_path)?;
+            Ok(inode)
+        } else {
+            // relative path
+            let cwd = self.cwd().trim_start_matches('/');
+            let inode = ROOT_INODE.lookup(cwd)?.lookup(path)?;
+            Ok(inode)
+        }
+    }
+
+    /// Convert the path to be absolute
+    pub fn convert_to_abs_path(&self, path: &str) -> String {
+        debug!(
+            "convert_to_abs_path: cwd: {:?}, path: {:?}",
+            self.cwd(),
+            path
+        );
+        if path.len() > 0 && path.as_bytes()[0] == b'/' {
+            // path is absolute path already
+            return path.to_owned();
+        }
+        let cwd = {
+            if !self.cwd().ends_with("/") {
+                self.cwd().to_owned() + "/"
+            } else {
+                self.cwd().to_owned()
+            }
+        };
+        cwd + path
+    }
+}
+
+impl Default for FsView {
+    fn default() -> Self {
+        Self {
+            cwd: "/".to_owned(),
+        }
+    }
+}
--- a/src/libos/src/fs/mod.rs
+++ b/src/libos/src/fs/mod.rs
@ -16,6 +16,7 @@ pub use self::file_ops::{AccessMode, CreationFlags, FileMode, Stat, StatusFlags}
 pub use self::file_ops::{Flock, FlockType};
 pub use self::file_ops::{IoctlCmd, StructuredIoctlArgType, StructuredIoctlNum};
 pub use self::file_table::{FileDesc, FileTable};
+pub use self::fs_view::FsView;
 pub use self::inode_file::{AsINodeFile, INodeExt, INodeFile};
 pub use self::pipe::Pipe;
 pub use self::rootfs::ROOT_INODE;
@ -28,6 +29,7 @@ mod file;
 mod file_ops;
 mod file_table;
 mod fs_ops;
+mod fs_view;
 mod hostfs;
 mod inode_file;
 mod pipe;
@ -35,3 +37,14 @@ mod rootfs;
 mod sefs;
 mod stdio;
 mod syscalls;
+
+/// Split a `path` str to `(base_path, file_name)`
+fn split_path(path: &str) -> (&str, &str) {
+    let mut split = path.trim_end_matches('/').rsplitn(2, '/');
+    let file_name = split.next().unwrap();
+    let mut dir_path = split.next().unwrap_or(".");
+    if dir_path == "" {
+        dir_path = "/";
+    }
+    (dir_path, file_name)
+}
--- a/src/libos/src/fs/pipe.rs
+++ b/src/libos/src/fs/pipe.rs
@ -160,15 +160,11 @@ pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2]> {
    let status_flags = StatusFlags::from_bits_truncate(flags);
    debug!("pipe2: flags: {:?} {:?}", creation_flags, status_flags);

-    let current_ref = process::get_current();
-    let current = current_ref.lock().unwrap();
+    let current = current!();
    let pipe = Pipe::new(status_flags)?;
-
-    let file_table_ref = current.get_files();
-    let mut file_table = file_table_ref.lock().unwrap();
    let close_on_spawn = creation_flags.must_close_on_spawn();
-    let reader_fd = file_table.put(Arc::new(Box::new(pipe.reader)), close_on_spawn);
-    let writer_fd = file_table.put(Arc::new(Box::new(pipe.writer)), close_on_spawn);
+    let reader_fd = current.add_file(Arc::new(Box::new(pipe.reader)), close_on_spawn);
+    let writer_fd = current.add_file(Arc::new(Box::new(pipe.writer)), close_on_spawn);
    trace!("pipe2: reader_fd: {}, writer_fd: {}", reader_fd, writer_fd);
    Ok([reader_fd, writer_fd])
 }
--- a/src/libos/src/fs/syscalls.rs
+++ b/src/libos/src/fs/syscalls.rs
@ -27,10 +27,10 @@ pub fn do_eventfd2(init_val: u32, flags: i32) -> Result<isize> {
        Arc::new(Box::new(event))
    };

-    let fd = process::put_file(
+    let fd = current!().add_file(
        file_ref,
        inner_flags.contains(EventCreationFlags::EFD_CLOEXEC),
-    )?;
+    );
    Ok(fd as isize)
 }

@ -307,10 +307,28 @@ pub fn do_chdir(path: *const i8) -> Result<isize> {
    let path = from_user::clone_cstring_safely(path)?
        .to_string_lossy()
        .into_owned();
-    file_ops::do_chdir(&path)?;
+    fs_ops::do_chdir(&path)?;
    Ok(0)
 }

+pub fn do_getcwd(buf_ptr: *mut u8, size: usize) -> Result<isize> {
+    let buf = {
+        from_user::check_mut_array(buf_ptr, size)?;
+        unsafe { std::slice::from_raw_parts_mut(buf_ptr, size) }
+    };
+
+    let cwd = fs_ops::do_getcwd()?;
+
+    if cwd.len() + 1 > buf.len() {
+        return_errno!(ERANGE, "buf is not long enough");
+    }
+    buf[..cwd.len()].copy_from_slice(cwd.as_bytes());
+    buf[cwd.len()] = 0;
+
+    // getcwd requires returning buf_ptr if success
+    Ok(buf_ptr as isize)
+}
+
 pub fn do_rename(oldpath: *const i8, newpath: *const i8) -> Result<isize> {
    let oldpath = from_user::clone_cstring_safely(oldpath)?
        .to_string_lossy()
--- a/src/libos/src/lib.rs
+++ b/src/libos/src/lib.rs
@ -43,11 +43,8 @@ use std::backtrace::{self, PrintFormat};
 use std::ffi::CStr; // a borrowed C string
 use std::panic;

-use error::*;
-use prelude::*;
-
-// Override prelude::Result with error::Result
-use error::Result;
+use crate::prelude::*;
+use crate::process::pid_t;

 #[macro_use]
 mod prelude;
--- a/src/libos/src/misc/mod.rs
+++ b/src/libos/src/misc/mod.rs
@ -3,5 +3,5 @@ use super::*;
 mod rlimit;
 mod uname;

-pub use self::rlimit::{do_prlimit, resource_t, rlimit_t, ResourceLimits, ResourceLimitsRef};
+pub use self::rlimit::{do_prlimit, resource_t, rlimit_t, ResourceLimits};
 pub use self::uname::{do_uname, utsname_t};
--- a/src/libos/src/misc/rlimit.rs
+++ b/src/libos/src/misc/rlimit.rs
@ -5,7 +5,6 @@ use process::pid_t;
 pub struct ResourceLimits {
    rlimits: [rlimit_t; RLIMIT_COUNT],
 }
-pub type ResourceLimitsRef = Arc<SgxMutex<ResourceLimits>>;

 impl ResourceLimits {
    pub fn get(&self, resource: resource_t) -> &rlimit_t {
@ -87,20 +86,25 @@ impl resource_t {
    }
 }

+/// Get or set resource limits.
+///
+/// The man page suggests that this system call works on a per-process basis
+/// and the input argument pid can only be process ID, not thread ID. This
+/// (unnecessary) restriction is lifted by our implementation. Nevertheless,
+/// since the rlimits object is shared between threads in a process, the
+/// semantic of limiting resource usage on a per-process basisi is preserved.
 pub fn do_prlimit(
    pid: pid_t,
    resource: resource_t,
    new_limit: Option<&rlimit_t>,
    old_limit: Option<&mut rlimit_t>,
 ) -> Result<()> {
-    let process_ref = if pid == 0 {
-        process::get_current()
+    let process = if pid == 0 {
+        current!()
    } else {
-        process::get(pid).cause_err(|_| errno!(ESRCH, "invalid pid"))?
+        process::table::get_thread(pid).cause_err(|_| errno!(ESRCH, "invalid pid"))?
    };
-    let mut process = process_ref.lock().unwrap();
-    let rlimits_ref = process.get_rlimits();
-    let mut rlimits = rlimits_ref.lock().unwrap();
+    let mut rlimits = process.rlimits().lock().unwrap();
    if let Some(old_limit) = old_limit {
        *old_limit = *rlimits.get(resource)
    }
--- a/src/libos/src/net/io_multiplexing/epoll.rs
+++ b/src/libos/src/net/io_multiplexing/epoll.rs
@ -92,7 +92,7 @@ impl EpollFile {

    pub fn control(&self, op: EpollCtlCmd, fd: FileDesc, event: Option<&EpollEvent>) -> Result<()> {
        let host_fd = {
-            let fd_ref = process::get_file(fd)?;
+            let fd_ref = current!().file(fd)?;
            if let Ok(socket) = fd_ref.as_socket() {
                socket.fd()
            } else if let Ok(eventfd) = fd_ref.as_event() {
--- a/src/libos/src/net/io_multiplexing/poll.rs
+++ b/src/libos/src/net/io_multiplexing/poll.rs
@ -10,8 +10,7 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
    // Untrusted pollfd's that will be modified by OCall
    let mut u_pollfds: Vec<libc::pollfd> = pollfds.to_vec();

-    let current_ref = process::get_current();
-    let mut proc = current_ref.lock().unwrap();
+    let current = current!();
    for (i, pollfd) in pollfds.iter_mut().enumerate() {
        // Poll should just ignore negative fds
        if pollfd.fd < 0 {
@ -20,11 +19,7 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
            continue;
        }

-        let file_ref = proc
-            .get_files()
-            .lock()
-            .unwrap()
-            .get(pollfd.fd as FileDesc)?;
+        let file_ref = current.file(pollfd.fd as FileDesc)?;
        if let Ok(socket) = file_ref.as_socket() {
            // convert libos fd to host fd in the copy to keep pollfds unchanged
            u_pollfds[i].fd = socket.fd();
@ -58,9 +53,6 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
        }
    }

-    // Unlock the current process as early as possible
-    drop(proc);
-
    let num_events = try_libc!(libc::ocall::poll(
        u_pollfds.as_mut_ptr(),
        u_pollfds.len() as u64,
--- a/src/libos/src/net/io_multiplexing/select.rs
+++ b/src/libos/src/net/io_multiplexing/select.rs
@ -14,9 +14,8 @@ pub fn do_select(
    let mut host_to_libos_fd = [0; libc::FD_SETSIZE];
    let mut polls = Vec::<libc::pollfd>::new();

-    let current_ref = process::get_current();
-    let mut proc = current_ref.lock().unwrap();
-    let file_table = proc.get_files().lock().unwrap();
+    let current = current!();
+    let file_table = current.files().lock().unwrap();

    for fd in 0..nfds {
        let fd_ref = file_table.get(fd as FileDesc)?;
@ -78,9 +77,8 @@ pub fn do_select(
        });
    }

-    // Unlock the current process and its file table as early as possible
+    // Unlock the file table as early as possible
    drop(file_table);
-    drop(proc);

    let timeout = match timeout {
        None => -1,
--- a/src/libos/src/net/syscalls.rs
+++ b/src/libos/src/net/syscalls.rs
@ -12,7 +12,7 @@ pub fn do_sendmsg(fd: c_int, msg_ptr: *const msghdr, flags_c: c_int) -> Result<i
        fd, msg_ptr, flags_c
    );

-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let msg_c = {
            from_user::check_ptr(msg_ptr)?;
@ -40,7 +40,7 @@ pub fn do_recvmsg(fd: c_int, msg_mut_ptr: *mut msghdr_mut, flags_c: c_int) -> Re
        fd, msg_mut_ptr, flags_c
    );

-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let msg_mut_c = {
            from_user::check_mut_ptr(msg_mut_ptr)?;
@ -192,7 +192,7 @@ pub fn do_epoll_create1(raw_flags: c_int) -> Result<isize> {
    let epoll_file = io_multiplexing::EpollFile::new(flags)?;
    let file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(epoll_file));
    let close_on_spawn = flags.contains(CreationFlags::O_CLOEXEC);
-    let fd = process::put_file(file_ref, close_on_spawn)?;
+    let fd = current!().add_file(file_ref, close_on_spawn);

    Ok(fd as isize)
 }
@ -211,7 +211,7 @@ pub fn do_epoll_ctl(
        None
    };

-    let epfile_ref = process::get_file(epfd as FileDesc)?;
+    let epfile_ref = current!().file(epfd as FileDesc)?;
    let epoll_file = epfile_ref.as_epfile()?;

    epoll_file.control(
@ -250,7 +250,7 @@ pub fn do_epoll_wait(
        timeout
    );

-    let epfile_ref = process::get_file(epfd as FileDesc)?;
+    let epfile_ref = current!().file(epfd as FileDesc)?;
    let epoll_file = epfile_ref.as_epfile()?;

    let count = epoll_file.wait(&mut inner_events, timeout)?;
--- a/src/libos/src/prelude.rs
+++ b/src/libos/src/prelude.rs
@ -12,12 +12,24 @@ pub use std::sync::{
    Arc, SgxMutex, SgxMutexGuard, SgxRwLock, SgxRwLockReadGuard, SgxRwLockWriteGuard,
 };

+// Override prelude::Result with error::Result
+pub use crate::error::Result;
+pub use crate::error::*;
+pub use crate::fs::{File, FileDesc, FileRef};
+pub use crate::process::pid_t;
+
 macro_rules! debug_trace {
    () => {
        debug!("> Line = {}, File = {}", line!(), file!())
    };
 }

+macro_rules! current {
+    () => {
+        crate::process::current::get()
+    };
+}
+
 pub fn align_up(addr: usize, align: usize) -> usize {
    debug_assert!(align != 0 && align.is_power_of_two());
    align_down(addr + (align - 1), align)
--- a/src/libos/src/process/current.rs
+++ b/src/libos/src/process/current.rs
@ -0,0 +1,36 @@
+use super::process::IDLE;
+use super::{Thread, ThreadRef};
+/// Get and set the current thread/process.
+use crate::prelude::*;
+
+pub fn get() -> ThreadRef {
+    let current_ptr = CURRENT_THREAD_PTR.with(|cell| cell.get());
+    let current_ref = unsafe { Arc::from_raw(current_ptr) };
+    let current_ref_clone = current_ref.clone();
+    Arc::into_raw(current_ref);
+    current_ref_clone
+}
+
+pub(super) fn set(thread_ref: ThreadRef) {
+    assert!(thread_ref.tid() > 0);
+    replace(thread_ref);
+}
+
+pub(super) fn reset() -> ThreadRef {
+    replace(IDLE.clone())
+}
+
+fn replace(thread_ref: ThreadRef) -> ThreadRef {
+    let new_thread_ptr = Arc::into_raw(thread_ref);
+    let mut old_thread_ptr = CURRENT_THREAD_PTR.with(|cp| cp.replace(new_thread_ptr));
+    unsafe { Arc::from_raw(old_thread_ptr) }
+}
+
+thread_local! {
+    // By default, the current thread is the idle (tid = 0).
+    //
+    // TODO: figure out why RefCell<ThreadRef> is not working as expected
+    static CURRENT_THREAD_PTR: Cell<*const Thread> = {
+        Cell::new(Arc::into_raw(IDLE.clone()))
+    };
+}
--- a/src/libos/src/process/do_arch_prctl.rs
+++ b/src/libos/src/process/do_arch_prctl.rs
@ -1,4 +1,20 @@
-use super::*;
+use crate::prelude::*;
+
+pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<()> {
+    debug!("do_arch_prctl: code: {:?}, addr: {:?}", code, addr);
+    match code {
+        ArchPrctlCode::ARCH_SET_FS => {
+            current!().task().set_user_fs(addr as usize);
+        }
+        ArchPrctlCode::ARCH_GET_FS => unsafe {
+            *addr = current!().task().user_fs();
+        },
+        ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => {
+            return_errno!(EINVAL, "GS cannot be accessed from the user space");
+        }
+    }
+    Ok(())
+}

 #[allow(non_camel_case_types)]
 #[derive(Debug)]
@ -20,30 +36,3 @@ impl ArchPrctlCode {
        }
    }
 }
-
-pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<()> {
-    debug!(
-        "do_arch_prctl: code: {:?}, addr: {:#o}",
-        code, addr as usize
-    );
-    match code {
-        ArchPrctlCode::ARCH_SET_FS => {
-            let current_ref = get_current();
-            let mut current = current_ref.lock().unwrap();
-            let task = &mut current.task;
-            task.set_user_fs(addr as usize);
-        }
-        ArchPrctlCode::ARCH_GET_FS => {
-            let current_ref = get_current();
-            let current = current_ref.lock().unwrap();
-            let task = &current.task;
-            unsafe {
-                *addr = task.get_user_fs();
-            }
-        }
-        ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => {
-            return_errno!(EINVAL, "GS cannot be accessed from the user space");
-        }
-    }
-    Ok(())
-}
--- a/src/libos/src/process/do_clone.rs
+++ b/src/libos/src/process/do_clone.rs
@ -0,0 +1,248 @@
+use std::ptr::NonNull;
+
+use super::table::{self};
+use super::task::{self, Task};
+use super::thread::{Thread, ThreadBuilder};
+use crate::prelude::*;
+use crate::vm::{ProcessVM, VMRange};
+
+/// Create and execute a new thread.
+pub fn do_clone(
+    flags: CloneFlags,
+    user_rsp: usize,
+    ptid: Option<NonNull<pid_t>>,
+    ctid: Option<NonNull<pid_t>>,
+    new_tls: Option<usize>,
+) -> Result<pid_t> {
+    debug!(
+        "clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
+        flags, user_rsp, ptid, ctid, new_tls
+    );
+
+    check_clone_args(flags, user_rsp, ptid, ctid, new_tls)?;
+
+    // Get thread entry, an implicit argument passed on the stack.
+    //
+    // The calling convention of Occlum clone syscall requires the user to
+    // store the entry point of the new thread at the top of the user stack.
+    //
+    // FIXME: this is workaround to passing more than 6 arguments in syscall.
+    // TODO: add pointer checking
+    let thread_entry = unsafe { *(user_rsp as *mut usize) };
+
+    let new_thread_ref = {
+        let current = current!();
+        let vm = current.vm().clone();
+        let task = {
+            let vm = vm.lock().unwrap();
+            let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?;
+            let user_stack_base = user_stack_range.end();
+            let user_stack_limit = user_stack_range.start();
+            unsafe {
+                Task::new(
+                    thread_entry,
+                    user_rsp,
+                    user_stack_base,
+                    user_stack_limit,
+                    new_tls,
+                )?
+            }
+        };
+        let files = current.files().clone();
+        let rlimits = current.rlimits().clone();
+        let fs = current.fs().clone();
+
+        let mut builder = ThreadBuilder::new()
+            .process(current.process().clone())
+            .vm(vm)
+            .task(task)
+            .fs(fs)
+            .files(files)
+            .rlimits(rlimits);
+        if let Some(ctid) = ctid {
+            builder = builder.clear_ctid(ctid);
+        }
+        builder.build()?
+    };
+    let new_tid = new_thread_ref.tid();
+    table::add_thread(new_thread_ref.clone());
+    info!("Thread created: tid = {}", new_tid);
+
+    if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
+        debug_assert!(ptid.is_some());
+        unsafe {
+            *ptid.unwrap().as_ptr() = new_tid;
+        }
+    }
+    if flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
+        debug_assert!(ctid.is_some());
+        unsafe {
+            *ctid.unwrap().as_ptr() = new_tid;
+        }
+    }
+
+    task::enqueue_and_exec(new_thread_ref.clone());
+    Ok(new_tid)
+}
+
+/// Clone flags.
+bitflags! {
+    pub struct CloneFlags : u32 {
+        const CLONE_VM              = 0x00000100;
+        const CLONE_FS              = 0x00000200;
+        const CLONE_FILES           = 0x00000400;
+        const CLONE_SIGHAND         = 0x00000800;
+        const CLONE_PIDFD           = 0x00001000;
+        const CLONE_PTRACE          = 0x00002000;
+        const CLONE_VFORK           = 0x00004000;
+        const CLONE_PARENT          = 0x00008000;
+        const CLONE_THREAD          = 0x00010000;
+        const CLONE_NEWNS           = 0x00020000;
+        const CLONE_SYSVSEM         = 0x00040000;
+        const CLONE_SETTLS          = 0x00080000;
+        const CLONE_PARENT_SETTID   = 0x00100000;
+        const CLONE_CHILD_CLEARTID  = 0x00200000;
+        const CLONE_DETACHED        = 0x00400000;
+        const CLONE_UNTRACED        = 0x00800000;
+        const CLONE_CHILD_SETTID    = 0x01000000;
+        const CLONE_NEWCGROUP       = 0x02000000;
+        const CLONE_NEWUTS          = 0x04000000;
+        const CLONE_NEWIPC          = 0x08000000;
+        const CLONE_NEWUSER         = 0x10000000;
+        const CLONE_NEWPID          = 0x20000000;
+        const CLONE_NEWNET          = 0x40000000;
+        const CLONE_IO              = 0x80000000;
+    }
+}
+
+fn check_clone_args(
+    flags: CloneFlags,
+    user_rsp: usize,
+    ptid: Option<NonNull<pid_t>>,
+    ctid: Option<NonNull<pid_t>>,
+    new_tls: Option<usize>,
+) -> Result<()> {
+    check_clone_flags(flags)?;
+
+    let need_ptid = flags.contains(CloneFlags::CLONE_PARENT_SETTID);
+    if need_ptid != ptid.is_some() {
+        return_errno!(EINVAL, "ptid is not consistent with flags");
+    }
+
+    let need_ctid = flags.contains(CloneFlags::CLONE_CHILD_SETTID)
+        || flags.contains(CloneFlags::CLONE_CHILD_CLEARTID);
+    if need_ctid != ctid.is_some() {
+        return_errno!(EINVAL, "ctid is not consistent with flags");
+    }
+
+    Ok(())
+}
+
+/// Check whether clone flags are valid.
+///
+/// The current implementation of clone, which is much less general than the one in Linux,
+/// essentially supports creating threads only. So the valid combinations of clone flags
+/// are quite limited.
+///
+/// # Mandatory flags
+///
+/// The following flags must be given. If not given, errors will be reported:
+/// ```
+/// CLONE_VM
+/// CLONE_THREAD
+/// CLONE_SIGHAND
+/// CLONE_FILES
+/// CLONE_FS
+/// CLONE_SETTLS
+/// CLONE_SIGHAND
+/// CLONE_SYSVSEM
+/// CLONE_PARENT_SETTID
+/// ```
+///
+/// # Optional flags
+///
+/// The following flags can be given and are supported:
+/// ```
+/// CLONE_CHILD_CLEARTID
+/// CLONE_CHILD_SETTID
+/// ```
+///
+/// # Ignored flags
+///
+/// The following flags are ignored silently:
+/// ```
+/// CLONE_DETACHED
+/// CLONE_IO
+/// CLONE_PARENT
+/// ```
+///
+/// # Unsupported flags
+///
+/// The following flags are unsupported; giving these flags triggers errors.
+/// ```
+/// CLONE_VFORK
+/// CLONE_NEWCGROUP
+/// CLONE_NEWIPC
+/// CLONE_NEWNET
+/// CLONE_NEWNS
+/// CLONE_NEWPID
+/// CLONE_NEWUSER
+/// CLONE_NEWUTS
+/// CLONE_PIDFD
+/// CLONE_PTRACE
+/// CLONE_UNTRACED
+/// ```
+fn check_clone_flags(flags: CloneFlags) -> Result<()> {
+    lazy_static! {
+        static ref MANDATORY_FLAGS: CloneFlags = {
+            CloneFlags::CLONE_VM
+                | CloneFlags::CLONE_THREAD
+                | CloneFlags::CLONE_SIGHAND
+                | CloneFlags::CLONE_FILES
+                | CloneFlags::CLONE_FS
+                | CloneFlags::CLONE_SETTLS
+                | CloneFlags::CLONE_SIGHAND
+                | CloneFlags::CLONE_SYSVSEM
+                | CloneFlags::CLONE_PARENT_SETTID
+        };
+        static ref UNSUPPORTED_FLAGS: CloneFlags = {
+            CloneFlags::CLONE_VFORK
+                | CloneFlags::CLONE_NEWCGROUP
+                | CloneFlags::CLONE_NEWIPC
+                | CloneFlags::CLONE_NEWNET
+                | CloneFlags::CLONE_NEWNS
+                | CloneFlags::CLONE_NEWPID
+                | CloneFlags::CLONE_NEWUSER
+                | CloneFlags::CLONE_NEWUTS
+                | CloneFlags::CLONE_PIDFD
+                | CloneFlags::CLONE_PTRACE
+                | CloneFlags::CLONE_UNTRACED
+        };
+    }
+
+    if !flags.contains(*MANDATORY_FLAGS) {
+        return_errno!(EINVAL, "missing mandatory flags");
+    }
+    if flags.contains(*UNSUPPORTED_FLAGS) {
+        return_errno!(EINVAL, "found unsupported flags");
+    }
+
+    Ok(())
+}
+
+fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange> {
+    // The first case is most likely
+    if let Ok(stack_range) = vm.find_mmap_region(user_rsp) {
+        Ok(stack_range)
+    }
+    // The next three cases are very unlikely, but valid
+    else if vm.get_stack_range().contains(user_rsp) {
+        Ok(vm.get_stack_range())
+    } else if vm.get_heap_range().contains(user_rsp) {
+        Ok(vm.get_heap_range())
+    }
+    // Invalid
+    else {
+        return_errno!(ESRCH, "invalid rsp")
+    }
+}
--- a/src/libos/src/process/do_exit.rs
+++ b/src/libos/src/process/do_exit.rs
@ -0,0 +1,78 @@
+use std::intrinsics::atomic_store;
+
+use super::do_futex::futex_wake;
+use super::process::ChildProcessFilter;
+use super::{table, ThreadRef};
+use crate::prelude::*;
+
+pub fn do_exit(exit_status: i32) {
+    let thread = current!();
+
+    let num_remaining_threads = thread.exit(exit_status);
+
+    // Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info.
+    if let Some(ctid_ptr) = thread.clear_ctid() {
+        unsafe {
+            atomic_store(ctid_ptr.as_ptr(), 0);
+        }
+        futex_wake(ctid_ptr.as_ptr() as *const i32, 1);
+    }
+
+    // Keep the main thread's tid available as long as the process is not destroyed.
+    // This is important as the user space may still attempt to access the main
+    // thread's ThreadRef through the process's pid after the process has become
+    // a zombie.
+    if thread.tid() != thread.process().pid() {
+        table::del_thread(thread.tid()).expect("tid must be in the table");
+    }
+
+    // If this thread is the last thread, then exit the process
+    if num_remaining_threads == 0 {
+        do_exit_process(&thread, exit_status);
+    }
+}
+
+fn do_exit_process(thread: &ThreadRef, exit_status: i32) {
+    let process = thread.process();
+
+    // If the parent process is the idle process, we can release the process directly.
+    if process.parent().pid() == 0 {
+        // Deadlock note: Always lock parent then child.
+        let mut parent_inner = super::IDLE.process().inner();
+        let mut process_inner = process.inner();
+
+        table::del_thread(thread.tid()).expect("tid must be in the table");
+        table::del_process(process.pid()).expect("pid must be in the table");
+
+        process_inner.exit(exit_status);
+        parent_inner.remove_zombie_child(process.pid());
+        return;
+    }
+    // Otherwise, we need to notify the parent process
+
+    // Lock the parent process to ensure that parent's wait4 cannot miss the current
+    // process's exit.
+    // Deadlock note: Always lock parent then child.
+    let parent = process.parent();
+    let mut parent_inner = parent.inner();
+    process.inner().exit(exit_status);
+
+    // Wake up the parent if it is waiting on this child
+    let waiting_children = parent_inner.waiting_children_mut().unwrap();
+    waiting_children.del_and_wake_one_waiter(|waiter_data| -> Option<pid_t> {
+        match waiter_data {
+            ChildProcessFilter::WithAnyPid => {}
+            ChildProcessFilter::WithPid(required_pid) => {
+                if process.pid() != *required_pid {
+                    return None;
+                }
+            }
+            ChildProcessFilter::WithPgid(required_pgid) => {
+                if process.pgid() != *required_pgid {
+                    return None;
+                }
+            }
+        }
+        Some(process.pid())
+    });
+}
--- a/src/libos/src/process/do_futex.rs
+++ b/src/libos/src/process/do_futex.rs
@ -1,9 +1,10 @@
-use super::*;
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};
 use std::intrinsics::atomic_load;
 use std::sync::atomic::{AtomicBool, Ordering};
-use time::timespec_t;
+
+use crate::prelude::*;
+use crate::time::timespec_t;

 /// `FutexOp`, `FutexFlags`, and `futex_op_and_flags_from_u32` are helper types and
 /// functions for handling the versatile commands and arguments of futex system
--- a/src/libos/src/process/do_getpid.rs
+++ b/src/libos/src/process/do_getpid.rs
@ -0,0 +1,18 @@
+use crate::prelude::*;
+
+pub fn do_getpid() -> pid_t {
+    current!().process().pid()
+}
+
+pub fn do_gettid() -> pid_t {
+    current!().tid()
+}
+
+pub fn do_getpgid() -> pid_t {
+    // TODO: implement process groups
+    1
+}
+
+pub fn do_getppid() -> pid_t {
+    current!().process().parent().pid()
+}
--- a/src/libos/src/process/do_sched.rs
+++ b/src/libos/src/process/do_sched.rs
@ -1,4 +1,56 @@
-use super::*;
+use super::table;
+/// Process scheduling.
+use crate::prelude::*;
+
+pub fn do_sched_getaffinity(tid: pid_t, cpu_set: &mut CpuSet) -> Result<usize> {
+    let host_tid = match tid {
+        0 => 0,
+        _ => find_host_tid(tid)?,
+    };
+    let buf = cpu_set.as_mut_ptr();
+    let cpusize = cpu_set.len();
+    let retval = try_libc!({
+        let mut retval = 0;
+        let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf);
+        assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
+        retval
+    }) as usize;
+    // Note: the first retval bytes in CpuSet are valid
+    Ok(retval)
+}
+
+pub fn do_sched_setaffinity(tid: pid_t, cpu_set: &CpuSet) -> Result<()> {
+    let host_tid = match tid {
+        0 => 0,
+        _ => find_host_tid(tid)?,
+    };
+    let buf = cpu_set.as_ptr();
+    let cpusize = cpu_set.len();
+    try_libc!({
+        let mut retval = 0;
+        let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf);
+        assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
+        retval
+    });
+    Ok(())
+}
+
+pub fn do_sched_yield() {
+    unsafe {
+        let status = occlum_ocall_sched_yield();
+        assert!(status == sgx_status_t::SGX_SUCCESS);
+    }
+}
+
+fn find_host_tid(tid: pid_t) -> Result<pid_t> {
+    let thread = table::get_thread(tid)?;
+    // TODO: fix the race condition of host_tid being available.
+    let host_tid = thread
+        .inner()
+        .host_tid()
+        .ok_or_else(|| errno!(ESRCH, "host_tid is not available"))?;
+    Ok(host_tid)
+}

 pub struct CpuSet {
    vec: Vec<u8>,
@ -61,53 +113,6 @@ impl std::fmt::UpperHex for CpuSet {
    }
 }

-fn find_host_tid(pid: pid_t) -> Result<pid_t> {
-    let process_ref = if pid == 0 { get_current() } else { get(pid)? };
-    let mut process = process_ref.lock().unwrap();
-    let host_tid = process.get_host_tid();
-    Ok(host_tid)
-}
-
-pub fn do_sched_getaffinity(pid: pid_t, cpu_set: &mut CpuSet) -> Result<usize> {
-    let host_tid = match pid {
-        0 => 0,
-        _ => find_host_tid(pid)?,
-    };
-    let buf = cpu_set.as_mut_ptr();
-    let cpusize = cpu_set.len();
-    let retval = try_libc!({
-        let mut retval = 0;
-        let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf);
-        assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
-        retval
-    }) as usize;
-    // Note: the first retval bytes in CpuSet are valid
-    Ok(retval)
-}
-
-pub fn do_sched_setaffinity(pid: pid_t, cpu_set: &CpuSet) -> Result<()> {
-    let host_tid = match pid {
-        0 => 0,
-        _ => find_host_tid(pid)?,
-    };
-    let buf = cpu_set.as_ptr();
-    let cpusize = cpu_set.len();
-    try_libc!({
-        let mut retval = 0;
-        let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf);
-        assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
-        retval
-    });
-    Ok(())
-}
-
-pub fn do_sched_yield() {
-    unsafe {
-        let status = occlum_ocall_sched_yield();
-        assert!(status == sgx_status_t::SGX_SUCCESS);
-    }
-}
-
 extern "C" {
    fn occlum_ocall_sched_getaffinity(
        ret: *mut i32,
--- a/src/libos/src/process/do_set_tid_address.rs
+++ b/src/libos/src/process/do_set_tid_address.rs
@ -0,0 +1,11 @@
+use std::ptr::NonNull;
+
+use crate::prelude::*;
+
+pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t> {
+    debug!("set_tid_address: tidptr: {:?}", tidptr);
+    let clear_ctid = NonNull::new(tidptr);
+    let current = current!();
+    current.set_clear_ctid(clear_ctid);
+    Ok(current.tid())
+}
--- a/src/libos/src/process/do_spawn/aux_vec.rs
+++ b/src/libos/src/process/do_spawn/aux_vec.rs
@ -0,0 +1,86 @@
+/// Auxiliary Vector.
+///
+/// # What is Auxiliary Vector?
+///
+/// Here is a concise description of Auxiliary Vector from GNU's manual:
+///
+/// > When a program is executed, it receives information from the operating system
+/// about the environment in which it is operating. The form of this information
+/// is a table of key-value pairs, where the keys are from the set of ‘AT_’
+/// values in elf.h.
+use crate::prelude::*;
+
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum AuxKey {
+    AT_NULL = 0,      /* end of vector */
+    AT_IGNORE = 1,    /* entry should be ignored */
+    AT_EXECFD = 2,    /* file descriptor of program */
+    AT_PHDR = 3,      /* program headers for program */
+    AT_PHENT = 4,     /* size of program header entry */
+    AT_PHNUM = 5,     /* number of program headers */
+    AT_PAGESZ = 6,    /* system page size */
+    AT_BASE = 7,      /* base address of interpreter */
+    AT_FLAGS = 8,     /* flags */
+    AT_ENTRY = 9,     /* entry point of program */
+    AT_NOTELF = 10,   /* program is not ELF */
+    AT_UID = 11,      /* real uid */
+    AT_EUID = 12,     /* effective uid */
+    AT_GID = 13,      /* real gid */
+    AT_EGID = 14,     /* effective gid */
+    AT_PLATFORM = 15, /* string identifying CPU for optimizations */
+    AT_HWCAP = 16,    /* arch dependent hints at CPU capabilities */
+    AT_CLKTCK = 17,   /* frequency at which times() increments */
+
+    /* 18...22 not used */
+    AT_SECURE = 23, /* secure mode boolean */
+    AT_BASE_PLATFORM = 24, /* string identifying real platform, may
+                     * differ from AT_PLATFORM. */
+    AT_RANDOM = 25, /* address of 16 random bytes */
+    AT_HWCAP2 = 26, /* extension of AT_HWCAP */
+
+    /* 28...30 not used */
+    AT_EXECFN = 31, /* filename of program */
+    AT_SYSINFO = 32,
+
+    /* Occlum-specific entries */
+    AT_OCCLUM_ENTRY = 48, /* the entry point of Occlum, i.e., syscall */
+}
+
+#[derive(Clone, Default, Debug)]
+pub struct AuxVec {
+    table: HashMap<AuxKey, u64>,
+}
+
+impl AuxVec {
+    pub fn new() -> AuxVec {
+        AuxVec {
+            table: HashMap::new(),
+        }
+    }
+}
+
+impl AuxVec {
+    pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
+        if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
+            return_errno!(EINVAL, "Illegal key");
+        }
+        self.table
+            .entry(key)
+            .and_modify(|val_mut| *val_mut = val)
+            .or_insert(val);
+        Ok(())
+    }
+
+    pub fn get(&self, key: AuxKey) -> Option<u64> {
+        self.table.get(&key).map(|val_ref| *val_ref)
+    }
+
+    pub fn del(&mut self, key: AuxKey) -> Option<u64> {
+        self.table.remove(&key)
+    }
+
+    pub fn table(&self) -> &HashMap<AuxKey, u64> {
+        &self.table
+    }
+}
--- a/src/libos/src/process/do_spawn/gdb_hook_load_elf.c
+++ b/src/libos/src/process/do_spawn/gdb_hook_load_elf.c
--- a/src/libos/src/process/do_spawn/init_stack.rs
+++ b/src/libos/src/process/do_spawn/init_stack.rs
@ -1,8 +1,9 @@
-use super::*;
-
 use std::ffi::{CStr, CString};
 use std::os::raw::c_char;
-use {std, std::mem, std::ptr};
+use std::{mem, ptr};
+
+use super::aux_vec::{AuxKey, AuxVec};
+use crate::prelude::*;

 /*
 * The initial stack of a process looks like below:
@ -52,7 +53,7 @@ pub fn do_init(
    init_area_size: usize,
    argv: &[CString],
    envp: &[CString],
-    auxtbl: &AuxTable,
+    auxtbl: &AuxVec,
 ) -> Result<usize> {
    let stack_buf = unsafe { StackBuf::new(stack_top, init_area_size)? };
    let envp_cloned = clone_cstrings_on_stack(&stack_buf, envp)?;
@ -158,7 +159,7 @@ fn clone_cstrings_on_stack<'a, 'b>(
    Ok(cstrs_cloned)
 }

-fn dump_auxtbl_on_stack<'a, 'b>(stack: &'a StackBuf, auxtbl: &'b AuxTable) -> Result<()> {
+fn dump_auxtbl_on_stack<'a, 'b>(stack: &'a StackBuf, auxtbl: &'b AuxVec) -> Result<()> {
    // For every key-value pair, dump the value first, then the key
    stack.put(0 as u64);
    stack.put(AuxKey::AT_NULL as u64);
@ -176,80 +177,3 @@ fn dump_cstrptrs_on_stack<'a, 'b>(stack: &'a StackBuf, strptrs: &'b [&'a CStr])
    }
    Ok(())
 }
-
-/* Symbolic values for the entries in the auxiliary table
-put on the initial stack */
-#[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum AuxKey {
-    AT_NULL = 0,      /* end of vector */
-    AT_IGNORE = 1,    /* entry should be ignored */
-    AT_EXECFD = 2,    /* file descriptor of program */
-    AT_PHDR = 3,      /* program headers for program */
-    AT_PHENT = 4,     /* size of program header entry */
-    AT_PHNUM = 5,     /* number of program headers */
-    AT_PAGESZ = 6,    /* system page size */
-    AT_BASE = 7,      /* base address of interpreter */
-    AT_FLAGS = 8,     /* flags */
-    AT_ENTRY = 9,     /* entry point of program */
-    AT_NOTELF = 10,   /* program is not ELF */
-    AT_UID = 11,      /* real uid */
-    AT_EUID = 12,     /* effective uid */
-    AT_GID = 13,      /* real gid */
-    AT_EGID = 14,     /* effective gid */
-    AT_PLATFORM = 15, /* string identifying CPU for optimizations */
-    AT_HWCAP = 16,    /* arch dependent hints at CPU capabilities */
-    AT_CLKTCK = 17,   /* frequency at which times() increments */
-
-    /* 18...22 not used */
-    AT_SECURE = 23, /* secure mode boolean */
-    AT_BASE_PLATFORM = 24, /* string identifying real platform, may
-                     * differ from AT_PLATFORM. */
-    AT_RANDOM = 25, /* address of 16 random bytes */
-    AT_HWCAP2 = 26, /* extension of AT_HWCAP */
-
-    /* 28...30 not used */
-    AT_EXECFN = 31, /* filename of program */
-    AT_SYSINFO = 32,
-
-    /* Occlum-specific entries */
-    AT_OCCLUM_ENTRY = 48, /* the entry point of Occlum, i.e., syscall */
-}
-
-#[derive(Clone, Default, Debug)]
-pub struct AuxTable {
-    table: HashMap<AuxKey, u64>,
-}
-
-impl AuxTable {
-    pub fn new() -> AuxTable {
-        AuxTable {
-            table: HashMap::new(),
-        }
-    }
-}
-
-impl AuxTable {
-    pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
-        if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
-            return_errno!(EINVAL, "Illegal key");
-        }
-        self.table
-            .entry(key)
-            .and_modify(|val_mut| *val_mut = val)
-            .or_insert(val);
-        Ok(())
-    }
-
-    pub fn get(&self, key: AuxKey) -> Option<u64> {
-        self.table.get(&key).map(|val_ref| *val_ref)
-    }
-
-    pub fn del(&mut self, key: AuxKey) -> Option<u64> {
-        self.table.remove(&key)
-    }
-
-    pub fn table(&self) -> &HashMap<AuxKey, u64> {
-        &self.table
-    }
-}
--- a/src/libos/src/process/do_spawn/init_vm.rs
+++ b/src/libos/src/process/do_spawn/init_vm.rs
@ -1,6 +1,9 @@
-use super::*;
 use std::ptr;

+use super::super::elf_file::ElfFile;
+use crate::prelude::*;
+use crate::vm::{ProcessVM, ProcessVMBuilder};
+
 pub fn do_init<'a, 'b>(
    elf_file: &'b ElfFile<'a>,
    ldso_elf_file: &'b ElfFile<'a>,
--- a/src/libos/src/process/do_spawn/mod.rs
+++ b/src/libos/src/process/do_spawn/mod.rs
@ -1,68 +1,107 @@
-use super::*;
-
 use std::ffi::{CStr, CString};
 use std::path::Path;
-use std::sgxfs::SgxFile;

-use super::fs::{
-    CreationFlags, File, FileDesc, FileMode, FileTable, HostStdioFds, INodeExt, StdinFile,
+use self::aux_vec::{AuxKey, AuxVec};
+use super::elf_file::{ElfFile, ElfHeader, ProgramHeader, ProgramHeaderExt};
+use super::process::ProcessBuilder;
+use super::task::Task;
+use super::{table, task, ProcessRef, ThreadRef};
+use crate::fs::{
+    CreationFlags, File, FileDesc, FileMode, FileTable, FsView, HostStdioFds, INodeExt, StdinFile,
    StdoutFile, ROOT_INODE,
 };
-use super::misc::ResourceLimitsRef;
-use super::vm::{ProcessVM, ProcessVMBuilder};
+use crate::prelude::*;
+use crate::vm::ProcessVM;

-pub use self::elf_file::{ElfFile, ProgramHeaderExt};
-use self::init_stack::{AuxKey, AuxTable};
-
-mod elf_file;
+mod aux_vec;
 mod init_stack;
 mod init_vm;

+/// Spawn a new process and execute it in a new host thread.
 pub fn do_spawn(
    elf_path: &str,
    argv: &[CString],
    envp: &[CString],
    file_actions: &[FileAction],
-    parent_ref: &ProcessRef,
+    current_ref: &ThreadRef,
 ) -> Result<pid_t> {
-    let (new_tid, new_process_ref) =
-        new_process(elf_path, argv, envp, file_actions, None, parent_ref)?;
-    task::enqueue_and_exec_task(new_tid, new_process_ref);
-    Ok(new_tid)
+    let exec_now = true;
+    do_spawn_common(
+        elf_path,
+        argv,
+        envp,
+        file_actions,
+        None,
+        current_ref,
+        exec_now,
+    )
 }

+/// Spawn a new process but execute it later.
 pub fn do_spawn_without_exec(
    elf_path: &str,
    argv: &[CString],
    envp: &[CString],
    file_actions: &[FileAction],
    host_stdio_fds: &HostStdioFds,
-    parent_ref: &ProcessRef,
+    current_ref: &ThreadRef,
 ) -> Result<pid_t> {
-    let (new_tid, new_process_ref) = new_process(
+    let exec_now = false;
+    do_spawn_common(
        elf_path,
        argv,
        envp,
        file_actions,
        Some(host_stdio_fds),
-        parent_ref,
-    )?;
-    task::enqueue_task(new_tid, new_process_ref);
-    Ok(new_tid)
+        current_ref,
+        exec_now,
+    )
 }

+fn do_spawn_common(
+    elf_path: &str,
+    argv: &[CString],
+    envp: &[CString],
+    file_actions: &[FileAction],
+    host_stdio_fds: Option<&HostStdioFds>,
+    current_ref: &ThreadRef,
+    exec_now: bool,
+) -> Result<pid_t> {
+    let new_process_ref = new_process(
+        elf_path,
+        argv,
+        envp,
+        file_actions,
+        host_stdio_fds,
+        current_ref,
+    )?;
+
+    let new_main_thread = new_process_ref
+        .main_thread()
+        .expect("the main thread is just created; it must exist");
+    if exec_now {
+        task::enqueue_and_exec(new_main_thread);
+    } else {
+        task::enqueue(new_main_thread);
+    };
+
+    let new_pid = new_process_ref.pid();
+    Ok(new_pid)
+}
+
+/// Create a new process and its main thread.
 fn new_process(
    elf_path: &str,
    argv: &[CString],
    envp: &[CString],
    file_actions: &[FileAction],
    host_stdio_fds: Option<&HostStdioFds>,
-    parent_ref: &ProcessRef,
-) -> Result<(pid_t, ProcessRef)> {
-    let elf_buf = load_elf_to_vec(elf_path, parent_ref)
+    current_ref: &ThreadRef,
+) -> Result<ProcessRef> {
+    let elf_buf = load_elf_to_vec(elf_path, current_ref)
        .cause_err(|e| errno!(e.errno(), "cannot load the executable"))?;
    let ldso_path = "/lib/ld-musl-x86_64.so.1";
-    let ldso_elf_buf = load_elf_to_vec(ldso_path, parent_ref)
+    let ldso_elf_buf = load_elf_to_vec(ldso_path, current_ref)
        .cause_err(|e| errno!(e.errno(), "cannot load ld.so"))?;

    let exec_elf_file =
@ -70,10 +109,11 @@ fn new_process(
    let ldso_elf_file =
        ElfFile::new(&ldso_elf_buf).cause_err(|e| errno!(e.errno(), "invalid ld.so"))?;

-    let (new_pid, new_process_ref) = {
-        let cwd = parent_ref.lock().unwrap().get_cwd().to_owned();
+    let new_process_ref = {
+        let process_ref = current_ref.process().clone();
+
        let vm = init_vm::do_init(&exec_elf_file, &ldso_elf_file)?;
-        let auxtbl = init_auxtbl(&vm, &exec_elf_file)?;
+        let auxvec = init_auxvec(&vm, &exec_elf_file)?;

        // Notify debugger to load the symbols from elf file
        let ldso_elf_base = vm.get_elf_ranges()[1].start() as u64;
@ -105,7 +145,7 @@ fn new_process(
            };
            let user_stack_base = vm.get_stack_base();
            let user_stack_limit = vm.get_stack_limit();
-            let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxtbl)?;
+            let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxvec)?;
            unsafe {
                Task::new(
                    ldso_entry,
@ -118,17 +158,31 @@ fn new_process(
        };
        let vm_ref = Arc::new(SgxMutex::new(vm));
        let files_ref = {
-            let files = init_files(parent_ref, file_actions, host_stdio_fds)?;
+            let files = init_files(current_ref, file_actions, host_stdio_fds)?;
            Arc::new(SgxMutex::new(files))
        };
-        let rlimits_ref = Default::default();
-        Process::new(&cwd, elf_path, task, vm_ref, files_ref, rlimits_ref, false)?
+        let fs_ref = Arc::new(SgxMutex::new(current_ref.fs().lock().unwrap().clone()));
+
+        ProcessBuilder::new()
+            .vm(vm_ref)
+            .exec_path(elf_path)
+            .parent(process_ref)
+            .task(task)
+            .fs(fs_ref)
+            .files(files_ref)
+            .build()?
    };
-    parent_adopts_new_child(&parent_ref, &new_process_ref);
-    process_table::put(new_pid, new_process_ref.clone());
-    let new_tid = new_pid;
-    info!("Process created: elf = {}, tid = {}", elf_path, new_tid);
-    Ok((new_tid, new_process_ref))
+
+    table::add_process(new_process_ref.clone());
+    table::add_thread(new_process_ref.main_thread().unwrap());
+
+    info!(
+        "Process created: elf = {}, pid = {}",
+        elf_path,
+        new_process_ref.pid()
+    );
+
+    Ok(new_process_ref)
 }

 #[derive(Debug)]
@ -145,8 +199,9 @@ pub enum FileAction {
    Close(FileDesc),
 }

-fn load_elf_to_vec(elf_path: &str, parent_ref: &ProcessRef) -> Result<Vec<u8>> {
-    let inode = parent_ref
+fn load_elf_to_vec(elf_path: &str, current_ref: &ThreadRef) -> Result<Vec<u8>> {
+    let inode = current_ref
+        .fs()
        .lock()
        .unwrap()
        .lookup_inode(elf_path)
@ -170,16 +225,15 @@ fn load_elf_to_vec(elf_path: &str, parent_ref: &ProcessRef) -> Result<Vec<u8>> {
 }

 fn init_files(
-    parent_ref: &ProcessRef,
+    current_ref: &ThreadRef,
    file_actions: &[FileAction],
    host_stdio_fds: Option<&HostStdioFds>,
 ) -> Result<FileTable> {
-    // Usually, we just inherit the file table from the parent
-    let parent = parent_ref.lock().unwrap();
-    let should_inherit_file_table = parent.get_pid() > 0;
+    // Usually, we just inherit the file table from the current process
+    let should_inherit_file_table = current_ref.process().pid() > 0;
    if should_inherit_file_table {
        // Fork: clone file table
-        let mut cloned_file_table = parent.get_files().lock().unwrap().clone();
+        let mut cloned_file_table = current_ref.files().lock().unwrap().clone();
        // Perform file actions to modify the cloned file table
        for file_action in file_actions {
            match file_action {
@ -189,7 +243,12 @@ fn init_files(
                    oflag,
                    fd,
                } => {
-                    let file = parent.open_file(path.as_str(), oflag, mode)?;
+                    let file =
+                        current_ref
+                            .fs()
+                            .lock()
+                            .unwrap()
+                            .open_file(path.as_str(), oflag, mode)?;
                    let file_ref: Arc<Box<dyn File>> = Arc::new(file);
                    let creation_flags = CreationFlags::from_bits_truncate(oflag);
                    cloned_file_table.put_at(fd, file_ref, creation_flags.must_close_on_spawn());
@ -210,7 +269,6 @@ fn init_files(
        cloned_file_table.close_on_spawn();
        return Ok(cloned_file_table);
    }
-    drop(parent);

    // But, for init process, we initialize file table for it
    let mut file_table = FileTable::new();
@ -230,42 +288,35 @@ fn init_files(
    Ok(file_table)
 }

-fn init_auxtbl(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result<AuxTable> {
-    let mut auxtbl = AuxTable::new();
-    auxtbl.set(AuxKey::AT_PAGESZ, 4096)?;
-    auxtbl.set(AuxKey::AT_UID, 0)?;
-    auxtbl.set(AuxKey::AT_GID, 0)?;
-    auxtbl.set(AuxKey::AT_EUID, 0)?;
-    auxtbl.set(AuxKey::AT_EGID, 0)?;
-    auxtbl.set(AuxKey::AT_SECURE, 0)?;
-    auxtbl.set(AuxKey::AT_SYSINFO, 0)?;
+fn init_auxvec(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result<AuxVec> {
+    let mut auxvec = AuxVec::new();
+    auxvec.set(AuxKey::AT_PAGESZ, 4096)?;
+    auxvec.set(AuxKey::AT_UID, 0)?;
+    auxvec.set(AuxKey::AT_GID, 0)?;
+    auxvec.set(AuxKey::AT_EUID, 0)?;
+    auxvec.set(AuxKey::AT_EGID, 0)?;
+    auxvec.set(AuxKey::AT_SECURE, 0)?;
+    auxvec.set(AuxKey::AT_SYSINFO, 0)?;

    let exec_elf_base = process_vm.get_elf_ranges()[0].start() as u64;
    let exec_elf_header = exec_elf_file.elf_header();
-    auxtbl.set(AuxKey::AT_PHENT, exec_elf_header.ph_entry_size() as u64)?;
-    auxtbl.set(AuxKey::AT_PHNUM, exec_elf_header.ph_count() as u64)?;
-    auxtbl.set(AuxKey::AT_PHDR, exec_elf_base + exec_elf_header.ph_offset())?;
-    auxtbl.set(
+    auxvec.set(AuxKey::AT_PHENT, exec_elf_header.ph_entry_size() as u64)?;
+    auxvec.set(AuxKey::AT_PHNUM, exec_elf_header.ph_count() as u64)?;
+    auxvec.set(AuxKey::AT_PHDR, exec_elf_base + exec_elf_header.ph_offset())?;
+    auxvec.set(
        AuxKey::AT_ENTRY,
        exec_elf_base + exec_elf_header.entry_point(),
    )?;

    let ldso_elf_base = process_vm.get_elf_ranges()[1].start() as u64;
-    auxtbl.set(AuxKey::AT_BASE, ldso_elf_base)?;
+    auxvec.set(AuxKey::AT_BASE, ldso_elf_base)?;

    let syscall_addr = __occlum_syscall as *const () as u64;
-    auxtbl.set(AuxKey::AT_OCCLUM_ENTRY, syscall_addr)?;
+    auxvec.set(AuxKey::AT_OCCLUM_ENTRY, syscall_addr)?;
    // TODO: init AT_EXECFN
-    // auxtbl.set_val(AuxKey::AT_EXECFN, "program_name")?;
+    // auxvec.set_val(AuxKey::AT_EXECFN, "program_name")?;

-    Ok(auxtbl)
-}
-
-fn parent_adopts_new_child(parent_ref: &ProcessRef, child_ref: &ProcessRef) {
-    let mut parent = parent_ref.lock().unwrap();
-    let mut child = child_ref.lock().unwrap();
-    parent.children.push(Arc::downgrade(child_ref));
-    child.parent = Some(parent_ref.clone());
+    Ok(auxvec)
 }

 extern "C" {
--- a/src/libos/src/process/do_wait4.rs
+++ b/src/libos/src/process/do_wait4.rs
@ -0,0 +1,67 @@
+use super::process::{ChildProcessFilter, ProcessInner};
+use super::wait::Waiter;
+use super::{table, ProcessRef, ProcessStatus};
+use crate::prelude::*;
+
+pub fn do_wait4(child_filter: &ChildProcessFilter) -> Result<(pid_t, i32)> {
+    // Lock the process early to ensure that we do not miss any changes in
+    // children processes
+    let thread = current!();
+    let process = thread.process();
+    // Lock order: always lock parent then child to avoid deadlock
+    let mut process_inner = process.inner();
+
+    let unwaited_children = process_inner
+        .children()
+        .unwrap()
+        .iter()
+        .filter(|child| match child_filter {
+            ChildProcessFilter::WithAnyPid => true,
+            ChildProcessFilter::WithPid(required_pid) => child.pid() == *required_pid,
+            ChildProcessFilter::WithPgid(required_pgid) => child.pgid() == *required_pgid,
+        })
+        .collect::<Vec<&ProcessRef>>();
+
+    if unwaited_children.len() == 0 {
+        return_errno!(ECHILD, "Cannot find any unwaited children");
+    }
+
+    // Return immediately if a child that we wait for has already exited
+    let zombie_child = unwaited_children
+        .iter()
+        .find(|child| child.status() == ProcessStatus::Zombie);
+    if let Some(zombie_child) = zombie_child {
+        let zombie_pid = zombie_child.pid();
+        let exit_status = free_zombie_child(process_inner, zombie_pid);
+        return Ok((zombie_pid, exit_status));
+    }
+
+    let mut waiter = Waiter::new(child_filter);
+    process_inner
+        .waiting_children_mut()
+        .unwrap()
+        .add_waiter(&waiter);
+    // After adding the waiter, we can safely release the lock on the process inner
+    // without risking missing events from the process's children.
+    drop(process_inner);
+    // Wait until a child has interesting events
+    let zombie_pid = waiter.sleep_until_woken_with_result();
+
+    let mut process_inner = process.inner();
+    let exit_status = free_zombie_child(process_inner, zombie_pid);
+    Ok((zombie_pid, exit_status))
+}
+
+fn free_zombie_child(mut parent_inner: SgxMutexGuard<ProcessInner>, zombie_pid: pid_t) -> i32 {
+    // Remove zombie from the process and thread table
+    table::del_thread(zombie_pid).expect("tid must be in the table");
+    table::del_process(zombie_pid).expect("pid must be in the table");
+
+    let zombie = parent_inner.remove_zombie_child(zombie_pid);
+    debug_assert!(zombie.status() == ProcessStatus::Zombie);
+
+    // Remove zombie from its parent
+
+    let zombie_inner = zombie.inner();
+    zombie_inner.exit_status().unwrap()
+}
--- a/src/libos/src/process/spawn/elf_file.rs
+++ b/src/libos/src/process/spawn/elf_file.rs
@ -1,8 +1,8 @@
-use super::*;
-
 use xmas_elf::symbol_table::Entry;
 use xmas_elf::{header, program, sections};

+use crate::prelude::*;
+
 pub use xmas_elf::header::HeaderPt2 as ElfHeader;
 pub use xmas_elf::program::{ProgramHeader, ProgramIter};

--- a/src/libos/src/process/exit.rs
+++ b/src/libos/src/process/exit.rs
@ -1,157 +0,0 @@
-use super::*;
-use std::intrinsics::atomic_store;
-
-// TODO: make sure Processes are released eventually
-
-#[derive(Clone, Copy, Debug)]
-pub enum ChildProcessFilter {
-    WithAnyPID,
-    WithPID(pid_t),
-    WithPGID(pid_t),
-}
-
-unsafe impl Send for ChildProcessFilter {}
-
-pub fn do_exit(exit_status: i32) {
-    let current_ref = get_current();
-    let mut current = current_ref.lock().unwrap();
-    let parent_ref = current.get_parent().clone();
-    // Update current
-    current.exit_status = exit_status;
-    current.status = Status::ZOMBIE;
-
-    // Update children
-    for child_ref in current.get_children_iter() {
-        let mut child = child_ref.lock().unwrap();
-        child.parent = Some(IDLE_PROCESS.clone());
-    }
-    current.children.clear();
-
-    // Notify another process, if any, that waits on ctid (see set_tid_address)
-    if let Some(ctid) = current.clear_child_tid {
-        unsafe {
-            atomic_store(ctid, 0);
-        }
-        futex_wake(ctid as *const i32, 1);
-    }
-
-    // If the process is detached, no need to notify the parent
-    if current.is_detached {
-        let current_tid = current.get_tid();
-        drop(current);
-        remove_zombie_child(&parent_ref, current_tid);
-        return;
-    }
-
-    // Notify the parent process if necessary
-    let (mut parent, current) = {
-        // Always lock parent before its child
-        drop(current);
-        lock_two_in_order(&parent_ref, &current_ref)
-    };
-    // Wake up the parent if it is waiting on this child
-    if parent.waiting_children.is_none() {
-        return;
-    }
-    let mut wait_queue = parent.waiting_children.as_mut().unwrap();
-    wait_queue.del_and_wake_one_waiter(|waiter_data| -> Option<pid_t> {
-        match waiter_data {
-            ChildProcessFilter::WithAnyPID => {}
-            ChildProcessFilter::WithPID(required_pid) => {
-                if current.get_pid() != *required_pid {
-                    return None;
-                }
-            }
-            ChildProcessFilter::WithPGID(required_pgid) => {
-                if current.get_pgid() != *required_pgid {
-                    return None;
-                }
-            }
-        }
-        Some(current.get_pid())
-    });
-}
-
-pub fn do_wait4(child_filter: &ChildProcessFilter, exit_status: &mut i32) -> Result<pid_t> {
-    let current_ref = get_current();
-    let waiter = {
-        let mut current = current_ref.lock().unwrap();
-
-        let mut any_child_to_wait_for = false;
-        for child_ref in current.get_children_iter() {
-            let child = child_ref.lock().unwrap();
-
-            let may_wait_for = match child_filter {
-                ChildProcessFilter::WithAnyPID => true,
-                ChildProcessFilter::WithPID(required_pid) => child.get_pid() == *required_pid,
-                ChildProcessFilter::WithPGID(required_pgid) => child.get_pgid() == *required_pgid,
-            };
-            if !may_wait_for {
-                continue;
-            }
-
-            // Return immediately as a child that we wait for has already exited
-            if child.status == Status::ZOMBIE {
-                process_table::remove(child.pid);
-                return Ok(child.pid);
-            }
-
-            any_child_to_wait_for = true;
-        }
-        if !any_child_to_wait_for {
-            return_errno!(ECHILD, "No such child");
-        }
-
-        let waiter = Waiter::new(child_filter);
-        let mut wait_queue = WaitQueue::new();
-        wait_queue.add_waiter(&waiter);
-
-        current.waiting_children = Some(wait_queue);
-
-        waiter
-    };
-
-    // Wait until a child has interesting events
-    let child_pid = waiter.sleep_until_woken_with_result();
-
-    // Remove the child from the parent
-    *exit_status = remove_zombie_child(&current_ref, child_pid);
-
-    let mut current = current_ref.lock().unwrap();
-    current.waiting_children = None;
-
-    Ok(child_pid)
-}
-
-fn remove_zombie_child(parent_ref: &ProcessRef, child_tid: pid_t) -> i32 {
-    // Find the zombie child process
-    let mut parent = parent_ref.lock().unwrap();
-    let (child_i, child_ref) = parent
-        .get_children_iter()
-        .enumerate()
-        .find(|(child_i, child_ref)| {
-            let child = child_ref.lock().unwrap();
-            if child.get_tid() != child_tid {
-                return false;
-            }
-            assert!(child.get_status() == Status::ZOMBIE);
-            true
-        })
-        .expect("cannot find the zombie child");
-
-    // Remove the zombie child from parent
-    parent.children.swap_remove(child_i);
-    // Remove the zombie child from process table
-    process_table::remove(child_tid);
-
-    // Return the exit status
-    let child = child_ref.lock().unwrap();
-    child.get_exit_status()
-}
-
-fn lock_two_in_order<'a>(
-    first_ref: &'a ProcessRef,
-    second_ref: &'a ProcessRef,
-) -> (SgxMutexGuard<'a, Process>, SgxMutexGuard<'a, Process>) {
-    (first_ref.lock().unwrap(), second_ref.lock().unwrap())
-}
--- a/src/libos/src/process/mod.rs
+++ b/src/libos/src/process/mod.rs
@ -1,110 +1,54 @@
-pub use self::arch_prctl::{do_arch_prctl, ArchPrctlCode};
-pub use self::exit::{do_exit, do_wait4, ChildProcessFilter};
-pub use self::futex::{
-    futex_op_and_flags_from_u32, futex_requeue, futex_wait, futex_wake, FutexFlags, FutexOp,
-};
-pub use self::process::{Status, IDLE_PROCESS};
-pub use self::process_table::get;
-pub use self::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield, CpuSet};
-pub use self::spawn::{do_spawn, do_spawn_without_exec, ElfFile, FileAction, ProgramHeaderExt};
-pub use self::task::{get_current, get_current_tid, run_task, Task};
-pub use self::thread::{do_clone, do_set_tid_address, CloneFlags, ThreadGroup};
-pub use self::wait::{WaitQueue, Waiter};
+/// Process/thread subsystem.
+///
+/// The subsystem implements process/thread-related system calls, which are
+/// mainly based on the three concepts below:
+///
+/// * [`Process`]. A process has a parent and may have multiple child processes and
+/// can own multiple threads.
+/// * [`Thread`]. A thread belongs to one and only one process and owns a set
+/// of OS resources, e.g., virtual memory, file tables, etc.
+/// * [`Task`]. A task belongs to one and only one thread, for which it deals with
+/// the low-level details about thread execution.
+use crate::fs::{FileRef, FileTable, FsView};
+use crate::misc::ResourceLimits;
+use crate::prelude::*;
+use crate::vm::ProcessVM;
+
+use self::process::{ChildProcessFilter, ProcessBuilder, ProcessInner};
+use self::thread::{ThreadBuilder, ThreadId, ThreadInner};
+use self::wait::{WaitQueue, Waiter};
+
+pub use self::do_spawn::do_spawn_without_exec;
+pub use self::process::{Process, ProcessStatus, IDLE};
+pub use self::syscalls::*;
+pub use self::task::Task;
+pub use self::thread::{Thread, ThreadStatus};
+
+mod do_arch_prctl;
+mod do_clone;
+mod do_exit;
+mod do_futex;
+mod do_getpid;
+mod do_sched;
+mod do_set_tid_address;
+mod do_spawn;
+mod do_wait4;
+mod process;
+mod syscalls;
+mod thread;
+mod wait;
+
+pub mod current;
+pub mod elf_file;
+pub mod table;
+pub mod task;

 #[allow(non_camel_case_types)]
 pub type pid_t = u32;

-#[derive(Debug)]
-pub struct Process {
-    task: Task,
-    status: Status,
-    pid: pid_t,
-    pgid: pid_t,
-    tgid: pid_t,
-    host_tid: pid_t,
-    exit_status: i32,
-    is_detached: bool,
-    // TODO: move cwd, root_inode into a FileSystem structure
-    // TODO: should cwd be a String or INode?
-    cwd: String,
-    elf_path: String,
-    clear_child_tid: Option<*mut pid_t>,
-    parent: Option<ProcessRef>,
-    children: Vec<ProcessWeakRef>,
-    waiting_children: Option<WaitQueue<ChildProcessFilter, pid_t>>,
-    //thread_group: ThreadGroupRef,
-    vm: ProcessVMRef,
-    file_table: FileTableRef,
-    rlimits: ResourceLimitsRef,
-}
-
-pub type ProcessRef = Arc<SgxMutex<Process>>;
-pub type ProcessWeakRef = std::sync::Weak<SgxMutex<Process>>;
+pub type ProcessRef = Arc<Process>;
+pub type ThreadRef = Arc<Thread>;
 pub type FileTableRef = Arc<SgxMutex<FileTable>>;
 pub type ProcessVMRef = Arc<SgxMutex<ProcessVM>>;
-pub type ThreadGroupRef = Arc<SgxMutex<ThreadGroup>>;
-
-pub fn do_getpid() -> pid_t {
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-    current.get_pid()
-}
-
-pub fn do_gettid() -> pid_t {
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-    current.get_tid()
-}
-
-pub fn do_getpgid() -> pid_t {
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-    current.get_pgid()
-}
-
-pub fn do_getppid() -> pid_t {
-    let parent_ref = {
-        let current_ref = get_current();
-        let current = current_ref.lock().unwrap();
-        current.get_parent().clone()
-    };
-    let parent = parent_ref.lock().unwrap();
-    parent.get_pid()
-}
-
-mod arch_prctl;
-mod exit;
-mod futex;
-mod process;
-mod process_table;
-mod sched;
-mod spawn;
-mod task;
-mod thread;
-mod wait;
-
-/// Get a file from the file table of the current process
-pub fn get_file(fd: FileDesc) -> Result<FileRef> {
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-    let file_ref = current.get_files().lock().unwrap().get(fd as FileDesc)?;
-    Ok(file_ref)
-}
-
-/// Put a file into the file table of the current process
-pub fn put_file(new_file: FileRef, close_on_spawn: bool) -> Result<FileDesc> {
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-    let new_fd = current
-        .get_files()
-        .lock()
-        .unwrap()
-        .put(new_file, close_on_spawn);
-    Ok(new_fd)
-}
-
-use super::*;
-use fs::{File, FileDesc, FileRef, FileTable};
-use misc::ResourceLimitsRef;
-use time::GLOBAL_PROFILER;
-use vm::ProcessVM;
+pub type FsViewRef = Arc<SgxMutex<FsView>>;
+pub type ResourceLimitsRef = Arc<SgxMutex<ResourceLimits>>;
--- a/src/libos/src/process/process.rs
+++ b/src/libos/src/process/process.rs
@ -1,152 +0,0 @@
-use super::task::Task;
-use super::*;
-use fs::{File, FileRef, FileTable};
-use vm::ProcessVM;
-
-lazy_static! {
-    // Dummy object to make all processes having a parent
-    pub static ref IDLE_PROCESS: ProcessRef = {
-        Arc::new(SgxMutex::new(Process {
-            task: Default::default(),
-            status: Default::default(),
-            pid: 0,
-            pgid: 1,
-            tgid: 0,
-            host_tid: 0,
-            exit_status: 0,
-            is_detached: false,
-            cwd: "/".to_owned(),
-            elf_path: "/".to_owned(),
-            clear_child_tid: None,
-            parent: None,
-            children: Vec::new(),
-            waiting_children: Default::default(),
-            vm: Default::default(),
-            file_table: Default::default(),
-            rlimits: Default::default(),
-        }))
-    };
-}
-
-impl Process {
-    // TODO: this constructor has become complicated enough to justify using builders
-    pub fn new(
-        cwd: &str,
-        elf_path: &str,
-        task: Task,
-        vm_ref: ProcessVMRef,
-        file_table_ref: FileTableRef,
-        rlimits_ref: ResourceLimitsRef,
-        is_detached: bool,
-    ) -> Result<(pid_t, ProcessRef)> {
-        let new_pid = process_table::alloc_pid();
-        let new_process_ref = Arc::new(SgxMutex::new(Process {
-            task: task,
-            status: Default::default(),
-            pid: new_pid,
-            pgid: 1, // TODO: implement pgid
-            tgid: new_pid,
-            host_tid: 0,
-            cwd: cwd.to_owned(),
-            elf_path: elf_path.to_owned(),
-            clear_child_tid: None,
-            exit_status: 0,
-            is_detached: is_detached,
-            parent: None,
-            children: Vec::new(),
-            waiting_children: None,
-            vm: vm_ref,
-            file_table: file_table_ref,
-            rlimits: rlimits_ref,
-        }));
-        Ok((new_pid, new_process_ref))
-    }
-
-    pub fn get_task(&self) -> &Task {
-        &self.task
-    }
-    pub fn get_task_mut(&mut self) -> &mut Task {
-        &mut self.task
-    }
-    /// pid as seen by the user is actually the thread group ID
-    pub fn get_pid(&self) -> pid_t {
-        self.tgid
-    }
-    /// tid as seen by the user is actually the process ID
-    pub fn get_tid(&self) -> pid_t {
-        self.pid
-    }
-    pub fn get_pgid(&self) -> pid_t {
-        self.pgid
-    }
-    pub fn get_host_tid(&self) -> pid_t {
-        self.host_tid
-    }
-    pub fn set_host_tid(&mut self, host_tid: pid_t) {
-        self.host_tid = host_tid;
-    }
-    pub fn get_status(&self) -> Status {
-        self.status
-    }
-    pub fn get_exit_status(&self) -> i32 {
-        self.exit_status
-    }
-    pub fn get_cwd(&self) -> &str {
-        &self.cwd
-    }
-    pub fn get_elf_path(&self) -> &str {
-        &self.elf_path
-    }
-    pub fn get_vm(&self) -> &ProcessVMRef {
-        &self.vm
-    }
-    pub fn get_files(&self) -> &FileTableRef {
-        &self.file_table
-    }
-    pub fn get_parent(&self) -> &ProcessRef {
-        self.parent.as_ref().unwrap()
-    }
-    pub fn get_children_iter(&self) -> impl Iterator<Item = ProcessRef> + '_ {
-        self.children
-            .iter()
-            .filter_map(|child_weak| child_weak.upgrade())
-    }
-    pub fn change_cwd(&mut self, path: &str) {
-        if path.len() > 0 && path.as_bytes()[0] == b'/' {
-            // absolute
-            self.cwd = path.to_owned();
-        } else {
-            // relative
-            if !self.cwd.ends_with("/") {
-                self.cwd += "/";
-            }
-            self.cwd += path;
-        }
-    }
-    pub fn get_rlimits(&self) -> &ResourceLimitsRef {
-        &self.rlimits
-    }
-}
-
-impl Drop for Process {
-    fn drop(&mut self) {
-        process_table::free_pid(self.pid);
-    }
-}
-
-unsafe impl Send for Process {}
-unsafe impl Sync for Process {}
-
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub enum Status {
-    RUNNING,
-    INTERRUPTIBLE,
-    ZOMBIE,
-    STOPPED,
-}
-
-impl Default for Status {
-    fn default() -> Status {
-        Status::RUNNING
-    }
-}
--- a/src/libos/src/process/process/builder.rs
+++ b/src/libos/src/process/process/builder.rs
@ -0,0 +1,123 @@
+use super::super::task::Task;
+use super::super::thread::{ThreadBuilder, ThreadId};
+use super::super::{FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef};
+use super::{Process, ProcessInner};
+use crate::prelude::*;
+
+#[derive(Debug)]
+pub struct ProcessBuilder {
+    tid: Option<ThreadId>,
+    thread_builder: Option<ThreadBuilder>,
+    // Mandatory fields
+    vm: Option<ProcessVMRef>,
+    // Optional fields, which have reasonable default values
+    exec_path: Option<String>,
+    parent: Option<ProcessRef>,
+    no_parent: bool,
+}
+
+impl ProcessBuilder {
+    pub fn new() -> Self {
+        let thread_builder = ThreadBuilder::new();
+        Self {
+            tid: None,
+            thread_builder: Some(thread_builder),
+            vm: None,
+            exec_path: None,
+            parent: None,
+            no_parent: false,
+        }
+    }
+
+    pub fn tid(mut self, tid: ThreadId) -> Self {
+        self.tid = Some(tid);
+        self
+    }
+
+    pub fn exec_path(mut self, exec_path: &str) -> Self {
+        self.exec_path = Some(exec_path.to_string());
+        self
+    }
+
+    pub fn parent(mut self, parent: ProcessRef) -> Self {
+        self.parent = Some(parent);
+        self
+    }
+
+    pub fn no_parent(mut self, no_parent: bool) -> Self {
+        self.no_parent = no_parent;
+        self
+    }
+
+    pub fn task(mut self, task: Task) -> Self {
+        self.thread_builder(|tb| tb.task(task))
+    }
+
+    pub fn vm(mut self, vm: ProcessVMRef) -> Self {
+        self.thread_builder(|tb| tb.vm(vm))
+    }
+
+    pub fn fs(mut self, fs: FsViewRef) -> Self {
+        self.thread_builder(|tb| tb.fs(fs))
+    }
+
+    pub fn files(mut self, files: FileTableRef) -> Self {
+        self.thread_builder(|tb| tb.files(files))
+    }
+
+    pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self {
+        self.thread_builder(|tb| tb.rlimits(rlimits))
+    }
+
+    pub fn build(mut self) -> Result<ProcessRef> {
+        // Process's pid == Main thread's tid
+        let tid = self.tid.take().unwrap_or_else(|| ThreadId::new());
+        let pid = tid.as_u32() as pid_t;
+
+        // Check whether parent is given as expected
+        if self.no_parent != self.parent.is_none() {
+            return_errno!(
+                EINVAL,
+                "parent and no_parent config contradicts with one another"
+            );
+        }
+
+        // Build a new process
+        let new_process = {
+            let exec_path = self.exec_path.take().unwrap_or_default();
+            let parent = self.parent.take().map(|parent| SgxRwLock::new(parent));
+            let inner = SgxMutex::new(ProcessInner::new());
+            Arc::new(Process {
+                pid,
+                exec_path,
+                parent,
+                inner,
+            })
+        };
+
+        // Build the main thread of the new process
+        let mut self_ = self.thread_builder(|tb| tb.tid(tid).process(new_process.clone()));
+        let main_thread = self_.thread_builder.take().unwrap().build()?;
+
+        // Associate the new process with its parent
+        if !self_.no_parent {
+            new_process
+                .parent()
+                .inner()
+                .children_mut()
+                .unwrap()
+                .push(new_process.clone());
+        }
+
+        Ok(new_process)
+    }
+
+    fn thread_builder<F>(mut self, f: F) -> Self
+    where
+        F: FnOnce(ThreadBuilder) -> ThreadBuilder,
+    {
+        let thread_builder = self.thread_builder.take().unwrap();
+        self.thread_builder = Some(f(thread_builder));
+        self
+    }
+}
--- a/src/libos/src/process/process/idle.rs
+++ b/src/libos/src/process/process/idle.rs
@ -0,0 +1,38 @@
+use super::super::task::Task;
+use super::super::thread::ThreadId;
+use super::{ProcessBuilder, ThreadRef};
+/// Process 0, a.k.a, the idle process.
+///
+/// The idle process has no practical use except making process 1 (a.k.a, the init proess)
+/// having a parent.
+use crate::prelude::*;
+use crate::vm::ProcessVM;
+
+lazy_static! {
+    pub static ref IDLE: ThreadRef =
+        { create_idle_thread().expect("creating the idle process should never fail") };
+}
+
+fn create_idle_thread() -> Result<ThreadRef> {
+    // Create dummy values for the mandatory fields
+    let dummy_tid = ThreadId::zero();
+    let dummy_vm = Arc::new(SgxMutex::new(ProcessVM::default()));
+    let dummy_task = Task::default();
+
+    // Assemble the idle process
+    let idle_process = ProcessBuilder::new()
+        .tid(dummy_tid)
+        .vm(dummy_vm)
+        .task(dummy_task)
+        .no_parent(true)
+        .build()?;
+    debug_assert!(idle_process.pid() == 0);
+
+    let idle_thread = idle_process.main_thread().unwrap();
+    debug_assert!(idle_thread.tid() == 0);
+
+    // We do not add the idle process/thread to the process/thread table.
+    // This ensures that the idle process is not accessible from the user space.
+
+    Ok(idle_thread)
+}
--- a/src/libos/src/process/process/mod.rs
+++ b/src/libos/src/process/process/mod.rs
@ -0,0 +1,304 @@
+use std::fmt;
+
+use super::wait::WaitQueue;
+use super::{ProcessRef, ThreadRef};
+use crate::prelude::*;
+
+pub use self::builder::ProcessBuilder;
+pub use self::idle::IDLE;
+
+mod builder;
+mod idle;
+
+pub struct Process {
+    // Immutable info
+    pid: pid_t,
+    exec_path: String,
+    // Mutable info
+    parent: Option<SgxRwLock<ProcessRef>>,
+    inner: SgxMutex<ProcessInner>,
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum ProcessStatus {
+    Running,
+    Stopped,
+    Zombie,
+}
+
+impl Process {
+    /// Get process ID.
+    pub fn pid(&self) -> pid_t {
+        self.pid
+    }
+
+    /// Get process group ID
+    // TODO: implement process group
+    pub fn pgid(&self) -> pid_t {
+        0
+    }
+
+    /// Get the parent process.
+    ///
+    /// Precondition. The process is not the idle process.
+    pub fn parent(&self) -> ProcessRef {
+        debug_assert!(self.pid() != 0);
+        self.parent
+            .as_ref()
+            // All non-idle process has a parent
+            .unwrap()
+            .read()
+            .unwrap()
+            .clone()
+    }
+
+    /// Get the main thread.
+    ///
+    /// The main thread is a thread whose tid equals to the process's pid.
+    /// Usually, the main thread is the last thread that exits in a process.
+    pub fn main_thread(&self) -> Option<ThreadRef> {
+        if let Some(leader) = self.leader_thread() {
+            if leader.tid() == self.pid() {
+                Some(leader)
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Get the leader thread.
+    ///
+    /// As long as there are some threads in the process, there is a leader.
+    /// The leader thread is usually the main thread, but not always.
+    pub fn leader_thread(&self) -> Option<ThreadRef> {
+        self.inner().leader_thread()
+    }
+
+    /// Get status.
+    pub fn status(&self) -> ProcessStatus {
+        self.inner().status()
+    }
+
+    /// Get the path of the executable
+    pub fn exec_path(&self) -> &str {
+        &self.exec_path
+    }
+
+    /// Get the internal representation of the process.
+    ///
+    /// For the purpose of encapsulation, this method is invisible to other subsystems.
+    pub(super) fn inner(&self) -> SgxMutexGuard<ProcessInner> {
+        self.inner.lock().unwrap()
+    }
+}
+
+pub enum ProcessInner {
+    Live {
+        status: LiveStatus,
+        children: Vec<ProcessRef>,
+        waiting_children: WaitQueue<ChildProcessFilter, pid_t>,
+        threads: Vec<ThreadRef>,
+    },
+    Zombie {
+        exit_status: i32,
+    },
+}
+
+impl ProcessInner {
+    pub fn new() -> Self {
+        Self::Live {
+            status: LiveStatus::Running,
+            children: Vec::new(),
+            waiting_children: WaitQueue::new(),
+            threads: Vec::new(),
+        }
+    }
+
+    pub fn status(&self) -> ProcessStatus {
+        match self {
+            Self::Live { status, .. } => (*status).into(),
+            Self::Zombie { .. } => ProcessStatus::Zombie,
+        }
+    }
+
+    pub fn children(&self) -> Option<&Vec<ProcessRef>> {
+        match self {
+            Self::Live { children, .. } => Some(children),
+            Self::Zombie { .. } => None,
+        }
+    }
+
+    pub fn children_mut(&mut self) -> Option<&mut Vec<ProcessRef>> {
+        match self {
+            Self::Live { children, .. } => Some(children),
+            Self::Zombie { .. } => None,
+        }
+    }
+
+    pub fn num_children(&mut self) -> usize {
+        self.children().map(|children| children.len()).unwrap_or(0)
+    }
+
+    pub fn threads(&self) -> Option<&Vec<ThreadRef>> {
+        match self {
+            Self::Live { threads, .. } => Some(threads),
+            Self::Zombie { .. } => None,
+        }
+    }
+
+    pub fn threads_mut(&mut self) -> Option<&mut Vec<ThreadRef>> {
+        match self {
+            Self::Live { threads, .. } => Some(threads),
+            Self::Zombie { .. } => None,
+        }
+    }
+
+    pub fn num_threads(&mut self) -> usize {
+        self.threads().map(|threads| threads.len()).unwrap_or(0)
+    }
+
+    pub fn leader_thread(&self) -> Option<ThreadRef> {
+        match self.threads() {
+            Some(threads) => {
+                if threads.len() > 0 {
+                    Some(threads[0].clone())
+                } else {
+                    None
+                }
+            }
+            None => None,
+        }
+    }
+
+    pub fn waiting_children_mut(&mut self) -> Option<&mut WaitQueue<ChildProcessFilter, pid_t>> {
+        match self {
+            Self::Live {
+                waiting_children, ..
+            } => Some(waiting_children),
+            _ => None,
+        }
+    }
+
+    pub fn remove_zombie_child(&mut self, zombie_pid: pid_t) -> ProcessRef {
+        let mut children = self.children_mut().unwrap();
+        let zombie_i = children
+            .iter()
+            .position(|child| child.pid() == zombie_pid)
+            .unwrap();
+        children.swap_remove(zombie_i)
+    }
+
+    pub fn exit(&mut self, exit_status: i32) {
+        // Check preconditions
+        debug_assert!(self.status() == ProcessStatus::Running);
+        debug_assert!(self.num_threads() == 0);
+
+        // When this process exits, its children are adopted by the init process
+        for child in self.children().unwrap() {
+            let mut parent = child.parent.as_ref().unwrap().write().unwrap();
+            *parent = IDLE.process().clone();
+        }
+
+        *self = Self::Zombie { exit_status };
+    }
+
+    pub fn exit_status(&self) -> Option<i32> {
+        // Check preconditions
+        debug_assert!(self.status() == ProcessStatus::Zombie);
+
+        match self {
+            Self::Zombie { exit_status } => Some(*exit_status),
+            _ => None,
+        }
+    }
+}
+
+impl PartialEq for Process {
+    fn eq(&self, other: &Self) -> bool {
+        self.pid() == other.pid()
+    }
+}
+
+// Why manual implementation of Debug trait?
+//
+// An explict implementation of Debug trait is required since Process and Thread
+// structs refer to each other. Thus, the automatically-derived implementation
+// of Debug trait for the two structs may lead to infinite loop.
+
+impl fmt::Debug for Process {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let ppid = if self.pid() > 0 {
+            Some(self.parent().pid())
+        } else {
+            None
+        };
+
+        f.debug_struct("Process")
+            .field("pid", &self.pid())
+            .field("exec_path", &self.exec_path())
+            .field("ppid", &ppid)
+            .field("inner", &self.inner())
+            .finish()
+    }
+}
+
+impl fmt::Debug for ProcessInner {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ProcessInner::Live {
+                status,
+                children,
+                threads,
+                ..
+            } => f
+                .debug_struct("ProcessInner::Live")
+                .field("status", &status)
+                .field(
+                    "child_pids",
+                    &children
+                        .iter()
+                        .map(|child| child.pid())
+                        .collect::<Vec<pid_t>>(),
+                )
+                .field(
+                    "thread_tids",
+                    &threads
+                        .iter()
+                        .map(|thread| thread.tid())
+                        .collect::<Vec<pid_t>>(),
+                )
+                .finish(),
+            ProcessInner::Zombie { exit_status, .. } => f
+                .debug_struct("ProcessInner::Zombie")
+                .field("exit_status", exit_status)
+                .finish(),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum LiveStatus {
+    Running,
+    Stopped,
+}
+
+impl Into<ProcessStatus> for LiveStatus {
+    fn into(self) -> ProcessStatus {
+        match self {
+            Self::Running => ProcessStatus::Running,
+            Self::Stopped => ProcessStatus::Stopped,
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum ChildProcessFilter {
+    WithAnyPid,
+    WithPid(pid_t),
+    WithPgid(pid_t),
+}
+
+// TODO: is this necessary?
+unsafe impl Send for ChildProcessFilter {}
--- a/src/libos/src/process/process_table.rs
+++ b/src/libos/src/process/process_table.rs
@ -1,38 +0,0 @@
-use super::*;
-use std::sync::atomic::{AtomicU32, Ordering};
-
-lazy_static! {
-    static ref PROCESS_TABLE: SgxMutex<HashMap<pid_t, ProcessRef>> =
-        { SgxMutex::new(HashMap::new()) };
-}
-
-pub fn put(pid: pid_t, process: ProcessRef) {
-    PROCESS_TABLE.lock().unwrap().insert(pid, process);
-}
-
-pub fn remove(pid: pid_t) {
-    PROCESS_TABLE.lock().unwrap().remove(&pid);
-}
-
-pub fn get(pid: pid_t) -> Result<ProcessRef> {
-    PROCESS_TABLE
-        .lock()
-        .unwrap()
-        .get(&pid)
-        .map(|pr| pr.clone())
-        .ok_or_else(|| errno!(ENOENT, "process not found"))
-}
-
-static NEXT_PID: AtomicU32 = AtomicU32::new(1);
-
-pub fn alloc_pid() -> u32 {
-    NEXT_PID.fetch_add(1, Ordering::SeqCst)
-}
-
-pub fn free_pid(pid: u32) {
-    // PID 0 is reserved for idle thread, thus no need to free
-    if pid == 0 {
-        return;
-    }
-    // TODO:
-}
--- a/src/libos/src/process/syscalls.rs
+++ b/src/libos/src/process/syscalls.rs
@ -0,0 +1,310 @@
+use std::ptr::NonNull;
+
+use super::do_arch_prctl::ArchPrctlCode;
+use super::do_clone::CloneFlags;
+use super::do_futex::{FutexFlags, FutexOp};
+use super::do_sched::CpuSet;
+use super::do_spawn::FileAction;
+use super::process::ChildProcessFilter;
+use crate::prelude::*;
+use crate::time::timespec_t;
+use crate::util::mem_util::from_user::*;
+
+pub fn do_spawn(
+    child_pid_ptr: *mut u32,
+    path: *const i8,
+    argv: *const *const i8,
+    envp: *const *const i8,
+    fdop_list: *const FdOp,
+) -> Result<isize> {
+    check_mut_ptr(child_pid_ptr)?;
+    let path = clone_cstring_safely(path)?.to_string_lossy().into_owned();
+    let argv = clone_cstrings_safely(argv)?;
+    let envp = clone_cstrings_safely(envp)?;
+    let file_actions = clone_file_actions_safely(fdop_list)?;
+    let current = current!();
+    debug!(
+        "spawn: path: {:?}, argv: {:?}, envp: {:?}, fdop: {:?}",
+        path, argv, envp, file_actions
+    );
+
+    let child_pid = super::do_spawn::do_spawn(&path, &argv, &envp, &file_actions, &current)?;
+
+    unsafe { *child_pid_ptr = child_pid };
+    Ok(0)
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct FdOp {
+    // We actually switch the prev and next fields in the libc definition.
+    prev: *const FdOp,
+    next: *const FdOp,
+    cmd: u32,
+    fd: u32,
+    srcfd: u32,
+    oflag: u32,
+    mode: u32,
+    path: *const i8,
+}
+
+// This Rust-version of fdop correspond to the C-version one in Occlum.
+// See <path_to_musl_libc>/src/process/fdop.h.
+const FDOP_CLOSE: u32 = 1;
+const FDOP_DUP2: u32 = 2;
+const FDOP_OPEN: u32 = 3;
+
+fn clone_file_actions_safely(fdop_ptr: *const FdOp) -> Result<Vec<FileAction>> {
+    let mut file_actions = Vec::new();
+
+    let mut fdop_ptr = fdop_ptr;
+    while fdop_ptr != std::ptr::null() {
+        check_ptr(fdop_ptr)?;
+        let fdop = unsafe { &*fdop_ptr };
+
+        #[deny(unreachable_patterns)]
+        let file_action = match fdop.cmd {
+            FDOP_CLOSE => FileAction::Close(fdop.fd),
+            FDOP_DUP2 => FileAction::Dup2(fdop.srcfd, fdop.fd),
+            FDOP_OPEN => FileAction::Open {
+                path: clone_cstring_safely(fdop.path)?
+                    .to_string_lossy()
+                    .into_owned(),
+                mode: fdop.mode,
+                oflag: fdop.oflag,
+                fd: fdop.fd,
+            },
+            _ => {
+                return_errno!(EINVAL, "Unknown file action command");
+            }
+        };
+        file_actions.push(file_action);
+
+        fdop_ptr = fdop.next;
+    }
+
+    Ok(file_actions)
+}
+
+pub fn do_clone(
+    flags: u32,
+    stack_addr: usize,
+    ptid: *mut pid_t,
+    ctid: *mut pid_t,
+    new_tls: usize,
+) -> Result<isize> {
+    let flags = CloneFlags::from_bits_truncate(flags);
+    check_mut_ptr(stack_addr as *mut u64)?;
+    let ptid = {
+        if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
+            check_mut_ptr(ptid)?;
+            NonNull::new(ptid)
+        } else {
+            None
+        }
+    };
+    let ctid = {
+        if flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
+            check_mut_ptr(ctid)?;
+            NonNull::new(ctid)
+        } else {
+            None
+        }
+    };
+    let new_tls = {
+        if flags.contains(CloneFlags::CLONE_SETTLS) {
+            check_mut_ptr(new_tls as *mut usize)?;
+            Some(new_tls)
+        } else {
+            None
+        }
+    };
+
+    let child_pid = super::do_clone::do_clone(flags, stack_addr, ptid, ctid, new_tls)?;
+
+    Ok(child_pid as isize)
+}
+
+pub fn do_futex(
+    futex_addr: *const i32,
+    futex_op: u32,
+    futex_val: i32,
+    timeout: u64,
+    futex_new_addr: *const i32,
+) -> Result<isize> {
+    check_ptr(futex_addr)?;
+    let (futex_op, futex_flags) = super::do_futex::futex_op_and_flags_from_u32(futex_op)?;
+
+    let get_futex_val = |val| -> Result<usize> {
+        if val < 0 {
+            return_errno!(EINVAL, "the futex val must not be negative");
+        }
+        Ok(val as usize)
+    };
+
+    match futex_op {
+        FutexOp::FUTEX_WAIT => {
+            let timeout = {
+                let timeout = timeout as *const timespec_t;
+                if timeout.is_null() {
+                    None
+                } else {
+                    let ts = timespec_t::from_raw_ptr(timeout)?;
+                    ts.validate()?;
+                    if futex_flags.contains(FutexFlags::FUTEX_CLOCK_REALTIME) {
+                        warn!("CLOCK_REALTIME is not supported yet, use monotonic clock");
+                    }
+                    Some(ts)
+                }
+            };
+            super::do_futex::futex_wait(futex_addr, futex_val, &timeout).map(|_| 0)
+        }
+        FutexOp::FUTEX_WAKE => {
+            let max_count = get_futex_val(futex_val)?;
+            super::do_futex::futex_wake(futex_addr, max_count).map(|count| count as isize)
+        }
+        FutexOp::FUTEX_REQUEUE => {
+            check_ptr(futex_new_addr)?;
+            let max_nwakes = get_futex_val(futex_val)?;
+            let max_nrequeues = get_futex_val(timeout as i32)?;
+            super::do_futex::futex_requeue(futex_addr, max_nwakes, max_nrequeues, futex_new_addr)
+                .map(|nwakes| nwakes as isize)
+        }
+        _ => return_errno!(ENOSYS, "the futex operation is not supported"),
+    }
+}
+
+pub fn do_arch_prctl(code: u32, addr: *mut usize) -> Result<isize> {
+    let code = ArchPrctlCode::from_u32(code)?;
+    check_mut_ptr(addr)?;
+    super::do_arch_prctl::do_arch_prctl(code, addr).map(|_| 0)
+}
+
+pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<isize> {
+    check_mut_ptr(tidptr)?;
+    super::do_set_tid_address::do_set_tid_address(tidptr).map(|tid| tid as isize)
+}
+
+pub fn do_sched_yield() -> Result<isize> {
+    super::do_sched::do_sched_yield();
+    Ok(0)
+}
+
+pub fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result<isize> {
+    // Construct safe Rust types
+    let mut buf_slice = {
+        check_mut_array(buf, cpusize)?;
+        if cpusize == 0 {
+            return_errno!(EINVAL, "cpuset size must be greater than zero");
+        }
+        if buf as *const _ == std::ptr::null() {
+            return_errno!(EFAULT, "cpuset mask must NOT be null");
+        }
+        unsafe { std::slice::from_raw_parts_mut(buf, cpusize) }
+    };
+    // Call the memory-safe do_sched_getaffinity
+    let mut cpuset = CpuSet::new(cpusize);
+    let retval = super::do_sched::do_sched_getaffinity(pid, &mut cpuset)?;
+    // Copy from Rust types to C types
+    buf_slice.copy_from_slice(cpuset.as_slice());
+    Ok(retval as isize)
+}
+
+pub fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result<isize> {
+    // Convert unsafe C types into safe Rust types
+    let cpuset = {
+        check_array(buf, cpusize)?;
+        if cpusize == 0 {
+            return_errno!(EINVAL, "cpuset size must be greater than zero");
+        }
+        if buf as *const _ == std::ptr::null() {
+            return_errno!(EFAULT, "cpuset mask must NOT be null");
+        }
+        CpuSet::from_raw_buf(buf, cpusize)
+    };
+    debug!("sched_setaffinity cpuset: {:#x}", cpuset);
+    // Call the memory-safe do_sched_setaffinity
+    super::do_sched::do_sched_setaffinity(pid, &cpuset)?;
+    Ok(0)
+}
+
+pub fn do_exit(status: i32) -> ! {
+    debug!("exit: {}", status);
+    super::do_exit::do_exit(status);
+
+    extern "C" {
+        fn do_exit_task() -> !;
+    }
+    unsafe {
+        do_exit_task();
+    }
+}
+
+pub fn do_wait4(pid: i32, exit_status_ptr: *mut i32) -> Result<isize> {
+    if !exit_status_ptr.is_null() {
+        check_mut_ptr(exit_status_ptr)?;
+    }
+
+    let child_process_filter = match pid {
+        pid if pid < -1 => ChildProcessFilter::WithPgid((-pid) as pid_t),
+        -1 => ChildProcessFilter::WithAnyPid,
+        0 => {
+            let pgid = current!().process().pgid();
+            ChildProcessFilter::WithPgid(pgid)
+        }
+        pid if pid > 0 => ChildProcessFilter::WithPid(pid as pid_t),
+        _ => {
+            panic!("THIS SHOULD NEVER HAPPEN!");
+        }
+    };
+    let mut exit_status = 0;
+    match super::do_wait4::do_wait4(&child_process_filter) {
+        Ok((pid, exit_status)) => {
+            if !exit_status_ptr.is_null() {
+                unsafe {
+                    *exit_status_ptr = exit_status;
+                }
+            }
+            Ok(pid as isize)
+        }
+        Err(e) => Err(e),
+    }
+}
+
+pub fn do_getpid() -> Result<isize> {
+    let pid = super::do_getpid::do_getpid();
+    Ok(pid as isize)
+}
+
+pub fn do_gettid() -> Result<isize> {
+    let tid = super::do_getpid::do_gettid();
+    Ok(tid as isize)
+}
+
+pub fn do_getppid() -> Result<isize> {
+    let ppid = super::do_getpid::do_getppid();
+    Ok(ppid as isize)
+}
+
+pub fn do_getpgid() -> Result<isize> {
+    let pgid = super::do_getpid::do_getpgid();
+    Ok(pgid as isize)
+}
+
+// TODO: implement uid, gid, euid, egid
+
+pub fn do_getuid() -> Result<isize> {
+    Ok(0)
+}
+
+pub fn do_getgid() -> Result<isize> {
+    Ok(0)
+}
+
+pub fn do_geteuid() -> Result<isize> {
+    Ok(0)
+}
+
+pub fn do_getegid() -> Result<isize> {
+    Ok(0)
+}
--- a/src/libos/src/process/table.rs
+++ b/src/libos/src/process/table.rs
@ -0,0 +1,74 @@
+use super::{ProcessRef, ThreadRef};
+use crate::prelude::*;
+
+pub fn get_process(pid: pid_t) -> Result<ProcessRef> {
+    PROCESS_TABLE.lock().unwrap().get(pid)
+}
+
+pub(super) fn add_process(process: ProcessRef) -> Result<()> {
+    PROCESS_TABLE.lock().unwrap().add(process.pid(), process)
+}
+
+pub(super) fn del_process(pid: pid_t) -> Result<ProcessRef> {
+    PROCESS_TABLE.lock().unwrap().del(pid)
+}
+
+pub fn get_thread(tid: pid_t) -> Result<ThreadRef> {
+    THREAD_TABLE.lock().unwrap().get(tid)
+}
+
+pub(super) fn add_thread(thread: ThreadRef) -> Result<()> {
+    THREAD_TABLE.lock().unwrap().add(thread.tid(), thread)
+}
+
+pub(super) fn del_thread(tid: pid_t) -> Result<ThreadRef> {
+    THREAD_TABLE.lock().unwrap().del(tid)
+}
+
+pub fn debug() {
+    println!("process table = {:#?}", PROCESS_TABLE.lock().unwrap());
+    println!("thread table = {:#?}", THREAD_TABLE.lock().unwrap());
+    //println!("idle = {:#?}", *super::IDLE);
+}
+
+lazy_static! {
+    static ref PROCESS_TABLE: SgxMutex<Table<ProcessRef>> =
+        { SgxMutex::new(Table::<ProcessRef>::with_capacity(8)) };
+    static ref THREAD_TABLE: SgxMutex<Table<ThreadRef>> =
+        { SgxMutex::new(Table::<ThreadRef>::with_capacity(8)) };
+}
+
+#[derive(Debug, Clone)]
+struct Table<I: Debug + Clone + Send + Sync> {
+    map: HashMap<pid_t, I>,
+}
+
+impl<I: Debug + Clone + Send + Sync> Table<I> {
+    pub fn with_capacity(capacity: usize) -> Self {
+        Self {
+            map: HashMap::with_capacity(capacity),
+        }
+    }
+
+    pub fn get(&self, id: pid_t) -> Result<I> {
+        self.map
+            .get(&id)
+            .map(|item_ref| item_ref.clone())
+            .ok_or_else(|| errno!(ESRCH, "id does not exist"))
+    }
+
+    pub fn add(&mut self, id: pid_t, item: I) -> Result<()> {
+        if self.map.contains_key(&id) {
+            return_errno!(EEXIST, "id is already added");
+        }
+        self.map.insert(id, item);
+        Ok(())
+    }
+
+    pub fn del(&mut self, id: pid_t) -> Result<I> {
+        if !self.map.contains_key(&id) {
+            return_errno!(ENOENT, "id does not exist");
+        }
+        Ok(self.map.remove(&id).unwrap())
+    }
+}
--- a/src/libos/src/process/task.rs
+++ b/src/libos/src/process/task.rs
@ -1,184 +0,0 @@
-use std::mem;
-
-use super::*;
-
-/// Note: this definition must be in sync with task.h
-#[derive(Clone, Debug, Default)]
-#[repr(C)]
-pub struct Task {
-    kernel_rsp: usize,
-    kernel_stack_base: usize,
-    kernel_stack_limit: usize,
-    kernel_fs: usize,
-    user_rsp: usize,
-    user_stack_base: usize,
-    user_stack_limit: usize,
-    user_fs: usize,
-    user_entry_addr: usize,
-    saved_state: usize, // struct jmpbuf*
-}
-
-impl Task {
-    pub unsafe fn new(
-        user_entry_addr: usize,
-        user_rsp: usize,
-        user_stack_base: usize,
-        user_stack_limit: usize,
-        user_fs: Option<usize>,
-    ) -> Result<Task> {
-        if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) {
-            return_errno!(EINVAL, "Invalid user stack");
-        }
-
-        // Set the default user fsbase to an address on user stack, which is
-        // a relatively safe address in case the user program uses %fs before
-        // initializing fs base address.
-        let user_fs = user_fs.unwrap_or(user_stack_limit);
-
-        Ok(Task {
-            user_entry_addr,
-            user_rsp,
-            user_stack_base,
-            user_stack_limit,
-            user_fs,
-            ..Default::default()
-        })
-    }
-
-    pub fn set_user_fs(&mut self, user_fs: usize) {
-        self.user_fs = user_fs;
-    }
-
-    pub fn get_user_fs(&self) -> usize {
-        self.user_fs
-    }
-}
-
-lazy_static! {
-    static ref NEW_PROCESS_TABLE: SgxMutex<HashMap<pid_t, ProcessRef>> =
-        { SgxMutex::new(HashMap::new()) };
-}
-
-pub fn enqueue_task(new_tid: pid_t, new_process: ProcessRef) {
-    let existing_task = NEW_PROCESS_TABLE
-        .lock()
-        .unwrap()
-        .insert(new_tid, new_process);
-    // There should NOT have any pending process with the same ID
-    assert!(existing_task.is_none());
-}
-
-pub fn enqueue_and_exec_task(new_tid: pid_t, new_process: ProcessRef) {
-    enqueue_task(new_tid, new_process);
-
-    let mut ret = 0;
-    let ocall_status = unsafe { occlum_ocall_exec_thread_async(&mut ret, new_tid) };
-    if ocall_status != sgx_status_t::SGX_SUCCESS || ret != 0 {
-        panic!("Failed to start the process");
-    }
-}
-
-fn dequeue_task(libos_tid: pid_t) -> Result<ProcessRef> {
-    NEW_PROCESS_TABLE
-        .lock()
-        .unwrap()
-        .remove(&libos_tid)
-        .ok_or_else(|| errno!(EAGAIN, "the given TID does not match any pending process"))
-}
-
-pub fn run_task(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
-    let new_process: ProcessRef = dequeue_task(libos_tid)?;
-    set_current(&new_process);
-
-    let (pid, task) = {
-        let mut process = new_process.lock().unwrap();
-        process.set_host_tid(host_tid);
-        let pid = process.get_pid();
-        let task = process.get_task_mut() as *mut Task;
-        (pid, task)
-    };
-
-    #[cfg(feature = "syscall_timing")]
-    GLOBAL_PROFILER
-        .lock()
-        .unwrap()
-        .thread_enter()
-        .expect("unexpected error from profiler to enter thread");
-
-    unsafe {
-        // task may only be modified by this function; so no lock is needed
-        do_run_task(task);
-    }
-
-    #[cfg(feature = "syscall_timing")]
-    GLOBAL_PROFILER
-        .lock()
-        .unwrap()
-        .thread_exit()
-        .expect("unexpected error from profiler to exit thread");
-
-    let (exit_status, parent_pid) = {
-        let mut process = new_process.lock().unwrap();
-        let parent = process.get_parent().lock().unwrap();
-        (process.get_exit_status(), parent.get_tid())
-    };
-
-    info!("Thread exited: tid = {}", libos_tid);
-
-    // If process's parent is the IDLE_PROCESS (pid = 0), so it has to release itself
-    if parent_pid == 0 {
-        process_table::remove(pid);
-    }
-
-    reset_current();
-    Ok(exit_status)
-}
-
-thread_local! {
-    static _CURRENT_PROCESS_PTR: Cell<*const SgxMutex<Process>> = {
-        Cell::new(0 as *const SgxMutex<Process>)
-    };
-    // for log getting pid without locking process
-    static _TID: Cell<pid_t> = Cell::new(0);
-}
-
-pub fn get_current_tid() -> pid_t {
-    _TID.with(|tid_cell| tid_cell.get())
-}
-
-pub fn get_current() -> ProcessRef {
-    let current_ptr = _CURRENT_PROCESS_PTR.with(|cell| cell.get());
-
-    let current_ref = unsafe { Arc::from_raw(current_ptr) };
-    let current_ref_clone = current_ref.clone();
-    Arc::into_raw(current_ref);
-
-    current_ref_clone
-}
-
-fn set_current(process: &ProcessRef) {
-    let tid = process.lock().unwrap().get_tid();
-    _TID.with(|tid_cell| tid_cell.set(tid));
-
-    let process_ref_clone = process.clone();
-    let process_ptr = Arc::into_raw(process_ref_clone);
-
-    _CURRENT_PROCESS_PTR.with(|cp| {
-        cp.set(process_ptr);
-    });
-}
-
-fn reset_current() {
-    _TID.with(|tid_cell| tid_cell.set(0));
-    let mut process_ptr = _CURRENT_PROCESS_PTR.with(|cp| cp.replace(0 as *const SgxMutex<Process>));
-
-    // Prevent memory leakage
-    unsafe {
-        drop(Arc::from_raw(process_ptr));
-    }
-}
-
-extern "C" {
-    fn occlum_ocall_exec_thread_async(ret: *mut i32, libos_tid: pid_t) -> sgx_status_t;
-    fn do_run_task(task: *mut Task) -> i32;
-}
--- a/src/libos/src/process/task/exec.rs
+++ b/src/libos/src/process/task/exec.rs
@ -0,0 +1,81 @@
+use super::super::{current, ThreadRef};
+use super::Task;
+use crate::prelude::*;
+
+/// Enqueue a new thread so that it can be executed later.
+pub fn enqueue(new_thread: ThreadRef) {
+    let existing_thread = NEW_THREAD_TABLE
+        .lock()
+        .unwrap()
+        .insert(new_thread.tid(), new_thread);
+    // There should NOT have any pending process with the same ID
+    assert!(existing_thread.is_none());
+}
+
+/// Enqueue a new thread and execute it in a separate host thread.
+pub fn enqueue_and_exec(new_thread: ThreadRef) {
+    let new_tid = new_thread.tid();
+    enqueue(new_thread);
+
+    let mut ret = 0;
+    let ocall_status = unsafe { occlum_ocall_exec_thread_async(&mut ret, new_tid) };
+    // TODO: check if there are any free TCS before do the OCall
+    assert!(ocall_status == sgx_status_t::SGX_SUCCESS && ret == 0);
+}
+
+fn dequeue(libos_tid: pid_t) -> Result<ThreadRef> {
+    NEW_THREAD_TABLE
+        .lock()
+        .unwrap()
+        .remove(&libos_tid)
+        .ok_or_else(|| errno!(EAGAIN, "the given TID does not match any pending thread"))
+}
+
+/// Execute the specified LibOS thread in the current host thread.
+pub fn exec(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
+    let new_thread: ThreadRef = dequeue(libos_tid)?;
+    new_thread.start(host_tid);
+
+    // Enable current::get() from now on
+    current::set(new_thread.clone());
+
+    #[cfg(feature = "syscall_timing")]
+    GLOBAL_PROFILER
+        .lock()
+        .unwrap()
+        .thread_enter()
+        .expect("unexpected error from profiler to enter thread");
+
+    unsafe {
+        // task may only be modified by this function; so no lock is needed
+        do_exec_task(new_thread.task() as *const Task as *mut Task);
+    }
+
+    #[cfg(feature = "syscall_timing")]
+    GLOBAL_PROFILER
+        .lock()
+        .unwrap()
+        .thread_exit()
+        .expect("unexpected error from profiler to exit thread");
+
+    let exit_status = new_thread.inner().exit_status().unwrap();
+    info!(
+        "Thread exited: tid = {}, exit_status = {}",
+        libos_tid, exit_status
+    );
+
+    // Disable current::get()
+    current::reset();
+
+    Ok(exit_status)
+}
+
+lazy_static! {
+    static ref NEW_THREAD_TABLE: SgxMutex<HashMap<pid_t, ThreadRef>> =
+        { SgxMutex::new(HashMap::new()) };
+}
+
+extern "C" {
+    fn occlum_ocall_exec_thread_async(ret: *mut i32, libos_tid: pid_t) -> sgx_status_t;
+    fn do_exec_task(task: *mut Task) -> i32;
+}
--- a/src/libos/src/process/task/mod.rs
+++ b/src/libos/src/process/task/mod.rs
@ -0,0 +1,60 @@
+/// Task is the low-level representation for the execution of a thread.
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use crate::prelude::*;
+
+pub use self::exec::{enqueue, enqueue_and_exec, exec};
+
+mod exec;
+
+/// Note: this definition must be in sync with task.h
+#[derive(Debug, Default)]
+#[repr(C)]
+pub struct Task {
+    kernel_rsp: usize,
+    kernel_stack_base: usize,
+    kernel_stack_limit: usize,
+    kernel_fs: usize,
+    user_rsp: usize,
+    user_stack_base: usize,
+    user_stack_limit: usize,
+    user_fs: AtomicUsize,
+    user_entry_addr: usize,
+    saved_state: usize, // struct jmpbuf*
+}
+
+impl Task {
+    pub unsafe fn new(
+        user_entry_addr: usize,
+        user_rsp: usize,
+        user_stack_base: usize,
+        user_stack_limit: usize,
+        user_fs: Option<usize>,
+    ) -> Result<Task> {
+        if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) {
+            return_errno!(EINVAL, "Invalid user stack");
+        }
+
+        // Set the default user fsbase to an address on user stack, which is
+        // a relatively safe address in case the user program uses %fs before
+        // initializing fs base address.
+        let user_fs = AtomicUsize::new(user_fs.unwrap_or(user_stack_limit));
+
+        Ok(Task {
+            user_entry_addr,
+            user_rsp,
+            user_stack_base,
+            user_stack_limit,
+            user_fs,
+            ..Default::default()
+        })
+    }
+
+    pub(super) fn set_user_fs(&self, user_fs: usize) {
+        self.user_fs.store(user_fs, Ordering::SeqCst);
+    }
+
+    pub fn user_fs(&self) -> usize {
+        self.user_fs.load(Ordering::SeqCst)
+    }
+}
--- a/src/libos/src/process/task/task.c
+++ b/src/libos/src/process/task/task.c
@ -13,7 +13,7 @@ typedef struct _thread_data_t
 extern thread_data_t *get_thread_data(void);


-extern void __run_task(struct Task* task);
+extern void __exec_task(struct Task* task);

 extern uint64_t __get_stack_guard(void);
 extern void __set_stack_guard(uint64_t new_val);
@ -59,7 +59,7 @@ void switch_td_to_user(const struct Task* task) {
    td->stack_commit_addr = task->user_stack_limit;
 }

-int do_run_task(struct Task* task) {
+int do_exec_task(struct Task* task) {
    jmp_buf libos_state = {0};
    thread_data_t* td = get_thread_data();
    task->saved_state = &libos_state;
@ -73,7 +73,7 @@ int do_run_task(struct Task* task) {

    int second = setjmp(libos_state);
    if (!second) {
-        __run_task(task);
+        __exec_task(task);
    }

    // Jump from do_exit_task
--- a/src/libos/src/process/task/task_x86-64.S
+++ b/src/libos/src/process/task/task_x86-64.S
@ -27,9 +27,9 @@ __set_stack_guard:
    mov %rdi, %gs:(TD_TASK_OFFSET)
    ret

-    .global __run_task
-    .type __run_task, @function
-__run_task:
+    .global __exec_task
+    .type __exec_task, @function
+__exec_task:
    // Save kernel fsbase and use user fsbase
    //
    // SGX HW Mode and SIM Mode require different implementations. In SGX hardware
--- a/src/libos/src/process/thread.rs
+++ b/src/libos/src/process/thread.rs
@ -1,137 +0,0 @@
-use super::vm::VMRange;
-use super::*;
-
-pub struct ThreadGroup {
-    threads: Vec<ProcessRef>,
-}
-
-impl ThreadGroup {}
-
-bitflags! {
-    pub struct CloneFlags : u32 {
-        const CLONE_VM              = 0x00000100;
-        const CLONE_FS              = 0x00000200;
-        const CLONE_FILES           = 0x00000400;
-        const CLONE_SIGHAND         = 0x00000800;
-        const CLONE_PTRACE          = 0x00002000;
-        const CLONE_VFORK           = 0x00004000;
-        const CLONE_PARENT          = 0x00008000;
-        const CLONE_THREAD          = 0x00010000;
-        const CLONE_NEWNS           = 0x00020000;
-        const CLONE_SYSVSEM         = 0x00040000;
-        const CLONE_SETTLS          = 0x00080000;
-        const CLONE_PARENT_SETTID   = 0x00100000;
-        const CLONE_CHILD_CLEARTID  = 0x00200000;
-        const CLONE_DETACHED        = 0x00400000;
-        const CLONE_UNTRACED        = 0x00800000;
-        const CLONE_CHILD_SETTID    = 0x01000000;
-        const CLONE_NEWCGROUP       = 0x02000000;
-        const CLONE_NEWUTS          = 0x04000000;
-        const CLONE_NEWIPC          = 0x08000000;
-        const CLONE_NEWUSER         = 0x10000000;
-        const CLONE_NEWPID          = 0x20000000;
-        const CLONE_NEWNET          = 0x40000000;
-        const CLONE_IO              = 0x80000000;
-    }
-}
-
-pub fn do_clone(
-    flags: CloneFlags,
-    user_rsp: usize,
-    ptid: Option<*mut pid_t>,
-    ctid: Option<*mut pid_t>,
-    new_tls: Option<usize>,
-) -> Result<pid_t> {
-    debug!(
-        "clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
-        flags, user_rsp, ptid, ctid, new_tls
-    );
-    // TODO: return error for unsupported flags
-
-    let current_ref = get_current();
-    let current = current_ref.lock().unwrap();
-
-    // The calling convention of Occlum clone syscall requires the user to
-    // store the entry point of the new thread at the top of the user stack.
-    let thread_entry = unsafe {
-        *(user_rsp as *mut usize)
-        // TODO: check user_entry is a cfi_label
-    };
-
-    let (new_thread_pid, new_thread_ref) = {
-        let vm_ref = current.get_vm().clone();
-        let task = {
-            let vm = vm_ref.lock().unwrap();
-            let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?;
-            let user_stack_base = user_stack_range.end();
-            let user_stack_limit = user_stack_range.start();
-            unsafe {
-                Task::new(
-                    thread_entry,
-                    user_rsp,
-                    user_stack_base,
-                    user_stack_limit,
-                    new_tls,
-                )?
-            }
-        };
-        let files_ref = current.get_files().clone();
-        let rlimits_ref = current.get_rlimits().clone();
-        let elf_path = &current.elf_path;
-        let cwd = &current.cwd;
-        Process::new(cwd, elf_path, task, vm_ref, files_ref, rlimits_ref, true)?
-    };
-
-    if let Some(ctid) = ctid {
-        let mut new_thread = new_thread_ref.lock().unwrap();
-        new_thread.clear_child_tid = Some(ctid);
-    }
-
-    // TODO: always get parent lock first to avoid deadlock
-    {
-        let parent_ref = current.parent.as_ref().unwrap();
-        let mut parent = parent_ref.lock().unwrap();
-        let mut new_thread = new_thread_ref.lock().unwrap();
-        parent.children.push(Arc::downgrade(&new_thread_ref));
-        new_thread.parent = Some(parent_ref.clone());
-
-        new_thread.tgid = current.tgid;
-    }
-
-    process_table::put(new_thread_pid, new_thread_ref.clone());
-    info!("Thread created: tid = {}", new_thread_pid);
-
-    if let Some(ptid) = ptid {
-        unsafe {
-            *ptid = new_thread_pid;
-        }
-    }
-
-    task::enqueue_and_exec_task(new_thread_pid, new_thread_ref);
-    Ok(new_thread_pid)
-}
-
-pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t> {
-    debug!("set_tid_address: tidptr: {:#x}", tidptr as usize);
-    let current_ref = get_current();
-    let mut current = current_ref.lock().unwrap();
-    current.clear_child_tid = Some(tidptr);
-    Ok(current.get_tid())
-}
-
-fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange> {
-    // The first case is most likely
-    if let Ok(stack_range) = vm.find_mmap_region(user_rsp) {
-        Ok(stack_range)
-    }
-    // The next three cases are very unlikely, but valid
-    else if vm.get_stack_range().contains(user_rsp) {
-        Ok(vm.get_stack_range())
-    } else if vm.get_heap_range().contains(user_rsp) {
-        Ok(vm.get_heap_range())
-    }
-    // Invalid
-    else {
-        return_errno!(ESRCH, "invalid rsp")
-    }
-}
--- a/src/libos/src/process/thread/builder.rs
+++ b/src/libos/src/process/thread/builder.rs
@ -0,0 +1,112 @@
+use std::ptr::NonNull;
+
+use super::{
+    FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, Task, Thread,
+    ThreadId, ThreadInner, ThreadRef,
+};
+use crate::prelude::*;
+
+#[derive(Debug)]
+pub struct ThreadBuilder {
+    // Mandatory field
+    tid: Option<ThreadId>,
+    task: Option<Task>,
+    process: Option<ProcessRef>,
+    vm: Option<ProcessVMRef>,
+    // Optional fields
+    fs: Option<FsViewRef>,
+    files: Option<FileTableRef>,
+    rlimits: Option<ResourceLimitsRef>,
+    clear_ctid: Option<NonNull<pid_t>>,
+}
+
+impl ThreadBuilder {
+    pub fn new() -> Self {
+        Self {
+            tid: None,
+            task: None,
+            process: None,
+            vm: None,
+            fs: None,
+            files: None,
+            rlimits: None,
+            clear_ctid: None,
+        }
+    }
+
+    pub fn tid(mut self, tid: ThreadId) -> Self {
+        self.tid = Some(tid);
+        self
+    }
+
+    pub fn task(mut self, task: Task) -> Self {
+        self.task = Some(task);
+        self
+    }
+
+    pub fn process(mut self, process: ProcessRef) -> Self {
+        self.process = Some(process);
+        self
+    }
+
+    pub fn vm(mut self, vm: ProcessVMRef) -> Self {
+        self.vm = Some(vm);
+        self
+    }
+
+    pub fn fs(mut self, fs: FsViewRef) -> Self {
+        self.fs = Some(fs);
+        self
+    }
+
+    pub fn files(mut self, files: FileTableRef) -> Self {
+        self.files = Some(files);
+        self
+    }
+
+    pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self {
+        self.rlimits = Some(rlimits);
+        self
+    }
+
+    pub fn clear_ctid(mut self, clear_tid_addr: NonNull<pid_t>) -> Self {
+        self.clear_ctid = Some(clear_tid_addr);
+        self
+    }
+
+    pub fn build(self) -> Result<ThreadRef> {
+        let tid = self.tid.unwrap_or_else(|| ThreadId::new());
+        let task = self
+            .task
+            .ok_or_else(|| errno!(EINVAL, "task is mandatory"))?;
+        let process = self
+            .process
+            .ok_or_else(|| errno!(EINVAL, "process is mandatory"))?;
+        let vm = self
+            .vm
+            .ok_or_else(|| errno!(EINVAL, "memory is mandatory"))?;
+        let fs = self.fs.unwrap_or_default();
+        let files = self.files.unwrap_or_default();
+        let rlimits = self.rlimits.unwrap_or_default();
+        let clear_ctid = SgxRwLock::new(self.clear_ctid);
+        let inner = SgxMutex::new(ThreadInner::new());
+
+        let new_thread = Arc::new(Thread {
+            task,
+            tid,
+            clear_ctid,
+            inner,
+            process,
+            vm,
+            fs,
+            files,
+            rlimits,
+        });
+
+        let mut inner = new_thread.process().inner();
+        inner.threads_mut().unwrap().push(new_thread.clone());
+        drop(inner);
+
+        Ok(new_thread)
+    }
+}
--- a/src/libos/src/process/thread/id.rs
+++ b/src/libos/src/process/thread/id.rs
@ -0,0 +1,100 @@
+use std::collections::HashSet;
+
+use crate::prelude::*;
+
+/// ThreadId implements self-managed thread IDs.
+///
+/// Each instance of ThreadID are guaranteed to have a unique ID.
+/// And when a ThreadID instance is freed, its ID is automatically freed too.
+#[derive(Debug, PartialEq)]
+pub struct ThreadId {
+    tid: u32,
+}
+
+impl ThreadId {
+    /// Create a new thread ID.
+    ///
+    /// The thread ID returned is guaranteed to have a value greater than zero.
+    pub fn new() -> ThreadId {
+        let mut alloc = THREAD_ID_ALLOC.lock().unwrap();
+        let tid = alloc.alloc();
+        Self { tid }
+    }
+
+    /// Create a "zero" thread ID.
+    ///
+    /// This "zero" thread ID is used exclusively by the idle process.
+    pub fn zero() -> ThreadId {
+        Self { tid: 0 }
+    }
+
+    /// Return the value of the thread ID.
+    pub fn as_u32(&self) -> u32 {
+        self.tid
+    }
+}
+
+impl Drop for ThreadId {
+    fn drop(&mut self) {
+        if self.tid == 0 {
+            return;
+        }
+
+        let mut alloc = THREAD_ID_ALLOC.lock().unwrap();
+        alloc.free(self.tid).expect("tid must has been allocated");
+    }
+}
+
+lazy_static! {
+    static ref THREAD_ID_ALLOC: SgxMutex<IdAlloc> = SgxMutex::new(IdAlloc::new());
+}
+
+/// PID/TID allocator.
+///
+/// The allocation strategy is to start from the minimal value (here, 1) and increments
+/// each returned ID, until a maximum value (e.g., 2^32-1) is reached. After that, recycle
+/// from the minimal value and see if it is still in use. If not, use the value; otherwise,
+/// increments again.
+///
+/// The allocation strategy above follows the *nix tradition.
+///
+/// Note that PID/TID 0 is reserved for the idle process. So the id allocator starts from 1.
+#[derive(Debug, Clone)]
+struct IdAlloc {
+    next_id: u32,
+    used_ids: HashSet<u32>,
+}
+
+impl IdAlloc {
+    pub fn new() -> Self {
+        Self {
+            next_id: 0,
+            used_ids: HashSet::new(),
+        }
+    }
+
+    pub fn alloc(&mut self) -> u32 {
+        let new_id = loop {
+            // Increments the ID and wrap around if necessary
+            self.next_id = self.next_id.wrapping_add(1);
+            if self.next_id == 0 {
+                self.next_id = 1;
+            }
+
+            if !self.used_ids.contains(&self.next_id) {
+                break self.next_id;
+            }
+        };
+        self.used_ids.insert(new_id);
+        new_id
+    }
+
+    pub fn free(&mut self, id: u32) -> Option<u32> {
+        debug_assert!(self.used_ids.contains(&id));
+        if self.used_ids.remove(&id) {
+            Some(id)
+        } else {
+            None
+        }
+    }
+}
--- a/src/libos/src/process/thread/mod.rs
+++ b/src/libos/src/process/thread/mod.rs
@ -0,0 +1,186 @@
+use std::fmt;
+use std::ptr::NonNull;
+
+use super::task::Task;
+use super::{
+    FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, ThreadRef,
+};
+use crate::prelude::*;
+
+pub use self::builder::ThreadBuilder;
+pub use self::id::ThreadId;
+
+mod builder;
+mod id;
+
+pub struct Thread {
+    // Low-level info
+    task: Task,
+    // Immutable info
+    tid: ThreadId,
+    // Mutable info
+    clear_ctid: SgxRwLock<Option<NonNull<pid_t>>>,
+    inner: SgxMutex<ThreadInner>,
+    // Process
+    process: ProcessRef,
+    // Resources
+    vm: ProcessVMRef,
+    fs: FsViewRef,
+    files: FileTableRef,
+    rlimits: ResourceLimitsRef,
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum ThreadStatus {
+    Init,
+    Running,
+    Exited,
+}
+
+impl Thread {
+    pub fn process(&self) -> &ProcessRef {
+        &self.process
+    }
+
+    pub fn task(&self) -> &Task {
+        &self.task
+    }
+
+    pub fn tid(&self) -> pid_t {
+        self.tid.as_u32()
+    }
+
+    pub fn status(&self) -> ThreadStatus {
+        self.inner().status()
+    }
+
+    pub fn vm(&self) -> &ProcessVMRef {
+        &self.vm
+    }
+
+    pub fn files(&self) -> &FileTableRef {
+        &self.files
+    }
+
+    /// Get a file from the file table.
+    pub fn file(&self, fd: FileDesc) -> Result<FileRef> {
+        self.files().lock().unwrap().get(fd)
+    }
+
+    /// Add a file to the file table.
+    pub fn add_file(&self, new_file: FileRef, close_on_spawn: bool) -> FileDesc {
+        self.files().lock().unwrap().put(new_file, close_on_spawn)
+    }
+
+    pub fn fs(&self) -> &FsViewRef {
+        &self.fs
+    }
+
+    pub fn rlimits(&self) -> &ResourceLimitsRef {
+        &self.rlimits
+    }
+
+    pub fn clear_ctid(&self) -> Option<NonNull<pid_t>> {
+        *self.clear_ctid.read().unwrap()
+    }
+
+    pub fn set_clear_ctid(&self, new_clear_ctid: Option<NonNull<pid_t>>) {
+        *self.clear_ctid.write().unwrap() = new_clear_ctid;
+    }
+
+    pub(super) fn start(&self, host_tid: pid_t) {
+        self.inner().start(host_tid);
+    }
+
+    pub(super) fn exit(&self, exit_status: i32) -> usize {
+        // Remove this thread from its owner process
+        let mut process_inner = self.process.inner();
+        let threads = process_inner.threads_mut().unwrap();
+        let thread_i = threads
+            .iter()
+            .position(|thread| thread.tid() == self.tid())
+            .expect("the thread must belong to the process");
+        threads.swap_remove(thread_i);
+
+        self.inner().exit(exit_status);
+
+        threads.len()
+    }
+
+    pub(super) fn inner(&self) -> SgxMutexGuard<ThreadInner> {
+        self.inner.lock().unwrap()
+    }
+}
+
+impl PartialEq for Thread {
+    fn eq(&self, other: &Self) -> bool {
+        self.tid() == other.tid()
+    }
+}
+
+// Why manual implementation of Debug trait?
+//
+// An explict implementation of Debug trait is required since Process and Thread
+// structs refer to each other. Thus, the automatically-derived implementation
+// of Debug trait for the two structs may lead to infinite loop.
+
+impl fmt::Debug for Thread {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Thread")
+            .field("tid", &self.tid())
+            .field("pid", &self.process().pid())
+            .field("inner", &self.inner())
+            .field("vm", self.vm())
+            .field("fs", self.fs())
+            .field("files", self.files())
+            .finish()
+    }
+}
+
+unsafe impl Send for Thread {}
+unsafe impl Sync for Thread {}
+
+#[derive(Debug)]
+pub enum ThreadInner {
+    Init,
+    Live { host_tid: pid_t },
+    Exited { exit_status: i32 },
+}
+
+impl ThreadInner {
+    pub fn new() -> Self {
+        Self::Init
+    }
+
+    pub fn status(&self) -> ThreadStatus {
+        match self {
+            Self::Init { .. } => ThreadStatus::Init,
+            Self::Live { .. } => ThreadStatus::Running,
+            Self::Exited { .. } => ThreadStatus::Exited,
+        }
+    }
+
+    pub fn exit_status(&self) -> Option<i32> {
+        match self {
+            Self::Exited { exit_status } => Some(*exit_status),
+            _ => None,
+        }
+    }
+
+    pub fn host_tid(&self) -> Option<pid_t> {
+        match self {
+            Self::Live { host_tid } => Some(*host_tid),
+            _ => None,
+        }
+    }
+
+    pub fn start(&mut self, host_tid: pid_t) {
+        debug_assert!(self.status() == ThreadStatus::Init);
+        *self = Self::Live { host_tid };
+    }
+
+    pub fn exit(&mut self, exit_status: i32) {
+        debug_assert!(self.status() == ThreadStatus::Running);
+        *self = Self::Exited { exit_status };
+    }
+}
--- a/src/libos/src/process/wait.rs
+++ b/src/libos/src/process/wait.rs
@ -1,4 +1,5 @@
-use super::*;
+/// A wait/wakeup mechanism that connects wait4 and exit system calls.
+use crate::prelude::*;

 #[derive(Debug)]
 pub struct Waiter<D, R>
--- a/src/libos/src/syscall/arch_prctl_x86-64.S
+++ b/src/libos/src/syscall/arch_prctl_x86-64.S
@ -1,3 +1,5 @@
+# In SGX simulation mode, we don't use wrfsbase directly. Instead, we use arch_prctl syscall.
+
 #if SGX_MODE_SIM

 #define __ASSEMBLY__
--- a/src/libos/src/syscall/mod.rs
+++ b/src/libos/src/syscall/mod.rs
@ -7,21 +7,6 @@
 //! 3. Preprocess the system call and then call `dispatch_syscall` (in this file)
 //! 4. Call `do_*` to process the system call (in other modules)

-use fs::{
-    do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd,
-    do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat,
-    do_fsync, do_ftruncate, do_getdents64, do_ioctl, do_lchown, do_link, do_lseek, do_lstat,
-    do_mkdir, do_open, do_openat, do_pipe, do_pipe2, do_pread, do_pwrite, do_read, do_readlink,
-    do_readv, do_rename, do_rmdir, do_sendfile, do_stat, do_sync, do_truncate, do_unlink, do_write,
-    do_writev, iovec_t, File, FileDesc, FileRef, HostStdioFds, Stat,
-};
-use misc::{resource_t, rlimit_t, utsname_t};
-use net::{
-    do_epoll_create, do_epoll_create1, do_epoll_ctl, do_epoll_pwait, do_epoll_wait, do_poll,
-    do_recvmsg, do_select, do_sendmsg, msghdr, msghdr_mut, AsSocket, AsUnixSocket, EpollEvent,
-    SocketFile, UnixSocketFile,
-};
-use process::{pid_t, ChildProcessFilter, CloneFlags, CpuSet, FileAction, FutexFlags, FutexOp};
 use std::any::Any;
 use std::convert::TryFrom;
 use std::ffi::{CStr, CString};
@ -30,8 +15,28 @@ use std::ptr;
 use time::{clockid_t, timespec_t, timeval_t, GLOBAL_PROFILER};
 use util::log::{self, LevelFilter};
 use util::mem_util::from_user::*;
-use vm::{MMapFlags, VMPerms};
-use {fs, process, std, vm};
+
+use crate::fs::{
+    do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd,
+    do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat,
+    do_fsync, do_ftruncate, do_getcwd, do_getdents64, do_ioctl, do_lchown, do_link, do_lseek,
+    do_lstat, do_mkdir, do_open, do_openat, do_pipe, do_pipe2, do_pread, do_pwrite, do_read,
+    do_readlink, do_readv, do_rename, do_rmdir, do_sendfile, do_stat, do_sync, do_truncate,
+    do_unlink, do_write, do_writev, iovec_t, File, FileDesc, FileRef, HostStdioFds, Stat,
+};
+use crate::misc::{resource_t, rlimit_t, utsname_t};
+use crate::net::{
+    do_epoll_create, do_epoll_create1, do_epoll_ctl, do_epoll_pwait, do_epoll_wait, do_poll,
+    do_recvmsg, do_select, do_sendmsg, msghdr, msghdr_mut, AsSocket, AsUnixSocket, EpollEvent,
+    SocketFile, UnixSocketFile,
+};
+use crate::process::{
+    do_arch_prctl, do_clone, do_exit, do_futex, do_getegid, do_geteuid, do_getgid, do_getpgid,
+    do_getpid, do_getppid, do_gettid, do_getuid, do_sched_getaffinity, do_sched_setaffinity,
+    do_sched_yield, do_set_tid_address, do_spawn, do_wait4, pid_t, FdOp,
+};
+use crate::vm::{MMapFlags, VMPerms};
+use crate::{fs, process, std, vm};

 use super::*;

@ -391,7 +396,7 @@ macro_rules! process_syscall_table_with_callback {
            (Membarrier = 324) => handle_unsupported(),
            (Mlock2 = 325) => handle_unsupported(),

-                // Occlum-specific sytem calls
+            // Occlum-specific system calls
            (Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp),
            // Exception handling
            (Rdtsc = 361) => do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32),
@ -648,163 +653,6 @@ const FDOP_CLOSE: u32 = 1;
 const FDOP_DUP2: u32 = 2;
 const FDOP_OPEN: u32 = 3;

-#[repr(C)]
-#[derive(Debug)]
-pub struct FdOp {
-    // We actually switch the prev and next fields in the libc definition.
-    prev: *const FdOp,
-    next: *const FdOp,
-    cmd: u32,
-    fd: u32,
-    srcfd: u32,
-    oflag: u32,
-    mode: u32,
-    path: *const i8,
-}
-
-fn clone_file_actions_safely(fdop_ptr: *const FdOp) -> Result<Vec<FileAction>> {
-    let mut file_actions = Vec::new();
-
-    let mut fdop_ptr = fdop_ptr;
-    while fdop_ptr != ptr::null() {
-        check_ptr(fdop_ptr)?;
-        let fdop = unsafe { &*fdop_ptr };
-
-        let file_action = match fdop.cmd {
-            FDOP_CLOSE => FileAction::Close(fdop.fd),
-            FDOP_DUP2 => FileAction::Dup2(fdop.srcfd, fdop.fd),
-            FDOP_OPEN => FileAction::Open {
-                path: clone_cstring_safely(fdop.path)?
-                    .to_string_lossy()
-                    .into_owned(),
-                mode: fdop.mode,
-                oflag: fdop.oflag,
-                fd: fdop.fd,
-            },
-            _ => {
-                return_errno!(EINVAL, "Unknown file action command");
-            }
-        };
-        file_actions.push(file_action);
-
-        fdop_ptr = fdop.next;
-    }
-
-    Ok(file_actions)
-}
-
-fn do_spawn(
-    child_pid_ptr: *mut u32,
-    path: *const i8,
-    argv: *const *const i8,
-    envp: *const *const i8,
-    fdop_list: *const FdOp,
-) -> Result<isize> {
-    check_mut_ptr(child_pid_ptr)?;
-    let path = clone_cstring_safely(path)?.to_string_lossy().into_owned();
-    let argv = clone_cstrings_safely(argv)?;
-    let envp = clone_cstrings_safely(envp)?;
-    let file_actions = clone_file_actions_safely(fdop_list)?;
-    let parent = process::get_current();
-    debug!(
-        "spawn: path: {:?}, argv: {:?}, envp: {:?}, fdop: {:?}",
-        path, argv, envp, file_actions
-    );
-
-    let child_pid = process::do_spawn(&path, &argv, &envp, &file_actions, &parent)?;
-
-    unsafe { *child_pid_ptr = child_pid };
-    Ok(0)
-}
-
-pub fn do_clone(
-    flags: u32,
-    stack_addr: usize,
-    ptid: *mut pid_t,
-    ctid: *mut pid_t,
-    new_tls: usize,
-) -> Result<isize> {
-    let flags = CloneFlags::from_bits_truncate(flags);
-    check_mut_ptr(stack_addr as *mut u64)?;
-    let ptid = {
-        if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
-            check_mut_ptr(ptid)?;
-            Some(ptid)
-        } else {
-            None
-        }
-    };
-    let ctid = {
-        if flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
-            check_mut_ptr(ctid)?;
-            Some(ctid)
-        } else {
-            None
-        }
-    };
-    let new_tls = {
-        if flags.contains(CloneFlags::CLONE_SETTLS) {
-            check_mut_ptr(new_tls as *mut usize)?;
-            Some(new_tls)
-        } else {
-            None
-        }
-    };
-
-    let child_pid = process::do_clone(flags, stack_addr, ptid, ctid, new_tls)?;
-
-    Ok(child_pid as isize)
-}
-
-pub fn do_futex(
-    futex_addr: *const i32,
-    futex_op: u32,
-    futex_val: i32,
-    timeout: u64,
-    futex_new_addr: *const i32,
-) -> Result<isize> {
-    check_ptr(futex_addr)?;
-    let (futex_op, futex_flags) = process::futex_op_and_flags_from_u32(futex_op)?;
-
-    let get_futex_val = |val| -> Result<usize> {
-        if val < 0 {
-            return_errno!(EINVAL, "the futex val must not be negative");
-        }
-        Ok(val as usize)
-    };
-
-    match futex_op {
-        FutexOp::FUTEX_WAIT => {
-            let timeout = {
-                let timeout = timeout as *const timespec_t;
-                if timeout.is_null() {
-                    None
-                } else {
-                    let ts = timespec_t::from_raw_ptr(timeout)?;
-                    ts.validate()?;
-                    if futex_flags.contains(FutexFlags::FUTEX_CLOCK_REALTIME) {
-                        warn!("CLOCK_REALTIME is not supported yet, use monotonic clock");
-                    }
-                    Some(ts)
-                }
-            };
-            process::futex_wait(futex_addr, futex_val, &timeout).map(|_| 0)
-        }
-        FutexOp::FUTEX_WAKE => {
-            let max_count = get_futex_val(futex_val)?;
-            process::futex_wake(futex_addr, max_count).map(|count| count as isize)
-        }
-        FutexOp::FUTEX_REQUEUE => {
-            check_ptr(futex_new_addr)?;
-            let max_nwakes = get_futex_val(futex_val)?;
-            let max_nrequeues = get_futex_val(timeout as i32)?;
-            process::futex_requeue(futex_addr, max_nwakes, max_nrequeues, futex_new_addr)
-                .map(|nwakes| nwakes as isize)
-        }
-        _ => return_errno!(ENOSYS, "the futex operation is not supported"),
-    }
-}
-
 fn do_mmap(
    addr: usize,
    size: usize,
@ -845,75 +693,6 @@ fn do_brk(new_brk_addr: usize) -> Result<isize> {
    Ok(ret_brk_addr as isize)
 }

-fn do_wait4(pid: i32, _exit_status: *mut i32) -> Result<isize> {
-    if !_exit_status.is_null() {
-        check_mut_ptr(_exit_status)?;
-    }
-
-    let child_process_filter = match pid {
-        pid if pid < -1 => process::ChildProcessFilter::WithPGID((-pid) as pid_t),
-        -1 => process::ChildProcessFilter::WithAnyPID,
-        0 => {
-            let pgid = process::do_getpgid();
-            process::ChildProcessFilter::WithPGID(pgid)
-        }
-        pid if pid > 0 => process::ChildProcessFilter::WithPID(pid as pid_t),
-        _ => {
-            panic!("THIS SHOULD NEVER HAPPEN!");
-        }
-    };
-    let mut exit_status = 0;
-    match process::do_wait4(&child_process_filter, &mut exit_status) {
-        Ok(pid) => {
-            if !_exit_status.is_null() {
-                unsafe {
-                    *_exit_status = exit_status;
-                }
-            }
-            Ok(pid as isize)
-        }
-        Err(e) => Err(e),
-    }
-}
-
-fn do_getpid() -> Result<isize> {
-    let pid = process::do_getpid();
-    Ok(pid as isize)
-}
-
-fn do_gettid() -> Result<isize> {
-    let tid = process::do_gettid();
-    Ok(tid as isize)
-}
-
-fn do_getppid() -> Result<isize> {
-    let ppid = process::do_getppid();
-    Ok(ppid as isize)
-}
-
-fn do_getpgid() -> Result<isize> {
-    let pgid = process::do_getpgid();
-    Ok(pgid as isize)
-}
-
-// TODO: implement uid, gid, euid, egid
-
-fn do_getuid() -> Result<isize> {
-    Ok(0)
-}
-
-fn do_getgid() -> Result<isize> {
-    Ok(0)
-}
-
-fn do_geteuid() -> Result<isize> {
-    Ok(0)
-}
-
-fn do_getegid() -> Result<isize> {
-    Ok(0)
-}
-
 // TODO: handle tz: timezone_t
 fn do_gettimeofday(tv_u: *mut timeval_t) -> Result<isize> {
    check_mut_ptr(tv_u)?;
@ -958,90 +737,6 @@ fn do_nanosleep(req_u: *const timespec_t, rem_u: *mut timespec_t) -> Result<isiz
    Ok(0)
 }

-// FIXME: use this
-const MAP_FAILED: *const c_void = ((-1) as i64) as *const c_void;
-
-fn do_exit(status: i32) -> ! {
-    debug!("exit: {}", status);
-    extern "C" {
-        fn do_exit_task() -> !;
-    }
-    process::do_exit(status);
-    unsafe {
-        do_exit_task();
-    }
-}
-
-fn do_getcwd(buf: *mut u8, size: usize) -> Result<isize> {
-    let safe_buf = {
-        check_mut_array(buf, size)?;
-        unsafe { std::slice::from_raw_parts_mut(buf, size) }
-    };
-    let proc_ref = process::get_current();
-    let mut proc = proc_ref.lock().unwrap();
-    let cwd = proc.get_cwd();
-    if cwd.len() + 1 > safe_buf.len() {
-        return_errno!(ERANGE, "buf is not long enough");
-    }
-    safe_buf[..cwd.len()].copy_from_slice(cwd.as_bytes());
-    safe_buf[cwd.len()] = 0;
-    Ok(buf as isize)
-}
-
-fn do_arch_prctl(code: u32, addr: *mut usize) -> Result<isize> {
-    let code = process::ArchPrctlCode::from_u32(code)?;
-    check_mut_ptr(addr)?;
-    process::do_arch_prctl(code, addr).map(|_| 0)
-}
-
-fn do_set_tid_address(tidptr: *mut pid_t) -> Result<isize> {
-    check_mut_ptr(tidptr)?;
-    process::do_set_tid_address(tidptr).map(|tid| tid as isize)
-}
-
-fn do_sched_yield() -> Result<isize> {
-    process::do_sched_yield();
-    Ok(0)
-}
-
-fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result<isize> {
-    // Construct safe Rust types
-    let mut buf_slice = {
-        check_mut_array(buf, cpusize)?;
-        if cpusize == 0 {
-            return_errno!(EINVAL, "cpuset size must be greater than zero");
-        }
-        if buf as *const _ == std::ptr::null() {
-            return_errno!(EFAULT, "cpuset mask must NOT be null");
-        }
-        unsafe { std::slice::from_raw_parts_mut(buf, cpusize) }
-    };
-    // Call the memory-safe do_sched_getaffinity
-    let mut cpuset = CpuSet::new(cpusize);
-    let retval = process::do_sched_getaffinity(pid, &mut cpuset)?;
-    // Copy from Rust types to C types
-    buf_slice.copy_from_slice(cpuset.as_slice());
-    Ok(retval as isize)
-}
-
-fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result<isize> {
-    // Convert unsafe C types into safe Rust types
-    let cpuset = {
-        check_array(buf, cpusize)?;
-        if cpusize == 0 {
-            return_errno!(EINVAL, "cpuset size must be greater than zero");
-        }
-        if buf as *const _ == std::ptr::null() {
-            return_errno!(EFAULT, "cpuset mask must NOT be null");
-        }
-        CpuSet::from_raw_buf(buf, cpusize)
-    };
-    debug!("sched_setaffinity cpuset: {:#x}", cpuset);
-    // Call the memory-safe do_sched_setaffinity
-    process::do_sched_setaffinity(pid, &cpuset)?;
-    Ok(0)
-}
-
 fn do_socket(domain: c_int, socket_type: c_int, protocol: c_int) -> Result<isize> {
    debug!(
        "socket: domain: {}, socket_type: 0x{:x}, protocol: {}",
@ -1059,7 +754,7 @@ fn do_socket(domain: c_int, socket_type: c_int, protocol: c_int) -> Result<isize
        }
    };

-    let fd = process::put_file(file_ref, false)?;
+    let fd = current!().add_file(file_ref, false);
    Ok(fd as isize)
 }

@ -1068,7 +763,7 @@ fn do_connect(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t)
        "connect: fd: {}, addr: {:?}, addr_len: {}",
        fd, addr, addr_len
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::connect(socket.fd(), addr, addr_len));
        Ok(ret as isize)
@ -1103,13 +798,13 @@ fn do_accept4(
        "accept4: fd: {}, addr: {:?}, addr_len: {:?}, flags: {:#x}",
        fd, addr, addr_len, flags
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let socket = file_ref.as_socket()?;

        let new_socket = socket.accept(addr, addr_len, flags)?;
        let new_file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(new_socket));
-        let new_fd = process::put_file(new_file_ref, false)?;
+        let new_fd = current!().add_file(new_file_ref, false);

        Ok(new_fd as isize)
    } else if let Ok(unix_socket) = file_ref.as_unix_socket() {
@ -1118,7 +813,7 @@ fn do_accept4(

        let new_socket = unix_socket.accept()?;
        let new_file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(new_socket));
-        let new_fd = process::put_file(new_file_ref, false)?;
+        let new_fd = current!().add_file(new_file_ref, false);

        Ok(new_fd as isize)
    } else {
@ -1128,7 +823,7 @@ fn do_accept4(

 fn do_shutdown(fd: c_int, how: c_int) -> Result<isize> {
    debug!("shutdown: fd: {}, how: {}", fd, how);
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::shutdown(socket.fd(), how));
        Ok(ret as isize)
@ -1139,7 +834,7 @@ fn do_shutdown(fd: c_int, how: c_int) -> Result<isize> {

 fn do_bind(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t) -> Result<isize> {
    debug!("bind: fd: {}, addr: {:?}, addr_len: {}", fd, addr, addr_len);
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        check_ptr(addr)?; // TODO: check addr_len
        let ret = try_libc!(libc::ocall::bind(socket.fd(), addr, addr_len));
@ -1159,7 +854,7 @@ fn do_bind(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t) ->

 fn do_listen(fd: c_int, backlog: c_int) -> Result<isize> {
    debug!("listen: fd: {}, backlog: {}", fd, backlog);
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::listen(socket.fd(), backlog));
        Ok(ret as isize)
@ -1182,7 +877,7 @@ fn do_setsockopt(
        "setsockopt: fd: {}, level: {}, optname: {}, optval: {:?}, optlen: {:?}",
        fd, level, optname, optval, optlen
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::setsockopt(
            socket.fd(),
@ -1211,7 +906,7 @@ fn do_getsockopt(
        "getsockopt: fd: {}, level: {}, optname: {}, optval: {:?}, optlen: {:?}",
        fd, level, optname, optval, optlen
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    let socket = file_ref.as_socket()?;

    let ret = try_libc!(libc::ocall::getsockopt(
@ -1233,7 +928,7 @@ fn do_getpeername(
        "getpeername: fd: {}, addr: {:?}, addr_len: {:?}",
        fd, addr, addr_len
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::getpeername(socket.fd(), addr, addr_len));
        Ok(ret as isize)
@ -1257,7 +952,7 @@ fn do_getsockname(
        "getsockname: fd: {}, addr: {:?}, addr_len: {:?}",
        fd, addr, addr_len
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    if let Ok(socket) = file_ref.as_socket() {
        let ret = try_libc!(libc::ocall::getsockname(socket.fd(), addr, addr_len));
        Ok(ret as isize)
@ -1281,7 +976,7 @@ fn do_sendto(
        "sendto: fd: {}, base: {:?}, len: {}, addr: {:?}, addr_len: {}",
        fd, base, len, addr, addr_len
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    let socket = file_ref.as_socket()?;

    let ret = try_libc!(libc::ocall::sendto(
@ -1307,7 +1002,7 @@ fn do_recvfrom(
        "recvfrom: fd: {}, base: {:?}, len: {}, flags: {}, addr: {:?}, addr_len: {:?}",
        fd, base, len, flags, addr, addr_len
    );
-    let file_ref = process::get_file(fd as FileDesc)?;
+    let file_ref = current!().file(fd as FileDesc)?;
    let socket = file_ref.as_socket()?;

    let ret = try_libc!(libc::ocall::recvfrom(
@ -1339,18 +1034,10 @@ fn do_socketpair(
    if (domain == libc::AF_UNIX) {
        let (client_socket, server_socket) =
            UnixSocketFile::socketpair(socket_type as i32, protocol as i32)?;
-        let current_ref = process::get_current();
-        let mut proc = current_ref.lock().unwrap();
-        sock_pair[0] = proc
-            .get_files()
-            .lock()
-            .unwrap()
-            .put(Arc::new(Box::new(client_socket)), false);
-        sock_pair[1] = proc
-            .get_files()
-            .lock()
-            .unwrap()
-            .put(Arc::new(Box::new(server_socket)), false);
+        let current = current!();
+        let mut files = current.files().lock().unwrap();
+        sock_pair[0] = files.put(Arc::new(Box::new(client_socket)), false);
+        sock_pair[1] = files.put(Arc::new(Box::new(server_socket)), false);

        debug!("socketpair: ({}, {})", sock_pair[0], sock_pair[1]);
        Ok(0)
--- a/src/libos/src/time/profiler.rs
+++ b/src/libos/src/time/profiler.rs
@ -18,7 +18,7 @@ impl GlobalProfiler {
    }

    pub fn thread_enter(&mut self) -> Result<()> {
-        let tid = process::do_gettid();
+        let tid = current!().tid();
        if self.inner.insert(tid, ThreadProfiler::new()).is_some() {
            return_errno!(
                EINVAL,
@ -33,7 +33,7 @@ impl GlobalProfiler {
        // will never return
        self.syscall_exit(SyscallNum::Exit, false);

-        let tid = process::do_gettid();
+        let tid = current!().tid();

        let mut exiting_profiler = self.inner.remove(&tid).ok_or_else(|| {
            errno!(
@ -47,13 +47,13 @@ impl GlobalProfiler {
    }

    pub fn syscall_enter(&mut self, syscall_num: SyscallNum) -> Result<()> {
-        let tid = process::do_gettid();
+        let tid = current!().tid();
        let mut prof = self.inner.get_mut(&tid).unwrap();
        prof.syscall_enter(syscall_num)
    }

    pub fn syscall_exit(&mut self, syscall_num: SyscallNum, is_err: bool) -> Result<()> {
-        let tid = process::do_gettid();
+        let tid = current!().tid();
        let mut prof = self.inner.get_mut(&tid).unwrap();
        prof.syscall_exit(syscall_num, is_err)
    }
--- a/src/libos/src/util/log.rs
+++ b/src/libos/src/util/log.rs
@ -94,7 +94,7 @@ impl Log for SimpleLogger {
        if self.enabled(record.metadata()) {
            // Parts of message
            let level = record.level();
-            let tid = process::get_current_tid();
+            let tid = current!().tid();
            let rounds = round_count();
            let desc = round_desc();
            // Message (null-terminated)
--- a/src/libos/src/vm/mod.rs
+++ b/src/libos/src/vm/mod.rs
@ -1,6 +1,6 @@
 use super::*;
 use fs::{File, FileDesc, FileRef};
-use process::{get_current, Process, ProcessRef};
+use process::{Process, ProcessRef};
 use std::fmt;

 mod process_vm;
@ -35,32 +35,22 @@ pub fn do_mmap(
        );
    }

-    let mut current_vm_ref = {
-        let current_ref = get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.get_vm().clone()
-    };
-    let mut current_vm = current_vm_ref.lock().unwrap();
+    let current = current!();
+    let mut current_vm = current.vm().lock().unwrap();
    current_vm.mmap(addr, size, perms, flags, fd, offset)
 }

 pub fn do_munmap(addr: usize, size: usize) -> Result<()> {
    debug!("munmap: addr: {:#x}, size: {:#x}", addr, size);
-    let mut current_vm_ref = {
-        let current_ref = get_current();
-        let current_process = current_ref.lock().unwrap();
-        current_process.get_vm().clone()
-    };
-    let mut current_vm = current_vm_ref.lock().unwrap();
+    let current = current!();
+    let mut current_vm = current.vm().lock().unwrap();
    current_vm.munmap(addr, size)
 }

 pub fn do_brk(addr: usize) -> Result<usize> {
    debug!("brk: addr: {:#x}", addr);
-    let current_ref = get_current();
-    let current_process = current_ref.lock().unwrap();
-    let current_vm_ref = current_process.get_vm();
-    let mut current_vm = current_vm_ref.lock().unwrap();
+    let current = current!();
+    let mut current_vm = current.vm().lock().unwrap();
    current_vm.brk(addr)
 }

--- a/src/libos/src/vm/process_vm.rs
+++ b/src/libos/src/vm/process_vm.rs
@ -1,7 +1,7 @@
 use super::*;

 use super::config;
-use super::process::{ElfFile, ProgramHeaderExt};
+use super::process::elf_file::{ElfFile, ProgramHeaderExt};
 use super::user_space_vm::{UserSpaceVMManager, UserSpaceVMRange, USER_SPACE_VM_MANAGER};
 use super::vm_manager::{VMInitializer, VMManager, VMMapAddr, VMMapOptions, VMMapOptionsBuilder};

@ -301,7 +301,7 @@ impl ProcessVM {
            if flags.contains(MMapFlags::MAP_ANONYMOUS) {
                VMInitializer::FillZeros()
            } else {
-                let file_ref = process::get_file(fd)?;
+                let file_ref = current!().file(fd)?;
                VMInitializer::LoadFromFile {
                    file: file_ref,
                    offset: offset,
--- a/test/pthread/main.c
+++ b/test/pthread/main.c
@ -27,13 +27,11 @@ struct thread_arg {

 static void* thread_func(void* _arg) {
    struct thread_arg* arg = _arg;
-    printf("Thread #%d: started\n", arg->ti);
    for (long i = 0; i < arg->local_count; i++) {
        pthread_mutex_lock(arg->mutex);
        (*arg->global_count)++;
        pthread_mutex_unlock(arg->mutex);
    }
-    printf("Thread #%d: completed\n", arg->ti);
    return NULL;
 }

--- a/test/sched/main.c
+++ b/test/sched/main.c
@ -171,7 +171,7 @@ static int test_sched_yield() {
 // ============================================================================

 static test_case_t test_cases[] = {
-    TEST_CASE(test_sched_xetaffinity_with_child_pid),
+    //TEST_CASE(test_sched_xetaffinity_with_child_pid),
    TEST_CASE(test_sched_getaffinity_with_self_pid),
    TEST_CASE(test_sched_setaffinity_with_self_pid),
    TEST_CASE(test_sched_getaffinity_via_explicit_syscall),