Refactor the process/thread subsystem

As a major rewrite to the process/thread subsystem, this commits:
1. Implements threads as a first-class object, which represents a group of OS resources
and a thread of execution;
2. Implements processes as a first-class object that manages threads and maintains
the parent-child relationship between processes;
3. Refactors the code in process subsystem to follow the improved coding style and
conventions emerged in recent commits;
4. Refactors the code in other subsystems to use the new process/thread subsystem.
This commit is contained in:
Tate, Hongliang Tian 2020-04-07 10:08:02 +00:00
parent f9df83f559
commit 2a1d3d98c5
85 changed files with 2909 additions and 1978 deletions

1
.gitignore vendored

@ -3,3 +3,4 @@
*.so
build/
build_sim/
.DS_Store

@ -67,7 +67,7 @@ C_SRCS := $(filter-out $(BUILTIN_C_SRCS),$(sort $(wildcard src/*.c src/*/*.c src
C_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(C_SRCS:.c=.o))
CXX_SRCS := $(sort $(wildcard src/*.cpp src/*/*.cpp))
CXX_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(CXX_SRCS:.cpp=.o))
S_SRCS := $(sort $(wildcard src/*.S src/*/*.S))
S_SRCS := $(sort $(wildcard src/*.S src/*/*.S src/*/*/*.S))
S_OBJS := $(addprefix $(BUILD_DIR)/src/libos/,$(S_SRCS:.S=.o))
ALL_BUILD_SUBDIRS := $(sort $(patsubst %/,%,$(dir $(LIBOS_SO) $(EDL_C_OBJS) $(BUILTIN_C_OBJS) $(C_OBJS) $(CXX_OBJS) $(S_OBJS)) $(RUST_TARGET_DIR) $(RUST_OUT_DIR)))

@ -175,7 +175,7 @@ fn do_new_process(
let envp = &config::LIBOS_CONFIG.env;
let file_actions = Vec::new();
let parent = &process::IDLE_PROCESS;
let current = &process::IDLE;
let program_path_str = program_path.to_str().unwrap();
let new_tid = process::do_spawn_without_exec(
&program_path_str,
@ -183,13 +183,13 @@ fn do_new_process(
envp,
&file_actions,
host_stdio_fds,
parent,
current,
)?;
Ok(new_tid)
}
fn do_exec_thread(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
let exit_status = process::run_task(libos_tid, host_tid)?;
let exit_status = process::task::exec(libos_tid, host_tid)?;
// sync file system
// TODO: only sync when all processes exit

@ -47,9 +47,9 @@ pub fn do_faccessat(
pub fn do_access(path: &str, mode: AccessibilityCheckMode) -> Result<()> {
debug!("access: path: {:?}, mode: {:?}", path, mode);
let inode = {
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
current.lookup_inode(path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(path)?
};
//let metadata = inode.get_metadata();
// TODO: check metadata.mode with mode

@ -1,17 +0,0 @@
use super::*;
pub fn do_chdir(path: &str) -> Result<()> {
debug!("chdir: path: {:?}", path);
let current_ref = process::get_current();
let mut current_process = current_ref.lock().unwrap();
let inode = current_process.lookup_inode(path)?;
let info = inode.metadata()?;
if info.type_ != FileType::Dir {
return_errno!(ENOTDIR, "");
}
current_process.change_cwd(path);
Ok(())
}

@ -58,9 +58,9 @@ impl FileMode {
pub fn do_chmod(path: &str, mode: FileMode) -> Result<()> {
debug!("chmod: path: {:?}, mode: {:?}", path, mode);
let inode = {
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
current.lookup_inode(path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(path)?
};
let mut info = inode.metadata()?;
info.mode = mode.bits();
@ -70,7 +70,7 @@ pub fn do_chmod(path: &str, mode: FileMode) -> Result<()> {
pub fn do_fchmod(fd: FileDesc, mode: FileMode) -> Result<()> {
debug!("fchmod: fd: {}, mode: {:?}", fd, mode);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
let mut info = file_ref.metadata()?;
info.mode = mode.bits();
file_ref.set_metadata(&info)?;

@ -7,7 +7,7 @@ pub fn do_chown(path: &str, uid: u32, gid: u32) -> Result<()> {
pub fn do_fchown(fd: FileDesc, uid: u32, gid: u32) -> Result<()> {
debug!("fchown: fd: {}, uid: {}, gid: {}", fd, uid, gid);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
let mut info = file_ref.metadata()?;
info.uid = uid as usize;
info.gid = gid as usize;
@ -18,9 +18,9 @@ pub fn do_fchown(fd: FileDesc, uid: u32, gid: u32) -> Result<()> {
pub fn do_lchown(path: &str, uid: u32, gid: u32) -> Result<()> {
debug!("lchown: path: {:?}, uid: {}, gid: {}", path, uid, gid);
let inode = {
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
current.lookup_inode(path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(path)?
};
let mut info = inode.metadata()?;
info.uid = uid as usize;

@ -2,10 +2,8 @@ use super::*;
pub fn do_close(fd: FileDesc) -> Result<()> {
debug!("close: fd: {}", fd);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_table_ref = current_process.get_files();
let mut file_table = file_table_ref.lock().unwrap();
file_table.del(fd)?;
let current = current!();
let mut files = current.files().lock().unwrap();
files.del(fd)?;
Ok(())
}

@ -67,7 +67,7 @@ pub fn do_getdents64(fd: FileDesc, buf: &mut [u8]) -> Result<usize> {
buf.as_ptr(),
buf.len()
);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
let info = file_ref.metadata()?;
if info.type_ != FileType::Dir {
return_errno!(ENOTDIR, "");

@ -24,7 +24,7 @@ impl DirFd {
// Get the absolute path of directory
pub fn get_dir_path(dirfd: FileDesc) -> Result<String> {
let dir_path = {
let file_ref = process::get_file(dirfd)?;
let file_ref = current!().file(dirfd)?;
if let Ok(inode_file) = file_ref.as_inode_file() {
if inode_file.metadata()?.type_ != FileType::Dir {
return_errno!(ENOTDIR, "not a directory");

@ -1,37 +1,30 @@
use super::*;
pub fn do_dup(old_fd: FileDesc) -> Result<FileDesc> {
let current_ref = process::get_current();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
let new_fd = file_table.put(file, false);
let current = current!();
let file = current.file(old_fd)?;
let new_fd = current.add_file(file, false);
Ok(new_fd)
}
pub fn do_dup2(old_fd: FileDesc, new_fd: FileDesc) -> Result<FileDesc> {
let current_ref = process::get_current();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
let current = current!();
let mut files = current.files().lock().unwrap();
let file = files.get(old_fd)?;
if old_fd != new_fd {
file_table.put_at(new_fd, file, false);
files.put_at(new_fd, file, false);
}
Ok(new_fd)
}
pub fn do_dup3(old_fd: FileDesc, new_fd: FileDesc, flags: u32) -> Result<FileDesc> {
let creation_flags = CreationFlags::from_bits_truncate(flags);
let current_ref = process::get_current();
let current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let file = file_table.get(old_fd)?;
let current = current!();
let mut files = current.files().lock().unwrap();
let file = files.get(old_fd)?;
if old_fd == new_fd {
return_errno!(EINVAL, "old_fd must not be equal to new_fd");
}
file_table.put_at(new_fd, file, creation_flags.must_close_on_spawn());
files.put_at(new_fd, file, creation_flags.must_close_on_spawn());
Ok(new_fd)
}

@ -53,10 +53,10 @@ impl<'a> FcntlCmd<'a> {
pub fn do_fcntl(fd: FileDesc, cmd: &mut FcntlCmd) -> Result<isize> {
debug!("fcntl: fd: {:?}, cmd: {:?}", &fd, cmd);
let current_ref = process::get_current();
let mut current = current_ref.lock().unwrap();
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let current = current!();
let mut file_table = current.files().lock().unwrap();
let ret = match cmd {
FcntlCmd::DupFd(min_fd) => {
let dup_fd = file_table.dup(fd, *min_fd, false)?;

@ -2,14 +2,14 @@ use super::*;
pub fn do_fsync(fd: FileDesc) -> Result<()> {
debug!("fsync: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.sync_all()?;
Ok(())
}
pub fn do_fdatasync(fd: FileDesc) -> Result<()> {
debug!("fdatasync: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.sync_data()?;
Ok(())
}

@ -66,6 +66,6 @@ impl<'a> IoctlCmd<'a> {
pub fn do_ioctl(fd: FileDesc, cmd: &mut IoctlCmd) -> Result<()> {
debug!("ioctl: fd: {}, cmd: {:?}", fd, cmd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.ioctl(cmd)
}

@ -5,10 +5,10 @@ pub fn do_link(oldpath: &str, newpath: &str) -> Result<()> {
let (new_dir_path, new_file_name) = split_path(&newpath);
let (inode, new_dir_inode) = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let inode = current_process.lookup_inode(&oldpath)?;
let new_dir_inode = current_process.lookup_inode(new_dir_path)?;
let current = current!();
let fs = current.fs().lock().unwrap();
let inode = fs.lookup_inode(&oldpath)?;
let new_dir_inode = fs.lookup_inode(new_dir_path)?;
(inode, new_dir_inode)
};
new_dir_inode.link(new_file_name, &inode)?;

@ -1,6 +1,6 @@
use super::*;
pub fn do_lseek(fd: FileDesc, offset: SeekFrom) -> Result<off_t> {
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.seek(offset)
}

@ -6,9 +6,9 @@ pub fn do_mkdir(path: &str, mode: usize) -> Result<()> {
let (dir_path, file_name) = split_path(&path);
let inode = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
current_process.lookup_inode(dir_path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(dir_path)?
};
if inode.find(file_name).is_ok() {
return_errno!(EEXIST, "");

@ -3,7 +3,6 @@ use super::*;
use process::Process;
pub use self::access::{do_access, do_faccessat, AccessibilityCheckFlags, AccessibilityCheckMode};
pub use self::chdir::do_chdir;
pub use self::chmod::{do_chmod, do_fchmod, FileMode};
pub use self::chown::{do_chown, do_fchown, do_lchown};
pub use self::close::do_close;
@ -30,7 +29,6 @@ pub use self::unlink::do_unlink;
pub use self::write::{do_pwrite, do_write, do_writev};
mod access;
mod chdir;
mod chmod;
mod chown;
mod close;
@ -56,85 +54,6 @@ mod truncate;
mod unlink;
mod write;
impl Process {
/// Open a file on the process. But DO NOT add it to file table.
pub fn open_file(&self, path: &str, flags: u32, mode: u32) -> Result<Box<dyn File>> {
if path == "/dev/null" {
return Ok(Box::new(DevNull));
}
if path == "/dev/zero" {
return Ok(Box::new(DevZero));
}
if path == "/dev/random" || path == "/dev/urandom" || path == "/dev/arandom" {
return Ok(Box::new(DevRandom));
}
if path == "/dev/sgx" {
return Ok(Box::new(DevSgx));
}
let creation_flags = CreationFlags::from_bits_truncate(flags);
let inode = if creation_flags.can_create() {
let (dir_path, file_name) = split_path(&path);
let dir_inode = self.lookup_inode(dir_path)?;
match dir_inode.find(file_name) {
Ok(file_inode) => {
if creation_flags.is_exclusive() {
return_errno!(EEXIST, "file exists");
}
file_inode
}
Err(FsError::EntryNotFound) => {
if !dir_inode.allow_write()? {
return_errno!(EPERM, "file cannot be created");
}
dir_inode.create(file_name, FileType::File, mode)?
}
Err(e) => return Err(Error::from(e)),
}
} else {
self.lookup_inode(&path)?
};
let abs_path = self.convert_to_abs_path(&path);
Ok(Box::new(INodeFile::open(inode, &abs_path, flags)?))
}
/// Lookup INode from the cwd of the process
pub fn lookup_inode(&self, path: &str) -> Result<Arc<dyn INode>> {
debug!("lookup_inode: cwd: {:?}, path: {:?}", self.get_cwd(), path);
if path.len() > 0 && path.as_bytes()[0] == b'/' {
// absolute path
let abs_path = path.trim_start_matches('/');
let inode = ROOT_INODE.lookup(abs_path)?;
Ok(inode)
} else {
// relative path
let cwd = self.get_cwd().trim_start_matches('/');
let inode = ROOT_INODE.lookup(cwd)?.lookup(path)?;
Ok(inode)
}
}
/// Convert the path to be absolute
pub fn convert_to_abs_path(&self, path: &str) -> String {
debug!(
"convert_to_abs_path: cwd: {:?}, path: {:?}",
self.get_cwd(),
path
);
if path.len() > 0 && path.as_bytes()[0] == b'/' {
// path is absolute path already
return path.to_owned();
}
let cwd = {
if !self.get_cwd().ends_with("/") {
self.get_cwd().to_owned() + "/"
} else {
self.get_cwd().to_owned()
}
};
cwd + path
}
}
/// Split a `path` str to `(base_path, file_name)`
pub fn split_path(path: &str) -> (&str, &str) {
let mut split = path.trim_end_matches('/').rsplitn(2, '/');

@ -1,18 +1,15 @@
use super::*;
fn do_open(path: &str, flags: u32, mode: u32) -> Result<FileDesc> {
let current_ref = process::get_current();
let mut proc = current_ref.lock().unwrap();
let current = current!();
let fs = current.fs().lock().unwrap();
let file = proc.open_file(path, flags, mode)?;
let file = fs.open_file(path, flags, mode)?;
let file_ref: Arc<Box<dyn File>> = Arc::new(file);
let fd = {
let creation_flags = CreationFlags::from_bits_truncate(flags);
proc.get_files()
.lock()
.unwrap()
.put(file_ref, creation_flags.must_close_on_spawn())
current.add_file(file_ref, creation_flags.must_close_on_spawn())
};
Ok(fd)
}

@ -2,18 +2,18 @@ use super::*;
pub fn do_read(fd: FileDesc, buf: &mut [u8]) -> Result<usize> {
debug!("read: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.read(buf)
}
pub fn do_readv(fd: FileDesc, bufs: &mut [&mut [u8]]) -> Result<usize> {
debug!("readv: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.readv(bufs)
}
pub fn do_pread(fd: FileDesc, buf: &mut [u8], offset: usize) -> Result<usize> {
debug!("pread: fd: {}, offset: {}", fd, offset);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.read_at(offset, buf)
}

@ -1,14 +1,15 @@
use super::*;
pub fn do_rename(oldpath: &str, newpath: &str) -> Result<()> {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
debug!("rename: oldpath: {:?}, newpath: {:?}", oldpath, newpath);
let current = current!();
let fs = current.fs().lock().unwrap();
let (old_dir_path, old_file_name) = split_path(&oldpath);
let (new_dir_path, new_file_name) = split_path(&newpath);
let old_dir_inode = current_process.lookup_inode(old_dir_path)?;
let new_dir_inode = current_process.lookup_inode(new_dir_path)?;
let old_dir_inode = fs.lookup_inode(old_dir_path)?;
let new_dir_inode = fs.lookup_inode(new_dir_path)?;
let old_file_mode = {
let old_file_inode = old_dir_inode.find(old_file_name)?;
let metadata = old_file_inode.metadata()?;

@ -5,9 +5,9 @@ pub fn do_rmdir(path: &str) -> Result<()> {
let (dir_path, file_name) = split_path(&path);
let dir_inode = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
current_process.lookup_inode(dir_path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(dir_path)?
};
let file_inode = dir_inode.find(file_name)?;
if file_inode.metadata()?.type_ != FileType::Dir {

@ -11,13 +11,10 @@ pub fn do_sendfile(
"sendfile: out: {}, in: {}, offset: {:?}, count: {}",
out_fd, in_fd, offset, count
);
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
let file_table_ref = current_process.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let in_file = file_table.get(in_fd)?;
let out_file = file_table.get(out_fd)?;
let current = current!();
let in_file = current.file(in_fd)?;
let out_file = current.file(out_fd)?;
let mut buffer: [u8; 1024 * 11] = unsafe { MaybeUninit::uninit().assume_init() };
let mut read_offset = match offset {

@ -141,7 +141,7 @@ fn do_stat(path: &str) -> Result<Stat> {
pub fn do_fstat(fd: u32) -> Result<Stat> {
debug!("fstat: fd: {}", fd);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
let stat = Stat::from(file_ref.metadata()?);
// TODO: handle symlink
Ok(stat)
@ -150,9 +150,9 @@ pub fn do_fstat(fd: u32) -> Result<Stat> {
pub fn do_lstat(path: &str) -> Result<Stat> {
debug!("lstat: path: {}", path);
let inode = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
current_process.lookup_inode(&path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(&path)?
};
let stat = Stat::from(inode.metadata()?);
Ok(stat)

@ -4,15 +4,13 @@ pub fn do_readlink(path: &str, buf: &mut [u8]) -> Result<usize> {
debug!("readlink: path: {:?}", path);
let file_path = {
if path == "/proc/self/exe" {
let current_ref = process::get_current();
let current = current_ref.lock().unwrap();
current.get_elf_path().to_owned()
current!().process().exec_path().to_owned()
} else if path.starts_with("/proc/self/fd") {
let fd = path
.trim_start_matches("/proc/self/fd/")
.parse::<FileDesc>()
.map_err(|e| errno!(EBADF, "Invalid file descriptor"))?;
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
if let Ok(inode_file) = file_ref.as_inode_file() {
inode_file.get_abs_path().to_owned()
} else {

@ -3,9 +3,9 @@ use super::*;
pub fn do_truncate(path: &str, len: usize) -> Result<()> {
debug!("truncate: path: {:?}, len: {}", path, len);
let inode = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
current_process.lookup_inode(&path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(&path)?
};
inode.resize(len)?;
Ok(())
@ -13,7 +13,7 @@ pub fn do_truncate(path: &str, len: usize) -> Result<()> {
pub fn do_ftruncate(fd: FileDesc, len: usize) -> Result<()> {
debug!("ftruncate: fd: {}, len: {}", fd, len);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.set_len(len as u64)?;
Ok(())
}

@ -5,9 +5,9 @@ pub fn do_unlink(path: &str) -> Result<()> {
let (dir_path, file_name) = split_path(&path);
let dir_inode = {
let current_ref = process::get_current();
let current_process = current_ref.lock().unwrap();
current_process.lookup_inode(dir_path)?
let current = current!();
let fs = current.fs().lock().unwrap();
fs.lookup_inode(dir_path)?
};
let file_inode = dir_inode.find(file_name)?;
let metadata = file_inode.metadata()?;

@ -2,18 +2,18 @@ use super::*;
pub fn do_write(fd: FileDesc, buf: &[u8]) -> Result<usize> {
debug!("write: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.write(buf)
}
pub fn do_writev(fd: FileDesc, bufs: &[&[u8]]) -> Result<usize> {
debug!("writev: fd: {}", fd);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.writev(bufs)
}
pub fn do_pwrite(fd: FileDesc, buf: &[u8], offset: usize) -> Result<usize> {
debug!("pwrite: fd: {}, offset: {}", fd, offset);
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
file_ref.write_at(offset, buf)
}

@ -0,0 +1,17 @@
use super::*;
pub fn do_chdir(path: &str) -> Result<()> {
debug!("chdir: path: {:?}", path);
let current = current!();
let mut fs = current.fs().lock().unwrap();
let inode = fs.lookup_inode(path)?;
let info = inode.metadata()?;
if info.type_ != FileType::Dir {
return_errno!(ENOTDIR, "cwd must be directory");
}
fs.set_cwd(path)?;
Ok(())
}

@ -0,0 +1,9 @@
use super::*;
pub fn do_getcwd() -> Result<String> {
debug!("getcwd");
let thread = current!();
let fs = thread.fs().lock().unwrap();
let cwd = fs.cwd().to_owned();
Ok(cwd)
}

@ -1,5 +1,9 @@
use super::*;
pub use self::chdir::do_chdir;
pub use self::getcwd::do_getcwd;
pub use self::sync::do_sync;
mod chdir;
mod getcwd;
mod sync;

125
src/libos/src/fs/fs_view.rs Normal file

@ -0,0 +1,125 @@
use super::dev_fs::{DevNull, DevRandom, DevSgx, DevZero};
/// Present a per-process view of FS.
use super::*;
#[derive(Debug, Clone)]
pub struct FsView {
cwd: String,
}
impl FsView {
pub fn new() -> FsView {
Self {
cwd: "/".to_owned(),
}
}
/// Get the current working directory.
pub fn cwd(&self) -> &str {
&self.cwd
}
/// Set the current working directory.
pub fn set_cwd(&mut self, path: &str) -> Result<()> {
if path.len() == 0 {
return_errno!(EINVAL, "empty path");
}
if path.as_bytes()[0] == b'/' {
// absolute
self.cwd = path.to_owned();
} else {
// relative
if !self.cwd.ends_with("/") {
self.cwd += "/";
}
self.cwd += path;
}
Ok(())
}
/// Open a file on the process. But DO NOT add it to file table.
pub fn open_file(&self, path: &str, flags: u32, mode: u32) -> Result<Box<dyn File>> {
if path == "/dev/null" {
return Ok(Box::new(DevNull));
}
if path == "/dev/zero" {
return Ok(Box::new(DevZero));
}
if path == "/dev/random" || path == "/dev/urandom" || path == "/dev/arandom" {
return Ok(Box::new(DevRandom));
}
if path == "/dev/sgx" {
return Ok(Box::new(DevSgx));
}
let creation_flags = CreationFlags::from_bits_truncate(flags);
let inode = if creation_flags.can_create() {
let (dir_path, file_name) = split_path(&path);
let dir_inode = self.lookup_inode(dir_path)?;
match dir_inode.find(file_name) {
Ok(file_inode) => {
if creation_flags.is_exclusive() {
return_errno!(EEXIST, "file exists");
}
file_inode
}
Err(FsError::EntryNotFound) => {
if !dir_inode.allow_write()? {
return_errno!(EPERM, "file cannot be created");
}
dir_inode.create(file_name, FileType::File, mode)?
}
Err(e) => return Err(Error::from(e)),
}
} else {
self.lookup_inode(&path)?
};
let abs_path = self.convert_to_abs_path(&path);
Ok(Box::new(INodeFile::open(inode, &abs_path, flags)?))
}
/// Lookup INode from the cwd of the process
pub fn lookup_inode(&self, path: &str) -> Result<Arc<dyn INode>> {
debug!("lookup_inode: cwd: {:?}, path: {:?}", self.cwd(), path);
if path.len() > 0 && path.as_bytes()[0] == b'/' {
// absolute path
let abs_path = path.trim_start_matches('/');
let inode = ROOT_INODE.lookup(abs_path)?;
Ok(inode)
} else {
// relative path
let cwd = self.cwd().trim_start_matches('/');
let inode = ROOT_INODE.lookup(cwd)?.lookup(path)?;
Ok(inode)
}
}
/// Convert the path to be absolute
pub fn convert_to_abs_path(&self, path: &str) -> String {
debug!(
"convert_to_abs_path: cwd: {:?}, path: {:?}",
self.cwd(),
path
);
if path.len() > 0 && path.as_bytes()[0] == b'/' {
// path is absolute path already
return path.to_owned();
}
let cwd = {
if !self.cwd().ends_with("/") {
self.cwd().to_owned() + "/"
} else {
self.cwd().to_owned()
}
};
cwd + path
}
}
impl Default for FsView {
fn default() -> Self {
Self {
cwd: "/".to_owned(),
}
}
}

@ -16,6 +16,7 @@ pub use self::file_ops::{AccessMode, CreationFlags, FileMode, Stat, StatusFlags}
pub use self::file_ops::{Flock, FlockType};
pub use self::file_ops::{IoctlCmd, StructuredIoctlArgType, StructuredIoctlNum};
pub use self::file_table::{FileDesc, FileTable};
pub use self::fs_view::FsView;
pub use self::inode_file::{AsINodeFile, INodeExt, INodeFile};
pub use self::pipe::Pipe;
pub use self::rootfs::ROOT_INODE;
@ -28,6 +29,7 @@ mod file;
mod file_ops;
mod file_table;
mod fs_ops;
mod fs_view;
mod hostfs;
mod inode_file;
mod pipe;
@ -35,3 +37,14 @@ mod rootfs;
mod sefs;
mod stdio;
mod syscalls;
/// Split a `path` str to `(base_path, file_name)`
fn split_path(path: &str) -> (&str, &str) {
let mut split = path.trim_end_matches('/').rsplitn(2, '/');
let file_name = split.next().unwrap();
let mut dir_path = split.next().unwrap_or(".");
if dir_path == "" {
dir_path = "/";
}
(dir_path, file_name)
}

@ -160,15 +160,11 @@ pub fn do_pipe2(flags: u32) -> Result<[FileDesc; 2]> {
let status_flags = StatusFlags::from_bits_truncate(flags);
debug!("pipe2: flags: {:?} {:?}", creation_flags, status_flags);
let current_ref = process::get_current();
let current = current_ref.lock().unwrap();
let current = current!();
let pipe = Pipe::new(status_flags)?;
let file_table_ref = current.get_files();
let mut file_table = file_table_ref.lock().unwrap();
let close_on_spawn = creation_flags.must_close_on_spawn();
let reader_fd = file_table.put(Arc::new(Box::new(pipe.reader)), close_on_spawn);
let writer_fd = file_table.put(Arc::new(Box::new(pipe.writer)), close_on_spawn);
let reader_fd = current.add_file(Arc::new(Box::new(pipe.reader)), close_on_spawn);
let writer_fd = current.add_file(Arc::new(Box::new(pipe.writer)), close_on_spawn);
trace!("pipe2: reader_fd: {}, writer_fd: {}", reader_fd, writer_fd);
Ok([reader_fd, writer_fd])
}

@ -27,10 +27,10 @@ pub fn do_eventfd2(init_val: u32, flags: i32) -> Result<isize> {
Arc::new(Box::new(event))
};
let fd = process::put_file(
let fd = current!().add_file(
file_ref,
inner_flags.contains(EventCreationFlags::EFD_CLOEXEC),
)?;
);
Ok(fd as isize)
}
@ -307,10 +307,28 @@ pub fn do_chdir(path: *const i8) -> Result<isize> {
let path = from_user::clone_cstring_safely(path)?
.to_string_lossy()
.into_owned();
file_ops::do_chdir(&path)?;
fs_ops::do_chdir(&path)?;
Ok(0)
}
pub fn do_getcwd(buf_ptr: *mut u8, size: usize) -> Result<isize> {
let buf = {
from_user::check_mut_array(buf_ptr, size)?;
unsafe { std::slice::from_raw_parts_mut(buf_ptr, size) }
};
let cwd = fs_ops::do_getcwd()?;
if cwd.len() + 1 > buf.len() {
return_errno!(ERANGE, "buf is not long enough");
}
buf[..cwd.len()].copy_from_slice(cwd.as_bytes());
buf[cwd.len()] = 0;
// getcwd requires returning buf_ptr if success
Ok(buf_ptr as isize)
}
pub fn do_rename(oldpath: *const i8, newpath: *const i8) -> Result<isize> {
let oldpath = from_user::clone_cstring_safely(oldpath)?
.to_string_lossy()

@ -43,11 +43,8 @@ use std::backtrace::{self, PrintFormat};
use std::ffi::CStr; // a borrowed C string
use std::panic;
use error::*;
use prelude::*;
// Override prelude::Result with error::Result
use error::Result;
use crate::prelude::*;
use crate::process::pid_t;
#[macro_use]
mod prelude;

@ -3,5 +3,5 @@ use super::*;
mod rlimit;
mod uname;
pub use self::rlimit::{do_prlimit, resource_t, rlimit_t, ResourceLimits, ResourceLimitsRef};
pub use self::rlimit::{do_prlimit, resource_t, rlimit_t, ResourceLimits};
pub use self::uname::{do_uname, utsname_t};

@ -5,7 +5,6 @@ use process::pid_t;
pub struct ResourceLimits {
rlimits: [rlimit_t; RLIMIT_COUNT],
}
pub type ResourceLimitsRef = Arc<SgxMutex<ResourceLimits>>;
impl ResourceLimits {
pub fn get(&self, resource: resource_t) -> &rlimit_t {
@ -87,20 +86,25 @@ impl resource_t {
}
}
/// Get or set resource limits.
///
/// The man page suggests that this system call works on a per-process basis
/// and the input argument pid can only be process ID, not thread ID. This
/// (unnecessary) restriction is lifted by our implementation. Nevertheless,
/// since the rlimits object is shared between threads in a process, the
/// semantic of limiting resource usage on a per-process basisi is preserved.
pub fn do_prlimit(
pid: pid_t,
resource: resource_t,
new_limit: Option<&rlimit_t>,
old_limit: Option<&mut rlimit_t>,
) -> Result<()> {
let process_ref = if pid == 0 {
process::get_current()
let process = if pid == 0 {
current!()
} else {
process::get(pid).cause_err(|_| errno!(ESRCH, "invalid pid"))?
process::table::get_thread(pid).cause_err(|_| errno!(ESRCH, "invalid pid"))?
};
let mut process = process_ref.lock().unwrap();
let rlimits_ref = process.get_rlimits();
let mut rlimits = rlimits_ref.lock().unwrap();
let mut rlimits = process.rlimits().lock().unwrap();
if let Some(old_limit) = old_limit {
*old_limit = *rlimits.get(resource)
}

@ -92,7 +92,7 @@ impl EpollFile {
pub fn control(&self, op: EpollCtlCmd, fd: FileDesc, event: Option<&EpollEvent>) -> Result<()> {
let host_fd = {
let fd_ref = process::get_file(fd)?;
let fd_ref = current!().file(fd)?;
if let Ok(socket) = fd_ref.as_socket() {
socket.fd()
} else if let Ok(eventfd) = fd_ref.as_event() {

@ -10,8 +10,7 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
// Untrusted pollfd's that will be modified by OCall
let mut u_pollfds: Vec<libc::pollfd> = pollfds.to_vec();
let current_ref = process::get_current();
let mut proc = current_ref.lock().unwrap();
let current = current!();
for (i, pollfd) in pollfds.iter_mut().enumerate() {
// Poll should just ignore negative fds
if pollfd.fd < 0 {
@ -20,11 +19,7 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
continue;
}
let file_ref = proc
.get_files()
.lock()
.unwrap()
.get(pollfd.fd as FileDesc)?;
let file_ref = current.file(pollfd.fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
// convert libos fd to host fd in the copy to keep pollfds unchanged
u_pollfds[i].fd = socket.fd();
@ -58,9 +53,6 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result<usize> {
}
}
// Unlock the current process as early as possible
drop(proc);
let num_events = try_libc!(libc::ocall::poll(
u_pollfds.as_mut_ptr(),
u_pollfds.len() as u64,

@ -14,9 +14,8 @@ pub fn do_select(
let mut host_to_libos_fd = [0; libc::FD_SETSIZE];
let mut polls = Vec::<libc::pollfd>::new();
let current_ref = process::get_current();
let mut proc = current_ref.lock().unwrap();
let file_table = proc.get_files().lock().unwrap();
let current = current!();
let file_table = current.files().lock().unwrap();
for fd in 0..nfds {
let fd_ref = file_table.get(fd as FileDesc)?;
@ -78,9 +77,8 @@ pub fn do_select(
});
}
// Unlock the current process and its file table as early as possible
// Unlock the file table as early as possible
drop(file_table);
drop(proc);
let timeout = match timeout {
None => -1,

@ -12,7 +12,7 @@ pub fn do_sendmsg(fd: c_int, msg_ptr: *const msghdr, flags_c: c_int) -> Result<i
fd, msg_ptr, flags_c
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let msg_c = {
from_user::check_ptr(msg_ptr)?;
@ -40,7 +40,7 @@ pub fn do_recvmsg(fd: c_int, msg_mut_ptr: *mut msghdr_mut, flags_c: c_int) -> Re
fd, msg_mut_ptr, flags_c
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let msg_mut_c = {
from_user::check_mut_ptr(msg_mut_ptr)?;
@ -192,7 +192,7 @@ pub fn do_epoll_create1(raw_flags: c_int) -> Result<isize> {
let epoll_file = io_multiplexing::EpollFile::new(flags)?;
let file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(epoll_file));
let close_on_spawn = flags.contains(CreationFlags::O_CLOEXEC);
let fd = process::put_file(file_ref, close_on_spawn)?;
let fd = current!().add_file(file_ref, close_on_spawn);
Ok(fd as isize)
}
@ -211,7 +211,7 @@ pub fn do_epoll_ctl(
None
};
let epfile_ref = process::get_file(epfd as FileDesc)?;
let epfile_ref = current!().file(epfd as FileDesc)?;
let epoll_file = epfile_ref.as_epfile()?;
epoll_file.control(
@ -250,7 +250,7 @@ pub fn do_epoll_wait(
timeout
);
let epfile_ref = process::get_file(epfd as FileDesc)?;
let epfile_ref = current!().file(epfd as FileDesc)?;
let epoll_file = epfile_ref.as_epfile()?;
let count = epoll_file.wait(&mut inner_events, timeout)?;

@ -12,12 +12,24 @@ pub use std::sync::{
Arc, SgxMutex, SgxMutexGuard, SgxRwLock, SgxRwLockReadGuard, SgxRwLockWriteGuard,
};
// Override prelude::Result with error::Result
pub use crate::error::Result;
pub use crate::error::*;
pub use crate::fs::{File, FileDesc, FileRef};
pub use crate::process::pid_t;
macro_rules! debug_trace {
() => {
debug!("> Line = {}, File = {}", line!(), file!())
};
}
macro_rules! current {
() => {
crate::process::current::get()
};
}
pub fn align_up(addr: usize, align: usize) -> usize {
debug_assert!(align != 0 && align.is_power_of_two());
align_down(addr + (align - 1), align)

@ -0,0 +1,36 @@
use super::process::IDLE;
use super::{Thread, ThreadRef};
/// Get and set the current thread/process.
use crate::prelude::*;
pub fn get() -> ThreadRef {
let current_ptr = CURRENT_THREAD_PTR.with(|cell| cell.get());
let current_ref = unsafe { Arc::from_raw(current_ptr) };
let current_ref_clone = current_ref.clone();
Arc::into_raw(current_ref);
current_ref_clone
}
pub(super) fn set(thread_ref: ThreadRef) {
assert!(thread_ref.tid() > 0);
replace(thread_ref);
}
pub(super) fn reset() -> ThreadRef {
replace(IDLE.clone())
}
fn replace(thread_ref: ThreadRef) -> ThreadRef {
let new_thread_ptr = Arc::into_raw(thread_ref);
let mut old_thread_ptr = CURRENT_THREAD_PTR.with(|cp| cp.replace(new_thread_ptr));
unsafe { Arc::from_raw(old_thread_ptr) }
}
thread_local! {
// By default, the current thread is the idle (tid = 0).
//
// TODO: figure out why RefCell<ThreadRef> is not working as expected
static CURRENT_THREAD_PTR: Cell<*const Thread> = {
Cell::new(Arc::into_raw(IDLE.clone()))
};
}

@ -1,4 +1,20 @@
use super::*;
use crate::prelude::*;
pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<()> {
debug!("do_arch_prctl: code: {:?}, addr: {:?}", code, addr);
match code {
ArchPrctlCode::ARCH_SET_FS => {
current!().task().set_user_fs(addr as usize);
}
ArchPrctlCode::ARCH_GET_FS => unsafe {
*addr = current!().task().user_fs();
},
ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => {
return_errno!(EINVAL, "GS cannot be accessed from the user space");
}
}
Ok(())
}
#[allow(non_camel_case_types)]
#[derive(Debug)]
@ -20,30 +36,3 @@ impl ArchPrctlCode {
}
}
}
pub fn do_arch_prctl(code: ArchPrctlCode, addr: *mut usize) -> Result<()> {
debug!(
"do_arch_prctl: code: {:?}, addr: {:#o}",
code, addr as usize
);
match code {
ArchPrctlCode::ARCH_SET_FS => {
let current_ref = get_current();
let mut current = current_ref.lock().unwrap();
let task = &mut current.task;
task.set_user_fs(addr as usize);
}
ArchPrctlCode::ARCH_GET_FS => {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
let task = &current.task;
unsafe {
*addr = task.get_user_fs();
}
}
ArchPrctlCode::ARCH_SET_GS | ArchPrctlCode::ARCH_GET_GS => {
return_errno!(EINVAL, "GS cannot be accessed from the user space");
}
}
Ok(())
}

@ -0,0 +1,248 @@
use std::ptr::NonNull;
use super::table::{self};
use super::task::{self, Task};
use super::thread::{Thread, ThreadBuilder};
use crate::prelude::*;
use crate::vm::{ProcessVM, VMRange};
/// Create and execute a new thread.
pub fn do_clone(
flags: CloneFlags,
user_rsp: usize,
ptid: Option<NonNull<pid_t>>,
ctid: Option<NonNull<pid_t>>,
new_tls: Option<usize>,
) -> Result<pid_t> {
debug!(
"clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
flags, user_rsp, ptid, ctid, new_tls
);
check_clone_args(flags, user_rsp, ptid, ctid, new_tls)?;
// Get thread entry, an implicit argument passed on the stack.
//
// The calling convention of Occlum clone syscall requires the user to
// store the entry point of the new thread at the top of the user stack.
//
// FIXME: this is workaround to passing more than 6 arguments in syscall.
// TODO: add pointer checking
let thread_entry = unsafe { *(user_rsp as *mut usize) };
let new_thread_ref = {
let current = current!();
let vm = current.vm().clone();
let task = {
let vm = vm.lock().unwrap();
let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?;
let user_stack_base = user_stack_range.end();
let user_stack_limit = user_stack_range.start();
unsafe {
Task::new(
thread_entry,
user_rsp,
user_stack_base,
user_stack_limit,
new_tls,
)?
}
};
let files = current.files().clone();
let rlimits = current.rlimits().clone();
let fs = current.fs().clone();
let mut builder = ThreadBuilder::new()
.process(current.process().clone())
.vm(vm)
.task(task)
.fs(fs)
.files(files)
.rlimits(rlimits);
if let Some(ctid) = ctid {
builder = builder.clear_ctid(ctid);
}
builder.build()?
};
let new_tid = new_thread_ref.tid();
table::add_thread(new_thread_ref.clone());
info!("Thread created: tid = {}", new_tid);
if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
debug_assert!(ptid.is_some());
unsafe {
*ptid.unwrap().as_ptr() = new_tid;
}
}
if flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
debug_assert!(ctid.is_some());
unsafe {
*ctid.unwrap().as_ptr() = new_tid;
}
}
task::enqueue_and_exec(new_thread_ref.clone());
Ok(new_tid)
}
/// Clone flags.
bitflags! {
pub struct CloneFlags : u32 {
const CLONE_VM = 0x00000100;
const CLONE_FS = 0x00000200;
const CLONE_FILES = 0x00000400;
const CLONE_SIGHAND = 0x00000800;
const CLONE_PIDFD = 0x00001000;
const CLONE_PTRACE = 0x00002000;
const CLONE_VFORK = 0x00004000;
const CLONE_PARENT = 0x00008000;
const CLONE_THREAD = 0x00010000;
const CLONE_NEWNS = 0x00020000;
const CLONE_SYSVSEM = 0x00040000;
const CLONE_SETTLS = 0x00080000;
const CLONE_PARENT_SETTID = 0x00100000;
const CLONE_CHILD_CLEARTID = 0x00200000;
const CLONE_DETACHED = 0x00400000;
const CLONE_UNTRACED = 0x00800000;
const CLONE_CHILD_SETTID = 0x01000000;
const CLONE_NEWCGROUP = 0x02000000;
const CLONE_NEWUTS = 0x04000000;
const CLONE_NEWIPC = 0x08000000;
const CLONE_NEWUSER = 0x10000000;
const CLONE_NEWPID = 0x20000000;
const CLONE_NEWNET = 0x40000000;
const CLONE_IO = 0x80000000;
}
}
fn check_clone_args(
flags: CloneFlags,
user_rsp: usize,
ptid: Option<NonNull<pid_t>>,
ctid: Option<NonNull<pid_t>>,
new_tls: Option<usize>,
) -> Result<()> {
check_clone_flags(flags)?;
let need_ptid = flags.contains(CloneFlags::CLONE_PARENT_SETTID);
if need_ptid != ptid.is_some() {
return_errno!(EINVAL, "ptid is not consistent with flags");
}
let need_ctid = flags.contains(CloneFlags::CLONE_CHILD_SETTID)
|| flags.contains(CloneFlags::CLONE_CHILD_CLEARTID);
if need_ctid != ctid.is_some() {
return_errno!(EINVAL, "ctid is not consistent with flags");
}
Ok(())
}
/// Check whether clone flags are valid.
///
/// The current implementation of clone, which is much less general than the one in Linux,
/// essentially supports creating threads only. So the valid combinations of clone flags
/// are quite limited.
///
/// # Mandatory flags
///
/// The following flags must be given. If not given, errors will be reported:
/// ```
/// CLONE_VM
/// CLONE_THREAD
/// CLONE_SIGHAND
/// CLONE_FILES
/// CLONE_FS
/// CLONE_SETTLS
/// CLONE_SIGHAND
/// CLONE_SYSVSEM
/// CLONE_PARENT_SETTID
/// ```
///
/// # Optional flags
///
/// The following flags can be given and are supported:
/// ```
/// CLONE_CHILD_CLEARTID
/// CLONE_CHILD_SETTID
/// ```
///
/// # Ignored flags
///
/// The following flags are ignored silently:
/// ```
/// CLONE_DETACHED
/// CLONE_IO
/// CLONE_PARENT
/// ```
///
/// # Unsupported flags
///
/// The following flags are unsupported; giving these flags triggers errors.
/// ```
/// CLONE_VFORK
/// CLONE_NEWCGROUP
/// CLONE_NEWIPC
/// CLONE_NEWNET
/// CLONE_NEWNS
/// CLONE_NEWPID
/// CLONE_NEWUSER
/// CLONE_NEWUTS
/// CLONE_PIDFD
/// CLONE_PTRACE
/// CLONE_UNTRACED
/// ```
fn check_clone_flags(flags: CloneFlags) -> Result<()> {
lazy_static! {
static ref MANDATORY_FLAGS: CloneFlags = {
CloneFlags::CLONE_VM
| CloneFlags::CLONE_THREAD
| CloneFlags::CLONE_SIGHAND
| CloneFlags::CLONE_FILES
| CloneFlags::CLONE_FS
| CloneFlags::CLONE_SETTLS
| CloneFlags::CLONE_SIGHAND
| CloneFlags::CLONE_SYSVSEM
| CloneFlags::CLONE_PARENT_SETTID
};
static ref UNSUPPORTED_FLAGS: CloneFlags = {
CloneFlags::CLONE_VFORK
| CloneFlags::CLONE_NEWCGROUP
| CloneFlags::CLONE_NEWIPC
| CloneFlags::CLONE_NEWNET
| CloneFlags::CLONE_NEWNS
| CloneFlags::CLONE_NEWPID
| CloneFlags::CLONE_NEWUSER
| CloneFlags::CLONE_NEWUTS
| CloneFlags::CLONE_PIDFD
| CloneFlags::CLONE_PTRACE
| CloneFlags::CLONE_UNTRACED
};
}
if !flags.contains(*MANDATORY_FLAGS) {
return_errno!(EINVAL, "missing mandatory flags");
}
if flags.contains(*UNSUPPORTED_FLAGS) {
return_errno!(EINVAL, "found unsupported flags");
}
Ok(())
}
fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange> {
// The first case is most likely
if let Ok(stack_range) = vm.find_mmap_region(user_rsp) {
Ok(stack_range)
}
// The next three cases are very unlikely, but valid
else if vm.get_stack_range().contains(user_rsp) {
Ok(vm.get_stack_range())
} else if vm.get_heap_range().contains(user_rsp) {
Ok(vm.get_heap_range())
}
// Invalid
else {
return_errno!(ESRCH, "invalid rsp")
}
}

@ -0,0 +1,78 @@
use std::intrinsics::atomic_store;
use super::do_futex::futex_wake;
use super::process::ChildProcessFilter;
use super::{table, ThreadRef};
use crate::prelude::*;
pub fn do_exit(exit_status: i32) {
let thread = current!();
let num_remaining_threads = thread.exit(exit_status);
// Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info.
if let Some(ctid_ptr) = thread.clear_ctid() {
unsafe {
atomic_store(ctid_ptr.as_ptr(), 0);
}
futex_wake(ctid_ptr.as_ptr() as *const i32, 1);
}
// Keep the main thread's tid available as long as the process is not destroyed.
// This is important as the user space may still attempt to access the main
// thread's ThreadRef through the process's pid after the process has become
// a zombie.
if thread.tid() != thread.process().pid() {
table::del_thread(thread.tid()).expect("tid must be in the table");
}
// If this thread is the last thread, then exit the process
if num_remaining_threads == 0 {
do_exit_process(&thread, exit_status);
}
}
fn do_exit_process(thread: &ThreadRef, exit_status: i32) {
let process = thread.process();
// If the parent process is the idle process, we can release the process directly.
if process.parent().pid() == 0 {
// Deadlock note: Always lock parent then child.
let mut parent_inner = super::IDLE.process().inner();
let mut process_inner = process.inner();
table::del_thread(thread.tid()).expect("tid must be in the table");
table::del_process(process.pid()).expect("pid must be in the table");
process_inner.exit(exit_status);
parent_inner.remove_zombie_child(process.pid());
return;
}
// Otherwise, we need to notify the parent process
// Lock the parent process to ensure that parent's wait4 cannot miss the current
// process's exit.
// Deadlock note: Always lock parent then child.
let parent = process.parent();
let mut parent_inner = parent.inner();
process.inner().exit(exit_status);
// Wake up the parent if it is waiting on this child
let waiting_children = parent_inner.waiting_children_mut().unwrap();
waiting_children.del_and_wake_one_waiter(|waiter_data| -> Option<pid_t> {
match waiter_data {
ChildProcessFilter::WithAnyPid => {}
ChildProcessFilter::WithPid(required_pid) => {
if process.pid() != *required_pid {
return None;
}
}
ChildProcessFilter::WithPgid(required_pgid) => {
if process.pgid() != *required_pgid {
return None;
}
}
}
Some(process.pid())
});
}

@ -1,9 +1,10 @@
use super::*;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::intrinsics::atomic_load;
use std::sync::atomic::{AtomicBool, Ordering};
use time::timespec_t;
use crate::prelude::*;
use crate::time::timespec_t;
/// `FutexOp`, `FutexFlags`, and `futex_op_and_flags_from_u32` are helper types and
/// functions for handling the versatile commands and arguments of futex system

@ -0,0 +1,18 @@
use crate::prelude::*;
pub fn do_getpid() -> pid_t {
current!().process().pid()
}
pub fn do_gettid() -> pid_t {
current!().tid()
}
pub fn do_getpgid() -> pid_t {
// TODO: implement process groups
1
}
pub fn do_getppid() -> pid_t {
current!().process().parent().pid()
}

@ -1,4 +1,56 @@
use super::*;
use super::table;
/// Process scheduling.
use crate::prelude::*;
pub fn do_sched_getaffinity(tid: pid_t, cpu_set: &mut CpuSet) -> Result<usize> {
let host_tid = match tid {
0 => 0,
_ => find_host_tid(tid)?,
};
let buf = cpu_set.as_mut_ptr();
let cpusize = cpu_set.len();
let retval = try_libc!({
let mut retval = 0;
let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf);
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
retval
}) as usize;
// Note: the first retval bytes in CpuSet are valid
Ok(retval)
}
pub fn do_sched_setaffinity(tid: pid_t, cpu_set: &CpuSet) -> Result<()> {
let host_tid = match tid {
0 => 0,
_ => find_host_tid(tid)?,
};
let buf = cpu_set.as_ptr();
let cpusize = cpu_set.len();
try_libc!({
let mut retval = 0;
let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf);
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
retval
});
Ok(())
}
pub fn do_sched_yield() {
unsafe {
let status = occlum_ocall_sched_yield();
assert!(status == sgx_status_t::SGX_SUCCESS);
}
}
fn find_host_tid(tid: pid_t) -> Result<pid_t> {
let thread = table::get_thread(tid)?;
// TODO: fix the race condition of host_tid being available.
let host_tid = thread
.inner()
.host_tid()
.ok_or_else(|| errno!(ESRCH, "host_tid is not available"))?;
Ok(host_tid)
}
pub struct CpuSet {
vec: Vec<u8>,
@ -61,53 +113,6 @@ impl std::fmt::UpperHex for CpuSet {
}
}
fn find_host_tid(pid: pid_t) -> Result<pid_t> {
let process_ref = if pid == 0 { get_current() } else { get(pid)? };
let mut process = process_ref.lock().unwrap();
let host_tid = process.get_host_tid();
Ok(host_tid)
}
pub fn do_sched_getaffinity(pid: pid_t, cpu_set: &mut CpuSet) -> Result<usize> {
let host_tid = match pid {
0 => 0,
_ => find_host_tid(pid)?,
};
let buf = cpu_set.as_mut_ptr();
let cpusize = cpu_set.len();
let retval = try_libc!({
let mut retval = 0;
let sgx_status = occlum_ocall_sched_getaffinity(&mut retval, host_tid as i32, cpusize, buf);
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
retval
}) as usize;
// Note: the first retval bytes in CpuSet are valid
Ok(retval)
}
pub fn do_sched_setaffinity(pid: pid_t, cpu_set: &CpuSet) -> Result<()> {
let host_tid = match pid {
0 => 0,
_ => find_host_tid(pid)?,
};
let buf = cpu_set.as_ptr();
let cpusize = cpu_set.len();
try_libc!({
let mut retval = 0;
let sgx_status = occlum_ocall_sched_setaffinity(&mut retval, host_tid as i32, cpusize, buf);
assert!(sgx_status == sgx_status_t::SGX_SUCCESS);
retval
});
Ok(())
}
pub fn do_sched_yield() {
unsafe {
let status = occlum_ocall_sched_yield();
assert!(status == sgx_status_t::SGX_SUCCESS);
}
}
extern "C" {
fn occlum_ocall_sched_getaffinity(
ret: *mut i32,

@ -0,0 +1,11 @@
use std::ptr::NonNull;
use crate::prelude::*;
pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t> {
debug!("set_tid_address: tidptr: {:?}", tidptr);
let clear_ctid = NonNull::new(tidptr);
let current = current!();
current.set_clear_ctid(clear_ctid);
Ok(current.tid())
}

@ -0,0 +1,86 @@
/// Auxiliary Vector.
///
/// # What is Auxiliary Vector?
///
/// Here is a concise description of Auxiliary Vector from GNU's manual:
///
/// > When a program is executed, it receives information from the operating system
/// about the environment in which it is operating. The form of this information
/// is a table of key-value pairs, where the keys are from the set of AT_
/// values in elf.h.
use crate::prelude::*;
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum AuxKey {
AT_NULL = 0, /* end of vector */
AT_IGNORE = 1, /* entry should be ignored */
AT_EXECFD = 2, /* file descriptor of program */
AT_PHDR = 3, /* program headers for program */
AT_PHENT = 4, /* size of program header entry */
AT_PHNUM = 5, /* number of program headers */
AT_PAGESZ = 6, /* system page size */
AT_BASE = 7, /* base address of interpreter */
AT_FLAGS = 8, /* flags */
AT_ENTRY = 9, /* entry point of program */
AT_NOTELF = 10, /* program is not ELF */
AT_UID = 11, /* real uid */
AT_EUID = 12, /* effective uid */
AT_GID = 13, /* real gid */
AT_EGID = 14, /* effective gid */
AT_PLATFORM = 15, /* string identifying CPU for optimizations */
AT_HWCAP = 16, /* arch dependent hints at CPU capabilities */
AT_CLKTCK = 17, /* frequency at which times() increments */
/* 18...22 not used */
AT_SECURE = 23, /* secure mode boolean */
AT_BASE_PLATFORM = 24, /* string identifying real platform, may
* differ from AT_PLATFORM. */
AT_RANDOM = 25, /* address of 16 random bytes */
AT_HWCAP2 = 26, /* extension of AT_HWCAP */
/* 28...30 not used */
AT_EXECFN = 31, /* filename of program */
AT_SYSINFO = 32,
/* Occlum-specific entries */
AT_OCCLUM_ENTRY = 48, /* the entry point of Occlum, i.e., syscall */
}
#[derive(Clone, Default, Debug)]
pub struct AuxVec {
table: HashMap<AuxKey, u64>,
}
impl AuxVec {
pub fn new() -> AuxVec {
AuxVec {
table: HashMap::new(),
}
}
}
impl AuxVec {
pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
return_errno!(EINVAL, "Illegal key");
}
self.table
.entry(key)
.and_modify(|val_mut| *val_mut = val)
.or_insert(val);
Ok(())
}
pub fn get(&self, key: AuxKey) -> Option<u64> {
self.table.get(&key).map(|val_ref| *val_ref)
}
pub fn del(&mut self, key: AuxKey) -> Option<u64> {
self.table.remove(&key)
}
pub fn table(&self) -> &HashMap<AuxKey, u64> {
&self.table
}
}

@ -1,8 +1,9 @@
use super::*;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use {std, std::mem, std::ptr};
use std::{mem, ptr};
use super::aux_vec::{AuxKey, AuxVec};
use crate::prelude::*;
/*
* The initial stack of a process looks like below:
@ -52,7 +53,7 @@ pub fn do_init(
init_area_size: usize,
argv: &[CString],
envp: &[CString],
auxtbl: &AuxTable,
auxtbl: &AuxVec,
) -> Result<usize> {
let stack_buf = unsafe { StackBuf::new(stack_top, init_area_size)? };
let envp_cloned = clone_cstrings_on_stack(&stack_buf, envp)?;
@ -158,7 +159,7 @@ fn clone_cstrings_on_stack<'a, 'b>(
Ok(cstrs_cloned)
}
fn dump_auxtbl_on_stack<'a, 'b>(stack: &'a StackBuf, auxtbl: &'b AuxTable) -> Result<()> {
fn dump_auxtbl_on_stack<'a, 'b>(stack: &'a StackBuf, auxtbl: &'b AuxVec) -> Result<()> {
// For every key-value pair, dump the value first, then the key
stack.put(0 as u64);
stack.put(AuxKey::AT_NULL as u64);
@ -176,80 +177,3 @@ fn dump_cstrptrs_on_stack<'a, 'b>(stack: &'a StackBuf, strptrs: &'b [&'a CStr])
}
Ok(())
}
/* Symbolic values for the entries in the auxiliary table
put on the initial stack */
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum AuxKey {
AT_NULL = 0, /* end of vector */
AT_IGNORE = 1, /* entry should be ignored */
AT_EXECFD = 2, /* file descriptor of program */
AT_PHDR = 3, /* program headers for program */
AT_PHENT = 4, /* size of program header entry */
AT_PHNUM = 5, /* number of program headers */
AT_PAGESZ = 6, /* system page size */
AT_BASE = 7, /* base address of interpreter */
AT_FLAGS = 8, /* flags */
AT_ENTRY = 9, /* entry point of program */
AT_NOTELF = 10, /* program is not ELF */
AT_UID = 11, /* real uid */
AT_EUID = 12, /* effective uid */
AT_GID = 13, /* real gid */
AT_EGID = 14, /* effective gid */
AT_PLATFORM = 15, /* string identifying CPU for optimizations */
AT_HWCAP = 16, /* arch dependent hints at CPU capabilities */
AT_CLKTCK = 17, /* frequency at which times() increments */
/* 18...22 not used */
AT_SECURE = 23, /* secure mode boolean */
AT_BASE_PLATFORM = 24, /* string identifying real platform, may
* differ from AT_PLATFORM. */
AT_RANDOM = 25, /* address of 16 random bytes */
AT_HWCAP2 = 26, /* extension of AT_HWCAP */
/* 28...30 not used */
AT_EXECFN = 31, /* filename of program */
AT_SYSINFO = 32,
/* Occlum-specific entries */
AT_OCCLUM_ENTRY = 48, /* the entry point of Occlum, i.e., syscall */
}
#[derive(Clone, Default, Debug)]
pub struct AuxTable {
table: HashMap<AuxKey, u64>,
}
impl AuxTable {
pub fn new() -> AuxTable {
AuxTable {
table: HashMap::new(),
}
}
}
impl AuxTable {
pub fn set(&mut self, key: AuxKey, val: u64) -> Result<()> {
if key == AuxKey::AT_NULL || key == AuxKey::AT_IGNORE {
return_errno!(EINVAL, "Illegal key");
}
self.table
.entry(key)
.and_modify(|val_mut| *val_mut = val)
.or_insert(val);
Ok(())
}
pub fn get(&self, key: AuxKey) -> Option<u64> {
self.table.get(&key).map(|val_ref| *val_ref)
}
pub fn del(&mut self, key: AuxKey) -> Option<u64> {
self.table.remove(&key)
}
pub fn table(&self) -> &HashMap<AuxKey, u64> {
&self.table
}
}

@ -1,6 +1,9 @@
use super::*;
use std::ptr;
use super::super::elf_file::ElfFile;
use crate::prelude::*;
use crate::vm::{ProcessVM, ProcessVMBuilder};
pub fn do_init<'a, 'b>(
elf_file: &'b ElfFile<'a>,
ldso_elf_file: &'b ElfFile<'a>,

@ -1,68 +1,107 @@
use super::*;
use std::ffi::{CStr, CString};
use std::path::Path;
use std::sgxfs::SgxFile;
use super::fs::{
CreationFlags, File, FileDesc, FileMode, FileTable, HostStdioFds, INodeExt, StdinFile,
use self::aux_vec::{AuxKey, AuxVec};
use super::elf_file::{ElfFile, ElfHeader, ProgramHeader, ProgramHeaderExt};
use super::process::ProcessBuilder;
use super::task::Task;
use super::{table, task, ProcessRef, ThreadRef};
use crate::fs::{
CreationFlags, File, FileDesc, FileMode, FileTable, FsView, HostStdioFds, INodeExt, StdinFile,
StdoutFile, ROOT_INODE,
};
use super::misc::ResourceLimitsRef;
use super::vm::{ProcessVM, ProcessVMBuilder};
use crate::prelude::*;
use crate::vm::ProcessVM;
pub use self::elf_file::{ElfFile, ProgramHeaderExt};
use self::init_stack::{AuxKey, AuxTable};
mod elf_file;
mod aux_vec;
mod init_stack;
mod init_vm;
/// Spawn a new process and execute it in a new host thread.
pub fn do_spawn(
elf_path: &str,
argv: &[CString],
envp: &[CString],
file_actions: &[FileAction],
parent_ref: &ProcessRef,
current_ref: &ThreadRef,
) -> Result<pid_t> {
let (new_tid, new_process_ref) =
new_process(elf_path, argv, envp, file_actions, None, parent_ref)?;
task::enqueue_and_exec_task(new_tid, new_process_ref);
Ok(new_tid)
let exec_now = true;
do_spawn_common(
elf_path,
argv,
envp,
file_actions,
None,
current_ref,
exec_now,
)
}
/// Spawn a new process but execute it later.
pub fn do_spawn_without_exec(
elf_path: &str,
argv: &[CString],
envp: &[CString],
file_actions: &[FileAction],
host_stdio_fds: &HostStdioFds,
parent_ref: &ProcessRef,
current_ref: &ThreadRef,
) -> Result<pid_t> {
let (new_tid, new_process_ref) = new_process(
let exec_now = false;
do_spawn_common(
elf_path,
argv,
envp,
file_actions,
Some(host_stdio_fds),
parent_ref,
)?;
task::enqueue_task(new_tid, new_process_ref);
Ok(new_tid)
current_ref,
exec_now,
)
}
fn do_spawn_common(
elf_path: &str,
argv: &[CString],
envp: &[CString],
file_actions: &[FileAction],
host_stdio_fds: Option<&HostStdioFds>,
current_ref: &ThreadRef,
exec_now: bool,
) -> Result<pid_t> {
let new_process_ref = new_process(
elf_path,
argv,
envp,
file_actions,
host_stdio_fds,
current_ref,
)?;
let new_main_thread = new_process_ref
.main_thread()
.expect("the main thread is just created; it must exist");
if exec_now {
task::enqueue_and_exec(new_main_thread);
} else {
task::enqueue(new_main_thread);
};
let new_pid = new_process_ref.pid();
Ok(new_pid)
}
/// Create a new process and its main thread.
fn new_process(
elf_path: &str,
argv: &[CString],
envp: &[CString],
file_actions: &[FileAction],
host_stdio_fds: Option<&HostStdioFds>,
parent_ref: &ProcessRef,
) -> Result<(pid_t, ProcessRef)> {
let elf_buf = load_elf_to_vec(elf_path, parent_ref)
current_ref: &ThreadRef,
) -> Result<ProcessRef> {
let elf_buf = load_elf_to_vec(elf_path, current_ref)
.cause_err(|e| errno!(e.errno(), "cannot load the executable"))?;
let ldso_path = "/lib/ld-musl-x86_64.so.1";
let ldso_elf_buf = load_elf_to_vec(ldso_path, parent_ref)
let ldso_elf_buf = load_elf_to_vec(ldso_path, current_ref)
.cause_err(|e| errno!(e.errno(), "cannot load ld.so"))?;
let exec_elf_file =
@ -70,10 +109,11 @@ fn new_process(
let ldso_elf_file =
ElfFile::new(&ldso_elf_buf).cause_err(|e| errno!(e.errno(), "invalid ld.so"))?;
let (new_pid, new_process_ref) = {
let cwd = parent_ref.lock().unwrap().get_cwd().to_owned();
let new_process_ref = {
let process_ref = current_ref.process().clone();
let vm = init_vm::do_init(&exec_elf_file, &ldso_elf_file)?;
let auxtbl = init_auxtbl(&vm, &exec_elf_file)?;
let auxvec = init_auxvec(&vm, &exec_elf_file)?;
// Notify debugger to load the symbols from elf file
let ldso_elf_base = vm.get_elf_ranges()[1].start() as u64;
@ -105,7 +145,7 @@ fn new_process(
};
let user_stack_base = vm.get_stack_base();
let user_stack_limit = vm.get_stack_limit();
let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxtbl)?;
let user_rsp = init_stack::do_init(user_stack_base, 4096, argv, envp, &auxvec)?;
unsafe {
Task::new(
ldso_entry,
@ -118,17 +158,31 @@ fn new_process(
};
let vm_ref = Arc::new(SgxMutex::new(vm));
let files_ref = {
let files = init_files(parent_ref, file_actions, host_stdio_fds)?;
let files = init_files(current_ref, file_actions, host_stdio_fds)?;
Arc::new(SgxMutex::new(files))
};
let rlimits_ref = Default::default();
Process::new(&cwd, elf_path, task, vm_ref, files_ref, rlimits_ref, false)?
let fs_ref = Arc::new(SgxMutex::new(current_ref.fs().lock().unwrap().clone()));
ProcessBuilder::new()
.vm(vm_ref)
.exec_path(elf_path)
.parent(process_ref)
.task(task)
.fs(fs_ref)
.files(files_ref)
.build()?
};
parent_adopts_new_child(&parent_ref, &new_process_ref);
process_table::put(new_pid, new_process_ref.clone());
let new_tid = new_pid;
info!("Process created: elf = {}, tid = {}", elf_path, new_tid);
Ok((new_tid, new_process_ref))
table::add_process(new_process_ref.clone());
table::add_thread(new_process_ref.main_thread().unwrap());
info!(
"Process created: elf = {}, pid = {}",
elf_path,
new_process_ref.pid()
);
Ok(new_process_ref)
}
#[derive(Debug)]
@ -145,8 +199,9 @@ pub enum FileAction {
Close(FileDesc),
}
fn load_elf_to_vec(elf_path: &str, parent_ref: &ProcessRef) -> Result<Vec<u8>> {
let inode = parent_ref
fn load_elf_to_vec(elf_path: &str, current_ref: &ThreadRef) -> Result<Vec<u8>> {
let inode = current_ref
.fs()
.lock()
.unwrap()
.lookup_inode(elf_path)
@ -170,16 +225,15 @@ fn load_elf_to_vec(elf_path: &str, parent_ref: &ProcessRef) -> Result<Vec<u8>> {
}
fn init_files(
parent_ref: &ProcessRef,
current_ref: &ThreadRef,
file_actions: &[FileAction],
host_stdio_fds: Option<&HostStdioFds>,
) -> Result<FileTable> {
// Usually, we just inherit the file table from the parent
let parent = parent_ref.lock().unwrap();
let should_inherit_file_table = parent.get_pid() > 0;
// Usually, we just inherit the file table from the current process
let should_inherit_file_table = current_ref.process().pid() > 0;
if should_inherit_file_table {
// Fork: clone file table
let mut cloned_file_table = parent.get_files().lock().unwrap().clone();
let mut cloned_file_table = current_ref.files().lock().unwrap().clone();
// Perform file actions to modify the cloned file table
for file_action in file_actions {
match file_action {
@ -189,7 +243,12 @@ fn init_files(
oflag,
fd,
} => {
let file = parent.open_file(path.as_str(), oflag, mode)?;
let file =
current_ref
.fs()
.lock()
.unwrap()
.open_file(path.as_str(), oflag, mode)?;
let file_ref: Arc<Box<dyn File>> = Arc::new(file);
let creation_flags = CreationFlags::from_bits_truncate(oflag);
cloned_file_table.put_at(fd, file_ref, creation_flags.must_close_on_spawn());
@ -210,7 +269,6 @@ fn init_files(
cloned_file_table.close_on_spawn();
return Ok(cloned_file_table);
}
drop(parent);
// But, for init process, we initialize file table for it
let mut file_table = FileTable::new();
@ -230,42 +288,35 @@ fn init_files(
Ok(file_table)
}
fn init_auxtbl(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result<AuxTable> {
let mut auxtbl = AuxTable::new();
auxtbl.set(AuxKey::AT_PAGESZ, 4096)?;
auxtbl.set(AuxKey::AT_UID, 0)?;
auxtbl.set(AuxKey::AT_GID, 0)?;
auxtbl.set(AuxKey::AT_EUID, 0)?;
auxtbl.set(AuxKey::AT_EGID, 0)?;
auxtbl.set(AuxKey::AT_SECURE, 0)?;
auxtbl.set(AuxKey::AT_SYSINFO, 0)?;
fn init_auxvec(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result<AuxVec> {
let mut auxvec = AuxVec::new();
auxvec.set(AuxKey::AT_PAGESZ, 4096)?;
auxvec.set(AuxKey::AT_UID, 0)?;
auxvec.set(AuxKey::AT_GID, 0)?;
auxvec.set(AuxKey::AT_EUID, 0)?;
auxvec.set(AuxKey::AT_EGID, 0)?;
auxvec.set(AuxKey::AT_SECURE, 0)?;
auxvec.set(AuxKey::AT_SYSINFO, 0)?;
let exec_elf_base = process_vm.get_elf_ranges()[0].start() as u64;
let exec_elf_header = exec_elf_file.elf_header();
auxtbl.set(AuxKey::AT_PHENT, exec_elf_header.ph_entry_size() as u64)?;
auxtbl.set(AuxKey::AT_PHNUM, exec_elf_header.ph_count() as u64)?;
auxtbl.set(AuxKey::AT_PHDR, exec_elf_base + exec_elf_header.ph_offset())?;
auxtbl.set(
auxvec.set(AuxKey::AT_PHENT, exec_elf_header.ph_entry_size() as u64)?;
auxvec.set(AuxKey::AT_PHNUM, exec_elf_header.ph_count() as u64)?;
auxvec.set(AuxKey::AT_PHDR, exec_elf_base + exec_elf_header.ph_offset())?;
auxvec.set(
AuxKey::AT_ENTRY,
exec_elf_base + exec_elf_header.entry_point(),
)?;
let ldso_elf_base = process_vm.get_elf_ranges()[1].start() as u64;
auxtbl.set(AuxKey::AT_BASE, ldso_elf_base)?;
auxvec.set(AuxKey::AT_BASE, ldso_elf_base)?;
let syscall_addr = __occlum_syscall as *const () as u64;
auxtbl.set(AuxKey::AT_OCCLUM_ENTRY, syscall_addr)?;
auxvec.set(AuxKey::AT_OCCLUM_ENTRY, syscall_addr)?;
// TODO: init AT_EXECFN
// auxtbl.set_val(AuxKey::AT_EXECFN, "program_name")?;
// auxvec.set_val(AuxKey::AT_EXECFN, "program_name")?;
Ok(auxtbl)
}
fn parent_adopts_new_child(parent_ref: &ProcessRef, child_ref: &ProcessRef) {
let mut parent = parent_ref.lock().unwrap();
let mut child = child_ref.lock().unwrap();
parent.children.push(Arc::downgrade(child_ref));
child.parent = Some(parent_ref.clone());
Ok(auxvec)
}
extern "C" {

@ -0,0 +1,67 @@
use super::process::{ChildProcessFilter, ProcessInner};
use super::wait::Waiter;
use super::{table, ProcessRef, ProcessStatus};
use crate::prelude::*;
pub fn do_wait4(child_filter: &ChildProcessFilter) -> Result<(pid_t, i32)> {
// Lock the process early to ensure that we do not miss any changes in
// children processes
let thread = current!();
let process = thread.process();
// Lock order: always lock parent then child to avoid deadlock
let mut process_inner = process.inner();
let unwaited_children = process_inner
.children()
.unwrap()
.iter()
.filter(|child| match child_filter {
ChildProcessFilter::WithAnyPid => true,
ChildProcessFilter::WithPid(required_pid) => child.pid() == *required_pid,
ChildProcessFilter::WithPgid(required_pgid) => child.pgid() == *required_pgid,
})
.collect::<Vec<&ProcessRef>>();
if unwaited_children.len() == 0 {
return_errno!(ECHILD, "Cannot find any unwaited children");
}
// Return immediately if a child that we wait for has already exited
let zombie_child = unwaited_children
.iter()
.find(|child| child.status() == ProcessStatus::Zombie);
if let Some(zombie_child) = zombie_child {
let zombie_pid = zombie_child.pid();
let exit_status = free_zombie_child(process_inner, zombie_pid);
return Ok((zombie_pid, exit_status));
}
let mut waiter = Waiter::new(child_filter);
process_inner
.waiting_children_mut()
.unwrap()
.add_waiter(&waiter);
// After adding the waiter, we can safely release the lock on the process inner
// without risking missing events from the process's children.
drop(process_inner);
// Wait until a child has interesting events
let zombie_pid = waiter.sleep_until_woken_with_result();
let mut process_inner = process.inner();
let exit_status = free_zombie_child(process_inner, zombie_pid);
Ok((zombie_pid, exit_status))
}
fn free_zombie_child(mut parent_inner: SgxMutexGuard<ProcessInner>, zombie_pid: pid_t) -> i32 {
// Remove zombie from the process and thread table
table::del_thread(zombie_pid).expect("tid must be in the table");
table::del_process(zombie_pid).expect("pid must be in the table");
let zombie = parent_inner.remove_zombie_child(zombie_pid);
debug_assert!(zombie.status() == ProcessStatus::Zombie);
// Remove zombie from its parent
let zombie_inner = zombie.inner();
zombie_inner.exit_status().unwrap()
}

@ -1,8 +1,8 @@
use super::*;
use xmas_elf::symbol_table::Entry;
use xmas_elf::{header, program, sections};
use crate::prelude::*;
pub use xmas_elf::header::HeaderPt2 as ElfHeader;
pub use xmas_elf::program::{ProgramHeader, ProgramIter};

@ -1,157 +0,0 @@
use super::*;
use std::intrinsics::atomic_store;
// TODO: make sure Processes are released eventually
#[derive(Clone, Copy, Debug)]
pub enum ChildProcessFilter {
WithAnyPID,
WithPID(pid_t),
WithPGID(pid_t),
}
unsafe impl Send for ChildProcessFilter {}
pub fn do_exit(exit_status: i32) {
let current_ref = get_current();
let mut current = current_ref.lock().unwrap();
let parent_ref = current.get_parent().clone();
// Update current
current.exit_status = exit_status;
current.status = Status::ZOMBIE;
// Update children
for child_ref in current.get_children_iter() {
let mut child = child_ref.lock().unwrap();
child.parent = Some(IDLE_PROCESS.clone());
}
current.children.clear();
// Notify another process, if any, that waits on ctid (see set_tid_address)
if let Some(ctid) = current.clear_child_tid {
unsafe {
atomic_store(ctid, 0);
}
futex_wake(ctid as *const i32, 1);
}
// If the process is detached, no need to notify the parent
if current.is_detached {
let current_tid = current.get_tid();
drop(current);
remove_zombie_child(&parent_ref, current_tid);
return;
}
// Notify the parent process if necessary
let (mut parent, current) = {
// Always lock parent before its child
drop(current);
lock_two_in_order(&parent_ref, &current_ref)
};
// Wake up the parent if it is waiting on this child
if parent.waiting_children.is_none() {
return;
}
let mut wait_queue = parent.waiting_children.as_mut().unwrap();
wait_queue.del_and_wake_one_waiter(|waiter_data| -> Option<pid_t> {
match waiter_data {
ChildProcessFilter::WithAnyPID => {}
ChildProcessFilter::WithPID(required_pid) => {
if current.get_pid() != *required_pid {
return None;
}
}
ChildProcessFilter::WithPGID(required_pgid) => {
if current.get_pgid() != *required_pgid {
return None;
}
}
}
Some(current.get_pid())
});
}
pub fn do_wait4(child_filter: &ChildProcessFilter, exit_status: &mut i32) -> Result<pid_t> {
let current_ref = get_current();
let waiter = {
let mut current = current_ref.lock().unwrap();
let mut any_child_to_wait_for = false;
for child_ref in current.get_children_iter() {
let child = child_ref.lock().unwrap();
let may_wait_for = match child_filter {
ChildProcessFilter::WithAnyPID => true,
ChildProcessFilter::WithPID(required_pid) => child.get_pid() == *required_pid,
ChildProcessFilter::WithPGID(required_pgid) => child.get_pgid() == *required_pgid,
};
if !may_wait_for {
continue;
}
// Return immediately as a child that we wait for has already exited
if child.status == Status::ZOMBIE {
process_table::remove(child.pid);
return Ok(child.pid);
}
any_child_to_wait_for = true;
}
if !any_child_to_wait_for {
return_errno!(ECHILD, "No such child");
}
let waiter = Waiter::new(child_filter);
let mut wait_queue = WaitQueue::new();
wait_queue.add_waiter(&waiter);
current.waiting_children = Some(wait_queue);
waiter
};
// Wait until a child has interesting events
let child_pid = waiter.sleep_until_woken_with_result();
// Remove the child from the parent
*exit_status = remove_zombie_child(&current_ref, child_pid);
let mut current = current_ref.lock().unwrap();
current.waiting_children = None;
Ok(child_pid)
}
fn remove_zombie_child(parent_ref: &ProcessRef, child_tid: pid_t) -> i32 {
// Find the zombie child process
let mut parent = parent_ref.lock().unwrap();
let (child_i, child_ref) = parent
.get_children_iter()
.enumerate()
.find(|(child_i, child_ref)| {
let child = child_ref.lock().unwrap();
if child.get_tid() != child_tid {
return false;
}
assert!(child.get_status() == Status::ZOMBIE);
true
})
.expect("cannot find the zombie child");
// Remove the zombie child from parent
parent.children.swap_remove(child_i);
// Remove the zombie child from process table
process_table::remove(child_tid);
// Return the exit status
let child = child_ref.lock().unwrap();
child.get_exit_status()
}
fn lock_two_in_order<'a>(
first_ref: &'a ProcessRef,
second_ref: &'a ProcessRef,
) -> (SgxMutexGuard<'a, Process>, SgxMutexGuard<'a, Process>) {
(first_ref.lock().unwrap(), second_ref.lock().unwrap())
}

@ -1,110 +1,54 @@
pub use self::arch_prctl::{do_arch_prctl, ArchPrctlCode};
pub use self::exit::{do_exit, do_wait4, ChildProcessFilter};
pub use self::futex::{
futex_op_and_flags_from_u32, futex_requeue, futex_wait, futex_wake, FutexFlags, FutexOp,
};
pub use self::process::{Status, IDLE_PROCESS};
pub use self::process_table::get;
pub use self::sched::{do_sched_getaffinity, do_sched_setaffinity, do_sched_yield, CpuSet};
pub use self::spawn::{do_spawn, do_spawn_without_exec, ElfFile, FileAction, ProgramHeaderExt};
pub use self::task::{get_current, get_current_tid, run_task, Task};
pub use self::thread::{do_clone, do_set_tid_address, CloneFlags, ThreadGroup};
pub use self::wait::{WaitQueue, Waiter};
/// Process/thread subsystem.
///
/// The subsystem implements process/thread-related system calls, which are
/// mainly based on the three concepts below:
///
/// * [`Process`]. A process has a parent and may have multiple child processes and
/// can own multiple threads.
/// * [`Thread`]. A thread belongs to one and only one process and owns a set
/// of OS resources, e.g., virtual memory, file tables, etc.
/// * [`Task`]. A task belongs to one and only one thread, for which it deals with
/// the low-level details about thread execution.
use crate::fs::{FileRef, FileTable, FsView};
use crate::misc::ResourceLimits;
use crate::prelude::*;
use crate::vm::ProcessVM;
use self::process::{ChildProcessFilter, ProcessBuilder, ProcessInner};
use self::thread::{ThreadBuilder, ThreadId, ThreadInner};
use self::wait::{WaitQueue, Waiter};
pub use self::do_spawn::do_spawn_without_exec;
pub use self::process::{Process, ProcessStatus, IDLE};
pub use self::syscalls::*;
pub use self::task::Task;
pub use self::thread::{Thread, ThreadStatus};
mod do_arch_prctl;
mod do_clone;
mod do_exit;
mod do_futex;
mod do_getpid;
mod do_sched;
mod do_set_tid_address;
mod do_spawn;
mod do_wait4;
mod process;
mod syscalls;
mod thread;
mod wait;
pub mod current;
pub mod elf_file;
pub mod table;
pub mod task;
#[allow(non_camel_case_types)]
pub type pid_t = u32;
#[derive(Debug)]
pub struct Process {
task: Task,
status: Status,
pid: pid_t,
pgid: pid_t,
tgid: pid_t,
host_tid: pid_t,
exit_status: i32,
is_detached: bool,
// TODO: move cwd, root_inode into a FileSystem structure
// TODO: should cwd be a String or INode?
cwd: String,
elf_path: String,
clear_child_tid: Option<*mut pid_t>,
parent: Option<ProcessRef>,
children: Vec<ProcessWeakRef>,
waiting_children: Option<WaitQueue<ChildProcessFilter, pid_t>>,
//thread_group: ThreadGroupRef,
vm: ProcessVMRef,
file_table: FileTableRef,
rlimits: ResourceLimitsRef,
}
pub type ProcessRef = Arc<SgxMutex<Process>>;
pub type ProcessWeakRef = std::sync::Weak<SgxMutex<Process>>;
pub type ProcessRef = Arc<Process>;
pub type ThreadRef = Arc<Thread>;
pub type FileTableRef = Arc<SgxMutex<FileTable>>;
pub type ProcessVMRef = Arc<SgxMutex<ProcessVM>>;
pub type ThreadGroupRef = Arc<SgxMutex<ThreadGroup>>;
pub fn do_getpid() -> pid_t {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
current.get_pid()
}
pub fn do_gettid() -> pid_t {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
current.get_tid()
}
pub fn do_getpgid() -> pid_t {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
current.get_pgid()
}
pub fn do_getppid() -> pid_t {
let parent_ref = {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
current.get_parent().clone()
};
let parent = parent_ref.lock().unwrap();
parent.get_pid()
}
mod arch_prctl;
mod exit;
mod futex;
mod process;
mod process_table;
mod sched;
mod spawn;
mod task;
mod thread;
mod wait;
/// Get a file from the file table of the current process
pub fn get_file(fd: FileDesc) -> Result<FileRef> {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
let file_ref = current.get_files().lock().unwrap().get(fd as FileDesc)?;
Ok(file_ref)
}
/// Put a file into the file table of the current process
pub fn put_file(new_file: FileRef, close_on_spawn: bool) -> Result<FileDesc> {
let current_ref = get_current();
let current = current_ref.lock().unwrap();
let new_fd = current
.get_files()
.lock()
.unwrap()
.put(new_file, close_on_spawn);
Ok(new_fd)
}
use super::*;
use fs::{File, FileDesc, FileRef, FileTable};
use misc::ResourceLimitsRef;
use time::GLOBAL_PROFILER;
use vm::ProcessVM;
pub type FsViewRef = Arc<SgxMutex<FsView>>;
pub type ResourceLimitsRef = Arc<SgxMutex<ResourceLimits>>;

@ -1,152 +0,0 @@
use super::task::Task;
use super::*;
use fs::{File, FileRef, FileTable};
use vm::ProcessVM;
lazy_static! {
// Dummy object to make all processes having a parent
pub static ref IDLE_PROCESS: ProcessRef = {
Arc::new(SgxMutex::new(Process {
task: Default::default(),
status: Default::default(),
pid: 0,
pgid: 1,
tgid: 0,
host_tid: 0,
exit_status: 0,
is_detached: false,
cwd: "/".to_owned(),
elf_path: "/".to_owned(),
clear_child_tid: None,
parent: None,
children: Vec::new(),
waiting_children: Default::default(),
vm: Default::default(),
file_table: Default::default(),
rlimits: Default::default(),
}))
};
}
impl Process {
// TODO: this constructor has become complicated enough to justify using builders
pub fn new(
cwd: &str,
elf_path: &str,
task: Task,
vm_ref: ProcessVMRef,
file_table_ref: FileTableRef,
rlimits_ref: ResourceLimitsRef,
is_detached: bool,
) -> Result<(pid_t, ProcessRef)> {
let new_pid = process_table::alloc_pid();
let new_process_ref = Arc::new(SgxMutex::new(Process {
task: task,
status: Default::default(),
pid: new_pid,
pgid: 1, // TODO: implement pgid
tgid: new_pid,
host_tid: 0,
cwd: cwd.to_owned(),
elf_path: elf_path.to_owned(),
clear_child_tid: None,
exit_status: 0,
is_detached: is_detached,
parent: None,
children: Vec::new(),
waiting_children: None,
vm: vm_ref,
file_table: file_table_ref,
rlimits: rlimits_ref,
}));
Ok((new_pid, new_process_ref))
}
pub fn get_task(&self) -> &Task {
&self.task
}
pub fn get_task_mut(&mut self) -> &mut Task {
&mut self.task
}
/// pid as seen by the user is actually the thread group ID
pub fn get_pid(&self) -> pid_t {
self.tgid
}
/// tid as seen by the user is actually the process ID
pub fn get_tid(&self) -> pid_t {
self.pid
}
pub fn get_pgid(&self) -> pid_t {
self.pgid
}
pub fn get_host_tid(&self) -> pid_t {
self.host_tid
}
pub fn set_host_tid(&mut self, host_tid: pid_t) {
self.host_tid = host_tid;
}
pub fn get_status(&self) -> Status {
self.status
}
pub fn get_exit_status(&self) -> i32 {
self.exit_status
}
pub fn get_cwd(&self) -> &str {
&self.cwd
}
pub fn get_elf_path(&self) -> &str {
&self.elf_path
}
pub fn get_vm(&self) -> &ProcessVMRef {
&self.vm
}
pub fn get_files(&self) -> &FileTableRef {
&self.file_table
}
pub fn get_parent(&self) -> &ProcessRef {
self.parent.as_ref().unwrap()
}
pub fn get_children_iter(&self) -> impl Iterator<Item = ProcessRef> + '_ {
self.children
.iter()
.filter_map(|child_weak| child_weak.upgrade())
}
pub fn change_cwd(&mut self, path: &str) {
if path.len() > 0 && path.as_bytes()[0] == b'/' {
// absolute
self.cwd = path.to_owned();
} else {
// relative
if !self.cwd.ends_with("/") {
self.cwd += "/";
}
self.cwd += path;
}
}
pub fn get_rlimits(&self) -> &ResourceLimitsRef {
&self.rlimits
}
}
impl Drop for Process {
fn drop(&mut self) {
process_table::free_pid(self.pid);
}
}
unsafe impl Send for Process {}
unsafe impl Sync for Process {}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Status {
RUNNING,
INTERRUPTIBLE,
ZOMBIE,
STOPPED,
}
impl Default for Status {
fn default() -> Status {
Status::RUNNING
}
}

@ -0,0 +1,123 @@
use super::super::task::Task;
use super::super::thread::{ThreadBuilder, ThreadId};
use super::super::{FileTableRef, FsViewRef, ProcessRef, ProcessVMRef, ResourceLimitsRef};
use super::{Process, ProcessInner};
use crate::prelude::*;
#[derive(Debug)]
pub struct ProcessBuilder {
tid: Option<ThreadId>,
thread_builder: Option<ThreadBuilder>,
// Mandatory fields
vm: Option<ProcessVMRef>,
// Optional fields, which have reasonable default values
exec_path: Option<String>,
parent: Option<ProcessRef>,
no_parent: bool,
}
impl ProcessBuilder {
pub fn new() -> Self {
let thread_builder = ThreadBuilder::new();
Self {
tid: None,
thread_builder: Some(thread_builder),
vm: None,
exec_path: None,
parent: None,
no_parent: false,
}
}
pub fn tid(mut self, tid: ThreadId) -> Self {
self.tid = Some(tid);
self
}
pub fn exec_path(mut self, exec_path: &str) -> Self {
self.exec_path = Some(exec_path.to_string());
self
}
pub fn parent(mut self, parent: ProcessRef) -> Self {
self.parent = Some(parent);
self
}
pub fn no_parent(mut self, no_parent: bool) -> Self {
self.no_parent = no_parent;
self
}
pub fn task(mut self, task: Task) -> Self {
self.thread_builder(|tb| tb.task(task))
}
pub fn vm(mut self, vm: ProcessVMRef) -> Self {
self.thread_builder(|tb| tb.vm(vm))
}
pub fn fs(mut self, fs: FsViewRef) -> Self {
self.thread_builder(|tb| tb.fs(fs))
}
pub fn files(mut self, files: FileTableRef) -> Self {
self.thread_builder(|tb| tb.files(files))
}
pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self {
self.thread_builder(|tb| tb.rlimits(rlimits))
}
pub fn build(mut self) -> Result<ProcessRef> {
// Process's pid == Main thread's tid
let tid = self.tid.take().unwrap_or_else(|| ThreadId::new());
let pid = tid.as_u32() as pid_t;
// Check whether parent is given as expected
if self.no_parent != self.parent.is_none() {
return_errno!(
EINVAL,
"parent and no_parent config contradicts with one another"
);
}
// Build a new process
let new_process = {
let exec_path = self.exec_path.take().unwrap_or_default();
let parent = self.parent.take().map(|parent| SgxRwLock::new(parent));
let inner = SgxMutex::new(ProcessInner::new());
Arc::new(Process {
pid,
exec_path,
parent,
inner,
})
};
// Build the main thread of the new process
let mut self_ = self.thread_builder(|tb| tb.tid(tid).process(new_process.clone()));
let main_thread = self_.thread_builder.take().unwrap().build()?;
// Associate the new process with its parent
if !self_.no_parent {
new_process
.parent()
.inner()
.children_mut()
.unwrap()
.push(new_process.clone());
}
Ok(new_process)
}
fn thread_builder<F>(mut self, f: F) -> Self
where
F: FnOnce(ThreadBuilder) -> ThreadBuilder,
{
let thread_builder = self.thread_builder.take().unwrap();
self.thread_builder = Some(f(thread_builder));
self
}
}

@ -0,0 +1,38 @@
use super::super::task::Task;
use super::super::thread::ThreadId;
use super::{ProcessBuilder, ThreadRef};
/// Process 0, a.k.a, the idle process.
///
/// The idle process has no practical use except making process 1 (a.k.a, the init proess)
/// having a parent.
use crate::prelude::*;
use crate::vm::ProcessVM;
lazy_static! {
pub static ref IDLE: ThreadRef =
{ create_idle_thread().expect("creating the idle process should never fail") };
}
fn create_idle_thread() -> Result<ThreadRef> {
// Create dummy values for the mandatory fields
let dummy_tid = ThreadId::zero();
let dummy_vm = Arc::new(SgxMutex::new(ProcessVM::default()));
let dummy_task = Task::default();
// Assemble the idle process
let idle_process = ProcessBuilder::new()
.tid(dummy_tid)
.vm(dummy_vm)
.task(dummy_task)
.no_parent(true)
.build()?;
debug_assert!(idle_process.pid() == 0);
let idle_thread = idle_process.main_thread().unwrap();
debug_assert!(idle_thread.tid() == 0);
// We do not add the idle process/thread to the process/thread table.
// This ensures that the idle process is not accessible from the user space.
Ok(idle_thread)
}

@ -0,0 +1,304 @@
use std::fmt;
use super::wait::WaitQueue;
use super::{ProcessRef, ThreadRef};
use crate::prelude::*;
pub use self::builder::ProcessBuilder;
pub use self::idle::IDLE;
mod builder;
mod idle;
pub struct Process {
// Immutable info
pid: pid_t,
exec_path: String,
// Mutable info
parent: Option<SgxRwLock<ProcessRef>>,
inner: SgxMutex<ProcessInner>,
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum ProcessStatus {
Running,
Stopped,
Zombie,
}
impl Process {
/// Get process ID.
pub fn pid(&self) -> pid_t {
self.pid
}
/// Get process group ID
// TODO: implement process group
pub fn pgid(&self) -> pid_t {
0
}
/// Get the parent process.
///
/// Precondition. The process is not the idle process.
pub fn parent(&self) -> ProcessRef {
debug_assert!(self.pid() != 0);
self.parent
.as_ref()
// All non-idle process has a parent
.unwrap()
.read()
.unwrap()
.clone()
}
/// Get the main thread.
///
/// The main thread is a thread whose tid equals to the process's pid.
/// Usually, the main thread is the last thread that exits in a process.
pub fn main_thread(&self) -> Option<ThreadRef> {
if let Some(leader) = self.leader_thread() {
if leader.tid() == self.pid() {
Some(leader)
} else {
None
}
} else {
None
}
}
/// Get the leader thread.
///
/// As long as there are some threads in the process, there is a leader.
/// The leader thread is usually the main thread, but not always.
pub fn leader_thread(&self) -> Option<ThreadRef> {
self.inner().leader_thread()
}
/// Get status.
pub fn status(&self) -> ProcessStatus {
self.inner().status()
}
/// Get the path of the executable
pub fn exec_path(&self) -> &str {
&self.exec_path
}
/// Get the internal representation of the process.
///
/// For the purpose of encapsulation, this method is invisible to other subsystems.
pub(super) fn inner(&self) -> SgxMutexGuard<ProcessInner> {
self.inner.lock().unwrap()
}
}
pub enum ProcessInner {
Live {
status: LiveStatus,
children: Vec<ProcessRef>,
waiting_children: WaitQueue<ChildProcessFilter, pid_t>,
threads: Vec<ThreadRef>,
},
Zombie {
exit_status: i32,
},
}
impl ProcessInner {
pub fn new() -> Self {
Self::Live {
status: LiveStatus::Running,
children: Vec::new(),
waiting_children: WaitQueue::new(),
threads: Vec::new(),
}
}
pub fn status(&self) -> ProcessStatus {
match self {
Self::Live { status, .. } => (*status).into(),
Self::Zombie { .. } => ProcessStatus::Zombie,
}
}
pub fn children(&self) -> Option<&Vec<ProcessRef>> {
match self {
Self::Live { children, .. } => Some(children),
Self::Zombie { .. } => None,
}
}
pub fn children_mut(&mut self) -> Option<&mut Vec<ProcessRef>> {
match self {
Self::Live { children, .. } => Some(children),
Self::Zombie { .. } => None,
}
}
pub fn num_children(&mut self) -> usize {
self.children().map(|children| children.len()).unwrap_or(0)
}
pub fn threads(&self) -> Option<&Vec<ThreadRef>> {
match self {
Self::Live { threads, .. } => Some(threads),
Self::Zombie { .. } => None,
}
}
pub fn threads_mut(&mut self) -> Option<&mut Vec<ThreadRef>> {
match self {
Self::Live { threads, .. } => Some(threads),
Self::Zombie { .. } => None,
}
}
pub fn num_threads(&mut self) -> usize {
self.threads().map(|threads| threads.len()).unwrap_or(0)
}
pub fn leader_thread(&self) -> Option<ThreadRef> {
match self.threads() {
Some(threads) => {
if threads.len() > 0 {
Some(threads[0].clone())
} else {
None
}
}
None => None,
}
}
pub fn waiting_children_mut(&mut self) -> Option<&mut WaitQueue<ChildProcessFilter, pid_t>> {
match self {
Self::Live {
waiting_children, ..
} => Some(waiting_children),
_ => None,
}
}
pub fn remove_zombie_child(&mut self, zombie_pid: pid_t) -> ProcessRef {
let mut children = self.children_mut().unwrap();
let zombie_i = children
.iter()
.position(|child| child.pid() == zombie_pid)
.unwrap();
children.swap_remove(zombie_i)
}
pub fn exit(&mut self, exit_status: i32) {
// Check preconditions
debug_assert!(self.status() == ProcessStatus::Running);
debug_assert!(self.num_threads() == 0);
// When this process exits, its children are adopted by the init process
for child in self.children().unwrap() {
let mut parent = child.parent.as_ref().unwrap().write().unwrap();
*parent = IDLE.process().clone();
}
*self = Self::Zombie { exit_status };
}
pub fn exit_status(&self) -> Option<i32> {
// Check preconditions
debug_assert!(self.status() == ProcessStatus::Zombie);
match self {
Self::Zombie { exit_status } => Some(*exit_status),
_ => None,
}
}
}
impl PartialEq for Process {
fn eq(&self, other: &Self) -> bool {
self.pid() == other.pid()
}
}
// Why manual implementation of Debug trait?
//
// An explict implementation of Debug trait is required since Process and Thread
// structs refer to each other. Thus, the automatically-derived implementation
// of Debug trait for the two structs may lead to infinite loop.
impl fmt::Debug for Process {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let ppid = if self.pid() > 0 {
Some(self.parent().pid())
} else {
None
};
f.debug_struct("Process")
.field("pid", &self.pid())
.field("exec_path", &self.exec_path())
.field("ppid", &ppid)
.field("inner", &self.inner())
.finish()
}
}
impl fmt::Debug for ProcessInner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ProcessInner::Live {
status,
children,
threads,
..
} => f
.debug_struct("ProcessInner::Live")
.field("status", &status)
.field(
"child_pids",
&children
.iter()
.map(|child| child.pid())
.collect::<Vec<pid_t>>(),
)
.field(
"thread_tids",
&threads
.iter()
.map(|thread| thread.tid())
.collect::<Vec<pid_t>>(),
)
.finish(),
ProcessInner::Zombie { exit_status, .. } => f
.debug_struct("ProcessInner::Zombie")
.field("exit_status", exit_status)
.finish(),
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum LiveStatus {
Running,
Stopped,
}
impl Into<ProcessStatus> for LiveStatus {
fn into(self) -> ProcessStatus {
match self {
Self::Running => ProcessStatus::Running,
Self::Stopped => ProcessStatus::Stopped,
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum ChildProcessFilter {
WithAnyPid,
WithPid(pid_t),
WithPgid(pid_t),
}
// TODO: is this necessary?
unsafe impl Send for ChildProcessFilter {}

@ -1,38 +0,0 @@
use super::*;
use std::sync::atomic::{AtomicU32, Ordering};
lazy_static! {
static ref PROCESS_TABLE: SgxMutex<HashMap<pid_t, ProcessRef>> =
{ SgxMutex::new(HashMap::new()) };
}
pub fn put(pid: pid_t, process: ProcessRef) {
PROCESS_TABLE.lock().unwrap().insert(pid, process);
}
pub fn remove(pid: pid_t) {
PROCESS_TABLE.lock().unwrap().remove(&pid);
}
pub fn get(pid: pid_t) -> Result<ProcessRef> {
PROCESS_TABLE
.lock()
.unwrap()
.get(&pid)
.map(|pr| pr.clone())
.ok_or_else(|| errno!(ENOENT, "process not found"))
}
static NEXT_PID: AtomicU32 = AtomicU32::new(1);
pub fn alloc_pid() -> u32 {
NEXT_PID.fetch_add(1, Ordering::SeqCst)
}
pub fn free_pid(pid: u32) {
// PID 0 is reserved for idle thread, thus no need to free
if pid == 0 {
return;
}
// TODO:
}

@ -0,0 +1,310 @@
use std::ptr::NonNull;
use super::do_arch_prctl::ArchPrctlCode;
use super::do_clone::CloneFlags;
use super::do_futex::{FutexFlags, FutexOp};
use super::do_sched::CpuSet;
use super::do_spawn::FileAction;
use super::process::ChildProcessFilter;
use crate::prelude::*;
use crate::time::timespec_t;
use crate::util::mem_util::from_user::*;
pub fn do_spawn(
child_pid_ptr: *mut u32,
path: *const i8,
argv: *const *const i8,
envp: *const *const i8,
fdop_list: *const FdOp,
) -> Result<isize> {
check_mut_ptr(child_pid_ptr)?;
let path = clone_cstring_safely(path)?.to_string_lossy().into_owned();
let argv = clone_cstrings_safely(argv)?;
let envp = clone_cstrings_safely(envp)?;
let file_actions = clone_file_actions_safely(fdop_list)?;
let current = current!();
debug!(
"spawn: path: {:?}, argv: {:?}, envp: {:?}, fdop: {:?}",
path, argv, envp, file_actions
);
let child_pid = super::do_spawn::do_spawn(&path, &argv, &envp, &file_actions, &current)?;
unsafe { *child_pid_ptr = child_pid };
Ok(0)
}
#[repr(C)]
#[derive(Debug)]
pub struct FdOp {
// We actually switch the prev and next fields in the libc definition.
prev: *const FdOp,
next: *const FdOp,
cmd: u32,
fd: u32,
srcfd: u32,
oflag: u32,
mode: u32,
path: *const i8,
}
// This Rust-version of fdop correspond to the C-version one in Occlum.
// See <path_to_musl_libc>/src/process/fdop.h.
const FDOP_CLOSE: u32 = 1;
const FDOP_DUP2: u32 = 2;
const FDOP_OPEN: u32 = 3;
fn clone_file_actions_safely(fdop_ptr: *const FdOp) -> Result<Vec<FileAction>> {
let mut file_actions = Vec::new();
let mut fdop_ptr = fdop_ptr;
while fdop_ptr != std::ptr::null() {
check_ptr(fdop_ptr)?;
let fdop = unsafe { &*fdop_ptr };
#[deny(unreachable_patterns)]
let file_action = match fdop.cmd {
FDOP_CLOSE => FileAction::Close(fdop.fd),
FDOP_DUP2 => FileAction::Dup2(fdop.srcfd, fdop.fd),
FDOP_OPEN => FileAction::Open {
path: clone_cstring_safely(fdop.path)?
.to_string_lossy()
.into_owned(),
mode: fdop.mode,
oflag: fdop.oflag,
fd: fdop.fd,
},
_ => {
return_errno!(EINVAL, "Unknown file action command");
}
};
file_actions.push(file_action);
fdop_ptr = fdop.next;
}
Ok(file_actions)
}
pub fn do_clone(
flags: u32,
stack_addr: usize,
ptid: *mut pid_t,
ctid: *mut pid_t,
new_tls: usize,
) -> Result<isize> {
let flags = CloneFlags::from_bits_truncate(flags);
check_mut_ptr(stack_addr as *mut u64)?;
let ptid = {
if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
check_mut_ptr(ptid)?;
NonNull::new(ptid)
} else {
None
}
};
let ctid = {
if flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
check_mut_ptr(ctid)?;
NonNull::new(ctid)
} else {
None
}
};
let new_tls = {
if flags.contains(CloneFlags::CLONE_SETTLS) {
check_mut_ptr(new_tls as *mut usize)?;
Some(new_tls)
} else {
None
}
};
let child_pid = super::do_clone::do_clone(flags, stack_addr, ptid, ctid, new_tls)?;
Ok(child_pid as isize)
}
pub fn do_futex(
futex_addr: *const i32,
futex_op: u32,
futex_val: i32,
timeout: u64,
futex_new_addr: *const i32,
) -> Result<isize> {
check_ptr(futex_addr)?;
let (futex_op, futex_flags) = super::do_futex::futex_op_and_flags_from_u32(futex_op)?;
let get_futex_val = |val| -> Result<usize> {
if val < 0 {
return_errno!(EINVAL, "the futex val must not be negative");
}
Ok(val as usize)
};
match futex_op {
FutexOp::FUTEX_WAIT => {
let timeout = {
let timeout = timeout as *const timespec_t;
if timeout.is_null() {
None
} else {
let ts = timespec_t::from_raw_ptr(timeout)?;
ts.validate()?;
if futex_flags.contains(FutexFlags::FUTEX_CLOCK_REALTIME) {
warn!("CLOCK_REALTIME is not supported yet, use monotonic clock");
}
Some(ts)
}
};
super::do_futex::futex_wait(futex_addr, futex_val, &timeout).map(|_| 0)
}
FutexOp::FUTEX_WAKE => {
let max_count = get_futex_val(futex_val)?;
super::do_futex::futex_wake(futex_addr, max_count).map(|count| count as isize)
}
FutexOp::FUTEX_REQUEUE => {
check_ptr(futex_new_addr)?;
let max_nwakes = get_futex_val(futex_val)?;
let max_nrequeues = get_futex_val(timeout as i32)?;
super::do_futex::futex_requeue(futex_addr, max_nwakes, max_nrequeues, futex_new_addr)
.map(|nwakes| nwakes as isize)
}
_ => return_errno!(ENOSYS, "the futex operation is not supported"),
}
}
pub fn do_arch_prctl(code: u32, addr: *mut usize) -> Result<isize> {
let code = ArchPrctlCode::from_u32(code)?;
check_mut_ptr(addr)?;
super::do_arch_prctl::do_arch_prctl(code, addr).map(|_| 0)
}
pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<isize> {
check_mut_ptr(tidptr)?;
super::do_set_tid_address::do_set_tid_address(tidptr).map(|tid| tid as isize)
}
pub fn do_sched_yield() -> Result<isize> {
super::do_sched::do_sched_yield();
Ok(0)
}
pub fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result<isize> {
// Construct safe Rust types
let mut buf_slice = {
check_mut_array(buf, cpusize)?;
if cpusize == 0 {
return_errno!(EINVAL, "cpuset size must be greater than zero");
}
if buf as *const _ == std::ptr::null() {
return_errno!(EFAULT, "cpuset mask must NOT be null");
}
unsafe { std::slice::from_raw_parts_mut(buf, cpusize) }
};
// Call the memory-safe do_sched_getaffinity
let mut cpuset = CpuSet::new(cpusize);
let retval = super::do_sched::do_sched_getaffinity(pid, &mut cpuset)?;
// Copy from Rust types to C types
buf_slice.copy_from_slice(cpuset.as_slice());
Ok(retval as isize)
}
pub fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result<isize> {
// Convert unsafe C types into safe Rust types
let cpuset = {
check_array(buf, cpusize)?;
if cpusize == 0 {
return_errno!(EINVAL, "cpuset size must be greater than zero");
}
if buf as *const _ == std::ptr::null() {
return_errno!(EFAULT, "cpuset mask must NOT be null");
}
CpuSet::from_raw_buf(buf, cpusize)
};
debug!("sched_setaffinity cpuset: {:#x}", cpuset);
// Call the memory-safe do_sched_setaffinity
super::do_sched::do_sched_setaffinity(pid, &cpuset)?;
Ok(0)
}
pub fn do_exit(status: i32) -> ! {
debug!("exit: {}", status);
super::do_exit::do_exit(status);
extern "C" {
fn do_exit_task() -> !;
}
unsafe {
do_exit_task();
}
}
pub fn do_wait4(pid: i32, exit_status_ptr: *mut i32) -> Result<isize> {
if !exit_status_ptr.is_null() {
check_mut_ptr(exit_status_ptr)?;
}
let child_process_filter = match pid {
pid if pid < -1 => ChildProcessFilter::WithPgid((-pid) as pid_t),
-1 => ChildProcessFilter::WithAnyPid,
0 => {
let pgid = current!().process().pgid();
ChildProcessFilter::WithPgid(pgid)
}
pid if pid > 0 => ChildProcessFilter::WithPid(pid as pid_t),
_ => {
panic!("THIS SHOULD NEVER HAPPEN!");
}
};
let mut exit_status = 0;
match super::do_wait4::do_wait4(&child_process_filter) {
Ok((pid, exit_status)) => {
if !exit_status_ptr.is_null() {
unsafe {
*exit_status_ptr = exit_status;
}
}
Ok(pid as isize)
}
Err(e) => Err(e),
}
}
pub fn do_getpid() -> Result<isize> {
let pid = super::do_getpid::do_getpid();
Ok(pid as isize)
}
pub fn do_gettid() -> Result<isize> {
let tid = super::do_getpid::do_gettid();
Ok(tid as isize)
}
pub fn do_getppid() -> Result<isize> {
let ppid = super::do_getpid::do_getppid();
Ok(ppid as isize)
}
pub fn do_getpgid() -> Result<isize> {
let pgid = super::do_getpid::do_getpgid();
Ok(pgid as isize)
}
// TODO: implement uid, gid, euid, egid
pub fn do_getuid() -> Result<isize> {
Ok(0)
}
pub fn do_getgid() -> Result<isize> {
Ok(0)
}
pub fn do_geteuid() -> Result<isize> {
Ok(0)
}
pub fn do_getegid() -> Result<isize> {
Ok(0)
}

@ -0,0 +1,74 @@
use super::{ProcessRef, ThreadRef};
use crate::prelude::*;
pub fn get_process(pid: pid_t) -> Result<ProcessRef> {
PROCESS_TABLE.lock().unwrap().get(pid)
}
pub(super) fn add_process(process: ProcessRef) -> Result<()> {
PROCESS_TABLE.lock().unwrap().add(process.pid(), process)
}
pub(super) fn del_process(pid: pid_t) -> Result<ProcessRef> {
PROCESS_TABLE.lock().unwrap().del(pid)
}
pub fn get_thread(tid: pid_t) -> Result<ThreadRef> {
THREAD_TABLE.lock().unwrap().get(tid)
}
pub(super) fn add_thread(thread: ThreadRef) -> Result<()> {
THREAD_TABLE.lock().unwrap().add(thread.tid(), thread)
}
pub(super) fn del_thread(tid: pid_t) -> Result<ThreadRef> {
THREAD_TABLE.lock().unwrap().del(tid)
}
pub fn debug() {
println!("process table = {:#?}", PROCESS_TABLE.lock().unwrap());
println!("thread table = {:#?}", THREAD_TABLE.lock().unwrap());
//println!("idle = {:#?}", *super::IDLE);
}
lazy_static! {
static ref PROCESS_TABLE: SgxMutex<Table<ProcessRef>> =
{ SgxMutex::new(Table::<ProcessRef>::with_capacity(8)) };
static ref THREAD_TABLE: SgxMutex<Table<ThreadRef>> =
{ SgxMutex::new(Table::<ThreadRef>::with_capacity(8)) };
}
#[derive(Debug, Clone)]
struct Table<I: Debug + Clone + Send + Sync> {
map: HashMap<pid_t, I>,
}
impl<I: Debug + Clone + Send + Sync> Table<I> {
pub fn with_capacity(capacity: usize) -> Self {
Self {
map: HashMap::with_capacity(capacity),
}
}
pub fn get(&self, id: pid_t) -> Result<I> {
self.map
.get(&id)
.map(|item_ref| item_ref.clone())
.ok_or_else(|| errno!(ESRCH, "id does not exist"))
}
pub fn add(&mut self, id: pid_t, item: I) -> Result<()> {
if self.map.contains_key(&id) {
return_errno!(EEXIST, "id is already added");
}
self.map.insert(id, item);
Ok(())
}
pub fn del(&mut self, id: pid_t) -> Result<I> {
if !self.map.contains_key(&id) {
return_errno!(ENOENT, "id does not exist");
}
Ok(self.map.remove(&id).unwrap())
}
}

@ -1,184 +0,0 @@
use std::mem;
use super::*;
/// Note: this definition must be in sync with task.h
#[derive(Clone, Debug, Default)]
#[repr(C)]
pub struct Task {
kernel_rsp: usize,
kernel_stack_base: usize,
kernel_stack_limit: usize,
kernel_fs: usize,
user_rsp: usize,
user_stack_base: usize,
user_stack_limit: usize,
user_fs: usize,
user_entry_addr: usize,
saved_state: usize, // struct jmpbuf*
}
impl Task {
pub unsafe fn new(
user_entry_addr: usize,
user_rsp: usize,
user_stack_base: usize,
user_stack_limit: usize,
user_fs: Option<usize>,
) -> Result<Task> {
if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) {
return_errno!(EINVAL, "Invalid user stack");
}
// Set the default user fsbase to an address on user stack, which is
// a relatively safe address in case the user program uses %fs before
// initializing fs base address.
let user_fs = user_fs.unwrap_or(user_stack_limit);
Ok(Task {
user_entry_addr,
user_rsp,
user_stack_base,
user_stack_limit,
user_fs,
..Default::default()
})
}
pub fn set_user_fs(&mut self, user_fs: usize) {
self.user_fs = user_fs;
}
pub fn get_user_fs(&self) -> usize {
self.user_fs
}
}
lazy_static! {
static ref NEW_PROCESS_TABLE: SgxMutex<HashMap<pid_t, ProcessRef>> =
{ SgxMutex::new(HashMap::new()) };
}
pub fn enqueue_task(new_tid: pid_t, new_process: ProcessRef) {
let existing_task = NEW_PROCESS_TABLE
.lock()
.unwrap()
.insert(new_tid, new_process);
// There should NOT have any pending process with the same ID
assert!(existing_task.is_none());
}
pub fn enqueue_and_exec_task(new_tid: pid_t, new_process: ProcessRef) {
enqueue_task(new_tid, new_process);
let mut ret = 0;
let ocall_status = unsafe { occlum_ocall_exec_thread_async(&mut ret, new_tid) };
if ocall_status != sgx_status_t::SGX_SUCCESS || ret != 0 {
panic!("Failed to start the process");
}
}
fn dequeue_task(libos_tid: pid_t) -> Result<ProcessRef> {
NEW_PROCESS_TABLE
.lock()
.unwrap()
.remove(&libos_tid)
.ok_or_else(|| errno!(EAGAIN, "the given TID does not match any pending process"))
}
pub fn run_task(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
let new_process: ProcessRef = dequeue_task(libos_tid)?;
set_current(&new_process);
let (pid, task) = {
let mut process = new_process.lock().unwrap();
process.set_host_tid(host_tid);
let pid = process.get_pid();
let task = process.get_task_mut() as *mut Task;
(pid, task)
};
#[cfg(feature = "syscall_timing")]
GLOBAL_PROFILER
.lock()
.unwrap()
.thread_enter()
.expect("unexpected error from profiler to enter thread");
unsafe {
// task may only be modified by this function; so no lock is needed
do_run_task(task);
}
#[cfg(feature = "syscall_timing")]
GLOBAL_PROFILER
.lock()
.unwrap()
.thread_exit()
.expect("unexpected error from profiler to exit thread");
let (exit_status, parent_pid) = {
let mut process = new_process.lock().unwrap();
let parent = process.get_parent().lock().unwrap();
(process.get_exit_status(), parent.get_tid())
};
info!("Thread exited: tid = {}", libos_tid);
// If process's parent is the IDLE_PROCESS (pid = 0), so it has to release itself
if parent_pid == 0 {
process_table::remove(pid);
}
reset_current();
Ok(exit_status)
}
thread_local! {
static _CURRENT_PROCESS_PTR: Cell<*const SgxMutex<Process>> = {
Cell::new(0 as *const SgxMutex<Process>)
};
// for log getting pid without locking process
static _TID: Cell<pid_t> = Cell::new(0);
}
pub fn get_current_tid() -> pid_t {
_TID.with(|tid_cell| tid_cell.get())
}
pub fn get_current() -> ProcessRef {
let current_ptr = _CURRENT_PROCESS_PTR.with(|cell| cell.get());
let current_ref = unsafe { Arc::from_raw(current_ptr) };
let current_ref_clone = current_ref.clone();
Arc::into_raw(current_ref);
current_ref_clone
}
fn set_current(process: &ProcessRef) {
let tid = process.lock().unwrap().get_tid();
_TID.with(|tid_cell| tid_cell.set(tid));
let process_ref_clone = process.clone();
let process_ptr = Arc::into_raw(process_ref_clone);
_CURRENT_PROCESS_PTR.with(|cp| {
cp.set(process_ptr);
});
}
fn reset_current() {
_TID.with(|tid_cell| tid_cell.set(0));
let mut process_ptr = _CURRENT_PROCESS_PTR.with(|cp| cp.replace(0 as *const SgxMutex<Process>));
// Prevent memory leakage
unsafe {
drop(Arc::from_raw(process_ptr));
}
}
extern "C" {
fn occlum_ocall_exec_thread_async(ret: *mut i32, libos_tid: pid_t) -> sgx_status_t;
fn do_run_task(task: *mut Task) -> i32;
}

@ -0,0 +1,81 @@
use super::super::{current, ThreadRef};
use super::Task;
use crate::prelude::*;
/// Enqueue a new thread so that it can be executed later.
pub fn enqueue(new_thread: ThreadRef) {
let existing_thread = NEW_THREAD_TABLE
.lock()
.unwrap()
.insert(new_thread.tid(), new_thread);
// There should NOT have any pending process with the same ID
assert!(existing_thread.is_none());
}
/// Enqueue a new thread and execute it in a separate host thread.
pub fn enqueue_and_exec(new_thread: ThreadRef) {
let new_tid = new_thread.tid();
enqueue(new_thread);
let mut ret = 0;
let ocall_status = unsafe { occlum_ocall_exec_thread_async(&mut ret, new_tid) };
// TODO: check if there are any free TCS before do the OCall
assert!(ocall_status == sgx_status_t::SGX_SUCCESS && ret == 0);
}
fn dequeue(libos_tid: pid_t) -> Result<ThreadRef> {
NEW_THREAD_TABLE
.lock()
.unwrap()
.remove(&libos_tid)
.ok_or_else(|| errno!(EAGAIN, "the given TID does not match any pending thread"))
}
/// Execute the specified LibOS thread in the current host thread.
pub fn exec(libos_tid: pid_t, host_tid: pid_t) -> Result<i32> {
let new_thread: ThreadRef = dequeue(libos_tid)?;
new_thread.start(host_tid);
// Enable current::get() from now on
current::set(new_thread.clone());
#[cfg(feature = "syscall_timing")]
GLOBAL_PROFILER
.lock()
.unwrap()
.thread_enter()
.expect("unexpected error from profiler to enter thread");
unsafe {
// task may only be modified by this function; so no lock is needed
do_exec_task(new_thread.task() as *const Task as *mut Task);
}
#[cfg(feature = "syscall_timing")]
GLOBAL_PROFILER
.lock()
.unwrap()
.thread_exit()
.expect("unexpected error from profiler to exit thread");
let exit_status = new_thread.inner().exit_status().unwrap();
info!(
"Thread exited: tid = {}, exit_status = {}",
libos_tid, exit_status
);
// Disable current::get()
current::reset();
Ok(exit_status)
}
lazy_static! {
static ref NEW_THREAD_TABLE: SgxMutex<HashMap<pid_t, ThreadRef>> =
{ SgxMutex::new(HashMap::new()) };
}
extern "C" {
fn occlum_ocall_exec_thread_async(ret: *mut i32, libos_tid: pid_t) -> sgx_status_t;
fn do_exec_task(task: *mut Task) -> i32;
}

@ -0,0 +1,60 @@
/// Task is the low-level representation for the execution of a thread.
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::prelude::*;
pub use self::exec::{enqueue, enqueue_and_exec, exec};
mod exec;
/// Note: this definition must be in sync with task.h
#[derive(Debug, Default)]
#[repr(C)]
pub struct Task {
kernel_rsp: usize,
kernel_stack_base: usize,
kernel_stack_limit: usize,
kernel_fs: usize,
user_rsp: usize,
user_stack_base: usize,
user_stack_limit: usize,
user_fs: AtomicUsize,
user_entry_addr: usize,
saved_state: usize, // struct jmpbuf*
}
impl Task {
pub unsafe fn new(
user_entry_addr: usize,
user_rsp: usize,
user_stack_base: usize,
user_stack_limit: usize,
user_fs: Option<usize>,
) -> Result<Task> {
if !(user_stack_base >= user_rsp && user_rsp > user_stack_limit) {
return_errno!(EINVAL, "Invalid user stack");
}
// Set the default user fsbase to an address on user stack, which is
// a relatively safe address in case the user program uses %fs before
// initializing fs base address.
let user_fs = AtomicUsize::new(user_fs.unwrap_or(user_stack_limit));
Ok(Task {
user_entry_addr,
user_rsp,
user_stack_base,
user_stack_limit,
user_fs,
..Default::default()
})
}
pub(super) fn set_user_fs(&self, user_fs: usize) {
self.user_fs.store(user_fs, Ordering::SeqCst);
}
pub fn user_fs(&self) -> usize {
self.user_fs.load(Ordering::SeqCst)
}
}

@ -13,7 +13,7 @@ typedef struct _thread_data_t
extern thread_data_t *get_thread_data(void);
extern void __run_task(struct Task* task);
extern void __exec_task(struct Task* task);
extern uint64_t __get_stack_guard(void);
extern void __set_stack_guard(uint64_t new_val);
@ -59,7 +59,7 @@ void switch_td_to_user(const struct Task* task) {
td->stack_commit_addr = task->user_stack_limit;
}
int do_run_task(struct Task* task) {
int do_exec_task(struct Task* task) {
jmp_buf libos_state = {0};
thread_data_t* td = get_thread_data();
task->saved_state = &libos_state;
@ -73,7 +73,7 @@ int do_run_task(struct Task* task) {
int second = setjmp(libos_state);
if (!second) {
__run_task(task);
__exec_task(task);
}
// Jump from do_exit_task

@ -27,9 +27,9 @@ __set_stack_guard:
mov %rdi, %gs:(TD_TASK_OFFSET)
ret
.global __run_task
.type __run_task, @function
__run_task:
.global __exec_task
.type __exec_task, @function
__exec_task:
// Save kernel fsbase and use user fsbase
//
// SGX HW Mode and SIM Mode require different implementations. In SGX hardware

@ -1,137 +0,0 @@
use super::vm::VMRange;
use super::*;
pub struct ThreadGroup {
threads: Vec<ProcessRef>,
}
impl ThreadGroup {}
bitflags! {
pub struct CloneFlags : u32 {
const CLONE_VM = 0x00000100;
const CLONE_FS = 0x00000200;
const CLONE_FILES = 0x00000400;
const CLONE_SIGHAND = 0x00000800;
const CLONE_PTRACE = 0x00002000;
const CLONE_VFORK = 0x00004000;
const CLONE_PARENT = 0x00008000;
const CLONE_THREAD = 0x00010000;
const CLONE_NEWNS = 0x00020000;
const CLONE_SYSVSEM = 0x00040000;
const CLONE_SETTLS = 0x00080000;
const CLONE_PARENT_SETTID = 0x00100000;
const CLONE_CHILD_CLEARTID = 0x00200000;
const CLONE_DETACHED = 0x00400000;
const CLONE_UNTRACED = 0x00800000;
const CLONE_CHILD_SETTID = 0x01000000;
const CLONE_NEWCGROUP = 0x02000000;
const CLONE_NEWUTS = 0x04000000;
const CLONE_NEWIPC = 0x08000000;
const CLONE_NEWUSER = 0x10000000;
const CLONE_NEWPID = 0x20000000;
const CLONE_NEWNET = 0x40000000;
const CLONE_IO = 0x80000000;
}
}
pub fn do_clone(
flags: CloneFlags,
user_rsp: usize,
ptid: Option<*mut pid_t>,
ctid: Option<*mut pid_t>,
new_tls: Option<usize>,
) -> Result<pid_t> {
debug!(
"clone: flags: {:?}, stack_addr: {:?}, ptid: {:?}, ctid: {:?}, new_tls: {:?}",
flags, user_rsp, ptid, ctid, new_tls
);
// TODO: return error for unsupported flags
let current_ref = get_current();
let current = current_ref.lock().unwrap();
// The calling convention of Occlum clone syscall requires the user to
// store the entry point of the new thread at the top of the user stack.
let thread_entry = unsafe {
*(user_rsp as *mut usize)
// TODO: check user_entry is a cfi_label
};
let (new_thread_pid, new_thread_ref) = {
let vm_ref = current.get_vm().clone();
let task = {
let vm = vm_ref.lock().unwrap();
let user_stack_range = guess_user_stack_bound(&vm, user_rsp)?;
let user_stack_base = user_stack_range.end();
let user_stack_limit = user_stack_range.start();
unsafe {
Task::new(
thread_entry,
user_rsp,
user_stack_base,
user_stack_limit,
new_tls,
)?
}
};
let files_ref = current.get_files().clone();
let rlimits_ref = current.get_rlimits().clone();
let elf_path = &current.elf_path;
let cwd = &current.cwd;
Process::new(cwd, elf_path, task, vm_ref, files_ref, rlimits_ref, true)?
};
if let Some(ctid) = ctid {
let mut new_thread = new_thread_ref.lock().unwrap();
new_thread.clear_child_tid = Some(ctid);
}
// TODO: always get parent lock first to avoid deadlock
{
let parent_ref = current.parent.as_ref().unwrap();
let mut parent = parent_ref.lock().unwrap();
let mut new_thread = new_thread_ref.lock().unwrap();
parent.children.push(Arc::downgrade(&new_thread_ref));
new_thread.parent = Some(parent_ref.clone());
new_thread.tgid = current.tgid;
}
process_table::put(new_thread_pid, new_thread_ref.clone());
info!("Thread created: tid = {}", new_thread_pid);
if let Some(ptid) = ptid {
unsafe {
*ptid = new_thread_pid;
}
}
task::enqueue_and_exec_task(new_thread_pid, new_thread_ref);
Ok(new_thread_pid)
}
pub fn do_set_tid_address(tidptr: *mut pid_t) -> Result<pid_t> {
debug!("set_tid_address: tidptr: {:#x}", tidptr as usize);
let current_ref = get_current();
let mut current = current_ref.lock().unwrap();
current.clear_child_tid = Some(tidptr);
Ok(current.get_tid())
}
fn guess_user_stack_bound(vm: &ProcessVM, user_rsp: usize) -> Result<&VMRange> {
// The first case is most likely
if let Ok(stack_range) = vm.find_mmap_region(user_rsp) {
Ok(stack_range)
}
// The next three cases are very unlikely, but valid
else if vm.get_stack_range().contains(user_rsp) {
Ok(vm.get_stack_range())
} else if vm.get_heap_range().contains(user_rsp) {
Ok(vm.get_heap_range())
}
// Invalid
else {
return_errno!(ESRCH, "invalid rsp")
}
}

@ -0,0 +1,112 @@
use std::ptr::NonNull;
use super::{
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, Task, Thread,
ThreadId, ThreadInner, ThreadRef,
};
use crate::prelude::*;
#[derive(Debug)]
pub struct ThreadBuilder {
// Mandatory field
tid: Option<ThreadId>,
task: Option<Task>,
process: Option<ProcessRef>,
vm: Option<ProcessVMRef>,
// Optional fields
fs: Option<FsViewRef>,
files: Option<FileTableRef>,
rlimits: Option<ResourceLimitsRef>,
clear_ctid: Option<NonNull<pid_t>>,
}
impl ThreadBuilder {
pub fn new() -> Self {
Self {
tid: None,
task: None,
process: None,
vm: None,
fs: None,
files: None,
rlimits: None,
clear_ctid: None,
}
}
pub fn tid(mut self, tid: ThreadId) -> Self {
self.tid = Some(tid);
self
}
pub fn task(mut self, task: Task) -> Self {
self.task = Some(task);
self
}
pub fn process(mut self, process: ProcessRef) -> Self {
self.process = Some(process);
self
}
pub fn vm(mut self, vm: ProcessVMRef) -> Self {
self.vm = Some(vm);
self
}
pub fn fs(mut self, fs: FsViewRef) -> Self {
self.fs = Some(fs);
self
}
pub fn files(mut self, files: FileTableRef) -> Self {
self.files = Some(files);
self
}
pub fn rlimits(mut self, rlimits: ResourceLimitsRef) -> Self {
self.rlimits = Some(rlimits);
self
}
pub fn clear_ctid(mut self, clear_tid_addr: NonNull<pid_t>) -> Self {
self.clear_ctid = Some(clear_tid_addr);
self
}
pub fn build(self) -> Result<ThreadRef> {
let tid = self.tid.unwrap_or_else(|| ThreadId::new());
let task = self
.task
.ok_or_else(|| errno!(EINVAL, "task is mandatory"))?;
let process = self
.process
.ok_or_else(|| errno!(EINVAL, "process is mandatory"))?;
let vm = self
.vm
.ok_or_else(|| errno!(EINVAL, "memory is mandatory"))?;
let fs = self.fs.unwrap_or_default();
let files = self.files.unwrap_or_default();
let rlimits = self.rlimits.unwrap_or_default();
let clear_ctid = SgxRwLock::new(self.clear_ctid);
let inner = SgxMutex::new(ThreadInner::new());
let new_thread = Arc::new(Thread {
task,
tid,
clear_ctid,
inner,
process,
vm,
fs,
files,
rlimits,
});
let mut inner = new_thread.process().inner();
inner.threads_mut().unwrap().push(new_thread.clone());
drop(inner);
Ok(new_thread)
}
}

@ -0,0 +1,100 @@
use std::collections::HashSet;
use crate::prelude::*;
/// ThreadId implements self-managed thread IDs.
///
/// Each instance of ThreadID are guaranteed to have a unique ID.
/// And when a ThreadID instance is freed, its ID is automatically freed too.
#[derive(Debug, PartialEq)]
pub struct ThreadId {
tid: u32,
}
impl ThreadId {
/// Create a new thread ID.
///
/// The thread ID returned is guaranteed to have a value greater than zero.
pub fn new() -> ThreadId {
let mut alloc = THREAD_ID_ALLOC.lock().unwrap();
let tid = alloc.alloc();
Self { tid }
}
/// Create a "zero" thread ID.
///
/// This "zero" thread ID is used exclusively by the idle process.
pub fn zero() -> ThreadId {
Self { tid: 0 }
}
/// Return the value of the thread ID.
pub fn as_u32(&self) -> u32 {
self.tid
}
}
impl Drop for ThreadId {
fn drop(&mut self) {
if self.tid == 0 {
return;
}
let mut alloc = THREAD_ID_ALLOC.lock().unwrap();
alloc.free(self.tid).expect("tid must has been allocated");
}
}
lazy_static! {
static ref THREAD_ID_ALLOC: SgxMutex<IdAlloc> = SgxMutex::new(IdAlloc::new());
}
/// PID/TID allocator.
///
/// The allocation strategy is to start from the minimal value (here, 1) and increments
/// each returned ID, until a maximum value (e.g., 2^32-1) is reached. After that, recycle
/// from the minimal value and see if it is still in use. If not, use the value; otherwise,
/// increments again.
///
/// The allocation strategy above follows the *nix tradition.
///
/// Note that PID/TID 0 is reserved for the idle process. So the id allocator starts from 1.
#[derive(Debug, Clone)]
struct IdAlloc {
next_id: u32,
used_ids: HashSet<u32>,
}
impl IdAlloc {
pub fn new() -> Self {
Self {
next_id: 0,
used_ids: HashSet::new(),
}
}
pub fn alloc(&mut self) -> u32 {
let new_id = loop {
// Increments the ID and wrap around if necessary
self.next_id = self.next_id.wrapping_add(1);
if self.next_id == 0 {
self.next_id = 1;
}
if !self.used_ids.contains(&self.next_id) {
break self.next_id;
}
};
self.used_ids.insert(new_id);
new_id
}
pub fn free(&mut self, id: u32) -> Option<u32> {
debug_assert!(self.used_ids.contains(&id));
if self.used_ids.remove(&id) {
Some(id)
} else {
None
}
}
}

@ -0,0 +1,186 @@
use std::fmt;
use std::ptr::NonNull;
use super::task::Task;
use super::{
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, ThreadRef,
};
use crate::prelude::*;
pub use self::builder::ThreadBuilder;
pub use self::id::ThreadId;
mod builder;
mod id;
pub struct Thread {
// Low-level info
task: Task,
// Immutable info
tid: ThreadId,
// Mutable info
clear_ctid: SgxRwLock<Option<NonNull<pid_t>>>,
inner: SgxMutex<ThreadInner>,
// Process
process: ProcessRef,
// Resources
vm: ProcessVMRef,
fs: FsViewRef,
files: FileTableRef,
rlimits: ResourceLimitsRef,
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum ThreadStatus {
Init,
Running,
Exited,
}
impl Thread {
pub fn process(&self) -> &ProcessRef {
&self.process
}
pub fn task(&self) -> &Task {
&self.task
}
pub fn tid(&self) -> pid_t {
self.tid.as_u32()
}
pub fn status(&self) -> ThreadStatus {
self.inner().status()
}
pub fn vm(&self) -> &ProcessVMRef {
&self.vm
}
pub fn files(&self) -> &FileTableRef {
&self.files
}
/// Get a file from the file table.
pub fn file(&self, fd: FileDesc) -> Result<FileRef> {
self.files().lock().unwrap().get(fd)
}
/// Add a file to the file table.
pub fn add_file(&self, new_file: FileRef, close_on_spawn: bool) -> FileDesc {
self.files().lock().unwrap().put(new_file, close_on_spawn)
}
pub fn fs(&self) -> &FsViewRef {
&self.fs
}
pub fn rlimits(&self) -> &ResourceLimitsRef {
&self.rlimits
}
pub fn clear_ctid(&self) -> Option<NonNull<pid_t>> {
*self.clear_ctid.read().unwrap()
}
pub fn set_clear_ctid(&self, new_clear_ctid: Option<NonNull<pid_t>>) {
*self.clear_ctid.write().unwrap() = new_clear_ctid;
}
pub(super) fn start(&self, host_tid: pid_t) {
self.inner().start(host_tid);
}
pub(super) fn exit(&self, exit_status: i32) -> usize {
// Remove this thread from its owner process
let mut process_inner = self.process.inner();
let threads = process_inner.threads_mut().unwrap();
let thread_i = threads
.iter()
.position(|thread| thread.tid() == self.tid())
.expect("the thread must belong to the process");
threads.swap_remove(thread_i);
self.inner().exit(exit_status);
threads.len()
}
pub(super) fn inner(&self) -> SgxMutexGuard<ThreadInner> {
self.inner.lock().unwrap()
}
}
impl PartialEq for Thread {
fn eq(&self, other: &Self) -> bool {
self.tid() == other.tid()
}
}
// Why manual implementation of Debug trait?
//
// An explict implementation of Debug trait is required since Process and Thread
// structs refer to each other. Thus, the automatically-derived implementation
// of Debug trait for the two structs may lead to infinite loop.
impl fmt::Debug for Thread {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Thread")
.field("tid", &self.tid())
.field("pid", &self.process().pid())
.field("inner", &self.inner())
.field("vm", self.vm())
.field("fs", self.fs())
.field("files", self.files())
.finish()
}
}
unsafe impl Send for Thread {}
unsafe impl Sync for Thread {}
#[derive(Debug)]
pub enum ThreadInner {
Init,
Live { host_tid: pid_t },
Exited { exit_status: i32 },
}
impl ThreadInner {
pub fn new() -> Self {
Self::Init
}
pub fn status(&self) -> ThreadStatus {
match self {
Self::Init { .. } => ThreadStatus::Init,
Self::Live { .. } => ThreadStatus::Running,
Self::Exited { .. } => ThreadStatus::Exited,
}
}
pub fn exit_status(&self) -> Option<i32> {
match self {
Self::Exited { exit_status } => Some(*exit_status),
_ => None,
}
}
pub fn host_tid(&self) -> Option<pid_t> {
match self {
Self::Live { host_tid } => Some(*host_tid),
_ => None,
}
}
pub fn start(&mut self, host_tid: pid_t) {
debug_assert!(self.status() == ThreadStatus::Init);
*self = Self::Live { host_tid };
}
pub fn exit(&mut self, exit_status: i32) {
debug_assert!(self.status() == ThreadStatus::Running);
*self = Self::Exited { exit_status };
}
}

@ -1,4 +1,5 @@
use super::*;
/// A wait/wakeup mechanism that connects wait4 and exit system calls.
use crate::prelude::*;
#[derive(Debug)]
pub struct Waiter<D, R>

@ -1,3 +1,5 @@
# In SGX simulation mode, we don't use wrfsbase directly. Instead, we use arch_prctl syscall.
#if SGX_MODE_SIM
#define __ASSEMBLY__

@ -7,21 +7,6 @@
//! 3. Preprocess the system call and then call `dispatch_syscall` (in this file)
//! 4. Call `do_*` to process the system call (in other modules)
use fs::{
do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd,
do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat,
do_fsync, do_ftruncate, do_getdents64, do_ioctl, do_lchown, do_link, do_lseek, do_lstat,
do_mkdir, do_open, do_openat, do_pipe, do_pipe2, do_pread, do_pwrite, do_read, do_readlink,
do_readv, do_rename, do_rmdir, do_sendfile, do_stat, do_sync, do_truncate, do_unlink, do_write,
do_writev, iovec_t, File, FileDesc, FileRef, HostStdioFds, Stat,
};
use misc::{resource_t, rlimit_t, utsname_t};
use net::{
do_epoll_create, do_epoll_create1, do_epoll_ctl, do_epoll_pwait, do_epoll_wait, do_poll,
do_recvmsg, do_select, do_sendmsg, msghdr, msghdr_mut, AsSocket, AsUnixSocket, EpollEvent,
SocketFile, UnixSocketFile,
};
use process::{pid_t, ChildProcessFilter, CloneFlags, CpuSet, FileAction, FutexFlags, FutexOp};
use std::any::Any;
use std::convert::TryFrom;
use std::ffi::{CStr, CString};
@ -30,8 +15,28 @@ use std::ptr;
use time::{clockid_t, timespec_t, timeval_t, GLOBAL_PROFILER};
use util::log::{self, LevelFilter};
use util::mem_util::from_user::*;
use vm::{MMapFlags, VMPerms};
use {fs, process, std, vm};
use crate::fs::{
do_access, do_chdir, do_chmod, do_chown, do_close, do_dup, do_dup2, do_dup3, do_eventfd,
do_eventfd2, do_faccessat, do_fchmod, do_fchown, do_fcntl, do_fdatasync, do_fstat, do_fstatat,
do_fsync, do_ftruncate, do_getcwd, do_getdents64, do_ioctl, do_lchown, do_link, do_lseek,
do_lstat, do_mkdir, do_open, do_openat, do_pipe, do_pipe2, do_pread, do_pwrite, do_read,
do_readlink, do_readv, do_rename, do_rmdir, do_sendfile, do_stat, do_sync, do_truncate,
do_unlink, do_write, do_writev, iovec_t, File, FileDesc, FileRef, HostStdioFds, Stat,
};
use crate::misc::{resource_t, rlimit_t, utsname_t};
use crate::net::{
do_epoll_create, do_epoll_create1, do_epoll_ctl, do_epoll_pwait, do_epoll_wait, do_poll,
do_recvmsg, do_select, do_sendmsg, msghdr, msghdr_mut, AsSocket, AsUnixSocket, EpollEvent,
SocketFile, UnixSocketFile,
};
use crate::process::{
do_arch_prctl, do_clone, do_exit, do_futex, do_getegid, do_geteuid, do_getgid, do_getpgid,
do_getpid, do_getppid, do_gettid, do_getuid, do_sched_getaffinity, do_sched_setaffinity,
do_sched_yield, do_set_tid_address, do_spawn, do_wait4, pid_t, FdOp,
};
use crate::vm::{MMapFlags, VMPerms};
use crate::{fs, process, std, vm};
use super::*;
@ -391,7 +396,7 @@ macro_rules! process_syscall_table_with_callback {
(Membarrier = 324) => handle_unsupported(),
(Mlock2 = 325) => handle_unsupported(),
// Occlum-specific sytem calls
// Occlum-specific system calls
(Spawn = 360) => do_spawn(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp),
// Exception handling
(Rdtsc = 361) => do_rdtsc(low_ptr: *mut u32, high_ptr: *mut u32),
@ -648,163 +653,6 @@ const FDOP_CLOSE: u32 = 1;
const FDOP_DUP2: u32 = 2;
const FDOP_OPEN: u32 = 3;
#[repr(C)]
#[derive(Debug)]
pub struct FdOp {
// We actually switch the prev and next fields in the libc definition.
prev: *const FdOp,
next: *const FdOp,
cmd: u32,
fd: u32,
srcfd: u32,
oflag: u32,
mode: u32,
path: *const i8,
}
fn clone_file_actions_safely(fdop_ptr: *const FdOp) -> Result<Vec<FileAction>> {
let mut file_actions = Vec::new();
let mut fdop_ptr = fdop_ptr;
while fdop_ptr != ptr::null() {
check_ptr(fdop_ptr)?;
let fdop = unsafe { &*fdop_ptr };
let file_action = match fdop.cmd {
FDOP_CLOSE => FileAction::Close(fdop.fd),
FDOP_DUP2 => FileAction::Dup2(fdop.srcfd, fdop.fd),
FDOP_OPEN => FileAction::Open {
path: clone_cstring_safely(fdop.path)?
.to_string_lossy()
.into_owned(),
mode: fdop.mode,
oflag: fdop.oflag,
fd: fdop.fd,
},
_ => {
return_errno!(EINVAL, "Unknown file action command");
}
};
file_actions.push(file_action);
fdop_ptr = fdop.next;
}
Ok(file_actions)
}
fn do_spawn(
child_pid_ptr: *mut u32,
path: *const i8,
argv: *const *const i8,
envp: *const *const i8,
fdop_list: *const FdOp,
) -> Result<isize> {
check_mut_ptr(child_pid_ptr)?;
let path = clone_cstring_safely(path)?.to_string_lossy().into_owned();
let argv = clone_cstrings_safely(argv)?;
let envp = clone_cstrings_safely(envp)?;
let file_actions = clone_file_actions_safely(fdop_list)?;
let parent = process::get_current();
debug!(
"spawn: path: {:?}, argv: {:?}, envp: {:?}, fdop: {:?}",
path, argv, envp, file_actions
);
let child_pid = process::do_spawn(&path, &argv, &envp, &file_actions, &parent)?;
unsafe { *child_pid_ptr = child_pid };
Ok(0)
}
pub fn do_clone(
flags: u32,
stack_addr: usize,
ptid: *mut pid_t,
ctid: *mut pid_t,
new_tls: usize,
) -> Result<isize> {
let flags = CloneFlags::from_bits_truncate(flags);
check_mut_ptr(stack_addr as *mut u64)?;
let ptid = {
if flags.contains(CloneFlags::CLONE_PARENT_SETTID) {
check_mut_ptr(ptid)?;
Some(ptid)
} else {
None
}
};
let ctid = {
if flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
check_mut_ptr(ctid)?;
Some(ctid)
} else {
None
}
};
let new_tls = {
if flags.contains(CloneFlags::CLONE_SETTLS) {
check_mut_ptr(new_tls as *mut usize)?;
Some(new_tls)
} else {
None
}
};
let child_pid = process::do_clone(flags, stack_addr, ptid, ctid, new_tls)?;
Ok(child_pid as isize)
}
pub fn do_futex(
futex_addr: *const i32,
futex_op: u32,
futex_val: i32,
timeout: u64,
futex_new_addr: *const i32,
) -> Result<isize> {
check_ptr(futex_addr)?;
let (futex_op, futex_flags) = process::futex_op_and_flags_from_u32(futex_op)?;
let get_futex_val = |val| -> Result<usize> {
if val < 0 {
return_errno!(EINVAL, "the futex val must not be negative");
}
Ok(val as usize)
};
match futex_op {
FutexOp::FUTEX_WAIT => {
let timeout = {
let timeout = timeout as *const timespec_t;
if timeout.is_null() {
None
} else {
let ts = timespec_t::from_raw_ptr(timeout)?;
ts.validate()?;
if futex_flags.contains(FutexFlags::FUTEX_CLOCK_REALTIME) {
warn!("CLOCK_REALTIME is not supported yet, use monotonic clock");
}
Some(ts)
}
};
process::futex_wait(futex_addr, futex_val, &timeout).map(|_| 0)
}
FutexOp::FUTEX_WAKE => {
let max_count = get_futex_val(futex_val)?;
process::futex_wake(futex_addr, max_count).map(|count| count as isize)
}
FutexOp::FUTEX_REQUEUE => {
check_ptr(futex_new_addr)?;
let max_nwakes = get_futex_val(futex_val)?;
let max_nrequeues = get_futex_val(timeout as i32)?;
process::futex_requeue(futex_addr, max_nwakes, max_nrequeues, futex_new_addr)
.map(|nwakes| nwakes as isize)
}
_ => return_errno!(ENOSYS, "the futex operation is not supported"),
}
}
fn do_mmap(
addr: usize,
size: usize,
@ -845,75 +693,6 @@ fn do_brk(new_brk_addr: usize) -> Result<isize> {
Ok(ret_brk_addr as isize)
}
fn do_wait4(pid: i32, _exit_status: *mut i32) -> Result<isize> {
if !_exit_status.is_null() {
check_mut_ptr(_exit_status)?;
}
let child_process_filter = match pid {
pid if pid < -1 => process::ChildProcessFilter::WithPGID((-pid) as pid_t),
-1 => process::ChildProcessFilter::WithAnyPID,
0 => {
let pgid = process::do_getpgid();
process::ChildProcessFilter::WithPGID(pgid)
}
pid if pid > 0 => process::ChildProcessFilter::WithPID(pid as pid_t),
_ => {
panic!("THIS SHOULD NEVER HAPPEN!");
}
};
let mut exit_status = 0;
match process::do_wait4(&child_process_filter, &mut exit_status) {
Ok(pid) => {
if !_exit_status.is_null() {
unsafe {
*_exit_status = exit_status;
}
}
Ok(pid as isize)
}
Err(e) => Err(e),
}
}
fn do_getpid() -> Result<isize> {
let pid = process::do_getpid();
Ok(pid as isize)
}
fn do_gettid() -> Result<isize> {
let tid = process::do_gettid();
Ok(tid as isize)
}
fn do_getppid() -> Result<isize> {
let ppid = process::do_getppid();
Ok(ppid as isize)
}
fn do_getpgid() -> Result<isize> {
let pgid = process::do_getpgid();
Ok(pgid as isize)
}
// TODO: implement uid, gid, euid, egid
fn do_getuid() -> Result<isize> {
Ok(0)
}
fn do_getgid() -> Result<isize> {
Ok(0)
}
fn do_geteuid() -> Result<isize> {
Ok(0)
}
fn do_getegid() -> Result<isize> {
Ok(0)
}
// TODO: handle tz: timezone_t
fn do_gettimeofday(tv_u: *mut timeval_t) -> Result<isize> {
check_mut_ptr(tv_u)?;
@ -958,90 +737,6 @@ fn do_nanosleep(req_u: *const timespec_t, rem_u: *mut timespec_t) -> Result<isiz
Ok(0)
}
// FIXME: use this
const MAP_FAILED: *const c_void = ((-1) as i64) as *const c_void;
fn do_exit(status: i32) -> ! {
debug!("exit: {}", status);
extern "C" {
fn do_exit_task() -> !;
}
process::do_exit(status);
unsafe {
do_exit_task();
}
}
fn do_getcwd(buf: *mut u8, size: usize) -> Result<isize> {
let safe_buf = {
check_mut_array(buf, size)?;
unsafe { std::slice::from_raw_parts_mut(buf, size) }
};
let proc_ref = process::get_current();
let mut proc = proc_ref.lock().unwrap();
let cwd = proc.get_cwd();
if cwd.len() + 1 > safe_buf.len() {
return_errno!(ERANGE, "buf is not long enough");
}
safe_buf[..cwd.len()].copy_from_slice(cwd.as_bytes());
safe_buf[cwd.len()] = 0;
Ok(buf as isize)
}
fn do_arch_prctl(code: u32, addr: *mut usize) -> Result<isize> {
let code = process::ArchPrctlCode::from_u32(code)?;
check_mut_ptr(addr)?;
process::do_arch_prctl(code, addr).map(|_| 0)
}
fn do_set_tid_address(tidptr: *mut pid_t) -> Result<isize> {
check_mut_ptr(tidptr)?;
process::do_set_tid_address(tidptr).map(|tid| tid as isize)
}
fn do_sched_yield() -> Result<isize> {
process::do_sched_yield();
Ok(0)
}
fn do_sched_getaffinity(pid: pid_t, cpusize: size_t, buf: *mut c_uchar) -> Result<isize> {
// Construct safe Rust types
let mut buf_slice = {
check_mut_array(buf, cpusize)?;
if cpusize == 0 {
return_errno!(EINVAL, "cpuset size must be greater than zero");
}
if buf as *const _ == std::ptr::null() {
return_errno!(EFAULT, "cpuset mask must NOT be null");
}
unsafe { std::slice::from_raw_parts_mut(buf, cpusize) }
};
// Call the memory-safe do_sched_getaffinity
let mut cpuset = CpuSet::new(cpusize);
let retval = process::do_sched_getaffinity(pid, &mut cpuset)?;
// Copy from Rust types to C types
buf_slice.copy_from_slice(cpuset.as_slice());
Ok(retval as isize)
}
fn do_sched_setaffinity(pid: pid_t, cpusize: size_t, buf: *const c_uchar) -> Result<isize> {
// Convert unsafe C types into safe Rust types
let cpuset = {
check_array(buf, cpusize)?;
if cpusize == 0 {
return_errno!(EINVAL, "cpuset size must be greater than zero");
}
if buf as *const _ == std::ptr::null() {
return_errno!(EFAULT, "cpuset mask must NOT be null");
}
CpuSet::from_raw_buf(buf, cpusize)
};
debug!("sched_setaffinity cpuset: {:#x}", cpuset);
// Call the memory-safe do_sched_setaffinity
process::do_sched_setaffinity(pid, &cpuset)?;
Ok(0)
}
fn do_socket(domain: c_int, socket_type: c_int, protocol: c_int) -> Result<isize> {
debug!(
"socket: domain: {}, socket_type: 0x{:x}, protocol: {}",
@ -1059,7 +754,7 @@ fn do_socket(domain: c_int, socket_type: c_int, protocol: c_int) -> Result<isize
}
};
let fd = process::put_file(file_ref, false)?;
let fd = current!().add_file(file_ref, false);
Ok(fd as isize)
}
@ -1068,7 +763,7 @@ fn do_connect(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t)
"connect: fd: {}, addr: {:?}, addr_len: {}",
fd, addr, addr_len
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::connect(socket.fd(), addr, addr_len));
Ok(ret as isize)
@ -1103,13 +798,13 @@ fn do_accept4(
"accept4: fd: {}, addr: {:?}, addr_len: {:?}, flags: {:#x}",
fd, addr, addr_len, flags
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let socket = file_ref.as_socket()?;
let new_socket = socket.accept(addr, addr_len, flags)?;
let new_file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(new_socket));
let new_fd = process::put_file(new_file_ref, false)?;
let new_fd = current!().add_file(new_file_ref, false);
Ok(new_fd as isize)
} else if let Ok(unix_socket) = file_ref.as_unix_socket() {
@ -1118,7 +813,7 @@ fn do_accept4(
let new_socket = unix_socket.accept()?;
let new_file_ref: Arc<Box<dyn File>> = Arc::new(Box::new(new_socket));
let new_fd = process::put_file(new_file_ref, false)?;
let new_fd = current!().add_file(new_file_ref, false);
Ok(new_fd as isize)
} else {
@ -1128,7 +823,7 @@ fn do_accept4(
fn do_shutdown(fd: c_int, how: c_int) -> Result<isize> {
debug!("shutdown: fd: {}, how: {}", fd, how);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::shutdown(socket.fd(), how));
Ok(ret as isize)
@ -1139,7 +834,7 @@ fn do_shutdown(fd: c_int, how: c_int) -> Result<isize> {
fn do_bind(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t) -> Result<isize> {
debug!("bind: fd: {}, addr: {:?}, addr_len: {}", fd, addr, addr_len);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
check_ptr(addr)?; // TODO: check addr_len
let ret = try_libc!(libc::ocall::bind(socket.fd(), addr, addr_len));
@ -1159,7 +854,7 @@ fn do_bind(fd: c_int, addr: *const libc::sockaddr, addr_len: libc::socklen_t) ->
fn do_listen(fd: c_int, backlog: c_int) -> Result<isize> {
debug!("listen: fd: {}, backlog: {}", fd, backlog);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::listen(socket.fd(), backlog));
Ok(ret as isize)
@ -1182,7 +877,7 @@ fn do_setsockopt(
"setsockopt: fd: {}, level: {}, optname: {}, optval: {:?}, optlen: {:?}",
fd, level, optname, optval, optlen
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::setsockopt(
socket.fd(),
@ -1211,7 +906,7 @@ fn do_getsockopt(
"getsockopt: fd: {}, level: {}, optname: {}, optval: {:?}, optlen: {:?}",
fd, level, optname, optval, optlen
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
let socket = file_ref.as_socket()?;
let ret = try_libc!(libc::ocall::getsockopt(
@ -1233,7 +928,7 @@ fn do_getpeername(
"getpeername: fd: {}, addr: {:?}, addr_len: {:?}",
fd, addr, addr_len
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::getpeername(socket.fd(), addr, addr_len));
Ok(ret as isize)
@ -1257,7 +952,7 @@ fn do_getsockname(
"getsockname: fd: {}, addr: {:?}, addr_len: {:?}",
fd, addr, addr_len
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
if let Ok(socket) = file_ref.as_socket() {
let ret = try_libc!(libc::ocall::getsockname(socket.fd(), addr, addr_len));
Ok(ret as isize)
@ -1281,7 +976,7 @@ fn do_sendto(
"sendto: fd: {}, base: {:?}, len: {}, addr: {:?}, addr_len: {}",
fd, base, len, addr, addr_len
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
let socket = file_ref.as_socket()?;
let ret = try_libc!(libc::ocall::sendto(
@ -1307,7 +1002,7 @@ fn do_recvfrom(
"recvfrom: fd: {}, base: {:?}, len: {}, flags: {}, addr: {:?}, addr_len: {:?}",
fd, base, len, flags, addr, addr_len
);
let file_ref = process::get_file(fd as FileDesc)?;
let file_ref = current!().file(fd as FileDesc)?;
let socket = file_ref.as_socket()?;
let ret = try_libc!(libc::ocall::recvfrom(
@ -1339,18 +1034,10 @@ fn do_socketpair(
if (domain == libc::AF_UNIX) {
let (client_socket, server_socket) =
UnixSocketFile::socketpair(socket_type as i32, protocol as i32)?;
let current_ref = process::get_current();
let mut proc = current_ref.lock().unwrap();
sock_pair[0] = proc
.get_files()
.lock()
.unwrap()
.put(Arc::new(Box::new(client_socket)), false);
sock_pair[1] = proc
.get_files()
.lock()
.unwrap()
.put(Arc::new(Box::new(server_socket)), false);
let current = current!();
let mut files = current.files().lock().unwrap();
sock_pair[0] = files.put(Arc::new(Box::new(client_socket)), false);
sock_pair[1] = files.put(Arc::new(Box::new(server_socket)), false);
debug!("socketpair: ({}, {})", sock_pair[0], sock_pair[1]);
Ok(0)

@ -18,7 +18,7 @@ impl GlobalProfiler {
}
pub fn thread_enter(&mut self) -> Result<()> {
let tid = process::do_gettid();
let tid = current!().tid();
if self.inner.insert(tid, ThreadProfiler::new()).is_some() {
return_errno!(
EINVAL,
@ -33,7 +33,7 @@ impl GlobalProfiler {
// will never return
self.syscall_exit(SyscallNum::Exit, false);
let tid = process::do_gettid();
let tid = current!().tid();
let mut exiting_profiler = self.inner.remove(&tid).ok_or_else(|| {
errno!(
@ -47,13 +47,13 @@ impl GlobalProfiler {
}
pub fn syscall_enter(&mut self, syscall_num: SyscallNum) -> Result<()> {
let tid = process::do_gettid();
let tid = current!().tid();
let mut prof = self.inner.get_mut(&tid).unwrap();
prof.syscall_enter(syscall_num)
}
pub fn syscall_exit(&mut self, syscall_num: SyscallNum, is_err: bool) -> Result<()> {
let tid = process::do_gettid();
let tid = current!().tid();
let mut prof = self.inner.get_mut(&tid).unwrap();
prof.syscall_exit(syscall_num, is_err)
}

@ -94,7 +94,7 @@ impl Log for SimpleLogger {
if self.enabled(record.metadata()) {
// Parts of message
let level = record.level();
let tid = process::get_current_tid();
let tid = current!().tid();
let rounds = round_count();
let desc = round_desc();
// Message (null-terminated)

@ -1,6 +1,6 @@
use super::*;
use fs::{File, FileDesc, FileRef};
use process::{get_current, Process, ProcessRef};
use process::{Process, ProcessRef};
use std::fmt;
mod process_vm;
@ -35,32 +35,22 @@ pub fn do_mmap(
);
}
let mut current_vm_ref = {
let current_ref = get_current();
let current_process = current_ref.lock().unwrap();
current_process.get_vm().clone()
};
let mut current_vm = current_vm_ref.lock().unwrap();
let current = current!();
let mut current_vm = current.vm().lock().unwrap();
current_vm.mmap(addr, size, perms, flags, fd, offset)
}
pub fn do_munmap(addr: usize, size: usize) -> Result<()> {
debug!("munmap: addr: {:#x}, size: {:#x}", addr, size);
let mut current_vm_ref = {
let current_ref = get_current();
let current_process = current_ref.lock().unwrap();
current_process.get_vm().clone()
};
let mut current_vm = current_vm_ref.lock().unwrap();
let current = current!();
let mut current_vm = current.vm().lock().unwrap();
current_vm.munmap(addr, size)
}
pub fn do_brk(addr: usize) -> Result<usize> {
debug!("brk: addr: {:#x}", addr);
let current_ref = get_current();
let current_process = current_ref.lock().unwrap();
let current_vm_ref = current_process.get_vm();
let mut current_vm = current_vm_ref.lock().unwrap();
let current = current!();
let mut current_vm = current.vm().lock().unwrap();
current_vm.brk(addr)
}

@ -1,7 +1,7 @@
use super::*;
use super::config;
use super::process::{ElfFile, ProgramHeaderExt};
use super::process::elf_file::{ElfFile, ProgramHeaderExt};
use super::user_space_vm::{UserSpaceVMManager, UserSpaceVMRange, USER_SPACE_VM_MANAGER};
use super::vm_manager::{VMInitializer, VMManager, VMMapAddr, VMMapOptions, VMMapOptionsBuilder};
@ -301,7 +301,7 @@ impl ProcessVM {
if flags.contains(MMapFlags::MAP_ANONYMOUS) {
VMInitializer::FillZeros()
} else {
let file_ref = process::get_file(fd)?;
let file_ref = current!().file(fd)?;
VMInitializer::LoadFromFile {
file: file_ref,
offset: offset,

@ -27,13 +27,11 @@ struct thread_arg {
static void* thread_func(void* _arg) {
struct thread_arg* arg = _arg;
printf("Thread #%d: started\n", arg->ti);
for (long i = 0; i < arg->local_count; i++) {
pthread_mutex_lock(arg->mutex);
(*arg->global_count)++;
pthread_mutex_unlock(arg->mutex);
}
printf("Thread #%d: completed\n", arg->ti);
return NULL;
}

@ -171,7 +171,7 @@ static int test_sched_yield() {
// ============================================================================
static test_case_t test_cases[] = {
TEST_CASE(test_sched_xetaffinity_with_child_pid),
//TEST_CASE(test_sched_xetaffinity_with_child_pid),
TEST_CASE(test_sched_getaffinity_with_self_pid),
TEST_CASE(test_sched_setaffinity_with_self_pid),
TEST_CASE(test_sched_getaffinity_via_explicit_syscall),