From 65694815a463e9669e22ad8f30d3f732a8eee506 Mon Sep 17 00:00:00 2001 From: He Sun Date: Thu, 20 Feb 2020 08:26:02 +0000 Subject: [PATCH] Add eventfd file type and system call --- src/Enclave.edl | 5 + src/libos/src/fs/event_file.rs | 108 ++++++++++++ src/libos/src/fs/mod.rs | 2 + src/libos/src/fs/syscalls.rs | 21 +++ src/libos/src/net/io_multiplexing.rs | 31 +++- src/libos/src/net/syscalls.rs | 8 +- src/libos/src/syscall/mod.rs | 2 + src/pal/src/ocalls/fs.c | 5 + test/Makefile | 2 +- test/eventfd/Makefile | 5 + test/eventfd/main.c | 251 +++++++++++++++++++++++++++ test/include/test.h | 10 ++ 12 files changed, 438 insertions(+), 12 deletions(-) create mode 100644 src/libos/src/fs/event_file.rs create mode 100644 test/eventfd/Makefile create mode 100644 test/eventfd/main.c diff --git a/src/Enclave.edl b/src/Enclave.edl index a8ea3945..c82c96d9 100644 --- a/src/Enclave.edl +++ b/src/Enclave.edl @@ -112,5 +112,10 @@ enclave { [out] int* msg_flags_recv, int flags ) propagate_errno; + + int occlum_ocall_eventfd( + unsigned int initval, + int flags + ) propagate_errno; }; }; diff --git a/src/libos/src/fs/event_file.rs b/src/libos/src/fs/event_file.rs new file mode 100644 index 00000000..fdb0dd16 --- /dev/null +++ b/src/libos/src/fs/event_file.rs @@ -0,0 +1,108 @@ +use super::*; + +/// Native Linux eventfd +// TODO: move the implementaion of eventfd into libos to defend against Iago attacks from OCalls +#[derive(Debug)] +pub struct EventFile { + host_fd: c_int, +} + +impl EventFile { + pub fn new(init_val: u32, flags: EventCreationFlags) -> Result { + let host_fd = try_libc!({ + let mut ret: i32 = 0; + let status = occlum_ocall_eventfd(&mut ret, init_val, flags.bits()); + assert!(status == sgx_status_t::SGX_SUCCESS); + ret + }); + Ok(Self { host_fd }) + } + + pub fn get_host_fd(&self) -> c_int { + self.host_fd + } +} + +bitflags! { + pub struct EventCreationFlags: i32 { + /// Provides semaphore-like semantics for reads from the new file descriptor + const EFD_SEMAPHORE = 1 << 0; + /// Non-blocking + const EFD_NONBLOCK = 1 << 11; + /// Close on exec + const EFD_CLOEXEC = 1 << 19; + } +} + +extern "C" { + fn occlum_ocall_eventfd(ret: *mut i32, init_val: u32, flags: i32) -> sgx_status_t; +} + +impl Drop for EventFile { + fn drop(&mut self) { + let ret = unsafe { libc::ocall::close(self.host_fd) }; + assert!(ret == 0); + } +} + +impl File for EventFile { + fn read(&self, buf: &mut [u8]) -> Result { + let ret = try_libc!(libc::ocall::read( + self.host_fd, + buf.as_mut_ptr() as *mut c_void, + buf.len() + )) as usize; + assert!(ret <= buf.len()); + Ok(ret) + } + + fn write(&self, buf: &[u8]) -> Result { + let ret = try_libc!(libc::ocall::write( + self.host_fd, + buf.as_ptr() as *const c_void, + buf.len() + )) as usize; + assert!(ret <= buf.len()); + Ok(ret) + } + + fn get_access_mode(&self) -> Result { + Ok(AccessMode::O_RDWR) + } + + fn get_status_flags(&self) -> Result { + let ret = try_libc!(libc::ocall::fcntl_arg0(self.get_host_fd(), libc::F_GETFL)); + Ok(StatusFlags::from_bits_truncate(ret as u32)) + } + + fn set_status_flags(&self, new_status_flags: StatusFlags) -> Result<()> { + let valid_flags_mask = StatusFlags::O_APPEND + | StatusFlags::O_ASYNC + | StatusFlags::O_DIRECT + | StatusFlags::O_NOATIME + | StatusFlags::O_NONBLOCK; + let raw_status_flags = (new_status_flags & valid_flags_mask).bits(); + try_libc!(libc::ocall::fcntl_arg1( + self.get_host_fd(), + libc::F_SETFL, + raw_status_flags as c_int + )); + Ok(()) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +pub trait AsEvent { + fn as_event(&self) -> Result<&EventFile>; +} + +impl AsEvent for FileRef { + fn as_event(&self) -> Result<&EventFile> { + self.as_any() + .downcast_ref::() + .ok_or_else(|| errno!(EBADF, "not an event file")) + } +} diff --git a/src/libos/src/fs/mod.rs b/src/libos/src/fs/mod.rs index 7de987c7..7cdadd1e 100644 --- a/src/libos/src/fs/mod.rs +++ b/src/libos/src/fs/mod.rs @@ -10,6 +10,7 @@ use std::mem::MaybeUninit; use std::path::Path; pub use self::dev_fs::AsDevRandom; +pub use self::event_file::{AsEvent, EventFile}; pub use self::file::{File, FileRef}; pub use self::file_ops::{AccessMode, CreationFlags, Stat, StatusFlags}; pub use self::file_ops::{Flock, FlockType}; @@ -22,6 +23,7 @@ pub use self::stdio::{StdinFile, StdoutFile}; pub use self::syscalls::*; mod dev_fs; +mod event_file; mod file; mod file_ops; mod file_table; diff --git a/src/libos/src/fs/syscalls.rs b/src/libos/src/fs/syscalls.rs index 8ed5436c..86742104 100644 --- a/src/libos/src/fs/syscalls.rs +++ b/src/libos/src/fs/syscalls.rs @@ -1,3 +1,4 @@ +use super::event_file::EventCreationFlags; use super::file_ops; use super::file_ops::{ AccessibilityCheckFlags, AccessibilityCheckMode, DirFd, FcntlCmd, StatFlags, @@ -12,6 +13,26 @@ pub struct iovec_t { len: size_t, } +pub fn do_eventfd2(init_val: u32, flags: i32) -> Result { + info!("eventfd: initval {}, flags {} ", init_val, flags); + + let inner_flags = + EventCreationFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL, "invalid flags"))?; + let file_ref: Arc> = { + let event = EventFile::new(init_val, inner_flags)?; + Arc::new(Box::new(event)) + }; + + let current_ref = process::get_current(); + let mut proc = current_ref.lock().unwrap(); + + let fd = proc.get_files().lock().unwrap().put( + file_ref, + inner_flags.contains(EventCreationFlags::EFD_CLOEXEC), + ); + Ok(fd as isize) +} + pub fn do_open(path: *const i8, flags: u32, mode: u32) -> Result { let path = from_user::clone_cstring_safely(path)? .to_string_lossy() diff --git a/src/libos/src/net/io_multiplexing.rs b/src/libos/src/net/io_multiplexing.rs index 48ed7fce..bda39f6d 100644 --- a/src/libos/src/net/io_multiplexing.rs +++ b/src/libos/src/net/io_multiplexing.rs @@ -1,5 +1,5 @@ use super::*; -use fs::{AsDevRandom, File, FileDesc, FileRef}; +use fs::{AsDevRandom, AsEvent, File, FileDesc, FileRef}; use std::any::Any; use std::collections::btree_map::BTreeMap; use std::fmt; @@ -25,6 +25,7 @@ pub fn do_select( let file_table_ref = proc.get_files().lock().unwrap(); for fd in 0..nfds { + let fd_ref = file_table_ref.get(fd as FileDesc)?; let (r, w, e) = ( readfds.is_set(fd), writefds.is_set(fd), @@ -33,7 +34,7 @@ pub fn do_select( if !(r || w || e) { continue; } - if let Ok(socket) = file_table_ref.get(fd as FileDesc)?.as_unix_socket() { + if let Ok(socket) = fd_ref.as_unix_socket() { warn!("select unix socket is unimplemented, spin for read"); readfds.clear(); writefds.clear(); @@ -56,7 +57,13 @@ pub fn do_select( } return Ok(1); } - let host_fd = file_table_ref.get(fd as FileDesc)?.as_socket()?.fd(); + let host_fd = if let Ok(socket) = fd_ref.as_socket() { + socket.fd() + } else if let Ok(eventfd) = fd_ref.as_event() { + eventfd.get_host_fd() + } else { + return_errno!(EBADF, "unsupported file type"); + }; host_to_libos_fd[host_fd as usize] = fd; let mut events = 0; @@ -132,6 +139,9 @@ pub fn do_poll(pollfds: &mut [libc::pollfd], timeout: c_int) -> Result { // convert libos fd to host fd in the copy to keep pollfds unchanged u_pollfds[i].fd = socket.fd(); u_pollfds[i].revents = 0; + } else if let Ok(eventfd) = file_ref.as_event() { + u_pollfds[i].fd = eventfd.get_host_fd(); + u_pollfds[i].revents = 0; } else if let Ok(socket) = file_ref.as_unix_socket() { // FIXME: spin poll until can read (hack for php) while (pollfd.events & libc::POLLIN) != 0 && socket.poll()?.0 == false { @@ -210,13 +220,16 @@ pub fn do_epoll_ctl( let mut epoll = file_ref.as_epoll()?.inner.lock().unwrap(); let fd_ref = file_table_ref.get(fd)?; - let sock_result = fd_ref.as_socket(); - if sock_result.is_err() { - //FIXME: workaround for grpc, other fd types including pipe should be supported - return Ok(()); - } - let host_fd = sock_result.unwrap().fd() as FileDesc; + let host_fd = if let Ok(socket) = fd_ref.as_socket() { + socket.fd() as FileDesc + } else if let Ok(eventfd) = fd_ref.as_event() { + eventfd.get_host_fd() as FileDesc + } else { + warn!("unsupported file type"); + return Ok(()); + }; + epoll.ctl(op, host_fd, event)?; Ok(()) diff --git a/src/libos/src/net/syscalls.rs b/src/libos/src/net/syscalls.rs index 39c5bed4..3d24f70c 100644 --- a/src/libos/src/net/syscalls.rs +++ b/src/libos/src/net/syscalls.rs @@ -232,7 +232,11 @@ pub fn do_epoll_pwait( timeout: c_int, sigmask: *const usize, //TODO:add sigset_t ) -> Result { - info!("epoll_pwait"); + if sigmask.is_null() { + info!("epoll_wait"); + } else { + info!("epoll_pwait") + } //TODO:add signal support - do_epoll_wait(epfd, events, maxevents, 0) + do_epoll_wait(epfd, events, maxevents, timeout) } diff --git a/src/libos/src/syscall/mod.rs b/src/libos/src/syscall/mod.rs index a76d7294..d7b7bd48 100644 --- a/src/libos/src/syscall/mod.rs +++ b/src/libos/src/syscall/mod.rs @@ -221,6 +221,8 @@ pub extern "C" fn dispatch_syscall( SysPipe => fs::do_pipe2(arg0 as *mut i32, 0), SysPipe2 => fs::do_pipe2(arg0 as *mut i32, arg1 as u32), + SysEventfd => fs::do_eventfd2(arg0 as u32, 0), + SysEventfd2 => fs::do_eventfd2(arg0 as u32, arg1 as i32), SysDup => fs::do_dup(arg0 as FileDesc), SysDup2 => fs::do_dup2(arg0 as FileDesc, arg1 as FileDesc), SysDup3 => fs::do_dup3(arg0 as FileDesc, arg1 as FileDesc, arg2 as u32), diff --git a/src/pal/src/ocalls/fs.c b/src/pal/src/ocalls/fs.c index 9172d307..56b6ed4a 100644 --- a/src/pal/src/ocalls/fs.c +++ b/src/pal/src/ocalls/fs.c @@ -1,6 +1,11 @@ #include #include "ocalls.h" +#include void occlum_ocall_sync(void) { sync(); } + +int occlum_ocall_eventfd(unsigned int initval, int flags) { + return eventfd(initval, flags); +} diff --git a/test/Makefile b/test/Makefile index dbd867a8..2c77ca25 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,7 +14,7 @@ TEST_DEPS := client data_sink TESTS ?= empty env hello_world malloc mmap file fs_perms getpid spawn sched pipe time \ truncate readdir mkdir open stat link symlink tls pthread uname rlimit server \ server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group \ - ioctl fcntl + ioctl fcntl eventfd # Benchmarks: need to be compiled and run by bench-% target BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput diff --git a/test/eventfd/Makefile b/test/eventfd/Makefile new file mode 100644 index 00000000..8c5a2fb4 --- /dev/null +++ b/test/eventfd/Makefile @@ -0,0 +1,5 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := -Wno-incompatible-pointer-types-discards-qualifiers +EXTRA_LINK_FLAGS := +BIN_ARGS := diff --git a/test/eventfd/main.c b/test/eventfd/main.c new file mode 100644 index 00000000..1d04fcc0 --- /dev/null +++ b/test/eventfd/main.c @@ -0,0 +1,251 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +#define MAXEVENTS 1 + +// ============================================================================ +// Test cases +// ============================================================================ + +int test_fcntl_get_flags() { + int event_fd = eventfd(0, 0); + if (event_fd < 0) { + THROW_ERROR("failed to create an eventfd"); + } + + if ((fcntl(event_fd, F_GETFL, 0) != O_RDWR)) { + close(event_fd); + THROW_ERROR("fcntl get flags failed"); + } + + close(event_fd); + return 0; +} + +int test_fcntl_set_flags() { + int event_fd = eventfd(0, 0); + if (event_fd < 0) { + THROW_ERROR("failed to create an eventfd"); + } + + fcntl(event_fd, F_SETFL, O_NONBLOCK); + if ((fcntl(event_fd, F_GETFL, 0) != (O_NONBLOCK | O_RDWR))) { + close(event_fd); + THROW_ERROR("fcntl set flags failed"); + } + + close(event_fd); + return 0; +} + +int test_create_with_flags() { + int event_fd = eventfd(0, EFD_NONBLOCK); + if (event_fd < 0) { + THROW_ERROR("failed to create an eventfd"); + } + + if ((fcntl(event_fd, F_GETFL, 0) != (O_NONBLOCK | O_RDWR))) { + close(event_fd); + THROW_ERROR("create flags failed\n"); + } + + close(event_fd); + return 0; +} + +struct thread_arg { + pthread_t tid; + int fd; + uint64_t data; +}; + +#define TEST_DATA 678 +#define CHILD_NUM 16 + +static void *thread_child(void *arg) { + struct thread_arg *child_arg = arg; + write(child_arg->fd, &(child_arg->data), sizeof(child_arg->data)); + return NULL; +} + +int create_child(struct thread_arg *arg) { + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) { + THROW_ERROR("failed to initialize attribute"); + } + + if (pthread_create(&(arg->tid), &attr, &thread_child, arg) != 0) { + if (pthread_attr_destroy(&attr) != 0) { + THROW_ERROR("failed to destroy attr"); + } + THROW_ERROR("failed to create the thread"); + } + + if (pthread_attr_destroy(&attr) != 0) { + THROW_ERROR("failed to destroy attr"); + } + + return 0; +} + +int test_read_write() { + int event_fd = eventfd(0, 0); + if (event_fd < 0) { + THROW_ERROR("failed to create an eventfd"); + } + + struct thread_arg child_arg[CHILD_NUM] = {0}; + + // Create child threads and send eventfd and data + for (int i = 0; i < CHILD_NUM; i++) { + child_arg[i].fd = event_fd; + child_arg[i].data = TEST_DATA; + if (create_child(&child_arg[i]) != 0) { + close(event_fd); + THROW_ERROR("failed to create children"); + } + } + + // Check the data sent from children + uint64_t data_recv = 0; + + do { + uint64_t cur_data = 0; + ssize_t len_recv = read(event_fd, &cur_data, sizeof(uint64_t)); + if (len_recv != sizeof(uint64_t)) { + close(event_fd); + THROW_ERROR("received length is not as expected"); + } + data_recv += cur_data; + } while (data_recv != TEST_DATA*CHILD_NUM); + + close(event_fd); + + for (int i = 0; i < CHILD_NUM; i++) { + if (pthread_join(child_arg[i].tid, NULL) != 0) { + THROW_ERROR("pthread_join"); + } + } + + return 0; +} + +int test_select_with_socket() { + fd_set wfds; + + struct timeval tv = { .tv_sec = 60, .tv_usec = 0 }; + + int sock = socket(AF_INET, SOCK_STREAM, 0); + int event_fd = eventfd(0, 0); + if (event_fd < 0 || sock < 0) { + THROW_ERROR("failed to create files"); + } + + FD_ZERO(&wfds); + FD_SET(sock, &wfds); + FD_SET(event_fd, &wfds); + + if (select(sock > event_fd? sock + 1: event_fd + 1, NULL, &wfds, NULL, &tv) <= 0) { + close_files(2, sock, event_fd); + THROW_ERROR("select failed"); + } + + close_files(2, sock, event_fd); + return 0; +} + +int test_poll_with_socket() { + int sock = socket(AF_INET, SOCK_STREAM, 0); + int event_fd = eventfd(0, 0); + if (event_fd < 0 || sock < 0) { + THROW_ERROR("failed to create files"); + } + + struct pollfd pollfds[] = { + { .fd = sock, .events = POLLIN, .revents = 0, }, + { .fd = event_fd, .events = POLLOUT, .revents = 0 }, + }; + + int ret = poll(pollfds, 2, -1); + if (ret <= 0) { + close_files(2, event_fd, sock); + THROW_ERROR("poll error"); + } + + close_files(2, event_fd, sock); + return 0; +} + +int test_epoll_with_socket() { + int event_fd = eventfd(0, EFD_NONBLOCK); + int sock = socket(AF_INET, SOCK_STREAM, 0); + int epfd = epoll_create1(0); + + if (event_fd < 0 || sock < 0 || epfd < 0) { + THROW_ERROR("failed to create files"); + } + + struct epoll_event ctl_events[2] = {0}; + // Add eventfd to the interest list + ctl_events[0].data.fd = event_fd; + ctl_events[0].events = EPOLLIN | EPOLLET; + // Add socket to the interest list + ctl_events[1].data.fd = sock; + ctl_events[1].events = EPOLLIN | EPOLLET; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, event_fd, &ctl_events[0]) == -1 || + epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ctl_events[1]) == -1) { + close_files(3, event_fd, sock, epfd); + THROW_ERROR("epoll_ctl"); + } + + struct thread_arg child_arg = { .tid = 0, .fd = event_fd, .data = TEST_DATA }; + if (create_child(&child_arg) != 0) { + close_files(3, event_fd, sock, epfd); + THROW_ERROR("failed to create child"); + } + + struct epoll_event events[MAXEVENTS] = {0}; + if (epoll_pwait(epfd, events, MAXEVENTS, -1, NULL) <= 0){ + close_files(3, event_fd, sock, epfd); + THROW_ERROR("epoll failed"); + } + + close_files(3, event_fd, sock, epfd); + + if (pthread_join(child_arg.tid, NULL) != 0) { + THROW_ERROR("pthread_join"); + } + + return 0; +} +// ============================================================================ +// Test suite +// ============================================================================ + +static test_case_t test_cases[] = { + TEST_CASE(test_fcntl_get_flags), + TEST_CASE(test_fcntl_set_flags), + TEST_CASE(test_create_with_flags), + TEST_CASE(test_read_write), + TEST_CASE(test_epoll_with_socket), + TEST_CASE(test_poll_with_socket), + TEST_CASE(test_select_with_socket), +}; + +int main(int argc, const char* argv[]) { + return test_suite_run(test_cases, ARRAY_SIZE(test_cases)); +} diff --git a/test/include/test.h b/test/include/test.h index b188414d..9643945a 100644 --- a/test/include/test.h +++ b/test/include/test.h @@ -2,6 +2,7 @@ #define __TEST_H #include +#include #define _STR(x) #x #define STR(x) _STR(x) @@ -37,4 +38,13 @@ int test_suite_run(test_case_t* test_cases, int num_test_cases) { return 0; } +void close_files(int count, ...) { + va_list ap; + va_start(ap, count); + for (int i = 0; i < count; i++) { + close(va_arg(ap, int)); + } + va_end(ap); +} + #endif /* __TEST_H */