Add support for robust futex syscalls
This commit is contained in:
parent
9b425798d6
commit
215e8ffbdf
@ -20,6 +20,7 @@
|
||||
#![feature(get_mut_unchecked)]
|
||||
// for std::hint::black_box
|
||||
#![feature(test)]
|
||||
#![feature(atomic_from_mut)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate alloc;
|
||||
|
@ -44,6 +44,9 @@ fn exit_thread(term_status: TermStatus) {
|
||||
futex_wake(ctid_ptr.as_ptr() as *const i32, 1);
|
||||
}
|
||||
|
||||
// Notify waiters that the owner of robust futex has died.
|
||||
thread.wake_robust_list();
|
||||
|
||||
// Keep the main thread's tid available as long as the process is not destroyed.
|
||||
// This is important as the user space may still attempt to access the main
|
||||
// thread's ThreadRef through the process's pid after the process has become
|
||||
|
195
src/libos/src/process/do_robust_list.rs
Normal file
195
src/libos/src/process/do_robust_list.rs
Normal file
@ -0,0 +1,195 @@
|
||||
/// Robust futexes provide a mechanism that is used in addition to normal futex,
|
||||
/// for kernel assist of cleanup of held locks on thread exit.
|
||||
///
|
||||
/// Actual locking and unlocking is handled entirely by user level code with the
|
||||
/// existing futex mechanism to wait or wakeup locks.
|
||||
/// The kernels only essential involvement in robust futex is to remember where
|
||||
/// the list head is, and to walk the list on thread exit, handling locks still
|
||||
/// held by the departing thread.
|
||||
/// Ref: https://www.kernel.org/doc/html/latest/locking/robust-futex-ABI.html
|
||||
///
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::util::mem_util::from_user::*;
|
||||
|
||||
pub fn do_set_robust_list(list_head_ptr: *mut RobustListHead, len: usize) -> Result<()> {
|
||||
debug!(
|
||||
"set_robust_list: list_head_ptr: {:?}, len: {}",
|
||||
list_head_ptr, len
|
||||
);
|
||||
if std::mem::size_of::<RobustListHead>() != len {
|
||||
return_errno!(EINVAL, "invalid size for RobustListHead");
|
||||
}
|
||||
// We do not check if the pointer is a valid user space pointer, deferring
|
||||
// it in waking the robust list. If the pointer is invalid, we just stop
|
||||
// waking the robust list.
|
||||
let robust_list = NonNull::new(list_head_ptr);
|
||||
let current = current!();
|
||||
current.set_robust_list(robust_list);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn do_get_robust_list(tid: pid_t) -> Result<*mut RobustListHead> {
|
||||
debug!("get_robust_list: tid: {}", tid);
|
||||
let thread = if tid == 0 {
|
||||
current!()
|
||||
} else {
|
||||
super::table::get_thread(tid)?
|
||||
};
|
||||
let robust_list_ptr = thread
|
||||
.robust_list()
|
||||
.map(|robust_list| robust_list.as_ptr())
|
||||
.unwrap_or(std::ptr::null_mut());
|
||||
Ok(robust_list_ptr)
|
||||
}
|
||||
|
||||
/// This struct is same as Linux's robust_list
|
||||
#[repr(C)]
|
||||
struct RobustList {
|
||||
next: *const RobustList,
|
||||
}
|
||||
|
||||
/// This struct is same as Linux's robust_list_head
|
||||
#[repr(C)]
|
||||
pub struct RobustListHead {
|
||||
/// Linked list of lock entries
|
||||
///
|
||||
/// If it points to the head of the list, then it is the end of the list.
|
||||
/// If it is an invalid user space pointer or a null pointer, stop iterating
|
||||
/// the list.
|
||||
list: RobustList,
|
||||
/// Specifies the offset from the address of the lock entry to the address
|
||||
/// of the futex.
|
||||
futex_offset: isize,
|
||||
/// Contains transient copy of the address of the lock entry, during list
|
||||
/// insertion and removal.
|
||||
list_op_pending: *const RobustList,
|
||||
}
|
||||
|
||||
impl RobustListHead {
|
||||
/// Return an iterator for all futexes in the robust list.
|
||||
///
|
||||
/// The futex refered to by `list_op_pending`, if any, will be returned as
|
||||
/// the last item.
|
||||
pub fn futexes<'a>(&'a self) -> FutexIter<'a> {
|
||||
FutexIter::new(self)
|
||||
}
|
||||
|
||||
/// Return the pending futex address if exist
|
||||
fn pending_futex_addr(&self) -> Option<*const i32> {
|
||||
if self.list_op_pending.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(unsafe { self.futex_addr(self.list_op_pending) })
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the futex address
|
||||
unsafe fn futex_addr(&self, entry_ptr: *const RobustList) -> *const i32 {
|
||||
(entry_ptr as *const u8).offset(self.futex_offset) as *const i32
|
||||
}
|
||||
}
|
||||
|
||||
const ROBUST_LIST_LIMIT: isize = 2048;
|
||||
|
||||
pub struct FutexIter<'a> {
|
||||
robust_list: &'a RobustListHead,
|
||||
entry_ptr: *const RobustList,
|
||||
count: isize,
|
||||
}
|
||||
|
||||
impl<'a> FutexIter<'a> {
|
||||
fn new(robust_list: &'a RobustListHead) -> Self {
|
||||
Self {
|
||||
robust_list,
|
||||
entry_ptr: robust_list.list.next,
|
||||
count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// The `self.count` is normally a positive value used to iterate the list
|
||||
// to avoid excessively long or circular list, we use a special value -1
|
||||
// to represent the end of the Iterator.
|
||||
fn set_end(&mut self) {
|
||||
self.count = -1;
|
||||
}
|
||||
|
||||
fn is_end(&self) -> bool {
|
||||
self.count < 0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for FutexIter<'a> {
|
||||
type Item = *const i32;
|
||||
|
||||
/// Returns the futex address.
|
||||
fn next(&mut self) -> Option<*const i32> {
|
||||
if self.is_end() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Iterate the linked list
|
||||
while self.entry_ptr != &self.robust_list.list {
|
||||
// Avoid excessively long or circular list
|
||||
if self.count == ROBUST_LIST_LIMIT {
|
||||
break;
|
||||
}
|
||||
// Invalid pointer, stop iterating the robust list
|
||||
if check_ptr(self.entry_ptr).is_err() {
|
||||
return None;
|
||||
}
|
||||
// A pending lock might already be on the list
|
||||
let futex_addr = if self.entry_ptr != self.robust_list.list_op_pending {
|
||||
Some(unsafe { self.robust_list.futex_addr(self.entry_ptr) })
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.entry_ptr = unsafe { (*self.entry_ptr).next };
|
||||
self.count += 1;
|
||||
if futex_addr.is_some() {
|
||||
return futex_addr;
|
||||
}
|
||||
}
|
||||
|
||||
// End of iterating the linked list
|
||||
// If the pending futex exists, return it as the last one
|
||||
self.set_end();
|
||||
self.robust_list.pending_futex_addr()
|
||||
}
|
||||
}
|
||||
|
||||
const FUTEX_WAITERS: u32 = 0x8000_0000;
|
||||
const FUTEX_OWNER_DIED: u32 = 0x4000_0000;
|
||||
const FUTEX_TID_MASK: u32 = 0x3FFF_FFFF;
|
||||
|
||||
/// Wakeup one robust futex owned by the thread
|
||||
pub fn wake_robust_futex(futex_addr: *const i32, tid: pid_t) -> Result<()> {
|
||||
let futex_val = {
|
||||
check_ptr(futex_addr)?;
|
||||
unsafe { AtomicU32::from_mut(&mut *(futex_addr as *mut u32)) }
|
||||
};
|
||||
let mut old_val = futex_val.load(Ordering::SeqCst);
|
||||
loop {
|
||||
// This futex may held by another thread, do nothing
|
||||
if old_val & FUTEX_TID_MASK != tid {
|
||||
break;
|
||||
}
|
||||
let new_val = (old_val & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
|
||||
if let Err(cur_val) =
|
||||
futex_val.compare_exchange(old_val, new_val, Ordering::SeqCst, Ordering::SeqCst)
|
||||
{
|
||||
// The futex value has changed, let's retry with current value
|
||||
old_val = cur_val;
|
||||
continue;
|
||||
}
|
||||
// Wakeup one waiter
|
||||
if futex_val.load(Ordering::SeqCst) & FUTEX_WAITERS != 0 {
|
||||
debug!("wake robust futex addr: {:?}", futex_addr);
|
||||
super::do_futex::futex_wake(futex_addr, 1)?;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -22,6 +22,7 @@ use self::wait::{WaitQueue, Waiter};
|
||||
|
||||
pub use self::do_exit::handle_force_exit;
|
||||
pub use self::do_futex::{futex_wait, futex_wake};
|
||||
pub use self::do_robust_list::RobustListHead;
|
||||
pub use self::do_spawn::do_spawn_without_exec;
|
||||
pub use self::process::{Process, ProcessFilter, ProcessStatus, IDLE};
|
||||
pub use self::spawn_attribute::posix_spawnattr_t;
|
||||
@ -36,6 +37,7 @@ mod do_exec;
|
||||
mod do_exit;
|
||||
mod do_futex;
|
||||
mod do_getpid;
|
||||
mod do_robust_list;
|
||||
mod do_set_tid_address;
|
||||
mod do_spawn;
|
||||
mod do_wait4;
|
||||
|
@ -2,6 +2,7 @@ use super::do_arch_prctl::ArchPrctlCode;
|
||||
use super::do_clone::CloneFlags;
|
||||
use super::do_exec::do_exec;
|
||||
use super::do_futex::{FutexFlags, FutexOp, FutexTimeout};
|
||||
use super::do_robust_list::RobustListHead;
|
||||
use super::do_spawn::FileAction;
|
||||
use super::do_wait4::WaitOptions;
|
||||
use super::prctl::PrctlCmd;
|
||||
@ -446,3 +447,26 @@ pub fn do_execve(path: *const i8, argv: *const *const i8, envp: *const *const i8
|
||||
|
||||
do_exec(&path, &argv, &envp, ¤t)
|
||||
}
|
||||
|
||||
pub fn do_set_robust_list(list_head_ptr: *mut RobustListHead, len: usize) -> Result<isize> {
|
||||
if !list_head_ptr.is_null() {
|
||||
check_mut_ptr(list_head_ptr)?;
|
||||
}
|
||||
super::do_robust_list::do_set_robust_list(list_head_ptr, len)?;
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn do_get_robust_list(
|
||||
tid: pid_t,
|
||||
list_head_ptr_ptr: *mut *mut RobustListHead,
|
||||
len_ptr: *mut usize,
|
||||
) -> Result<isize> {
|
||||
check_mut_ptr(list_head_ptr_ptr)?;
|
||||
check_mut_ptr(len_ptr)?;
|
||||
let list_head_ptr = super::do_robust_list::do_get_robust_list(tid)?;
|
||||
unsafe {
|
||||
list_head_ptr_ptr.write(list_head_ptr);
|
||||
len_ptr.write(std::mem::size_of::<RobustListHead>());
|
||||
}
|
||||
Ok(0)
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
use std::ptr::NonNull;
|
||||
|
||||
use super::{
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef, SchedAgentRef,
|
||||
SigQueues, SigSet, Task, Thread, ThreadId, ThreadInner, ThreadName, ThreadRef,
|
||||
FileTableRef, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef, ResourceLimitsRef,
|
||||
RobustListHead, SchedAgentRef, SigQueues, SigSet, Task, Thread, ThreadId, ThreadInner,
|
||||
ThreadName, ThreadRef,
|
||||
};
|
||||
use crate::events::HostEventFd;
|
||||
use crate::prelude::*;
|
||||
@ -22,6 +23,7 @@ pub struct ThreadBuilder {
|
||||
rlimits: Option<ResourceLimitsRef>,
|
||||
sig_mask: Option<SigSet>,
|
||||
clear_ctid: Option<NonNull<pid_t>>,
|
||||
robust_list: Option<NonNull<RobustListHead>>,
|
||||
name: Option<ThreadName>,
|
||||
}
|
||||
|
||||
@ -38,6 +40,7 @@ impl ThreadBuilder {
|
||||
rlimits: None,
|
||||
sig_mask: None,
|
||||
clear_ctid: None,
|
||||
robust_list: None,
|
||||
name: None,
|
||||
}
|
||||
}
|
||||
@ -92,6 +95,11 @@ impl ThreadBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn robust_list(mut self, robust_list_addr: NonNull<RobustListHead>) -> Self {
|
||||
self.robust_list = Some(robust_list_addr);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn name(mut self, name: ThreadName) -> Self {
|
||||
self.name = Some(name);
|
||||
self
|
||||
@ -103,6 +111,7 @@ impl ThreadBuilder {
|
||||
.ok_or_else(|| errno!(EINVAL, "task is mandatory"))?;
|
||||
let tid = self.tid.unwrap_or_else(|| ThreadId::new());
|
||||
let clear_ctid = RwLock::new(self.clear_ctid);
|
||||
let robust_list = RwLock::new(self.robust_list);
|
||||
let inner = SgxMutex::new(ThreadInner::new());
|
||||
let process = self
|
||||
.process
|
||||
@ -130,6 +139,7 @@ impl ThreadBuilder {
|
||||
task,
|
||||
tid,
|
||||
clear_ctid,
|
||||
robust_list,
|
||||
inner,
|
||||
process,
|
||||
vm,
|
||||
|
@ -4,7 +4,7 @@ use std::ptr::NonNull;
|
||||
use super::task::Task;
|
||||
use super::{
|
||||
FileTableRef, ForcedExitStatus, FsViewRef, ProcessRef, ProcessVM, ProcessVMRef,
|
||||
ResourceLimitsRef, SchedAgentRef, TermStatus, ThreadRef,
|
||||
ResourceLimitsRef, RobustListHead, SchedAgentRef, TermStatus, ThreadRef,
|
||||
};
|
||||
use crate::events::HostEventFd;
|
||||
use crate::fs::{EventCreationFlags, EventFile};
|
||||
@ -28,6 +28,7 @@ pub struct Thread {
|
||||
tid: ThreadId,
|
||||
// Mutable info
|
||||
clear_ctid: RwLock<Option<NonNull<pid_t>>>,
|
||||
robust_list: RwLock<Option<NonNull<RobustListHead>>>,
|
||||
inner: SgxMutex<ThreadInner>,
|
||||
name: RwLock<ThreadName>,
|
||||
// Process
|
||||
@ -139,6 +140,37 @@ impl Thread {
|
||||
*self.clear_ctid.write().unwrap() = new_clear_ctid;
|
||||
}
|
||||
|
||||
pub fn robust_list(&self) -> Option<NonNull<RobustListHead>> {
|
||||
*self.robust_list.read().unwrap()
|
||||
}
|
||||
|
||||
pub fn set_robust_list(&self, new_robust_list: Option<NonNull<RobustListHead>>) {
|
||||
*self.robust_list.write().unwrap() = new_robust_list;
|
||||
}
|
||||
|
||||
/// Walks the robust futex list, marking futex dead and wake waiters.
|
||||
/// It corresponds to Linux's exit_robust_list(), errors are silently ignored.
|
||||
pub fn wake_robust_list(&self) {
|
||||
let list_head_ptr = match self.robust_list() {
|
||||
None => {
|
||||
return;
|
||||
}
|
||||
Some(robust_list) => robust_list.as_ptr(),
|
||||
};
|
||||
debug!("wake the rubust_list: {:?}", list_head_ptr);
|
||||
let robust_list = {
|
||||
// Invalid pointer, stop scanning the list further
|
||||
if crate::util::mem_util::from_user::check_ptr(list_head_ptr).is_err() {
|
||||
return;
|
||||
}
|
||||
unsafe { &*list_head_ptr }
|
||||
};
|
||||
for futex_addr in robust_list.futexes() {
|
||||
super::do_robust_list::wake_robust_futex(futex_addr, self.tid());
|
||||
}
|
||||
self.set_robust_list(None);
|
||||
}
|
||||
|
||||
pub fn name(&self) -> ThreadName {
|
||||
self.name.read().unwrap().clone()
|
||||
}
|
||||
|
@ -41,10 +41,11 @@ use crate::net::{
|
||||
do_shutdown, do_socket, do_socketpair, mmsghdr, msghdr, msghdr_mut,
|
||||
};
|
||||
use crate::process::{
|
||||
do_arch_prctl, do_clone, do_execve, do_exit, do_exit_group, do_futex, do_getegid, do_geteuid,
|
||||
do_getgid, do_getgroups, do_getpgid, do_getpid, do_getppid, do_gettid, do_getuid, do_prctl,
|
||||
do_set_tid_address, do_spawn_for_glibc, do_spawn_for_musl, do_wait4, pid_t, posix_spawnattr_t,
|
||||
FdOp, SpawnFileActions, ThreadStatus,
|
||||
do_arch_prctl, do_clone, do_execve, do_exit, do_exit_group, do_futex, do_get_robust_list,
|
||||
do_getegid, do_geteuid, do_getgid, do_getgroups, do_getpgid, do_getpid, do_getppid, do_gettid,
|
||||
do_getuid, do_prctl, do_set_robust_list, do_set_tid_address, do_spawn_for_glibc,
|
||||
do_spawn_for_musl, do_wait4, pid_t, posix_spawnattr_t, FdOp, RobustListHead, SpawnFileActions,
|
||||
ThreadStatus,
|
||||
};
|
||||
use crate::sched::{do_getcpu, do_sched_getaffinity, do_sched_setaffinity, do_sched_yield};
|
||||
use crate::signal::{
|
||||
@ -359,8 +360,8 @@ macro_rules! process_syscall_table_with_callback {
|
||||
(Pselect6 = 270) => handle_unsupported(),
|
||||
(Ppoll = 271) => handle_unsupported(),
|
||||
(Unshare = 272) => handle_unsupported(),
|
||||
(SetRobustList = 273) => handle_unsupported(),
|
||||
(GetRobustList = 274) => handle_unsupported(),
|
||||
(SetRobustList = 273) => do_set_robust_list(list_head_ptr: *mut RobustListHead, len: usize),
|
||||
(GetRobustList = 274) => do_get_robust_list(tid: pid_t, list_head_ptr_ptr: *mut *mut RobustListHead, len_ptr: *mut usize),
|
||||
(Splice = 275) => handle_unsupported(),
|
||||
(Tee = 276) => handle_unsupported(),
|
||||
(SyncFileRange = 277) => handle_unsupported(),
|
||||
|
@ -84,6 +84,83 @@ static int test_mutex_with_concurrent_counter(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// The test case of robust mutex
|
||||
// ============================================================================
|
||||
|
||||
struct thread_robust_arg {
|
||||
int ti;
|
||||
volatile int *global_count;
|
||||
pthread_mutex_t *mutex;
|
||||
};
|
||||
|
||||
int ret_err = -1;
|
||||
|
||||
static void *thread_worker(void *_arg) {
|
||||
struct thread_robust_arg *arg = _arg;
|
||||
int err = pthread_mutex_lock(arg->mutex);
|
||||
if (err == EOWNERDEAD) {
|
||||
// The mutex is locked by the thread here, but the state is marked as
|
||||
// inconsistent, the thread should call 'pthread_mutex_consistent' to
|
||||
// make the mutex consistent again.
|
||||
if (pthread_mutex_consistent(arg->mutex) != 0) {
|
||||
printf("ERROR: failed to recover the mutex\n");
|
||||
return &ret_err;
|
||||
}
|
||||
} else if (err != 0) {
|
||||
printf("ERROR: failed to lock the mutex with error: %d\n", err);
|
||||
return &ret_err;
|
||||
}
|
||||
// Mutex is locked
|
||||
(*arg->global_count)++;
|
||||
// Wait for other threads to acquire the lock
|
||||
sleep(1);
|
||||
// Exit without unlocking the mutex, this will makes the mutex in an
|
||||
// inconsistent state.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int test_robust_mutex_with_concurrent_counter(void) {
|
||||
volatile int global_count = 0;
|
||||
pthread_t threads[NTHREADS];
|
||||
struct thread_robust_arg thread_args[NTHREADS];
|
||||
// Init robust mutex
|
||||
pthread_mutex_t mutex;
|
||||
pthread_mutexattr_t attr;
|
||||
pthread_mutexattr_init(&attr);
|
||||
pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
|
||||
pthread_mutex_init(&mutex, &attr);
|
||||
// Start the threads
|
||||
for (int ti = 0; ti < NTHREADS; ti++) {
|
||||
struct thread_robust_arg *thread_arg = &thread_args[ti];
|
||||
thread_arg->ti = ti;
|
||||
thread_arg->global_count = &global_count;
|
||||
thread_arg->mutex = &mutex;
|
||||
|
||||
if (pthread_create(&threads[ti], NULL, thread_worker, thread_arg) < 0) {
|
||||
THROW_ERROR("pthread_create failed (ti = %d)", ti);
|
||||
}
|
||||
}
|
||||
// Wait for the threads to finish
|
||||
for (int ti = 0; ti < NTHREADS; ti++) {
|
||||
int *ret_val;
|
||||
if (pthread_join(threads[ti], (void **)&ret_val) < 0) {
|
||||
THROW_ERROR("pthread_join failed (ti = %d)", ti);
|
||||
}
|
||||
if (ret_val && *ret_val != 0) {
|
||||
THROW_ERROR("run thread failed (ti = %d) with return val: %d", ti, *ret_val);
|
||||
}
|
||||
}
|
||||
// Check the result
|
||||
if (global_count != NTHREADS) {
|
||||
THROW_ERROR("incorrect global_count (actual = %d, expected = %d)", global_count,
|
||||
NTHREADS);
|
||||
}
|
||||
|
||||
pthread_mutex_destroy(&mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// The test case of waiting condition variable
|
||||
// ============================================================================
|
||||
@ -189,6 +266,7 @@ static int test_mutex_timedlock() {
|
||||
|
||||
static test_case_t test_cases[] = {
|
||||
TEST_CASE(test_mutex_with_concurrent_counter),
|
||||
TEST_CASE(test_robust_mutex_with_concurrent_counter),
|
||||
TEST_CASE(test_mutex_with_cond_wait),
|
||||
TEST_CASE(test_mutex_timedlock),
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user