[vm] Support shared memory (POSIX)

This commit is contained in:
Shaowei Song 2023-06-16 16:52:05 +08:00 committed by volcano
parent fa3f2fb3cc
commit b0de80bd50
22 changed files with 933 additions and 117 deletions

@ -495,6 +495,10 @@ jobs:
- name: Check result - name: Check result
run: docker exec ${{ github.job }} bash -c "cd /root/occlum/demos/python/python_glibc/occlum_instance; cat smvlight.dat" run: docker exec ${{ github.job }} bash -c "cd /root/occlum/demos/python/python_glibc/occlum_instance; cat smvlight.dat"
- name: Run python3.10 multiprocessing demo
run: docker exec ${{ github.job }} bash -c "cd /root/occlum/demos/python/python_glibc/python3.10-multiprocessing; ./install_python3.10.sh;
SGX_MODE=SIM ./run_python3.10_on_occlum.sh"
# Redis test # Redis test
Redis_support_test: Redis_support_test:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04

@ -1,3 +1,4 @@
occlum_instance/ occlum_instance/
miniconda/ miniconda/
Miniconda3* Miniconda3*
python-occlum

@ -0,0 +1,4 @@
occlum_instance/
miniconda/
Miniconda3*
python-occlum

@ -0,0 +1,12 @@
#!/bin/bash
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# 1. Init occlum workspace
[ -d occlum_instance ] || occlum new occlum_instance
# 2. Install python and dependencies to specified position
[ -f Miniconda3-latest-Linux-x86_64.sh ] || wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
$script_dir/miniconda/bin/conda create --prefix $script_dir/python-occlum -y \
python=3.10.0 numpy scipy scikit-learn pandas Cython

@ -0,0 +1,18 @@
# Python multiprocessing package provides API for process communication and management.
# This demo demonstrates creating a worker pool and offloading jobs.
# Processes communicate through shared memory (POSIX).
import multiprocessing as mp
import time
def job():
print(1)
start = time.time()
if __name__ == '__main__':
mp.set_start_method('spawn')
pool = mp.Pool(processes=4)
for i in range(4):
pool.apply(job)
print("total time {}".format(time.time() - start))

@ -0,0 +1,18 @@
includes:
- base.yaml
targets:
- target: /bin
copy:
- files:
- ../python-occlum/bin/python3.10
# python packages
- target: /opt
copy:
- dirs:
- ../python-occlum
# below are python code and data
- target: /
copy:
- from: ..
files:
- multiprocessing_demo.py

@ -0,0 +1,28 @@
#!/bin/bash
set -e
BLUE='\033[1;34m'
NC='\033[0m'
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
python_dir="$script_dir/occlum_instance/image/opt/python-occlum"
rm -rf occlum_instance && occlum new occlum_instance
cd occlum_instance && rm -rf image
copy_bom -f ../python3.10.yaml --root image --include-dir /opt/occlum/etc/template
if [ ! -d $python_dir ];then
echo "Error: cannot stat '$python_dir' directory"
exit 1
fi
new_json="$(jq '.resource_limits.user_space_size = "1000MB" |
.resource_limits.kernel_space_heap_size = "300MB" |
.env.default += ["PYTHONHOME=/opt/python-occlum", "PATH=/bin"]' Occlum.json)" && \
echo "${new_json}" > Occlum.json
occlum build
# Run the python demo
echo -e "${BLUE}occlum run /bin/python3.10 multiprocessing_demo.py${NC}"
occlum run /bin/python3.10 multiprocessing_demo.py

@ -212,7 +212,7 @@ impl Drop for ShmSegment {
assert!(self.process_set.len() == 0); assert!(self.process_set.len() == 0);
USER_SPACE_VM_MANAGER USER_SPACE_VM_MANAGER
.internal() .internal()
.munmap_chunk(&self.chunk, None); .munmap_chunk(&self.chunk, None, false);
} }
} }

@ -6,6 +6,7 @@ use super::vm_perms::VMPerms;
use super::vm_util::*; use super::vm_util::*;
use crate::process::ProcessRef; use crate::process::ProcessRef;
use crate::process::ThreadRef; use crate::process::ThreadRef;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::HashSet; use std::collections::HashSet;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
@ -71,10 +72,10 @@ impl Chunk {
&self.internal &self.internal
} }
pub fn get_vma_for_single_vma_chunk(&self) -> VMArea { pub fn get_vma_for_single_vma_chunk(&self) -> SgxMutexGuard<VMArea> {
match self.internal() { match self.internal() {
ChunkType::SingleVMA(vma) => return vma.lock().unwrap(),
ChunkType::MultiVMA(internal_manager) => unreachable!(), ChunkType::MultiVMA(internal_manager) => unreachable!(),
ChunkType::SingleVMA(vma) => return vma.lock().unwrap().clone(),
} }
} }
@ -98,7 +99,7 @@ impl Chunk {
vm_range.clone(), vm_range.clone(),
*options.perms(), *options.perms(),
options.initializer().backed_file(), options.initializer().backed_file(),
DUMMY_CHUNK_PROCESS_ID, current!().process().pid(),
); );
// Initialize the memory of the new range // Initialize the memory of the new range
unsafe { unsafe {
@ -247,6 +248,13 @@ impl Chunk {
.is_free_range(request_range), .is_free_range(request_range),
} }
} }
pub fn is_shared(&self) -> bool {
match self.internal() {
ChunkType::SingleVMA(vma) => vma.lock().unwrap().is_shared(),
ChunkType::MultiVMA(_) => false,
}
}
} }
#[derive(Debug)] #[derive(Debug)]

@ -2,10 +2,11 @@
// Currently only use simple vector as the base structure. // Currently only use simple vector as the base structure.
// //
// Basically use address-ordered first fit to find free ranges. // Basically use address-ordered first fit to find free ranges.
use std::cmp::Ordering; use super::*;
use super::vm_util::VMMapAddr; use super::vm_util::VMMapAddr;
use super::*;
use std::cmp::Ordering;
const INITIAL_SIZE: usize = 100; const INITIAL_SIZE: usize = 100;
@ -37,7 +38,7 @@ impl VMFreeSpaceManager {
align: usize, align: usize,
addr: VMMapAddr, addr: VMMapAddr,
) -> Result<VMRange> { ) -> Result<VMRange> {
// Record the minimal free range that satisfies the contraints // Record the minimal free range that satisfies the constraints
let mut result_free_range: Option<VMRange> = None; let mut result_free_range: Option<VMRange> = None;
let mut result_idx: Option<usize> = None; let mut result_idx: Option<usize> = None;
let mut free_list = &mut self.free_manager; let mut free_list = &mut self.free_manager;

@ -64,6 +64,7 @@ use std::fmt;
mod chunk; mod chunk;
mod free_space_manager; mod free_space_manager;
mod process_vm; mod process_vm;
mod shm_manager;
mod user_space_vm; mod user_space_vm;
mod vm_area; mod vm_area;
mod vm_chunk_manager; mod vm_chunk_manager;

@ -1,17 +1,18 @@
use super::*; use super::*;
use super::chunk::*; use super::chunk::*;
use super::config;
use super::ipc::SHM_MANAGER;
use super::process::elf_file::{ElfFile, ProgramHeaderExt};
use super::user_space_vm::USER_SPACE_VM_MANAGER; use super::user_space_vm::USER_SPACE_VM_MANAGER;
use super::vm_area::VMArea; use super::vm_area::VMArea;
use super::vm_perms::VMPerms; use super::vm_perms::VMPerms;
use super::vm_util::{ use super::vm_util::{
FileBacked, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions, FileBacked, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions,
}; };
use crate::config;
use crate::ipc::SHM_MANAGER;
use crate::process::elf_file::{ElfFile, ProgramHeaderExt};
use crate::util::sync::rw_lock::RwLockWriteGuard;
use std::collections::HashSet; use std::collections::HashSet;
use util::sync::rw_lock::RwLockWriteGuard;
// Used for heap and stack start address randomization. // Used for heap and stack start address randomization.
const RANGE_FOR_RANDOMIZATION: usize = 256 * 4096; // 1M const RANGE_FOR_RANDOMIZATION: usize = 256 * 4096; // 1M
@ -50,14 +51,14 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
} }
// Generate a random address within [0, range] // Generate a random address within [0, range]
// Note: This function doesn't gurantee alignment // Note: This function doesn't guarantee alignment
fn get_randomize_offset(range: usize) -> usize { fn get_randomize_offset(range: usize) -> usize {
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
return range; return range;
} }
use crate::misc; use crate::misc;
trace!("entrophy size = {}", range); trace!("entropy size = {}", range);
let mut random_buf: [u8; 8] = [0u8; 8]; // same length as usize let mut random_buf: [u8; 8] = [0u8; 8]; // same length as usize
misc::get_random(&mut random_buf).expect("failed to get random number"); misc::get_random(&mut random_buf).expect("failed to get random number");
let random_num: usize = u64::from_le_bytes(random_buf) as usize; let random_num: usize = u64::from_le_bytes(random_buf) as usize;
@ -74,7 +75,7 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
.stack_size .stack_size
.unwrap_or(config::LIBOS_CONFIG.process.default_stack_size); .unwrap_or(config::LIBOS_CONFIG.process.default_stack_size);
// Before allocating memory, let's first calcualte how much memory // Before allocating memory, let's first calculate how much memory
// we need in total by iterating the memory layouts required by // we need in total by iterating the memory layouts required by
// all the memory regions // all the memory regions
let elf_layouts: Vec<VMLayout> = self let elf_layouts: Vec<VMLayout> = self
@ -211,7 +212,9 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
fn handle_error_when_init(&self, chunks: &HashSet<Arc<Chunk>>) { fn handle_error_when_init(&self, chunks: &HashSet<Arc<Chunk>>) {
chunks.iter().for_each(|chunk| { chunks.iter().for_each(|chunk| {
USER_SPACE_VM_MANAGER.internal().munmap_chunk(chunk, None); USER_SPACE_VM_MANAGER
.internal()
.munmap_chunk(chunk, None, false);
}); });
} }
@ -227,7 +230,7 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
let mut empty_start_offset = 0; let mut empty_start_offset = 0;
let mut empty_end_offset = 0; let mut empty_end_offset = 0;
// Init all loadable segements // Init all loadable segments
elf_file elf_file
.program_headers() .program_headers()
.filter(|segment| segment.loadable()) .filter(|segment| segment.loadable())
@ -309,7 +312,9 @@ impl Drop for ProcessVM {
mem_chunks mem_chunks
.drain_filter(|chunk| chunk.is_single_vma()) .drain_filter(|chunk| chunk.is_single_vma())
.for_each(|chunk| { .for_each(|chunk| {
USER_SPACE_VM_MANAGER.internal().munmap_chunk(&chunk, None); USER_SPACE_VM_MANAGER
.internal()
.munmap_chunk(&chunk, None, false);
}); });
assert!(mem_chunks.len() == 0); assert!(mem_chunks.len() == 0);
@ -351,8 +356,9 @@ impl ProcessVM {
mem_chunks: &mut RwLockWriteGuard<HashSet<ChunkRef>>, mem_chunks: &mut RwLockWriteGuard<HashSet<ChunkRef>>,
) -> Result<Vec<VMArea>> { ) -> Result<Vec<VMArea>> {
// Get all single VMA chunks // Get all single VMA chunks
// Shared chunks shouldn't be merged since they are managed by shm manager and shared by multi processes
let mut single_vma_chunks = mem_chunks let mut single_vma_chunks = mem_chunks
.drain_filter(|chunk| chunk.is_single_vma()) .drain_filter(|chunk| chunk.is_single_vma() && !chunk.is_shared())
.collect::<Vec<ChunkRef>>(); .collect::<Vec<ChunkRef>>();
single_vma_chunks.sort_unstable_by(|chunk_a, chunk_b| { single_vma_chunks.sort_unstable_by(|chunk_a, chunk_b| {
chunk_a chunk_a
@ -395,7 +401,7 @@ impl ProcessVM {
for chunk in single_vma_chunks.into_iter().filter_map(|chunk| { for chunk in single_vma_chunks.into_iter().filter_map(|chunk| {
if !chunk.is_single_dummy_vma() { if !chunk.is_single_dummy_vma() {
if chunk.is_single_vma_with_conflict_size() { if chunk.is_single_vma_with_conflict_size() {
let new_vma = chunk.get_vma_for_single_vma_chunk(); let new_vma = chunk.get_vma_for_single_vma_chunk().clone();
merged_vmas.push(new_vma); merged_vmas.push(new_vma);
// Don't insert the merged chunks to mem_chunk list here. It should be updated later. // Don't insert the merged chunks to mem_chunk list here. It should be updated later.

@ -0,0 +1,278 @@
//! Shared memory manager. (POSIX)
use super::*;
use super::vm_manager::InternalVMManager;
use super::vm_util::VMMapOptions;
use crate::process::ThreadStatus;
use rcore_fs::vfs::{FileType, Metadata};
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, Weak};
type InodeId = usize;
/// Shared VM manager.
#[derive(Debug)]
pub struct ShmManager {
// K: Inode id of shared backed file. V: Chunk which is shared by processes.
shared_chunks: HashMap<InodeId, ChunkRef>,
}
/// Result types of `mmap()` with `MAP_SHARED`.
#[derive(Clone, Debug)]
pub enum MmapSharedResult {
/// Can share successfully
Success(usize),
/// Need to create a new shared chunk
NeedCreate,
/// Current shared chunk needs to expand range. (old shared chunk, expand range)
NeedExpand(ChunkRef, VMRange),
/// Current shared chunk needs to be replaced to satisfy new request
NeedReplace(ChunkRef),
}
/// Result types of unmapping a shared memory chunk.
/// This could come from `munmap()` or `mremap` request.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MunmapSharedResult {
/// Current shared chunk is still being shared and used by other processes
StillInUse,
/// All shared processes are detached, the chunk can be freed
Freeable,
}
impl ShmManager {
pub fn new() -> Self {
Self {
shared_chunks: HashMap::new(),
}
}
pub fn mmap_shared_chunk(&mut self, options: &VMMapOptions) -> Result<MmapSharedResult> {
Self::qualified_for_sharing(options)?;
let backed_file = options.initializer().backed_file().unwrap();
let inode_id = backed_file.metadata().inode;
let offset = backed_file.offset();
let shared_chunk = match self.shared_chunks.get(&inode_id) {
Some(shared_chunk) => shared_chunk,
None => {
return Ok(MmapSharedResult::NeedCreate);
}
};
let mut shared_vma = Self::vma_of(&shared_chunk);
let current = current!();
let current_pid = current.process().pid();
let contained = shared_vma.belong_to(current_pid);
let exclusived = shared_vma.exclusive_by(current_pid);
let addr = {
let sc_addr = shared_vma.start();
let sc_size = shared_vma.size();
let sc_offset = shared_vma
.writeback_file()
.map(|(_, offset)| offset)
.unwrap();
let new_size = *options.size();
let mut target_addr = usize::MAX;
match *options.addr() {
vm_util::VMMapAddr::Any | vm_util::VMMapAddr::Hint(_) => {
if offset == sc_offset && new_size <= sc_size {
target_addr = sc_addr;
} else if offset > sc_offset && offset - sc_offset + new_size <= sc_size {
target_addr = sc_addr + offset - sc_offset;
} else if exclusived {
return Ok(MmapSharedResult::NeedReplace(shared_chunk.clone()));
} else {
return_errno!(EINVAL, "mmap shared chunk failed");
}
}
vm_util::VMMapAddr::Need(addr) | vm_util::VMMapAddr::Force(addr) => {
if addr == sc_addr && offset == sc_offset && new_size <= sc_size {
target_addr = addr;
} else if Self::can_expand_shared_vma(
&shared_vma,
(
&VMRange::new_with_size(addr, new_size)?,
options
.initializer()
.backed_file()
.unwrap()
.writeback_file()
.unwrap(),
),
) {
return Ok(MmapSharedResult::NeedExpand(
shared_chunk.clone(),
VMRange::new_with_size(addr, new_size).unwrap(),
));
} else if exclusived {
return Ok(MmapSharedResult::NeedReplace(shared_chunk.clone()));
} else {
return_errno!(EINVAL, "mmap shared chunk failed");
}
}
}
target_addr
};
Self::apply_new_perms_if_higher(&mut shared_vma, *options.perms());
if !contained {
shared_vma.attach_shared_process(current_pid)?;
current.vm().add_mem_chunk(shared_chunk.clone());
}
Ok(MmapSharedResult::Success(addr))
}
pub fn munmap_shared_chunk(
&mut self,
chunk: &ChunkRef,
unmap_range: &VMRange,
force_unmap: bool,
) -> Result<MunmapSharedResult> {
debug_assert!(chunk.is_shared());
let mut shared_vma = Self::vma_of(chunk);
let shared_range = shared_vma.range();
let current_pid = current!().process().pid();
let partial_unmap = !unmap_range.is_superset_of(shared_range);
// Fails when force unmap a partial of shared chunk which is still shared by other process
if force_unmap && (partial_unmap || !shared_vma.exclusive_by(current_pid)) {
return_errno!(EINVAL, "force unmap shared chunk failed");
}
// Treat partial unmapped shared chunk as still-in-use(do nothing)
if partial_unmap {
return Ok(MunmapSharedResult::StillInUse);
}
if shared_vma.detach_shared_process(current_pid)? {
self.shared_chunks.remove(&Self::inode_id_of(&shared_vma));
Ok(MunmapSharedResult::Freeable)
} else {
Ok(MunmapSharedResult::StillInUse)
}
}
pub fn mprotect_shared_chunk(&self, chunk: &ChunkRef, new_perms: VMPerms) -> Result<()> {
let mut vma = Self::vma_of(chunk);
if !vma.is_shared() {
return_errno!(EINVAL, "not a shared chunk");
}
Self::apply_new_perms_if_higher(&mut vma, new_perms);
Ok(())
}
pub fn create_shared_chunk(
&mut self,
options: &VMMapOptions,
new_chunk: ChunkRef,
) -> Result<usize> {
let backed_file = options.initializer().backed_file().ok_or(errno!(EINVAL))?;
let (inode_id, addr) = {
let mut new_vma = Self::vma_of(&new_chunk);
new_vma.mark_shared();
let inode_id = backed_file.metadata().inode;
debug_assert_eq!(inode_id, Self::inode_id_of(&new_vma));
(inode_id, new_vma.start())
};
self.shared_chunks.insert(inode_id, new_chunk);
Ok(addr)
}
pub fn replace_shared_chunk(&mut self, old_shared_chunk: ChunkRef, new_chunk: ChunkRef) {
debug_assert!(old_shared_chunk.is_shared() && new_chunk.is_shared());
let inode_id = {
let mut new_vma = Self::vma_of(&new_chunk);
let old_vma = Self::vma_of(&old_shared_chunk);
// Inherits access and perms from the old one
new_vma.inherits_access_from(&old_vma);
// Apply higher perms to the whole new range
let new_perms = new_vma.perms();
let old_perms = old_vma.perms();
if new_perms != old_perms {
let perms = new_perms | old_perms;
VMPerms::apply_perms(new_vma.range(), perms);
new_vma.set_perms(perms);
}
let inode_id = Self::inode_id_of(&new_vma);
debug_assert_eq!(inode_id, Self::inode_id_of(&old_vma));
inode_id
};
let replaced = self.shared_chunks.insert(inode_id, new_chunk).unwrap();
debug_assert!(Arc::ptr_eq(&replaced, &old_shared_chunk));
}
// Left: Old shared vma. Right: New vm range, backed file and offset.
fn can_expand_shared_vma(lhs: &VMArea, rhs: (&VMRange, (&FileRef, usize))) -> bool {
debug_assert!(lhs.is_shared());
let (lhs_range, lhs_file, lhs_file_offset) = {
let writeback_file = lhs.writeback_file().unwrap();
(lhs.range(), writeback_file.0, writeback_file.1)
};
let (rhs_range, (rhs_file, rhs_file_offset)) = rhs;
debug_assert!(lhs_range.end() <= rhs_range.start());
if lhs_range.size() == 0 || rhs_range.size() == 0 {
return false;
}
// The two vm range must border with each other
if lhs_range.end() != rhs_range.start() {
return false;
}
Arc::ptr_eq(lhs_file, rhs_file)
&& rhs_file_offset > lhs_file_offset
&& rhs_file_offset - lhs_file_offset == lhs_range.size()
}
fn qualified_for_sharing(options: &VMMapOptions) -> Result<()> {
if !options.is_shared() {
return_errno!(EINVAL, "not a mmap(MAP_SHARED) request");
}
let backed_file = options.initializer().backed_file().unwrap();
if backed_file.metadata().type_ != FileType::File {
return_errno!(
EINVAL,
"unsupported file type when creating shared mappings"
);
}
Ok(())
}
fn vma_of(chunk: &ChunkRef) -> SgxMutexGuard<VMArea> {
match chunk.internal() {
ChunkType::SingleVMA(vma) => vma.lock().unwrap(),
ChunkType::MultiVMA(_) => unreachable!(),
}
}
/// Associated functions below only applied to shared vmas.
fn inode_id_of(vma: &SgxMutexGuard<VMArea>) -> InodeId {
debug_assert!(vma.is_shared());
vma.writeback_file()
.map(|(file, _)| file.metadata().unwrap().inode)
.unwrap()
}
fn apply_new_perms_if_higher(vma: &mut SgxMutexGuard<VMArea>, new_perms: VMPerms) {
debug_assert!(vma.is_shared());
let old_perms = vma.perms();
let perms = new_perms | old_perms;
if perms == old_perms {
return;
}
VMPerms::apply_perms(vma.range(), perms);
vma.set_perms(perms);
}
}

@ -1,10 +1,12 @@
use super::ipc::SHM_MANAGER;
use super::*; use super::*;
use super::vm_manager::VMManager;
use crate::config::LIBOS_CONFIG;
use crate::ctor::dtor; use crate::ctor::dtor;
use crate::ipc::SHM_MANAGER;
use crate::util::pku_util; use crate::util::pku_util;
use config::LIBOS_CONFIG;
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use vm_manager::VMManager;
const RSRV_MEM_PERM: MemPerm = const RSRV_MEM_PERM: MemPerm =
MemPerm::from_bits_truncate(MemPerm::READ.bits() | MemPerm::WRITE.bits()); MemPerm::from_bits_truncate(MemPerm::READ.bits() | MemPerm::WRITE.bits());
@ -16,7 +18,7 @@ impl UserSpaceVMManager {
fn new() -> Result<UserSpaceVMManager> { fn new() -> Result<UserSpaceVMManager> {
let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size; let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size;
let vm_range = unsafe { let vm_range = unsafe {
// TODO: Current sgx_alloc_rsrv_mem implmentation will commit all the pages of the desired size, which will consume // TODO: Current sgx_alloc_rsrv_mem implementation will commit all the pages of the desired size, which will consume
// a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function // a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function
// which reserves memory but not commit pages should be provided then. // which reserves memory but not commit pages should be provided then.
let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size); let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size);

@ -1,19 +1,26 @@
use std::ops::{Deref, DerefMut}; use super::*;
use super::vm_perms::VMPerms; use super::vm_perms::VMPerms;
use super::vm_range::VMRange; use super::vm_range::VMRange;
use super::vm_util::FileBacked; use super::vm_util::FileBacked;
use super::*;
use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::rbtree::{Link, RBTree};
use intrusive_collections::{intrusive_adapter, KeyAdapter}; use intrusive_collections::{intrusive_adapter, KeyAdapter};
use std::collections::HashSet;
use std::ops::{Deref, DerefMut};
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct VMArea { pub struct VMArea {
range: VMRange, range: VMRange,
perms: VMPerms, perms: VMPerms,
file_backed: Option<FileBacked>, file_backed: Option<FileBacked>,
pid: pid_t, access: VMAccess,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum VMAccess {
Private(pid_t),
Shared(HashSet<pid_t>),
} }
impl VMArea { impl VMArea {
@ -27,7 +34,7 @@ impl VMArea {
range, range,
perms, perms,
file_backed, file_backed,
pid, access: VMAccess::Private(pid),
} }
} }
@ -37,7 +44,7 @@ impl VMArea {
vma: &VMArea, vma: &VMArea,
new_range: VMRange, new_range: VMRange,
new_perms: VMPerms, new_perms: VMPerms,
pid: pid_t, access: VMAccess,
) -> Self { ) -> Self {
let new_backed_file = vma.file_backed.as_ref().map(|file| { let new_backed_file = vma.file_backed.as_ref().map(|file| {
let mut new_file = file.clone(); let mut new_file = file.clone();
@ -57,7 +64,12 @@ impl VMArea {
new_file new_file
}); });
Self::new(new_range, new_perms, new_backed_file, pid) Self {
range: new_range,
perms: new_perms,
file_backed: new_backed_file,
access,
}
} }
pub fn perms(&self) -> VMPerms { pub fn perms(&self) -> VMPerms {
@ -68,8 +80,22 @@ impl VMArea {
&self.range &self.range
} }
pub fn pid(&self) -> pid_t { pub fn access(&self) -> &VMAccess {
self.pid &self.access
}
pub fn belong_to(&self, target_pid: pid_t) -> bool {
match &self.access {
VMAccess::Private(pid) => *pid == target_pid,
VMAccess::Shared(pid_set) => pid_set.contains(&target_pid),
}
}
pub fn exclusive_by(&self, target_pid: pid_t) -> bool {
match &self.access {
VMAccess::Private(pid) => *pid == target_pid,
VMAccess::Shared(pid_set) => pid_set.len() == 1 && pid_set.contains(&target_pid),
}
} }
pub fn init_file(&self) -> Option<(&FileRef, usize)> { pub fn init_file(&self) -> Option<(&FileRef, usize)> {
@ -96,7 +122,7 @@ impl VMArea {
self.deref() self.deref()
.subtract(other) .subtract(other)
.into_iter() .into_iter()
.map(|range| Self::inherits_file_from(self, range, self.perms(), self.pid())) .map(|range| Self::inherits_file_from(self, range, self.perms(), self.access().clone()))
.collect() .collect()
} }
@ -109,7 +135,8 @@ impl VMArea {
} }
new_range.unwrap() new_range.unwrap()
}; };
let new_vma = VMArea::inherits_file_from(self, new_range, self.perms(), self.pid()); let new_vma =
VMArea::inherits_file_from(self, new_range, self.perms(), self.access().clone());
Some(new_vma) Some(new_vma)
} }
@ -139,7 +166,7 @@ impl VMArea {
} }
pub fn is_the_same_to(&self, other: &VMArea) -> bool { pub fn is_the_same_to(&self, other: &VMArea) -> bool {
if self.pid() != other.pid() { if self.access() != other.access() {
return false; return false;
} }
@ -175,7 +202,7 @@ impl VMArea {
return false; return false;
} }
// The two VMAs must be owned by the same process // The two VMAs must be owned by the same process
if left.pid() != right.pid() { if left.access() != right.access() {
return false; return false;
} }
// The two VMAs must border with each other // The two VMAs must border with each other
@ -202,6 +229,78 @@ impl VMArea {
} }
} }
} }
/// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
pub fn flush_backed_file(&self) {
self.flush_backed_file_with_cond(|_| true)
}
/// Same as `flush_backed_file()`, except that an extra condition on the file needs to satisfy.
pub fn flush_backed_file_with_cond<F: Fn(&FileRef) -> bool>(&self, cond_fn: F) {
let (file, file_offset) = match self.writeback_file() {
None => return,
Some((file_and_offset)) => file_and_offset,
};
let file_writable = file
.access_mode()
.map(|ac| ac.writable())
.unwrap_or_default();
if !file_writable {
return;
}
if !cond_fn(file) {
return;
}
file.write_at(file_offset, unsafe { self.as_slice() });
}
pub fn is_shared(&self) -> bool {
match self.access {
VMAccess::Private(_) => false,
VMAccess::Shared(_) => true,
}
}
pub fn mark_shared(&mut self) {
let access = match self.access {
VMAccess::Private(pid) => VMAccess::Shared(HashSet::from([pid])),
VMAccess::Shared(_) => {
return;
}
};
self.access = access;
}
pub fn shared_process_set(&self) -> Result<&HashSet<pid_t>> {
match &self.access {
VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")),
VMAccess::Shared(pid_set) => Ok(pid_set),
}
}
pub fn attach_shared_process(&mut self, pid: pid_t) -> Result<()> {
match &mut self.access {
VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")),
VMAccess::Shared(pid_set) => {
pid_set.insert(pid);
Ok(())
}
}
}
pub fn detach_shared_process(&mut self, pid: pid_t) -> Result<bool> {
match &mut self.access {
VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")),
VMAccess::Shared(pid_set) => {
pid_set.remove(&pid);
Ok(pid_set.is_empty())
}
}
}
pub fn inherits_access_from(&mut self, vma: &VMArea) {
self.access = vma.access().clone()
}
} }
impl Deref for VMArea { impl Deref for VMArea {
@ -212,6 +311,12 @@ impl Deref for VMArea {
} }
} }
impl Default for VMAccess {
fn default() -> Self {
Self::Private(0)
}
}
#[derive(Clone)] #[derive(Clone)]
pub struct VMAObj { pub struct VMAObj {
link: Link, link: Link,

@ -4,12 +4,12 @@ use super::free_space_manager::VMFreeSpaceManager as FreeRangeManager;
use super::vm_area::*; use super::vm_area::*;
use super::vm_perms::VMPerms; use super::vm_perms::VMPerms;
use super::vm_util::*; use super::vm_util::*;
use std::collections::BTreeSet;
use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::rbtree::{Link, RBTree};
use intrusive_collections::Bound; use intrusive_collections::Bound;
use intrusive_collections::RBTreeLink; use intrusive_collections::RBTreeLink;
use intrusive_collections::{intrusive_adapter, KeyAdapter}; use intrusive_collections::{intrusive_adapter, KeyAdapter};
use std::collections::BTreeSet;
/// Memory chunk manager. /// Memory chunk manager.
/// ///
@ -77,13 +77,13 @@ impl ChunkManager {
vmas_cursor.move_next(); // move to the first element of the tree vmas_cursor.move_next(); // move to the first element of the tree
while !vmas_cursor.is_null() { while !vmas_cursor.is_null() {
let vma = vmas_cursor.get().unwrap().vma(); let vma = vmas_cursor.get().unwrap().vma();
if vma.pid() != pid || vma.size() == 0 { if !vma.belong_to(pid) || vma.size() == 0 {
// Skip vmas which doesn't belong to this process // Skip vmas which doesn't belong to this process
vmas_cursor.move_next(); vmas_cursor.move_next();
continue; continue;
} }
Self::flush_file_vma(vma); vma.flush_backed_file();
if !vma.perms().is_default() { if !vma.perms().is_default() {
VMPerms::apply_perms(vma, VMPerms::default()); VMPerms::apply_perms(vma, VMPerms::default());
@ -156,7 +156,7 @@ impl ChunkManager {
while !vmas_cursor.is_null() && vmas_cursor.get().unwrap().vma().start() <= range.end() { while !vmas_cursor.is_null() && vmas_cursor.get().unwrap().vma().start() <= range.end() {
let vma = &vmas_cursor.get().unwrap().vma(); let vma = &vmas_cursor.get().unwrap().vma();
trace!("munmap related vma = {:?}", vma); trace!("munmap related vma = {:?}", vma);
if vma.size() == 0 || current_pid != vma.pid() { if vma.size() == 0 || !vma.belong_to(current_pid) {
vmas_cursor.move_next(); vmas_cursor.move_next();
continue; continue;
} }
@ -169,7 +169,7 @@ impl ChunkManager {
}; };
// File-backed VMA needs to be flushed upon munmap // File-backed VMA needs to be flushed upon munmap
Self::flush_file_vma(&intersection_vma); intersection_vma.flush_backed_file();
if !&intersection_vma.perms().is_default() { if !&intersection_vma.perms().is_default() {
VMPerms::apply_perms(&intersection_vma, VMPerms::default()); VMPerms::apply_perms(&intersection_vma, VMPerms::default());
} }
@ -254,7 +254,7 @@ impl ChunkManager {
{ {
let vma = &vmas_cursor.get().unwrap().vma(); let vma = &vmas_cursor.get().unwrap().vma();
// The old range must be contained in one single VMA // The old range must be contained in one single VMA
if vma.pid() == current_pid && vma.is_superset_of(&old_range) { if vma.belong_to(current_pid) && vma.is_superset_of(&old_range) {
break; break;
} else { } else {
vmas_cursor.move_next(); vmas_cursor.move_next();
@ -284,7 +284,7 @@ impl ChunkManager {
&& containing_vmas.get().unwrap().vma().start() <= protect_range.end() && containing_vmas.get().unwrap().vma().start() <= protect_range.end()
{ {
let mut containing_vma = containing_vmas.get().unwrap().vma().clone(); let mut containing_vma = containing_vmas.get().unwrap().vma().clone();
if containing_vma.pid() != current_pid { if !containing_vma.belong_to(current_pid) {
containing_vmas.move_next(); containing_vmas.move_next();
continue; continue;
} }
@ -332,7 +332,7 @@ impl ChunkManager {
&containing_vma, &containing_vma,
protect_range, protect_range,
new_perms, new_perms,
current_pid, VMAccess::Private(current_pid),
); );
VMPerms::apply_perms(&new_vma, new_vma.perms()); VMPerms::apply_perms(&new_vma, new_vma.perms());
let new_vma = VMAObj::new_vma_obj(new_vma); let new_vma = VMAObj::new_vma_obj(new_vma);
@ -344,7 +344,7 @@ impl ChunkManager {
&containing_vma, &containing_vma,
range, range,
old_perms, old_perms,
current_pid, VMAccess::Private(current_pid),
); );
VMAObj::new_vma_obj(new_vma) VMAObj::new_vma_obj(new_vma)
}; };
@ -369,7 +369,7 @@ impl ChunkManager {
&containing_vma, &containing_vma,
intersection_vma.range().clone(), intersection_vma.range().clone(),
new_perms, new_perms,
current_pid, VMAccess::Private(current_pid),
); );
VMPerms::apply_perms(&new_vma, new_vma.perms()); VMPerms::apply_perms(&new_vma, new_vma.perms());
@ -399,7 +399,7 @@ impl ChunkManager {
None => continue, None => continue,
Some(vma) => vma, Some(vma) => vma,
}; };
Self::flush_file_vma(&vma); vma.flush_backed_file();
} }
Ok(()) Ok(())
} }
@ -409,41 +409,17 @@ impl ChunkManager {
pub fn msync_by_file(&mut self, sync_file: &FileRef) { pub fn msync_by_file(&mut self, sync_file: &FileRef) {
for vma_obj in &self.vmas { for vma_obj in &self.vmas {
let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) }; let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) };
Self::flush_file_vma_with_cond(&vma_obj.vma(), is_same_file); vma_obj.vma().flush_backed_file_with_cond(is_same_file);
} }
} }
/// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
pub fn flush_file_vma(vma: &VMArea) {
Self::flush_file_vma_with_cond(vma, |_| true)
}
/// Same as flush_vma, except that an extra condition on the file needs to satisfy.
pub fn flush_file_vma_with_cond<F: Fn(&FileRef) -> bool>(vma: &VMArea, cond_fn: F) {
let (file, file_offset) = match vma.writeback_file() {
None => return,
Some((file_and_offset)) => file_and_offset,
};
let file_writable = file
.access_mode()
.map(|ac| ac.writable())
.unwrap_or_default();
if !file_writable {
return;
}
if !cond_fn(file) {
return;
}
file.write_at(file_offset, unsafe { vma.as_slice() });
}
pub fn find_mmap_region(&self, addr: usize) -> Result<VMRange> { pub fn find_mmap_region(&self, addr: usize) -> Result<VMRange> {
let vma = self.vmas.upper_bound(Bound::Included(&addr)); let vma = self.vmas.upper_bound(Bound::Included(&addr));
if vma.is_null() { if vma.is_null() {
return_errno!(ESRCH, "no mmap regions that contains the address"); return_errno!(ESRCH, "no mmap regions that contains the address");
} }
let vma = vma.get().unwrap().vma(); let vma = vma.get().unwrap().vma();
if vma.pid() != current!().process().pid() || !vma.contains(addr) { if !vma.belong_to(current!().process().pid()) || !vma.contains(addr) {
return_errno!(ESRCH, "no mmap regions that contains the address"); return_errno!(ESRCH, "no mmap regions that contains the address");
} }
@ -451,13 +427,13 @@ impl ChunkManager {
} }
pub fn usage_percentage(&self) -> f32 { pub fn usage_percentage(&self) -> f32 {
let totol_size = self.range.size(); let total_size = self.range.size();
let mut used_size = 0; let mut used_size = 0;
self.vmas self.vmas
.iter() .iter()
.for_each(|vma_obj| used_size += vma_obj.vma().size()); .for_each(|vma_obj| used_size += vma_obj.vma().size());
return used_size as f32 / totol_size as f32; return used_size as f32 / total_size as f32;
} }
fn merge_all_vmas(&mut self) { fn merge_all_vmas(&mut self) {

@ -4,15 +4,15 @@ use super::chunk::{
Chunk, ChunkID, ChunkRef, ChunkType, CHUNK_DEFAULT_SIZE, DUMMY_CHUNK_PROCESS_ID, Chunk, ChunkID, ChunkRef, ChunkType, CHUNK_DEFAULT_SIZE, DUMMY_CHUNK_PROCESS_ID,
}; };
use super::free_space_manager::VMFreeSpaceManager; use super::free_space_manager::VMFreeSpaceManager;
use super::vm_area::VMArea; use super::shm_manager::{MmapSharedResult, MunmapSharedResult, ShmManager};
use super::vm_area::{VMAccess, VMArea};
use super::vm_chunk_manager::ChunkManager; use super::vm_chunk_manager::ChunkManager;
use super::vm_perms::VMPerms; use super::vm_perms::VMPerms;
use super::vm_util::*; use super::vm_util::*;
use crate::process::{ThreadRef, ThreadStatus}; use crate::process::{ThreadRef, ThreadStatus};
use std::ops::Bound::{Excluded, Included};
use crate::util::sync::rw_lock; use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashSet}; use std::ops::Bound::{Excluded, Included};
// Incorrect order of locks could cause deadlock easily. // Incorrect order of locks could cause deadlock easily.
// Don't hold a low-order lock and then try to get a high-order lock. // Don't hold a low-order lock and then try to get a high-order lock.
@ -65,6 +65,24 @@ impl VMManager {
} }
pub fn mmap(&self, options: &VMMapOptions) -> Result<usize> { pub fn mmap(&self, options: &VMMapOptions) -> Result<usize> {
if options.is_shared() {
let res = self.internal().mmap_shared_chunk(options);
match res {
Ok(addr) => {
trace!(
"mmap_shared_chunk success: addr = 0x{:X}, pid = {}",
res.as_ref().unwrap(),
current!().process().pid()
);
return Ok(addr);
}
Err(e) => {
warn!("mmap_shared_chunk failed: {:?}", e);
// Do not return when `mmap_shared_chunk()` fails. Try mmap as a regular chunk as below.
}
}
}
let addr = *options.addr(); let addr = *options.addr();
let size = *options.size(); let size = *options.size();
let align = *options.align(); let align = *options.align();
@ -198,7 +216,7 @@ impl VMManager {
for chunk in overlapping_chunks.iter() { for chunk in overlapping_chunks.iter() {
match chunk.internal() { match chunk.internal() {
ChunkType::SingleVMA(_) => { ChunkType::SingleVMA(_) => {
internal_manager.munmap_chunk(chunk, Some(&munmap_range))? internal_manager.munmap_chunk(chunk, Some(&munmap_range), false)?
} }
ChunkType::MultiVMA(manager) => manager ChunkType::MultiVMA(manager) => manager
.lock() .lock()
@ -232,7 +250,11 @@ impl VMManager {
.map(|chunk| chunk.clone()) .map(|chunk| chunk.clone())
}; };
if let Some(overlapping_chunk) = overlapping_chunk { if let Some(overlapping_chunk) = overlapping_chunk {
return internal_manager.munmap_chunk(&overlapping_chunk, Some(&munmap_range)); return internal_manager.munmap_chunk(
&overlapping_chunk,
Some(&munmap_range),
false,
);
} else { } else {
warn!("no overlapping chunks anymore"); warn!("no overlapping chunks anymore");
return Ok(()); return Ok(());
@ -354,7 +376,7 @@ impl VMManager {
} }
ChunkType::SingleVMA(vma) => { ChunkType::SingleVMA(vma) => {
let vma = vma.lock().unwrap(); let vma = vma.lock().unwrap();
ChunkManager::flush_file_vma(&vma); vma.flush_backed_file();
} }
} }
Ok(()) Ok(())
@ -375,7 +397,9 @@ impl VMManager {
.msync_by_file(sync_file); .msync_by_file(sync_file);
} }
ChunkType::SingleVMA(vma) => { ChunkType::SingleVMA(vma) => {
ChunkManager::flush_file_vma_with_cond(&vma.lock().unwrap(), is_same_file); vma.lock()
.unwrap()
.flush_backed_file_with_cond(is_same_file);
} }
}); });
} }
@ -437,7 +461,7 @@ impl VMManager {
let ret_addr = if let Some(mmap_options) = remap_result_option.mmap_options() { let ret_addr = if let Some(mmap_options) = remap_result_option.mmap_options() {
let mmap_addr = self.mmap(mmap_options); let mmap_addr = self.mmap(mmap_options);
// FIXME: For MRemapFlags::MayMove flag, we checked if the prefered range is free when parsing the options. // FIXME: For MRemapFlags::MayMove flag, we checked if the preferred range is free when parsing the options.
// But there is no lock after the checking, thus the mmap might fail. In this case, we should try mmap again. // But there is no lock after the checking, thus the mmap might fail. In this case, we should try mmap again.
if mmap_addr.is_err() && remap_result_option.may_move() == true { if mmap_addr.is_err() && remap_result_option.may_move() == true {
return_errno!( return_errno!(
@ -457,7 +481,7 @@ impl VMManager {
if let Some((munmap_addr, munmap_size)) = remap_result_option.munmap_args() { if let Some((munmap_addr, munmap_size)) = remap_result_option.munmap_args() {
self.munmap(*munmap_addr, *munmap_size) self.munmap(*munmap_addr, *munmap_size)
.expect("Shouln't fail"); .expect("Shouldn't fail");
} }
return Ok(ret_addr); return Ok(ret_addr);
@ -479,7 +503,7 @@ impl VMManager {
let mut mem_chunks = thread.vm().mem_chunks().write().unwrap(); let mut mem_chunks = thread.vm().mem_chunks().write().unwrap();
mem_chunks.iter().for_each(|chunk| { mem_chunks.iter().for_each(|chunk| {
internal_manager.munmap_chunk(&chunk, None); internal_manager.munmap_chunk(&chunk, None, false);
}); });
mem_chunks.clear(); mem_chunks.clear();
@ -487,13 +511,14 @@ impl VMManager {
} }
} }
// Modification on this structure must aquire the global lock. // Modification on this structure must acquire the global lock.
// TODO: Enable fast_default_chunks for faster chunk allocation // TODO: Enable fast_default_chunks for faster chunk allocation
#[derive(Debug)] #[derive(Debug)]
pub struct InternalVMManager { pub struct InternalVMManager {
chunks: BTreeSet<ChunkRef>, // track in-use chunks, use B-Tree for better performance and simplicity (compared with red-black tree) chunks: BTreeSet<ChunkRef>, // track in-use chunks, use B-Tree for better performance and simplicity (compared with red-black tree)
fast_default_chunks: Vec<ChunkRef>, // empty default chunks fast_default_chunks: Vec<ChunkRef>, // empty default chunks
free_manager: VMFreeSpaceManager, free_manager: VMFreeSpaceManager,
shm_manager: ShmManager, // track chunks which are shared by processes
} }
impl InternalVMManager { impl InternalVMManager {
@ -501,10 +526,12 @@ impl InternalVMManager {
let chunks = BTreeSet::new(); let chunks = BTreeSet::new();
let fast_default_chunks = Vec::new(); let fast_default_chunks = Vec::new();
let free_manager = VMFreeSpaceManager::new(vm_range); let free_manager = VMFreeSpaceManager::new(vm_range);
let shm_manager = ShmManager::new();
Self { Self {
chunks, chunks,
fast_default_chunks, fast_default_chunks,
free_manager, free_manager,
shm_manager,
} }
} }
@ -522,26 +549,38 @@ impl InternalVMManager {
// Allocate a chunk with single vma // Allocate a chunk with single vma
pub fn mmap_chunk(&mut self, options: &VMMapOptions) -> Result<ChunkRef> { pub fn mmap_chunk(&mut self, options: &VMMapOptions) -> Result<ChunkRef> {
let new_chunk = self
.new_chunk_with_options(options)
.map_err(|e| errno!(e.errno(), "mmap_chunk failure"))?;
trace!("allocate a new single vma chunk: {:?}", new_chunk);
self.chunks.insert(new_chunk.clone());
Ok(new_chunk)
}
fn new_chunk_with_options(&mut self, options: &VMMapOptions) -> Result<ChunkRef> {
let addr = *options.addr(); let addr = *options.addr();
let size = *options.size(); let size = *options.size();
let align = *options.align(); let align = *options.align();
let free_range = self.find_free_gaps(size, align, addr)?; let free_range = self.find_free_gaps(size, align, addr)?;
let free_chunk = Chunk::new_single_vma_chunk(&free_range, options); let free_chunk = Chunk::new_single_vma_chunk(&free_range, options).map_err(|e| {
if let Err(e) = free_chunk {
// Error when creating chunks. Must return the free space before returning error // Error when creating chunks. Must return the free space before returning error
self.free_manager self.free_manager
.add_range_back_to_free_manager(&free_range); .add_range_back_to_free_manager(&free_range);
return_errno!(e.errno(), "mmap_chunk failure"); e
} })?;
let chunk = Arc::new(free_chunk.unwrap()); Ok(Arc::new(free_chunk))
trace!("allocate a new single vma chunk: {:?}", chunk);
self.chunks.insert(chunk.clone());
Ok(chunk)
} }
// Munmap a chunk // Munmap a chunk
// For Single VMA chunk, a part of the chunk could be munmapped if munmap_range is specified. // For Single VMA chunk, a part of the chunk could be munmapped if munmap_range is specified.
pub fn munmap_chunk(&mut self, chunk: &ChunkRef, munmap_range: Option<&VMRange>) -> Result<()> { // `force_unmap` indicates whether a unmap request came from a (re)map request with `MAP_FIXED`,
// the chunk would end differently when it is being shared.
pub fn munmap_chunk(
&mut self,
chunk: &ChunkRef,
munmap_range: Option<&VMRange>,
force_unmap: bool,
) -> Result<()> {
trace!( trace!(
"munmap_chunk range = {:?}, munmap_range = {:?}", "munmap_chunk range = {:?}, munmap_range = {:?}",
chunk.range(), chunk.range(),
@ -569,6 +608,15 @@ impl InternalVMManager {
} }
}; };
if chunk.is_shared() {
trace!(
"munmap_shared_chunk, chunk_range: {:?}, munmap_range = {:?}",
chunk.range(),
munmap_range,
);
return self.munmap_shared_chunk(chunk, munmap_range, force_unmap);
}
// Either the munmap range is a subset of the chunk range or the munmap range is // Either the munmap range is a subset of the chunk range or the munmap range is
// a superset of the chunk range. We can handle both cases. // a superset of the chunk range. We can handle both cases.
@ -580,7 +628,7 @@ impl InternalVMManager {
}; };
// File-backed VMA needs to be flushed upon munmap // File-backed VMA needs to be flushed upon munmap
ChunkManager::flush_file_vma(&intersection_vma); intersection_vma.flush_backed_file();
// Reset memory permissions // Reset memory permissions
if !&intersection_vma.perms().is_default() { if !&intersection_vma.perms().is_default() {
@ -625,7 +673,9 @@ impl InternalVMManager {
let new_vma = new_vmas.pop().unwrap(); let new_vma = new_vmas.pop().unwrap();
let new_vma_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma)); let new_vma_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma));
self.chunks.insert(new_vma_chunk.clone()); self.chunks.insert(new_vma_chunk.clone());
if current.status() != ThreadStatus::Exited {
current.vm().add_mem_chunk(new_vma_chunk); current.vm().add_mem_chunk(new_vma_chunk);
}
let updated_vma = new_vmas.pop().unwrap(); let updated_vma = new_vmas.pop().unwrap();
self.update_single_vma_chunk(&current, &chunk, updated_vma); self.update_single_vma_chunk(&current, &chunk, updated_vma);
@ -635,18 +685,119 @@ impl InternalVMManager {
Ok(()) Ok(())
} }
pub fn mmap_shared_chunk(&mut self, options: &VMMapOptions) -> Result<usize> {
match self.shm_manager.mmap_shared_chunk(options)? {
MmapSharedResult::Success(addr) => Ok(addr),
MmapSharedResult::NeedCreate => {
let new_chunk = self.mmap_chunk(options)?;
current!().vm().add_mem_chunk(new_chunk.clone());
self.shm_manager
.create_shared_chunk(options, new_chunk.clone())
.map_err(|e| {
let vma = new_chunk.get_vma_for_single_vma_chunk();
// Reset memory permissions
if !vma.perms().is_default() {
VMPerms::apply_perms(&vma, VMPerms::default());
}
// Reset memory contents
unsafe {
let buf = vma.as_slice_mut();
buf.iter_mut().for_each(|b| *b = 0)
}
drop(vma);
self.free_chunk(&new_chunk);
current!().vm().remove_mem_chunk(&new_chunk);
e
})
}
MmapSharedResult::NeedExpand(old_shared_chunk, expand_range) => {
let new_chunk = {
let new_chunk = self.new_chunk_with_options(options)?;
self.merge_two_single_vma_chunks(&old_shared_chunk, &new_chunk)
};
let new_range = *new_chunk.range();
debug_assert_eq!(new_range, expand_range);
self.shm_manager
.replace_shared_chunk(old_shared_chunk, new_chunk);
Ok(new_range.start())
}
MmapSharedResult::NeedReplace(_) => {
return_errno!(EINVAL, "mmap shared chunk failed");
// TODO: Support replace shared chunk when necessary,
// e.g., Current shared chunk is exclusived and `remap()` by same process
}
}
}
pub fn munmap_shared_chunk(
&mut self,
chunk: &ChunkRef,
munmap_range: &VMRange,
force_unmap: bool,
) -> Result<()> {
if !chunk.is_shared() {
return_errno!(EINVAL, "not a shared chunk");
}
if !chunk.range().overlap_with(munmap_range) {
return Ok(());
}
if self
.shm_manager
.munmap_shared_chunk(chunk, munmap_range, force_unmap)?
== MunmapSharedResult::Freeable
{
let vma = chunk.get_vma_for_single_vma_chunk();
// Flush memory contents to backed file
vma.flush_backed_file();
// Reset memory permissions
if !vma.perms().is_default() {
VMPerms::apply_perms(&vma, VMPerms::default());
}
// Reset memory contents
unsafe {
let buf = vma.as_slice_mut();
buf.iter_mut().for_each(|b| *b = 0)
}
drop(vma);
self.free_chunk(chunk);
let current = current!();
if current.status() != ThreadStatus::Exited {
current.vm().remove_mem_chunk(&chunk);
}
}
Ok(())
}
fn update_single_vma_chunk( fn update_single_vma_chunk(
&mut self, &mut self,
current_thread: &ThreadRef, current_thread: &ThreadRef,
old_chunk: &ChunkRef, old_chunk: &ChunkRef,
new_vma: VMArea, new_vma: VMArea,
) { ) -> ChunkRef {
let new_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma)); let new_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma));
current_thread current_thread
.vm() .vm()
.replace_mem_chunk(old_chunk, new_chunk.clone()); .replace_mem_chunk(old_chunk, new_chunk.clone());
self.chunks.remove(old_chunk); self.chunks.remove(old_chunk);
self.chunks.insert(new_chunk); self.chunks.insert(new_chunk.clone());
new_chunk
}
fn merge_two_single_vma_chunks(&mut self, lhs: &ChunkRef, rhs: &ChunkRef) -> ChunkRef {
let mut new_vma = {
let lhs_vma = lhs.get_vma_for_single_vma_chunk();
let rhs_vma = rhs.get_vma_for_single_vma_chunk();
debug_assert_eq!(lhs_vma.end(), rhs_vma.start());
let mut new_vma = rhs_vma.clone();
new_vma.set_start(lhs_vma.start());
new_vma
};
self.update_single_vma_chunk(&current!(), lhs, new_vma)
} }
// protect_range should a sub-range of the chunk range // protect_range should a sub-range of the chunk range
@ -657,6 +808,17 @@ impl InternalVMManager {
new_perms: VMPerms, new_perms: VMPerms,
) -> Result<()> { ) -> Result<()> {
debug_assert!(chunk.range().is_superset_of(&protect_range)); debug_assert!(chunk.range().is_superset_of(&protect_range));
if chunk.is_shared() {
trace!(
"mprotect_shared_chunk, chunk_range: {:?}, mprotect_range = {:?}",
chunk.range(),
protect_range,
);
// When protect range hits a shared chunk, new perms are
// applied in a all-or-nothing mannner of the whole vma
return self.shm_manager.mprotect_shared_chunk(chunk, new_perms);
}
let vma = match chunk.internal() { let vma = match chunk.internal() {
ChunkType::MultiVMA(_) => { ChunkType::MultiVMA(_) => {
unreachable!(); unreachable!();
@ -677,6 +839,7 @@ impl InternalVMManager {
return Ok(()); return Ok(());
} }
let current_pid = current!().process().pid();
let same_start = protect_range.start() == containing_vma.start(); let same_start = protect_range.start() == containing_vma.start();
let same_end = protect_range.end() == containing_vma.end(); let same_end = protect_range.end() == containing_vma.end();
match (same_start, same_end) { match (same_start, same_end) {
@ -701,7 +864,7 @@ impl InternalVMManager {
&containing_vma, &containing_vma,
protect_range, protect_range,
new_perms, new_perms,
DUMMY_CHUNK_PROCESS_ID, VMAccess::Private(current_pid),
); );
VMPerms::apply_perms(&new_vma, new_vma.perms()); VMPerms::apply_perms(&new_vma, new_vma.perms());
@ -711,7 +874,7 @@ impl InternalVMManager {
&containing_vma, &containing_vma,
range, range,
old_perms, old_perms,
DUMMY_CHUNK_PROCESS_ID, VMAccess::Private(current_pid),
) )
}; };
@ -732,7 +895,7 @@ impl InternalVMManager {
&containing_vma, &containing_vma,
protect_range, protect_range,
new_perms, new_perms,
DUMMY_CHUNK_PROCESS_ID, VMAccess::Private(current_pid),
); );
VMPerms::apply_perms(&new_vma, new_vma.perms()); VMPerms::apply_perms(&new_vma, new_vma.perms());
@ -800,7 +963,7 @@ impl InternalVMManager {
// //
// Previously, this method is implemented for VMManager and only acquire the internal lock when needed. However, this will make mmap non-atomic. // Previously, this method is implemented for VMManager and only acquire the internal lock when needed. However, this will make mmap non-atomic.
// In multi-thread applications, for example, when a thread calls mmap with MAP_FIXED flag, and that desired memory is mapped already, the libos // In multi-thread applications, for example, when a thread calls mmap with MAP_FIXED flag, and that desired memory is mapped already, the libos
// will first munmap the corresponding memory and then do a mmap with desired address. If the lock is not aquired during the whole process, // will first munmap the corresponding memory and then do a mmap with desired address. If the lock is not acquired during the whole process,
// the unmapped memory might be allocated by other thread who is waiting and acquiring the lock. // the unmapped memory might be allocated by other thread who is waiting and acquiring the lock.
// Thus, in current code, this method is implemented for InternalManager and holds the lock during the whole process. // Thus, in current code, this method is implemented for InternalManager and holds the lock during the whole process.
// Below method "force_mmap_across_multiple_chunks" is the same. // Below method "force_mmap_across_multiple_chunks" is the same.
@ -853,7 +1016,7 @@ impl InternalVMManager {
} }
// Munmap the corresponding single vma chunk // Munmap the corresponding single vma chunk
self.munmap_chunk(&chunk, Some(&target_range))?; self.munmap_chunk(&chunk, Some(&target_range), true)?;
} }
VMMapAddr::Any => unreachable!(), VMMapAddr::Any => unreachable!(),
} }

@ -89,7 +89,7 @@ impl VMRange {
self.start() <= addr && addr < self.end() self.start() <= addr && addr < self.end()
} }
// Returns whether two ranges have non-empty interesection. // Returns whether two ranges have non-empty intersection.
pub fn overlap_with(&self, other: &VMRange) -> bool { pub fn overlap_with(&self, other: &VMRange) -> bool {
let intersection_start = self.start().max(other.start()); let intersection_start = self.start().max(other.start());
let intersection_end = self.end().min(other.end()); let intersection_end = self.end().min(other.end());

@ -3,12 +3,12 @@ use super::*;
use super::vm_area::*; use super::vm_area::*;
use super::vm_perms::VMPerms; use super::vm_perms::VMPerms;
use crate::fs::FileMode; use crate::fs::FileMode;
use std::collections::BTreeSet;
use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::rbtree::{Link, RBTree};
use intrusive_collections::Bound; use intrusive_collections::Bound;
use intrusive_collections::RBTreeLink; use intrusive_collections::RBTreeLink;
use intrusive_collections::{intrusive_adapter, KeyAdapter}; use intrusive_collections::{intrusive_adapter, KeyAdapter};
use rcore_fs::vfs::Metadata;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum VMInitializer { pub enum VMInitializer {
@ -20,7 +20,7 @@ pub enum VMInitializer {
FileBacked { FileBacked {
file: FileBacked, file: FileBacked,
}, },
// For ELF files, there is specical handling to not copy all the contents of the file. This is only used for tracking. // For ELF files, there is special handling to not copy all the contents of the file. This is only used for tracking.
ElfSpecific { ElfSpecific {
elf_file: FileRef, elf_file: FileRef,
}, },
@ -150,6 +150,10 @@ impl FileBacked {
None None
} }
} }
pub fn metadata(&self) -> Metadata {
self.file.metadata().unwrap()
}
} }
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
@ -258,6 +262,13 @@ impl VMMapOptions {
pub fn initializer(&self) -> &VMInitializer { pub fn initializer(&self) -> &VMInitializer {
&self.initializer &self.initializer
} }
pub fn is_shared(&self) -> bool {
if let Some(backed_file) = self.initializer.backed_file() {
return backed_file.need_write_back();
}
false
}
} }
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
@ -407,7 +418,7 @@ pub trait VMRemapParser {
// For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before
// we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error. // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error.
if writeback_file.is_some() && &old_range != vma.range() { if writeback_file.is_some() && &old_range != vma.range() {
return_errno!(EINVAL, "Known limition") return_errno!(EINVAL, "Known limitation")
} }
// Implement mremap as one optional mmap followed by one optional munmap. // Implement mremap as one optional mmap followed by one optional munmap.
@ -449,13 +460,13 @@ pub trait VMRemapParser {
(Some(mmap_opts), ret_addr) (Some(mmap_opts), ret_addr)
} }
(MRemapFlags::MayMove, VMRemapSizeType::Growing, None) => { (MRemapFlags::MayMove, VMRemapSizeType::Growing, None) => {
let prefered_new_range = let preferred_new_range =
VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; VMRange::new_with_size(old_addr + old_size, new_size - old_size)?;
if self.is_free_range(&prefered_new_range) { if self.is_free_range(&preferred_new_range) {
// Don't need to move the old range // Don't need to move the old range
let mmap_ops = VMMapOptionsBuilder::default() let mmap_ops = VMMapOptionsBuilder::default()
.size(prefered_new_range.size()) .size(preferred_new_range.size())
.addr(VMMapAddr::Need(prefered_new_range.start())) .addr(VMMapAddr::Need(preferred_new_range.start()))
.perms(perms) .perms(perms)
.initializer(VMInitializer::DoNothing()) .initializer(VMInitializer::DoNothing())
.build()?; .build()?;
@ -475,9 +486,9 @@ pub trait VMRemapParser {
} }
} }
(MRemapFlags::MayMove, VMRemapSizeType::Growing, Some((backed_file, offset))) => { (MRemapFlags::MayMove, VMRemapSizeType::Growing, Some((backed_file, offset))) => {
let prefered_new_range = let preferred_new_range =
VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; VMRange::new_with_size(old_addr + old_size, new_size - old_size)?;
if self.is_free_range(&prefered_new_range) { if self.is_free_range(&preferred_new_range) {
// Don't need to move the old range // Don't need to move the old range
let vm_initializer_for_new_range = VMInitializer::FileBacked { let vm_initializer_for_new_range = VMInitializer::FileBacked {
file: FileBacked::new( file: FileBacked::new(
@ -487,8 +498,8 @@ pub trait VMRemapParser {
), ),
}; };
let mmap_ops = VMMapOptionsBuilder::default() let mmap_ops = VMMapOptionsBuilder::default()
.size(prefered_new_range.size()) .size(preferred_new_range.size())
.addr(VMMapAddr::Need(prefered_new_range.start())) .addr(VMMapAddr::Need(preferred_new_range.start()))
.perms(perms) .perms(perms)
.initializer(vm_initializer_for_new_range) .initializer(vm_initializer_for_new_range)
.build()?; .build()?;

@ -22,7 +22,7 @@ TESTS ?= env empty hello_world malloc mmap file fs_perms getpid spawn sched pipe
truncate readdir mkdir open stat link symlink chmod chown tls pthread system_info rlimit \ truncate readdir mkdir open stat link symlink chmod chown tls pthread system_info rlimit \
server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group posix_flock \ server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group posix_flock \
ioctl fcntl eventfd emulate_syscall access signal sysinfo prctl rename procfs wait \ ioctl fcntl eventfd emulate_syscall access signal sysinfo prctl rename procfs wait \
spawn_attribute exec statfs random umask pgrp vfork mount flock utimes shm epoll brk spawn_attribute exec statfs random umask pgrp vfork mount flock utimes shm epoll brk posix_shm
# Benchmarks: need to be compiled and run by bench-% target # Benchmarks: need to be compiled and run by bench-% target
BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput

10
test/posix_shm/Makefile Normal file

@ -0,0 +1,10 @@
include ../test_common.mk
EXTRA_C_FLAGS := -Wno-stringop-truncation -Wno-nonnull
EXTRA_LINK_FLAGS := -lrt
BIN_ARGS :=
CUSTOM_PRE_BUILD := 1
custom_pre_build:
@cp /lib/x86_64-linux-gnu/librt.so.1 $(BUILD_DIR)/test/image/opt/occlum/glibc/lib/

170
test/posix_shm/main.c Normal file

@ -0,0 +1,170 @@
// C test program illustrating POSIX shared-memory API.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "test.h"
// ============================================================================
// Helper macros
// ============================================================================
#define SHM_OBJ "shm_test" // Name of shared memory object
#define SHM_SIZE 0x1000 // Size (in bytes) of shared memory object
/* Messages to communicate through shared memory */
#define MSG0 "1st Hello"
#define MSG1 "2nd Hello"
#define MSG2 "3rd Hello"
#define MSG3 "4th Hello"
#define MSG_SIZE strlen(MSG0)
// ============================================================================
// Test cases
// ============================================================================
int producer_process() {
// Shared memory file descriptor
int shm_fd;
// Shared memory buffer
void *shm_buf;
// Create the shared memory object
shm_fd = shm_open(SHM_OBJ, O_CREAT | O_RDWR, 0666);
if (shm_fd < 0) {
THROW_ERROR("shm_open failed");
}
// Configure the size of the shared memory object
if (ftruncate(shm_fd, SHM_SIZE) < 0) {
THROW_ERROR("ftruncate error");
}
// Memory map the shared memory object
shm_buf = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if (shm_buf == MAP_FAILED) {
THROW_ERROR("mmap(MAP_SHARED) failed");
}
// Fork a child and launch consumer process
pid_t child_pid = vfork();
if (child_pid < 0) {
// THROW_ERROR("Spawn a child process failed");
perror("Spawn a child process failed");
return -1;
} else if (child_pid == 0) {
execl("/bin/posix_shm", NULL, NULL);
THROW_ERROR("exec failed");
}
// Communicate through shared memory
strncpy(shm_buf, MSG0, MSG_SIZE);
printf("[Producer] send %s\n", MSG0);
while (1) {
if (strncmp(shm_buf, MSG1, MSG_SIZE) != 0) {
sleep(1);
continue;
}
printf("[Producer] receive %s\n", MSG1);
strncpy(shm_buf, MSG2, MSG_SIZE);
printf("[Producer] send %s\n", MSG2);
while (1) {
if (strncmp(shm_buf, MSG3, MSG_SIZE) != 0) {
sleep(1);
continue;
}
printf("[Producer] receive %s\n", MSG3);
break;
}
break;
}
// Unmap the shared memory
if (munmap(shm_buf, SHM_SIZE) < 0) {
THROW_ERROR("munmap failed");
}
// Unlink the shared memory object
shm_unlink(SHM_OBJ);
return 0;
}
int consumer_process() {
// Shared memory file descriptor
int shm_fd;
// Shared memory buffer
void *shm_buf;
// Create the shared memory object
shm_fd = shm_open(SHM_OBJ, O_CREAT | O_RDWR, 0666);
if (shm_fd < 0) {
THROW_ERROR("shm_open failed");
}
// Configure the size of the shared memory object
if (ftruncate(shm_fd, SHM_SIZE) < 0) {
THROW_ERROR("ftruncate error");
}
// Memory map the shared memory object
shm_buf = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if (shm_buf == MAP_FAILED) {
THROW_ERROR("mmap(MAP_SHARED) failed");
}
while (1) {
if (strncmp(shm_buf, MSG0, MSG_SIZE) != 0) {
sleep(1);
continue;
}
printf("[Consumer] receive %s\n", MSG0);
strncpy(shm_buf, MSG1, MSG_SIZE);
printf("[Consumer] send %s\n", MSG1);
while (1) {
if (strncmp(shm_buf, MSG2, MSG_SIZE) != 0) {
sleep(1);
continue;
}
printf("[Consumer] receive %s\n", MSG2);
strncpy(shm_buf, MSG3, MSG_SIZE);
printf("[Consumer] send %s\n", MSG3);
break;
}
break;
}
// Unmap the shared memory
if (munmap(shm_buf, SHM_SIZE) < 0) {
THROW_ERROR("munmap failed");
}
// Unlink the shared memory object
shm_unlink(SHM_OBJ);
return 0;
}
int test_posix_shm() {
return producer_process();
}
// ============================================================================
// Test suite main
// ============================================================================
static test_case_t test_cases[] = {
TEST_CASE(test_posix_shm),
};
int main(int argc, const char *argv[]) {
if (argc == 1) {
// Producer process
return test_suite_run(test_cases, ARRAY_SIZE(test_cases));
} else {
// Consumer process
return consumer_process();
}
}