From b0de80bd507ec6753deeb1e56af9dfebb759022f Mon Sep 17 00:00:00 2001 From: Shaowei Song <1498430017@qq.com> Date: Fri, 16 Jun 2023 16:52:05 +0800 Subject: [PATCH] [vm] Support shared memory (POSIX) --- .github/workflows/demo_test.yml | 4 + demos/python/python_glibc/.gitignore | 1 + .../python3.10-multiprocessing/.gitignore | 4 + .../install_python3.10.sh | 12 + .../multiprocessing_demo.py | 18 ++ .../python3.10.yaml | 18 ++ .../run_python3.10_on_occlum.sh | 28 ++ src/libos/src/ipc/shm.rs | 2 +- src/libos/src/vm/chunk.rs | 14 +- src/libos/src/vm/free_space_manager.rs | 7 +- src/libos/src/vm/mod.rs | 1 + src/libos/src/vm/process_vm.rs | 30 +- src/libos/src/vm/shm_manager.rs | 278 ++++++++++++++++++ src/libos/src/vm/user_space_vm.rs | 10 +- src/libos/src/vm/vm_area.rs | 129 +++++++- src/libos/src/vm/vm_chunk_manager.rs | 54 +--- src/libos/src/vm/vm_manager.rs | 223 ++++++++++++-- src/libos/src/vm/vm_range.rs | 2 +- src/libos/src/vm/vm_util.rs | 33 ++- test/Makefile | 2 +- test/posix_shm/Makefile | 10 + test/posix_shm/main.c | 170 +++++++++++ 22 files changed, 933 insertions(+), 117 deletions(-) create mode 100644 demos/python/python_glibc/python3.10-multiprocessing/.gitignore create mode 100755 demos/python/python_glibc/python3.10-multiprocessing/install_python3.10.sh create mode 100644 demos/python/python_glibc/python3.10-multiprocessing/multiprocessing_demo.py create mode 100644 demos/python/python_glibc/python3.10-multiprocessing/python3.10.yaml create mode 100755 demos/python/python_glibc/python3.10-multiprocessing/run_python3.10_on_occlum.sh create mode 100644 src/libos/src/vm/shm_manager.rs create mode 100644 test/posix_shm/Makefile create mode 100644 test/posix_shm/main.c diff --git a/.github/workflows/demo_test.yml b/.github/workflows/demo_test.yml index 04e1436b..0f0f5e69 100644 --- a/.github/workflows/demo_test.yml +++ b/.github/workflows/demo_test.yml @@ -495,6 +495,10 @@ jobs: - name: Check result run: docker exec ${{ github.job }} bash -c "cd /root/occlum/demos/python/python_glibc/occlum_instance; cat smvlight.dat" + - name: Run python3.10 multiprocessing demo + run: docker exec ${{ github.job }} bash -c "cd /root/occlum/demos/python/python_glibc/python3.10-multiprocessing; ./install_python3.10.sh; + SGX_MODE=SIM ./run_python3.10_on_occlum.sh" + # Redis test Redis_support_test: runs-on: ubuntu-20.04 diff --git a/demos/python/python_glibc/.gitignore b/demos/python/python_glibc/.gitignore index 4ba7c712..71184ca2 100644 --- a/demos/python/python_glibc/.gitignore +++ b/demos/python/python_glibc/.gitignore @@ -1,3 +1,4 @@ occlum_instance/ miniconda/ Miniconda3* +python-occlum diff --git a/demos/python/python_glibc/python3.10-multiprocessing/.gitignore b/demos/python/python_glibc/python3.10-multiprocessing/.gitignore new file mode 100644 index 00000000..71184ca2 --- /dev/null +++ b/demos/python/python_glibc/python3.10-multiprocessing/.gitignore @@ -0,0 +1,4 @@ +occlum_instance/ +miniconda/ +Miniconda3* +python-occlum diff --git a/demos/python/python_glibc/python3.10-multiprocessing/install_python3.10.sh b/demos/python/python_glibc/python3.10-multiprocessing/install_python3.10.sh new file mode 100755 index 00000000..3268fa31 --- /dev/null +++ b/demos/python/python_glibc/python3.10-multiprocessing/install_python3.10.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# 1. Init occlum workspace +[ -d occlum_instance ] || occlum new occlum_instance + +# 2. Install python and dependencies to specified position +[ -f Miniconda3-latest-Linux-x86_64.sh ] || wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda +$script_dir/miniconda/bin/conda create --prefix $script_dir/python-occlum -y \ +python=3.10.0 numpy scipy scikit-learn pandas Cython diff --git a/demos/python/python_glibc/python3.10-multiprocessing/multiprocessing_demo.py b/demos/python/python_glibc/python3.10-multiprocessing/multiprocessing_demo.py new file mode 100644 index 00000000..0257f36e --- /dev/null +++ b/demos/python/python_glibc/python3.10-multiprocessing/multiprocessing_demo.py @@ -0,0 +1,18 @@ +# Python multiprocessing package provides API for process communication and management. +# This demo demonstrates creating a worker pool and offloading jobs. +# Processes communicate through shared memory (POSIX). +import multiprocessing as mp +import time + +def job(): + print(1) + +start = time.time() + +if __name__ == '__main__': + mp.set_start_method('spawn') + pool = mp.Pool(processes=4) + for i in range(4): + pool.apply(job) + print("total time {}".format(time.time() - start)) + diff --git a/demos/python/python_glibc/python3.10-multiprocessing/python3.10.yaml b/demos/python/python_glibc/python3.10-multiprocessing/python3.10.yaml new file mode 100644 index 00000000..fd2c943b --- /dev/null +++ b/demos/python/python_glibc/python3.10-multiprocessing/python3.10.yaml @@ -0,0 +1,18 @@ +includes: + - base.yaml +targets: + - target: /bin + copy: + - files: + - ../python-occlum/bin/python3.10 + # python packages + - target: /opt + copy: + - dirs: + - ../python-occlum + # below are python code and data + - target: / + copy: + - from: .. + files: + - multiprocessing_demo.py diff --git a/demos/python/python_glibc/python3.10-multiprocessing/run_python3.10_on_occlum.sh b/demos/python/python_glibc/python3.10-multiprocessing/run_python3.10_on_occlum.sh new file mode 100755 index 00000000..dc23d28d --- /dev/null +++ b/demos/python/python_glibc/python3.10-multiprocessing/run_python3.10_on_occlum.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e + +BLUE='\033[1;34m' +NC='\033[0m' + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +python_dir="$script_dir/occlum_instance/image/opt/python-occlum" + +rm -rf occlum_instance && occlum new occlum_instance + +cd occlum_instance && rm -rf image +copy_bom -f ../python3.10.yaml --root image --include-dir /opt/occlum/etc/template + +if [ ! -d $python_dir ];then + echo "Error: cannot stat '$python_dir' directory" + exit 1 +fi + +new_json="$(jq '.resource_limits.user_space_size = "1000MB" | + .resource_limits.kernel_space_heap_size = "300MB" | + .env.default += ["PYTHONHOME=/opt/python-occlum", "PATH=/bin"]' Occlum.json)" && \ +echo "${new_json}" > Occlum.json +occlum build + +# Run the python demo +echo -e "${BLUE}occlum run /bin/python3.10 multiprocessing_demo.py${NC}" +occlum run /bin/python3.10 multiprocessing_demo.py diff --git a/src/libos/src/ipc/shm.rs b/src/libos/src/ipc/shm.rs index 1c6a3ad6..ddafeaff 100644 --- a/src/libos/src/ipc/shm.rs +++ b/src/libos/src/ipc/shm.rs @@ -212,7 +212,7 @@ impl Drop for ShmSegment { assert!(self.process_set.len() == 0); USER_SPACE_VM_MANAGER .internal() - .munmap_chunk(&self.chunk, None); + .munmap_chunk(&self.chunk, None, false); } } diff --git a/src/libos/src/vm/chunk.rs b/src/libos/src/vm/chunk.rs index fca8f23f..9f58bd36 100644 --- a/src/libos/src/vm/chunk.rs +++ b/src/libos/src/vm/chunk.rs @@ -6,6 +6,7 @@ use super::vm_perms::VMPerms; use super::vm_util::*; use crate::process::ProcessRef; use crate::process::ThreadRef; + use std::cmp::Ordering; use std::collections::HashSet; use std::hash::{Hash, Hasher}; @@ -71,10 +72,10 @@ impl Chunk { &self.internal } - pub fn get_vma_for_single_vma_chunk(&self) -> VMArea { + pub fn get_vma_for_single_vma_chunk(&self) -> SgxMutexGuard { match self.internal() { + ChunkType::SingleVMA(vma) => return vma.lock().unwrap(), ChunkType::MultiVMA(internal_manager) => unreachable!(), - ChunkType::SingleVMA(vma) => return vma.lock().unwrap().clone(), } } @@ -98,7 +99,7 @@ impl Chunk { vm_range.clone(), *options.perms(), options.initializer().backed_file(), - DUMMY_CHUNK_PROCESS_ID, + current!().process().pid(), ); // Initialize the memory of the new range unsafe { @@ -247,6 +248,13 @@ impl Chunk { .is_free_range(request_range), } } + + pub fn is_shared(&self) -> bool { + match self.internal() { + ChunkType::SingleVMA(vma) => vma.lock().unwrap().is_shared(), + ChunkType::MultiVMA(_) => false, + } + } } #[derive(Debug)] diff --git a/src/libos/src/vm/free_space_manager.rs b/src/libos/src/vm/free_space_manager.rs index 569d3a91..7457b311 100644 --- a/src/libos/src/vm/free_space_manager.rs +++ b/src/libos/src/vm/free_space_manager.rs @@ -2,10 +2,11 @@ // Currently only use simple vector as the base structure. // // Basically use address-ordered first fit to find free ranges. -use std::cmp::Ordering; +use super::*; use super::vm_util::VMMapAddr; -use super::*; + +use std::cmp::Ordering; const INITIAL_SIZE: usize = 100; @@ -37,7 +38,7 @@ impl VMFreeSpaceManager { align: usize, addr: VMMapAddr, ) -> Result { - // Record the minimal free range that satisfies the contraints + // Record the minimal free range that satisfies the constraints let mut result_free_range: Option = None; let mut result_idx: Option = None; let mut free_list = &mut self.free_manager; diff --git a/src/libos/src/vm/mod.rs b/src/libos/src/vm/mod.rs index 3ce22b52..9c64753b 100644 --- a/src/libos/src/vm/mod.rs +++ b/src/libos/src/vm/mod.rs @@ -64,6 +64,7 @@ use std::fmt; mod chunk; mod free_space_manager; mod process_vm; +mod shm_manager; mod user_space_vm; mod vm_area; mod vm_chunk_manager; diff --git a/src/libos/src/vm/process_vm.rs b/src/libos/src/vm/process_vm.rs index 8f1d86a5..e20ecc58 100644 --- a/src/libos/src/vm/process_vm.rs +++ b/src/libos/src/vm/process_vm.rs @@ -1,17 +1,18 @@ use super::*; use super::chunk::*; -use super::config; -use super::ipc::SHM_MANAGER; -use super::process::elf_file::{ElfFile, ProgramHeaderExt}; use super::user_space_vm::USER_SPACE_VM_MANAGER; use super::vm_area::VMArea; use super::vm_perms::VMPerms; use super::vm_util::{ FileBacked, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions, }; +use crate::config; +use crate::ipc::SHM_MANAGER; +use crate::process::elf_file::{ElfFile, ProgramHeaderExt}; +use crate::util::sync::rw_lock::RwLockWriteGuard; + use std::collections::HashSet; -use util::sync::rw_lock::RwLockWriteGuard; // Used for heap and stack start address randomization. const RANGE_FOR_RANDOMIZATION: usize = 256 * 4096; // 1M @@ -50,14 +51,14 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { } // Generate a random address within [0, range] - // Note: This function doesn't gurantee alignment + // Note: This function doesn't guarantee alignment fn get_randomize_offset(range: usize) -> usize { if cfg!(debug_assertions) { return range; } use crate::misc; - trace!("entrophy size = {}", range); + trace!("entropy size = {}", range); let mut random_buf: [u8; 8] = [0u8; 8]; // same length as usize misc::get_random(&mut random_buf).expect("failed to get random number"); let random_num: usize = u64::from_le_bytes(random_buf) as usize; @@ -74,7 +75,7 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { .stack_size .unwrap_or(config::LIBOS_CONFIG.process.default_stack_size); - // Before allocating memory, let's first calcualte how much memory + // Before allocating memory, let's first calculate how much memory // we need in total by iterating the memory layouts required by // all the memory regions let elf_layouts: Vec = self @@ -211,7 +212,9 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { fn handle_error_when_init(&self, chunks: &HashSet>) { chunks.iter().for_each(|chunk| { - USER_SPACE_VM_MANAGER.internal().munmap_chunk(chunk, None); + USER_SPACE_VM_MANAGER + .internal() + .munmap_chunk(chunk, None, false); }); } @@ -227,7 +230,7 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { let mut empty_start_offset = 0; let mut empty_end_offset = 0; - // Init all loadable segements + // Init all loadable segments elf_file .program_headers() .filter(|segment| segment.loadable()) @@ -309,7 +312,9 @@ impl Drop for ProcessVM { mem_chunks .drain_filter(|chunk| chunk.is_single_vma()) .for_each(|chunk| { - USER_SPACE_VM_MANAGER.internal().munmap_chunk(&chunk, None); + USER_SPACE_VM_MANAGER + .internal() + .munmap_chunk(&chunk, None, false); }); assert!(mem_chunks.len() == 0); @@ -351,8 +356,9 @@ impl ProcessVM { mem_chunks: &mut RwLockWriteGuard>, ) -> Result> { // Get all single VMA chunks + // Shared chunks shouldn't be merged since they are managed by shm manager and shared by multi processes let mut single_vma_chunks = mem_chunks - .drain_filter(|chunk| chunk.is_single_vma()) + .drain_filter(|chunk| chunk.is_single_vma() && !chunk.is_shared()) .collect::>(); single_vma_chunks.sort_unstable_by(|chunk_a, chunk_b| { chunk_a @@ -395,7 +401,7 @@ impl ProcessVM { for chunk in single_vma_chunks.into_iter().filter_map(|chunk| { if !chunk.is_single_dummy_vma() { if chunk.is_single_vma_with_conflict_size() { - let new_vma = chunk.get_vma_for_single_vma_chunk(); + let new_vma = chunk.get_vma_for_single_vma_chunk().clone(); merged_vmas.push(new_vma); // Don't insert the merged chunks to mem_chunk list here. It should be updated later. diff --git a/src/libos/src/vm/shm_manager.rs b/src/libos/src/vm/shm_manager.rs new file mode 100644 index 00000000..669b16f4 --- /dev/null +++ b/src/libos/src/vm/shm_manager.rs @@ -0,0 +1,278 @@ +//! Shared memory manager. (POSIX) +use super::*; + +use super::vm_manager::InternalVMManager; +use super::vm_util::VMMapOptions; +use crate::process::ThreadStatus; + +use rcore_fs::vfs::{FileType, Metadata}; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, Weak}; + +type InodeId = usize; + +/// Shared VM manager. +#[derive(Debug)] +pub struct ShmManager { + // K: Inode id of shared backed file. V: Chunk which is shared by processes. + shared_chunks: HashMap, +} + +/// Result types of `mmap()` with `MAP_SHARED`. +#[derive(Clone, Debug)] +pub enum MmapSharedResult { + /// Can share successfully + Success(usize), + /// Need to create a new shared chunk + NeedCreate, + /// Current shared chunk needs to expand range. (old shared chunk, expand range) + NeedExpand(ChunkRef, VMRange), + /// Current shared chunk needs to be replaced to satisfy new request + NeedReplace(ChunkRef), +} + +/// Result types of unmapping a shared memory chunk. +/// This could come from `munmap()` or `mremap` request. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MunmapSharedResult { + /// Current shared chunk is still being shared and used by other processes + StillInUse, + /// All shared processes are detached, the chunk can be freed + Freeable, +} + +impl ShmManager { + pub fn new() -> Self { + Self { + shared_chunks: HashMap::new(), + } + } + + pub fn mmap_shared_chunk(&mut self, options: &VMMapOptions) -> Result { + Self::qualified_for_sharing(options)?; + + let backed_file = options.initializer().backed_file().unwrap(); + let inode_id = backed_file.metadata().inode; + let offset = backed_file.offset(); + + let shared_chunk = match self.shared_chunks.get(&inode_id) { + Some(shared_chunk) => shared_chunk, + None => { + return Ok(MmapSharedResult::NeedCreate); + } + }; + let mut shared_vma = Self::vma_of(&shared_chunk); + + let current = current!(); + let current_pid = current.process().pid(); + + let contained = shared_vma.belong_to(current_pid); + let exclusived = shared_vma.exclusive_by(current_pid); + + let addr = { + let sc_addr = shared_vma.start(); + let sc_size = shared_vma.size(); + let sc_offset = shared_vma + .writeback_file() + .map(|(_, offset)| offset) + .unwrap(); + let new_size = *options.size(); + let mut target_addr = usize::MAX; + match *options.addr() { + vm_util::VMMapAddr::Any | vm_util::VMMapAddr::Hint(_) => { + if offset == sc_offset && new_size <= sc_size { + target_addr = sc_addr; + } else if offset > sc_offset && offset - sc_offset + new_size <= sc_size { + target_addr = sc_addr + offset - sc_offset; + } else if exclusived { + return Ok(MmapSharedResult::NeedReplace(shared_chunk.clone())); + } else { + return_errno!(EINVAL, "mmap shared chunk failed"); + } + } + vm_util::VMMapAddr::Need(addr) | vm_util::VMMapAddr::Force(addr) => { + if addr == sc_addr && offset == sc_offset && new_size <= sc_size { + target_addr = addr; + } else if Self::can_expand_shared_vma( + &shared_vma, + ( + &VMRange::new_with_size(addr, new_size)?, + options + .initializer() + .backed_file() + .unwrap() + .writeback_file() + .unwrap(), + ), + ) { + return Ok(MmapSharedResult::NeedExpand( + shared_chunk.clone(), + VMRange::new_with_size(addr, new_size).unwrap(), + )); + } else if exclusived { + return Ok(MmapSharedResult::NeedReplace(shared_chunk.clone())); + } else { + return_errno!(EINVAL, "mmap shared chunk failed"); + } + } + } + target_addr + }; + + Self::apply_new_perms_if_higher(&mut shared_vma, *options.perms()); + if !contained { + shared_vma.attach_shared_process(current_pid)?; + current.vm().add_mem_chunk(shared_chunk.clone()); + } + + Ok(MmapSharedResult::Success(addr)) + } + + pub fn munmap_shared_chunk( + &mut self, + chunk: &ChunkRef, + unmap_range: &VMRange, + force_unmap: bool, + ) -> Result { + debug_assert!(chunk.is_shared()); + let mut shared_vma = Self::vma_of(chunk); + let shared_range = shared_vma.range(); + let current_pid = current!().process().pid(); + let partial_unmap = !unmap_range.is_superset_of(shared_range); + + // Fails when force unmap a partial of shared chunk which is still shared by other process + if force_unmap && (partial_unmap || !shared_vma.exclusive_by(current_pid)) { + return_errno!(EINVAL, "force unmap shared chunk failed"); + } + + // Treat partial unmapped shared chunk as still-in-use(do nothing) + if partial_unmap { + return Ok(MunmapSharedResult::StillInUse); + } + + if shared_vma.detach_shared_process(current_pid)? { + self.shared_chunks.remove(&Self::inode_id_of(&shared_vma)); + Ok(MunmapSharedResult::Freeable) + } else { + Ok(MunmapSharedResult::StillInUse) + } + } + + pub fn mprotect_shared_chunk(&self, chunk: &ChunkRef, new_perms: VMPerms) -> Result<()> { + let mut vma = Self::vma_of(chunk); + if !vma.is_shared() { + return_errno!(EINVAL, "not a shared chunk"); + } + Self::apply_new_perms_if_higher(&mut vma, new_perms); + Ok(()) + } + + pub fn create_shared_chunk( + &mut self, + options: &VMMapOptions, + new_chunk: ChunkRef, + ) -> Result { + let backed_file = options.initializer().backed_file().ok_or(errno!(EINVAL))?; + let (inode_id, addr) = { + let mut new_vma = Self::vma_of(&new_chunk); + new_vma.mark_shared(); + + let inode_id = backed_file.metadata().inode; + debug_assert_eq!(inode_id, Self::inode_id_of(&new_vma)); + (inode_id, new_vma.start()) + }; + + self.shared_chunks.insert(inode_id, new_chunk); + Ok(addr) + } + + pub fn replace_shared_chunk(&mut self, old_shared_chunk: ChunkRef, new_chunk: ChunkRef) { + debug_assert!(old_shared_chunk.is_shared() && new_chunk.is_shared()); + let inode_id = { + let mut new_vma = Self::vma_of(&new_chunk); + let old_vma = Self::vma_of(&old_shared_chunk); + // Inherits access and perms from the old one + new_vma.inherits_access_from(&old_vma); + + // Apply higher perms to the whole new range + let new_perms = new_vma.perms(); + let old_perms = old_vma.perms(); + if new_perms != old_perms { + let perms = new_perms | old_perms; + VMPerms::apply_perms(new_vma.range(), perms); + new_vma.set_perms(perms); + } + + let inode_id = Self::inode_id_of(&new_vma); + debug_assert_eq!(inode_id, Self::inode_id_of(&old_vma)); + inode_id + }; + + let replaced = self.shared_chunks.insert(inode_id, new_chunk).unwrap(); + debug_assert!(Arc::ptr_eq(&replaced, &old_shared_chunk)); + } + + // Left: Old shared vma. Right: New vm range, backed file and offset. + fn can_expand_shared_vma(lhs: &VMArea, rhs: (&VMRange, (&FileRef, usize))) -> bool { + debug_assert!(lhs.is_shared()); + let (lhs_range, lhs_file, lhs_file_offset) = { + let writeback_file = lhs.writeback_file().unwrap(); + (lhs.range(), writeback_file.0, writeback_file.1) + }; + let (rhs_range, (rhs_file, rhs_file_offset)) = rhs; + debug_assert!(lhs_range.end() <= rhs_range.start()); + if lhs_range.size() == 0 || rhs_range.size() == 0 { + return false; + } + + // The two vm range must border with each other + if lhs_range.end() != rhs_range.start() { + return false; + } + + Arc::ptr_eq(lhs_file, rhs_file) + && rhs_file_offset > lhs_file_offset + && rhs_file_offset - lhs_file_offset == lhs_range.size() + } + + fn qualified_for_sharing(options: &VMMapOptions) -> Result<()> { + if !options.is_shared() { + return_errno!(EINVAL, "not a mmap(MAP_SHARED) request"); + } + let backed_file = options.initializer().backed_file().unwrap(); + if backed_file.metadata().type_ != FileType::File { + return_errno!( + EINVAL, + "unsupported file type when creating shared mappings" + ); + } + Ok(()) + } + + fn vma_of(chunk: &ChunkRef) -> SgxMutexGuard { + match chunk.internal() { + ChunkType::SingleVMA(vma) => vma.lock().unwrap(), + ChunkType::MultiVMA(_) => unreachable!(), + } + } + + /// Associated functions below only applied to shared vmas. + + fn inode_id_of(vma: &SgxMutexGuard) -> InodeId { + debug_assert!(vma.is_shared()); + vma.writeback_file() + .map(|(file, _)| file.metadata().unwrap().inode) + .unwrap() + } + + fn apply_new_perms_if_higher(vma: &mut SgxMutexGuard, new_perms: VMPerms) { + debug_assert!(vma.is_shared()); + let old_perms = vma.perms(); + let perms = new_perms | old_perms; + if perms == old_perms { + return; + } + VMPerms::apply_perms(vma.range(), perms); + vma.set_perms(perms); + } +} diff --git a/src/libos/src/vm/user_space_vm.rs b/src/libos/src/vm/user_space_vm.rs index 4a04ac87..ac078340 100644 --- a/src/libos/src/vm/user_space_vm.rs +++ b/src/libos/src/vm/user_space_vm.rs @@ -1,10 +1,12 @@ -use super::ipc::SHM_MANAGER; use super::*; + +use super::vm_manager::VMManager; +use crate::config::LIBOS_CONFIG; use crate::ctor::dtor; +use crate::ipc::SHM_MANAGER; use crate::util::pku_util; -use config::LIBOS_CONFIG; + use std::ops::{Deref, DerefMut}; -use vm_manager::VMManager; const RSRV_MEM_PERM: MemPerm = MemPerm::from_bits_truncate(MemPerm::READ.bits() | MemPerm::WRITE.bits()); @@ -16,7 +18,7 @@ impl UserSpaceVMManager { fn new() -> Result { let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size; let vm_range = unsafe { - // TODO: Current sgx_alloc_rsrv_mem implmentation will commit all the pages of the desired size, which will consume + // TODO: Current sgx_alloc_rsrv_mem implementation will commit all the pages of the desired size, which will consume // a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function // which reserves memory but not commit pages should be provided then. let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size); diff --git a/src/libos/src/vm/vm_area.rs b/src/libos/src/vm/vm_area.rs index f3a92114..f3889ef5 100644 --- a/src/libos/src/vm/vm_area.rs +++ b/src/libos/src/vm/vm_area.rs @@ -1,19 +1,26 @@ -use std::ops::{Deref, DerefMut}; +use super::*; use super::vm_perms::VMPerms; use super::vm_range::VMRange; use super::vm_util::FileBacked; -use super::*; use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::{intrusive_adapter, KeyAdapter}; +use std::collections::HashSet; +use std::ops::{Deref, DerefMut}; #[derive(Clone, Debug, Default)] pub struct VMArea { range: VMRange, perms: VMPerms, file_backed: Option, - pid: pid_t, + access: VMAccess, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum VMAccess { + Private(pid_t), + Shared(HashSet), } impl VMArea { @@ -27,7 +34,7 @@ impl VMArea { range, perms, file_backed, - pid, + access: VMAccess::Private(pid), } } @@ -37,7 +44,7 @@ impl VMArea { vma: &VMArea, new_range: VMRange, new_perms: VMPerms, - pid: pid_t, + access: VMAccess, ) -> Self { let new_backed_file = vma.file_backed.as_ref().map(|file| { let mut new_file = file.clone(); @@ -57,7 +64,12 @@ impl VMArea { new_file }); - Self::new(new_range, new_perms, new_backed_file, pid) + Self { + range: new_range, + perms: new_perms, + file_backed: new_backed_file, + access, + } } pub fn perms(&self) -> VMPerms { @@ -68,8 +80,22 @@ impl VMArea { &self.range } - pub fn pid(&self) -> pid_t { - self.pid + pub fn access(&self) -> &VMAccess { + &self.access + } + + pub fn belong_to(&self, target_pid: pid_t) -> bool { + match &self.access { + VMAccess::Private(pid) => *pid == target_pid, + VMAccess::Shared(pid_set) => pid_set.contains(&target_pid), + } + } + + pub fn exclusive_by(&self, target_pid: pid_t) -> bool { + match &self.access { + VMAccess::Private(pid) => *pid == target_pid, + VMAccess::Shared(pid_set) => pid_set.len() == 1 && pid_set.contains(&target_pid), + } } pub fn init_file(&self) -> Option<(&FileRef, usize)> { @@ -96,7 +122,7 @@ impl VMArea { self.deref() .subtract(other) .into_iter() - .map(|range| Self::inherits_file_from(self, range, self.perms(), self.pid())) + .map(|range| Self::inherits_file_from(self, range, self.perms(), self.access().clone())) .collect() } @@ -109,7 +135,8 @@ impl VMArea { } new_range.unwrap() }; - let new_vma = VMArea::inherits_file_from(self, new_range, self.perms(), self.pid()); + let new_vma = + VMArea::inherits_file_from(self, new_range, self.perms(), self.access().clone()); Some(new_vma) } @@ -139,7 +166,7 @@ impl VMArea { } pub fn is_the_same_to(&self, other: &VMArea) -> bool { - if self.pid() != other.pid() { + if self.access() != other.access() { return false; } @@ -175,7 +202,7 @@ impl VMArea { return false; } // The two VMAs must be owned by the same process - if left.pid() != right.pid() { + if left.access() != right.access() { return false; } // The two VMAs must border with each other @@ -202,6 +229,78 @@ impl VMArea { } } } + + /// Flush a file-backed VMA to its file. This has no effect on anonymous VMA. + pub fn flush_backed_file(&self) { + self.flush_backed_file_with_cond(|_| true) + } + + /// Same as `flush_backed_file()`, except that an extra condition on the file needs to satisfy. + pub fn flush_backed_file_with_cond bool>(&self, cond_fn: F) { + let (file, file_offset) = match self.writeback_file() { + None => return, + Some((file_and_offset)) => file_and_offset, + }; + let file_writable = file + .access_mode() + .map(|ac| ac.writable()) + .unwrap_or_default(); + if !file_writable { + return; + } + if !cond_fn(file) { + return; + } + file.write_at(file_offset, unsafe { self.as_slice() }); + } + + pub fn is_shared(&self) -> bool { + match self.access { + VMAccess::Private(_) => false, + VMAccess::Shared(_) => true, + } + } + + pub fn mark_shared(&mut self) { + let access = match self.access { + VMAccess::Private(pid) => VMAccess::Shared(HashSet::from([pid])), + VMAccess::Shared(_) => { + return; + } + }; + self.access = access; + } + + pub fn shared_process_set(&self) -> Result<&HashSet> { + match &self.access { + VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")), + VMAccess::Shared(pid_set) => Ok(pid_set), + } + } + + pub fn attach_shared_process(&mut self, pid: pid_t) -> Result<()> { + match &mut self.access { + VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")), + VMAccess::Shared(pid_set) => { + pid_set.insert(pid); + Ok(()) + } + } + } + + pub fn detach_shared_process(&mut self, pid: pid_t) -> Result { + match &mut self.access { + VMAccess::Private(_) => Err(errno!(EINVAL, "not a shared vma")), + VMAccess::Shared(pid_set) => { + pid_set.remove(&pid); + Ok(pid_set.is_empty()) + } + } + } + + pub fn inherits_access_from(&mut self, vma: &VMArea) { + self.access = vma.access().clone() + } } impl Deref for VMArea { @@ -212,6 +311,12 @@ impl Deref for VMArea { } } +impl Default for VMAccess { + fn default() -> Self { + Self::Private(0) + } +} + #[derive(Clone)] pub struct VMAObj { link: Link, diff --git a/src/libos/src/vm/vm_chunk_manager.rs b/src/libos/src/vm/vm_chunk_manager.rs index ae404a39..5d0a9135 100644 --- a/src/libos/src/vm/vm_chunk_manager.rs +++ b/src/libos/src/vm/vm_chunk_manager.rs @@ -4,12 +4,12 @@ use super::free_space_manager::VMFreeSpaceManager as FreeRangeManager; use super::vm_area::*; use super::vm_perms::VMPerms; use super::vm_util::*; -use std::collections::BTreeSet; use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::Bound; use intrusive_collections::RBTreeLink; use intrusive_collections::{intrusive_adapter, KeyAdapter}; +use std::collections::BTreeSet; /// Memory chunk manager. /// @@ -77,13 +77,13 @@ impl ChunkManager { vmas_cursor.move_next(); // move to the first element of the tree while !vmas_cursor.is_null() { let vma = vmas_cursor.get().unwrap().vma(); - if vma.pid() != pid || vma.size() == 0 { + if !vma.belong_to(pid) || vma.size() == 0 { // Skip vmas which doesn't belong to this process vmas_cursor.move_next(); continue; } - Self::flush_file_vma(vma); + vma.flush_backed_file(); if !vma.perms().is_default() { VMPerms::apply_perms(vma, VMPerms::default()); @@ -156,7 +156,7 @@ impl ChunkManager { while !vmas_cursor.is_null() && vmas_cursor.get().unwrap().vma().start() <= range.end() { let vma = &vmas_cursor.get().unwrap().vma(); trace!("munmap related vma = {:?}", vma); - if vma.size() == 0 || current_pid != vma.pid() { + if vma.size() == 0 || !vma.belong_to(current_pid) { vmas_cursor.move_next(); continue; } @@ -169,7 +169,7 @@ impl ChunkManager { }; // File-backed VMA needs to be flushed upon munmap - Self::flush_file_vma(&intersection_vma); + intersection_vma.flush_backed_file(); if !&intersection_vma.perms().is_default() { VMPerms::apply_perms(&intersection_vma, VMPerms::default()); } @@ -254,7 +254,7 @@ impl ChunkManager { { let vma = &vmas_cursor.get().unwrap().vma(); // The old range must be contained in one single VMA - if vma.pid() == current_pid && vma.is_superset_of(&old_range) { + if vma.belong_to(current_pid) && vma.is_superset_of(&old_range) { break; } else { vmas_cursor.move_next(); @@ -284,7 +284,7 @@ impl ChunkManager { && containing_vmas.get().unwrap().vma().start() <= protect_range.end() { let mut containing_vma = containing_vmas.get().unwrap().vma().clone(); - if containing_vma.pid() != current_pid { + if !containing_vma.belong_to(current_pid) { containing_vmas.move_next(); continue; } @@ -332,7 +332,7 @@ impl ChunkManager { &containing_vma, protect_range, new_perms, - current_pid, + VMAccess::Private(current_pid), ); VMPerms::apply_perms(&new_vma, new_vma.perms()); let new_vma = VMAObj::new_vma_obj(new_vma); @@ -344,7 +344,7 @@ impl ChunkManager { &containing_vma, range, old_perms, - current_pid, + VMAccess::Private(current_pid), ); VMAObj::new_vma_obj(new_vma) }; @@ -369,7 +369,7 @@ impl ChunkManager { &containing_vma, intersection_vma.range().clone(), new_perms, - current_pid, + VMAccess::Private(current_pid), ); VMPerms::apply_perms(&new_vma, new_vma.perms()); @@ -399,7 +399,7 @@ impl ChunkManager { None => continue, Some(vma) => vma, }; - Self::flush_file_vma(&vma); + vma.flush_backed_file(); } Ok(()) } @@ -409,41 +409,17 @@ impl ChunkManager { pub fn msync_by_file(&mut self, sync_file: &FileRef) { for vma_obj in &self.vmas { let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) }; - Self::flush_file_vma_with_cond(&vma_obj.vma(), is_same_file); + vma_obj.vma().flush_backed_file_with_cond(is_same_file); } } - /// Flush a file-backed VMA to its file. This has no effect on anonymous VMA. - pub fn flush_file_vma(vma: &VMArea) { - Self::flush_file_vma_with_cond(vma, |_| true) - } - - /// Same as flush_vma, except that an extra condition on the file needs to satisfy. - pub fn flush_file_vma_with_cond bool>(vma: &VMArea, cond_fn: F) { - let (file, file_offset) = match vma.writeback_file() { - None => return, - Some((file_and_offset)) => file_and_offset, - }; - let file_writable = file - .access_mode() - .map(|ac| ac.writable()) - .unwrap_or_default(); - if !file_writable { - return; - } - if !cond_fn(file) { - return; - } - file.write_at(file_offset, unsafe { vma.as_slice() }); - } - pub fn find_mmap_region(&self, addr: usize) -> Result { let vma = self.vmas.upper_bound(Bound::Included(&addr)); if vma.is_null() { return_errno!(ESRCH, "no mmap regions that contains the address"); } let vma = vma.get().unwrap().vma(); - if vma.pid() != current!().process().pid() || !vma.contains(addr) { + if !vma.belong_to(current!().process().pid()) || !vma.contains(addr) { return_errno!(ESRCH, "no mmap regions that contains the address"); } @@ -451,13 +427,13 @@ impl ChunkManager { } pub fn usage_percentage(&self) -> f32 { - let totol_size = self.range.size(); + let total_size = self.range.size(); let mut used_size = 0; self.vmas .iter() .for_each(|vma_obj| used_size += vma_obj.vma().size()); - return used_size as f32 / totol_size as f32; + return used_size as f32 / total_size as f32; } fn merge_all_vmas(&mut self) { diff --git a/src/libos/src/vm/vm_manager.rs b/src/libos/src/vm/vm_manager.rs index aebabe71..3649720e 100644 --- a/src/libos/src/vm/vm_manager.rs +++ b/src/libos/src/vm/vm_manager.rs @@ -4,15 +4,15 @@ use super::chunk::{ Chunk, ChunkID, ChunkRef, ChunkType, CHUNK_DEFAULT_SIZE, DUMMY_CHUNK_PROCESS_ID, }; use super::free_space_manager::VMFreeSpaceManager; -use super::vm_area::VMArea; +use super::shm_manager::{MmapSharedResult, MunmapSharedResult, ShmManager}; +use super::vm_area::{VMAccess, VMArea}; use super::vm_chunk_manager::ChunkManager; use super::vm_perms::VMPerms; use super::vm_util::*; use crate::process::{ThreadRef, ThreadStatus}; -use std::ops::Bound::{Excluded, Included}; -use crate::util::sync::rw_lock; -use std::collections::{BTreeSet, HashSet}; +use std::collections::BTreeSet; +use std::ops::Bound::{Excluded, Included}; // Incorrect order of locks could cause deadlock easily. // Don't hold a low-order lock and then try to get a high-order lock. @@ -65,6 +65,24 @@ impl VMManager { } pub fn mmap(&self, options: &VMMapOptions) -> Result { + if options.is_shared() { + let res = self.internal().mmap_shared_chunk(options); + match res { + Ok(addr) => { + trace!( + "mmap_shared_chunk success: addr = 0x{:X}, pid = {}", + res.as_ref().unwrap(), + current!().process().pid() + ); + return Ok(addr); + } + Err(e) => { + warn!("mmap_shared_chunk failed: {:?}", e); + // Do not return when `mmap_shared_chunk()` fails. Try mmap as a regular chunk as below. + } + } + } + let addr = *options.addr(); let size = *options.size(); let align = *options.align(); @@ -198,7 +216,7 @@ impl VMManager { for chunk in overlapping_chunks.iter() { match chunk.internal() { ChunkType::SingleVMA(_) => { - internal_manager.munmap_chunk(chunk, Some(&munmap_range))? + internal_manager.munmap_chunk(chunk, Some(&munmap_range), false)? } ChunkType::MultiVMA(manager) => manager .lock() @@ -232,7 +250,11 @@ impl VMManager { .map(|chunk| chunk.clone()) }; if let Some(overlapping_chunk) = overlapping_chunk { - return internal_manager.munmap_chunk(&overlapping_chunk, Some(&munmap_range)); + return internal_manager.munmap_chunk( + &overlapping_chunk, + Some(&munmap_range), + false, + ); } else { warn!("no overlapping chunks anymore"); return Ok(()); @@ -354,7 +376,7 @@ impl VMManager { } ChunkType::SingleVMA(vma) => { let vma = vma.lock().unwrap(); - ChunkManager::flush_file_vma(&vma); + vma.flush_backed_file(); } } Ok(()) @@ -375,7 +397,9 @@ impl VMManager { .msync_by_file(sync_file); } ChunkType::SingleVMA(vma) => { - ChunkManager::flush_file_vma_with_cond(&vma.lock().unwrap(), is_same_file); + vma.lock() + .unwrap() + .flush_backed_file_with_cond(is_same_file); } }); } @@ -437,7 +461,7 @@ impl VMManager { let ret_addr = if let Some(mmap_options) = remap_result_option.mmap_options() { let mmap_addr = self.mmap(mmap_options); - // FIXME: For MRemapFlags::MayMove flag, we checked if the prefered range is free when parsing the options. + // FIXME: For MRemapFlags::MayMove flag, we checked if the preferred range is free when parsing the options. // But there is no lock after the checking, thus the mmap might fail. In this case, we should try mmap again. if mmap_addr.is_err() && remap_result_option.may_move() == true { return_errno!( @@ -457,7 +481,7 @@ impl VMManager { if let Some((munmap_addr, munmap_size)) = remap_result_option.munmap_args() { self.munmap(*munmap_addr, *munmap_size) - .expect("Shouln't fail"); + .expect("Shouldn't fail"); } return Ok(ret_addr); @@ -479,7 +503,7 @@ impl VMManager { let mut mem_chunks = thread.vm().mem_chunks().write().unwrap(); mem_chunks.iter().for_each(|chunk| { - internal_manager.munmap_chunk(&chunk, None); + internal_manager.munmap_chunk(&chunk, None, false); }); mem_chunks.clear(); @@ -487,13 +511,14 @@ impl VMManager { } } -// Modification on this structure must aquire the global lock. +// Modification on this structure must acquire the global lock. // TODO: Enable fast_default_chunks for faster chunk allocation #[derive(Debug)] pub struct InternalVMManager { chunks: BTreeSet, // track in-use chunks, use B-Tree for better performance and simplicity (compared with red-black tree) fast_default_chunks: Vec, // empty default chunks free_manager: VMFreeSpaceManager, + shm_manager: ShmManager, // track chunks which are shared by processes } impl InternalVMManager { @@ -501,10 +526,12 @@ impl InternalVMManager { let chunks = BTreeSet::new(); let fast_default_chunks = Vec::new(); let free_manager = VMFreeSpaceManager::new(vm_range); + let shm_manager = ShmManager::new(); Self { chunks, fast_default_chunks, free_manager, + shm_manager, } } @@ -522,26 +549,38 @@ impl InternalVMManager { // Allocate a chunk with single vma pub fn mmap_chunk(&mut self, options: &VMMapOptions) -> Result { + let new_chunk = self + .new_chunk_with_options(options) + .map_err(|e| errno!(e.errno(), "mmap_chunk failure"))?; + trace!("allocate a new single vma chunk: {:?}", new_chunk); + self.chunks.insert(new_chunk.clone()); + Ok(new_chunk) + } + + fn new_chunk_with_options(&mut self, options: &VMMapOptions) -> Result { let addr = *options.addr(); let size = *options.size(); let align = *options.align(); let free_range = self.find_free_gaps(size, align, addr)?; - let free_chunk = Chunk::new_single_vma_chunk(&free_range, options); - if let Err(e) = free_chunk { + let free_chunk = Chunk::new_single_vma_chunk(&free_range, options).map_err(|e| { // Error when creating chunks. Must return the free space before returning error self.free_manager .add_range_back_to_free_manager(&free_range); - return_errno!(e.errno(), "mmap_chunk failure"); - } - let chunk = Arc::new(free_chunk.unwrap()); - trace!("allocate a new single vma chunk: {:?}", chunk); - self.chunks.insert(chunk.clone()); - Ok(chunk) + e + })?; + Ok(Arc::new(free_chunk)) } // Munmap a chunk // For Single VMA chunk, a part of the chunk could be munmapped if munmap_range is specified. - pub fn munmap_chunk(&mut self, chunk: &ChunkRef, munmap_range: Option<&VMRange>) -> Result<()> { + // `force_unmap` indicates whether a unmap request came from a (re)map request with `MAP_FIXED`, + // the chunk would end differently when it is being shared. + pub fn munmap_chunk( + &mut self, + chunk: &ChunkRef, + munmap_range: Option<&VMRange>, + force_unmap: bool, + ) -> Result<()> { trace!( "munmap_chunk range = {:?}, munmap_range = {:?}", chunk.range(), @@ -569,6 +608,15 @@ impl InternalVMManager { } }; + if chunk.is_shared() { + trace!( + "munmap_shared_chunk, chunk_range: {:?}, munmap_range = {:?}", + chunk.range(), + munmap_range, + ); + return self.munmap_shared_chunk(chunk, munmap_range, force_unmap); + } + // Either the munmap range is a subset of the chunk range or the munmap range is // a superset of the chunk range. We can handle both cases. @@ -580,7 +628,7 @@ impl InternalVMManager { }; // File-backed VMA needs to be flushed upon munmap - ChunkManager::flush_file_vma(&intersection_vma); + intersection_vma.flush_backed_file(); // Reset memory permissions if !&intersection_vma.perms().is_default() { @@ -625,7 +673,9 @@ impl InternalVMManager { let new_vma = new_vmas.pop().unwrap(); let new_vma_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma)); self.chunks.insert(new_vma_chunk.clone()); - current.vm().add_mem_chunk(new_vma_chunk); + if current.status() != ThreadStatus::Exited { + current.vm().add_mem_chunk(new_vma_chunk); + } let updated_vma = new_vmas.pop().unwrap(); self.update_single_vma_chunk(¤t, &chunk, updated_vma); @@ -635,18 +685,119 @@ impl InternalVMManager { Ok(()) } + pub fn mmap_shared_chunk(&mut self, options: &VMMapOptions) -> Result { + match self.shm_manager.mmap_shared_chunk(options)? { + MmapSharedResult::Success(addr) => Ok(addr), + MmapSharedResult::NeedCreate => { + let new_chunk = self.mmap_chunk(options)?; + current!().vm().add_mem_chunk(new_chunk.clone()); + self.shm_manager + .create_shared_chunk(options, new_chunk.clone()) + .map_err(|e| { + let vma = new_chunk.get_vma_for_single_vma_chunk(); + // Reset memory permissions + if !vma.perms().is_default() { + VMPerms::apply_perms(&vma, VMPerms::default()); + } + // Reset memory contents + unsafe { + let buf = vma.as_slice_mut(); + buf.iter_mut().for_each(|b| *b = 0) + } + drop(vma); + + self.free_chunk(&new_chunk); + current!().vm().remove_mem_chunk(&new_chunk); + e + }) + } + MmapSharedResult::NeedExpand(old_shared_chunk, expand_range) => { + let new_chunk = { + let new_chunk = self.new_chunk_with_options(options)?; + self.merge_two_single_vma_chunks(&old_shared_chunk, &new_chunk) + }; + let new_range = *new_chunk.range(); + debug_assert_eq!(new_range, expand_range); + self.shm_manager + .replace_shared_chunk(old_shared_chunk, new_chunk); + Ok(new_range.start()) + } + MmapSharedResult::NeedReplace(_) => { + return_errno!(EINVAL, "mmap shared chunk failed"); + // TODO: Support replace shared chunk when necessary, + // e.g., Current shared chunk is exclusived and `remap()` by same process + } + } + } + + pub fn munmap_shared_chunk( + &mut self, + chunk: &ChunkRef, + munmap_range: &VMRange, + force_unmap: bool, + ) -> Result<()> { + if !chunk.is_shared() { + return_errno!(EINVAL, "not a shared chunk"); + } + if !chunk.range().overlap_with(munmap_range) { + return Ok(()); + } + + if self + .shm_manager + .munmap_shared_chunk(chunk, munmap_range, force_unmap)? + == MunmapSharedResult::Freeable + { + let vma = chunk.get_vma_for_single_vma_chunk(); + // Flush memory contents to backed file + vma.flush_backed_file(); + // Reset memory permissions + if !vma.perms().is_default() { + VMPerms::apply_perms(&vma, VMPerms::default()); + } + // Reset memory contents + unsafe { + let buf = vma.as_slice_mut(); + buf.iter_mut().for_each(|b| *b = 0) + } + drop(vma); + + self.free_chunk(chunk); + let current = current!(); + if current.status() != ThreadStatus::Exited { + current.vm().remove_mem_chunk(&chunk); + } + } + Ok(()) + } + fn update_single_vma_chunk( &mut self, current_thread: &ThreadRef, old_chunk: &ChunkRef, new_vma: VMArea, - ) { + ) -> ChunkRef { let new_chunk = Arc::new(Chunk::new_chunk_with_vma(new_vma)); current_thread .vm() .replace_mem_chunk(old_chunk, new_chunk.clone()); self.chunks.remove(old_chunk); - self.chunks.insert(new_chunk); + self.chunks.insert(new_chunk.clone()); + new_chunk + } + + fn merge_two_single_vma_chunks(&mut self, lhs: &ChunkRef, rhs: &ChunkRef) -> ChunkRef { + let mut new_vma = { + let lhs_vma = lhs.get_vma_for_single_vma_chunk(); + let rhs_vma = rhs.get_vma_for_single_vma_chunk(); + debug_assert_eq!(lhs_vma.end(), rhs_vma.start()); + + let mut new_vma = rhs_vma.clone(); + new_vma.set_start(lhs_vma.start()); + new_vma + }; + + self.update_single_vma_chunk(¤t!(), lhs, new_vma) } // protect_range should a sub-range of the chunk range @@ -657,6 +808,17 @@ impl InternalVMManager { new_perms: VMPerms, ) -> Result<()> { debug_assert!(chunk.range().is_superset_of(&protect_range)); + if chunk.is_shared() { + trace!( + "mprotect_shared_chunk, chunk_range: {:?}, mprotect_range = {:?}", + chunk.range(), + protect_range, + ); + // When protect range hits a shared chunk, new perms are + // applied in a all-or-nothing mannner of the whole vma + return self.shm_manager.mprotect_shared_chunk(chunk, new_perms); + } + let vma = match chunk.internal() { ChunkType::MultiVMA(_) => { unreachable!(); @@ -677,6 +839,7 @@ impl InternalVMManager { return Ok(()); } + let current_pid = current!().process().pid(); let same_start = protect_range.start() == containing_vma.start(); let same_end = protect_range.end() == containing_vma.end(); match (same_start, same_end) { @@ -701,7 +864,7 @@ impl InternalVMManager { &containing_vma, protect_range, new_perms, - DUMMY_CHUNK_PROCESS_ID, + VMAccess::Private(current_pid), ); VMPerms::apply_perms(&new_vma, new_vma.perms()); @@ -711,7 +874,7 @@ impl InternalVMManager { &containing_vma, range, old_perms, - DUMMY_CHUNK_PROCESS_ID, + VMAccess::Private(current_pid), ) }; @@ -732,7 +895,7 @@ impl InternalVMManager { &containing_vma, protect_range, new_perms, - DUMMY_CHUNK_PROCESS_ID, + VMAccess::Private(current_pid), ); VMPerms::apply_perms(&new_vma, new_vma.perms()); @@ -800,7 +963,7 @@ impl InternalVMManager { // // Previously, this method is implemented for VMManager and only acquire the internal lock when needed. However, this will make mmap non-atomic. // In multi-thread applications, for example, when a thread calls mmap with MAP_FIXED flag, and that desired memory is mapped already, the libos - // will first munmap the corresponding memory and then do a mmap with desired address. If the lock is not aquired during the whole process, + // will first munmap the corresponding memory and then do a mmap with desired address. If the lock is not acquired during the whole process, // the unmapped memory might be allocated by other thread who is waiting and acquiring the lock. // Thus, in current code, this method is implemented for InternalManager and holds the lock during the whole process. // Below method "force_mmap_across_multiple_chunks" is the same. @@ -853,7 +1016,7 @@ impl InternalVMManager { } // Munmap the corresponding single vma chunk - self.munmap_chunk(&chunk, Some(&target_range))?; + self.munmap_chunk(&chunk, Some(&target_range), true)?; } VMMapAddr::Any => unreachable!(), } diff --git a/src/libos/src/vm/vm_range.rs b/src/libos/src/vm/vm_range.rs index 17c88481..65f35645 100644 --- a/src/libos/src/vm/vm_range.rs +++ b/src/libos/src/vm/vm_range.rs @@ -89,7 +89,7 @@ impl VMRange { self.start() <= addr && addr < self.end() } - // Returns whether two ranges have non-empty interesection. + // Returns whether two ranges have non-empty intersection. pub fn overlap_with(&self, other: &VMRange) -> bool { let intersection_start = self.start().max(other.start()); let intersection_end = self.end().min(other.end()); diff --git a/src/libos/src/vm/vm_util.rs b/src/libos/src/vm/vm_util.rs index 84599c67..0a1e3892 100644 --- a/src/libos/src/vm/vm_util.rs +++ b/src/libos/src/vm/vm_util.rs @@ -3,12 +3,12 @@ use super::*; use super::vm_area::*; use super::vm_perms::VMPerms; use crate::fs::FileMode; -use std::collections::BTreeSet; use intrusive_collections::rbtree::{Link, RBTree}; use intrusive_collections::Bound; use intrusive_collections::RBTreeLink; use intrusive_collections::{intrusive_adapter, KeyAdapter}; +use rcore_fs::vfs::Metadata; #[derive(Clone, Debug)] pub enum VMInitializer { @@ -20,7 +20,7 @@ pub enum VMInitializer { FileBacked { file: FileBacked, }, - // For ELF files, there is specical handling to not copy all the contents of the file. This is only used for tracking. + // For ELF files, there is special handling to not copy all the contents of the file. This is only used for tracking. ElfSpecific { elf_file: FileRef, }, @@ -150,6 +150,10 @@ impl FileBacked { None } } + + pub fn metadata(&self) -> Metadata { + self.file.metadata().unwrap() + } } #[derive(Clone, Copy, Debug, PartialEq)] @@ -258,6 +262,13 @@ impl VMMapOptions { pub fn initializer(&self) -> &VMInitializer { &self.initializer } + + pub fn is_shared(&self) -> bool { + if let Some(backed_file) = self.initializer.backed_file() { + return backed_file.need_write_back(); + } + false + } } #[derive(Clone, Copy, PartialEq)] @@ -407,7 +418,7 @@ pub trait VMRemapParser { // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error. if writeback_file.is_some() && &old_range != vma.range() { - return_errno!(EINVAL, "Known limition") + return_errno!(EINVAL, "Known limitation") } // Implement mremap as one optional mmap followed by one optional munmap. @@ -449,13 +460,13 @@ pub trait VMRemapParser { (Some(mmap_opts), ret_addr) } (MRemapFlags::MayMove, VMRemapSizeType::Growing, None) => { - let prefered_new_range = + let preferred_new_range = VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; - if self.is_free_range(&prefered_new_range) { + if self.is_free_range(&preferred_new_range) { // Don't need to move the old range let mmap_ops = VMMapOptionsBuilder::default() - .size(prefered_new_range.size()) - .addr(VMMapAddr::Need(prefered_new_range.start())) + .size(preferred_new_range.size()) + .addr(VMMapAddr::Need(preferred_new_range.start())) .perms(perms) .initializer(VMInitializer::DoNothing()) .build()?; @@ -475,9 +486,9 @@ pub trait VMRemapParser { } } (MRemapFlags::MayMove, VMRemapSizeType::Growing, Some((backed_file, offset))) => { - let prefered_new_range = + let preferred_new_range = VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; - if self.is_free_range(&prefered_new_range) { + if self.is_free_range(&preferred_new_range) { // Don't need to move the old range let vm_initializer_for_new_range = VMInitializer::FileBacked { file: FileBacked::new( @@ -487,8 +498,8 @@ pub trait VMRemapParser { ), }; let mmap_ops = VMMapOptionsBuilder::default() - .size(prefered_new_range.size()) - .addr(VMMapAddr::Need(prefered_new_range.start())) + .size(preferred_new_range.size()) + .addr(VMMapAddr::Need(preferred_new_range.start())) .perms(perms) .initializer(vm_initializer_for_new_range) .build()?; diff --git a/test/Makefile b/test/Makefile index b0ec0f75..dced6048 100644 --- a/test/Makefile +++ b/test/Makefile @@ -22,7 +22,7 @@ TESTS ?= env empty hello_world malloc mmap file fs_perms getpid spawn sched pipe truncate readdir mkdir open stat link symlink chmod chown tls pthread system_info rlimit \ server server_epoll unix_socket cout hostfs cpuid rdtsc device sleep exit_group posix_flock \ ioctl fcntl eventfd emulate_syscall access signal sysinfo prctl rename procfs wait \ - spawn_attribute exec statfs random umask pgrp vfork mount flock utimes shm epoll brk + spawn_attribute exec statfs random umask pgrp vfork mount flock utimes shm epoll brk posix_shm # Benchmarks: need to be compiled and run by bench-% target BENCHES := spawn_and_exit_latency pipe_throughput unix_socket_throughput diff --git a/test/posix_shm/Makefile b/test/posix_shm/Makefile new file mode 100644 index 00000000..d719d7e3 --- /dev/null +++ b/test/posix_shm/Makefile @@ -0,0 +1,10 @@ +include ../test_common.mk + +EXTRA_C_FLAGS := -Wno-stringop-truncation -Wno-nonnull +EXTRA_LINK_FLAGS := -lrt +BIN_ARGS := + +CUSTOM_PRE_BUILD := 1 + +custom_pre_build: + @cp /lib/x86_64-linux-gnu/librt.so.1 $(BUILD_DIR)/test/image/opt/occlum/glibc/lib/ diff --git a/test/posix_shm/main.c b/test/posix_shm/main.c new file mode 100644 index 00000000..d0a45539 --- /dev/null +++ b/test/posix_shm/main.c @@ -0,0 +1,170 @@ +// C test program illustrating POSIX shared-memory API. +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "test.h" + +// ============================================================================ +// Helper macros +// ============================================================================ + +#define SHM_OBJ "shm_test" // Name of shared memory object +#define SHM_SIZE 0x1000 // Size (in bytes) of shared memory object + +/* Messages to communicate through shared memory */ +#define MSG0 "1st Hello" +#define MSG1 "2nd Hello" +#define MSG2 "3rd Hello" +#define MSG3 "4th Hello" +#define MSG_SIZE strlen(MSG0) + +// ============================================================================ +// Test cases +// ============================================================================ + +int producer_process() { + // Shared memory file descriptor + int shm_fd; + // Shared memory buffer + void *shm_buf; + + // Create the shared memory object + shm_fd = shm_open(SHM_OBJ, O_CREAT | O_RDWR, 0666); + if (shm_fd < 0) { + THROW_ERROR("shm_open failed"); + } + + // Configure the size of the shared memory object + if (ftruncate(shm_fd, SHM_SIZE) < 0) { + THROW_ERROR("ftruncate error"); + } + + // Memory map the shared memory object + shm_buf = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + if (shm_buf == MAP_FAILED) { + THROW_ERROR("mmap(MAP_SHARED) failed"); + } + + // Fork a child and launch consumer process + pid_t child_pid = vfork(); + if (child_pid < 0) { + // THROW_ERROR("Spawn a child process failed"); + perror("Spawn a child process failed"); + return -1; + } else if (child_pid == 0) { + execl("/bin/posix_shm", NULL, NULL); + THROW_ERROR("exec failed"); + } + + // Communicate through shared memory + strncpy(shm_buf, MSG0, MSG_SIZE); + printf("[Producer] send %s\n", MSG0); + while (1) { + if (strncmp(shm_buf, MSG1, MSG_SIZE) != 0) { + sleep(1); + continue; + } + printf("[Producer] receive %s\n", MSG1); + strncpy(shm_buf, MSG2, MSG_SIZE); + printf("[Producer] send %s\n", MSG2); + while (1) { + if (strncmp(shm_buf, MSG3, MSG_SIZE) != 0) { + sleep(1); + continue; + } + printf("[Producer] receive %s\n", MSG3); + break; + } + break; + } + + // Unmap the shared memory + if (munmap(shm_buf, SHM_SIZE) < 0) { + THROW_ERROR("munmap failed"); + } + + // Unlink the shared memory object + shm_unlink(SHM_OBJ); + return 0; +} + +int consumer_process() { + // Shared memory file descriptor + int shm_fd; + // Shared memory buffer + void *shm_buf; + + // Create the shared memory object + shm_fd = shm_open(SHM_OBJ, O_CREAT | O_RDWR, 0666); + if (shm_fd < 0) { + THROW_ERROR("shm_open failed"); + } + + // Configure the size of the shared memory object + if (ftruncate(shm_fd, SHM_SIZE) < 0) { + THROW_ERROR("ftruncate error"); + } + + // Memory map the shared memory object + shm_buf = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + if (shm_buf == MAP_FAILED) { + THROW_ERROR("mmap(MAP_SHARED) failed"); + } + + while (1) { + if (strncmp(shm_buf, MSG0, MSG_SIZE) != 0) { + sleep(1); + continue; + } + printf("[Consumer] receive %s\n", MSG0); + strncpy(shm_buf, MSG1, MSG_SIZE); + printf("[Consumer] send %s\n", MSG1); + while (1) { + if (strncmp(shm_buf, MSG2, MSG_SIZE) != 0) { + sleep(1); + continue; + } + printf("[Consumer] receive %s\n", MSG2); + strncpy(shm_buf, MSG3, MSG_SIZE); + printf("[Consumer] send %s\n", MSG3); + break; + } + break; + } + + // Unmap the shared memory + if (munmap(shm_buf, SHM_SIZE) < 0) { + THROW_ERROR("munmap failed"); + } + + // Unlink the shared memory object + shm_unlink(SHM_OBJ); + return 0; +} + +int test_posix_shm() { + return producer_process(); +} + +// ============================================================================ +// Test suite main +// ============================================================================ + +static test_case_t test_cases[] = { + TEST_CASE(test_posix_shm), +}; + +int main(int argc, const char *argv[]) { + if (argc == 1) { + // Producer process + return test_suite_run(test_cases, ARRAY_SIZE(test_cases)); + } else { + // Consumer process + return consumer_process(); + } +}