From bdb782560774d132b88b52e364e925df258bde06 Mon Sep 17 00:00:00 2001 From: "Hui, Chunyang" Date: Sat, 9 Oct 2021 08:48:10 +0000 Subject: [PATCH] Add support for mremap --- src/libos/src/lib.rs | 2 + src/libos/src/vm/chunk.rs | 37 ++++ src/libos/src/vm/free_space_manager.rs | 6 + src/libos/src/vm/process_vm.rs | 78 ++++++- src/libos/src/vm/vm_area.rs | 61 ++++++ src/libos/src/vm/vm_chunk_manager.rs | 281 ++++++------------------- src/libos/src/vm/vm_manager.rs | 94 ++++++++- src/libos/src/vm/vm_util.rs | 264 ++++++++++++++++++++++- test/mmap/main.c | 2 - 9 files changed, 601 insertions(+), 224 deletions(-) diff --git a/src/libos/src/lib.rs b/src/libos/src/lib.rs index e2799628..2c3f04c0 100644 --- a/src/libos/src/lib.rs +++ b/src/libos/src/lib.rs @@ -22,6 +22,7 @@ // for std::hint::black_box #![feature(test)] #![feature(atomic_from_mut)] +#![feature(btree_drain_filter)] #[macro_use] extern crate alloc; @@ -57,6 +58,7 @@ extern crate serde_json; extern crate memoffset; extern crate ctor; extern crate intrusive_collections; +extern crate itertools; extern crate resolv_conf; use sgx_trts::libc; diff --git a/src/libos/src/vm/chunk.rs b/src/libos/src/vm/chunk.rs index 603b6fc9..6b939a82 100644 --- a/src/libos/src/vm/chunk.rs +++ b/src/libos/src/vm/chunk.rs @@ -19,6 +19,9 @@ pub type ChunkID = usize; pub type ChunkRef = Arc; pub struct Chunk { + // This range is used for fast check without any locks. However, when mremap, the size of this range could be + // different with the internal VMA range for single VMA chunk. This can only be corrected by getting the internal + // VMA, creating a new chunk and replacing the old chunk. range: VMRange, internal: ChunkType, } @@ -67,6 +70,13 @@ impl Chunk { &self.internal } + pub fn get_vma_for_single_vma_chunk(&self) -> VMArea { + match self.internal() { + ChunkType::MultiVMA(internal_manager) => unreachable!(), + ChunkType::SingleVMA(vma) => return vma.lock().unwrap().clone(), + } + } + pub fn free_size(&self) -> usize { match self.internal() { ChunkType::SingleVMA(vma) => 0, // for single VMA chunk, there is no free space @@ -160,6 +170,33 @@ impl Chunk { } } + pub fn is_single_dummy_vma(&self) -> bool { + if let ChunkType::SingleVMA(vma) = &self.internal { + vma.lock().unwrap().size() == 0 + } else { + false + } + } + + // Chunk size and internal VMA size are conflict. + // This is due to the change of internal VMA. + pub fn is_single_vma_with_conflict_size(&self) -> bool { + if let ChunkType::SingleVMA(vma) = &self.internal { + vma.lock().unwrap().size() != self.range.size() + } else { + false + } + } + + pub fn is_single_vma_chunk_should_be_removed(&self) -> bool { + if let ChunkType::SingleVMA(vma) = &self.internal { + let vma_size = vma.lock().unwrap().size(); + vma_size == 0 || vma_size != self.range.size() + } else { + false + } + } + pub fn find_mmap_region(&self, addr: usize) -> Result { let internal = &self.internal; match self.internal() { diff --git a/src/libos/src/vm/free_space_manager.rs b/src/libos/src/vm/free_space_manager.rs index 2e19f32b..4a39fb86 100644 --- a/src/libos/src/vm/free_space_manager.rs +++ b/src/libos/src/vm/free_space_manager.rs @@ -146,4 +146,10 @@ impl VMFreeSpaceManager { trace!("after add range back free list = {:?}", free_list); return Ok(()); } + + pub fn is_free_range(&self, request_range: &VMRange) -> bool { + self.free_manager + .iter() + .any(|free_range| free_range.is_superset_of(request_range)) + } } diff --git a/src/libos/src/vm/process_vm.rs b/src/libos/src/vm/process_vm.rs index c8d4570e..5884f924 100644 --- a/src/libos/src/vm/process_vm.rs +++ b/src/libos/src/vm/process_vm.rs @@ -1,9 +1,10 @@ use super::*; -use super::chunk::{Chunk, ChunkRef}; +use super::chunk::*; use super::config; use super::process::elf_file::{ElfFile, ProgramHeaderExt}; use super::user_space_vm::USER_SPACE_VM_MANAGER; +use super::vm_area::VMArea; use super::vm_perms::VMPerms; use super::vm_util::{VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions}; use std::collections::HashSet; @@ -302,6 +303,80 @@ impl ProcessVM { self.add_mem_chunk(new_chunk) } + // Try merging all connecting single VMAs of the process. + // This is a very expensive operation. + pub fn merge_all_single_vma_chunks(&self) -> Result> { + // Get all single VMA chunks + let mut mem_chunks = self.mem_chunks.write().unwrap(); + let mut single_vma_chunks = mem_chunks + .drain_filter(|chunk| chunk.is_single_vma()) + .collect::>(); + single_vma_chunks.sort_unstable_by(|chunk_a, chunk_b| { + chunk_a + .range() + .start() + .partial_cmp(&chunk_b.range().start()) + .unwrap() + }); + + // Try merging connecting VMAs + for chunks in single_vma_chunks.windows(2) { + let chunk_a = &chunks[0]; + let chunk_b = &chunks[1]; + let mut vma_a = match chunk_a.internal() { + ChunkType::MultiVMA(_) => { + unreachable!(); + } + ChunkType::SingleVMA(vma) => vma.lock().unwrap(), + }; + + let mut vma_b = match chunk_b.internal() { + ChunkType::MultiVMA(_) => { + unreachable!(); + } + ChunkType::SingleVMA(vma) => vma.lock().unwrap(), + }; + + if VMArea::can_merge_vmas(&vma_a, &vma_b) { + let new_start = vma_a.start(); + vma_b.set_start(new_start); + // set vma_a to zero + vma_a.set_end(new_start); + } + } + + // Remove single dummy VMA chunk + single_vma_chunks + .drain_filter(|chunk| chunk.is_single_dummy_vma()) + .collect::>(); + + // Get all merged chunks whose vma and range are conflict + let merged_chunks = single_vma_chunks + .drain_filter(|chunk| chunk.is_single_vma_with_conflict_size()) + .collect::>(); + + // Get merged vmas + let mut new_vmas = Vec::new(); + merged_chunks.iter().for_each(|chunk| { + let vma = chunk.get_vma_for_single_vma_chunk(); + new_vmas.push(vma) + }); + + // Add all merged vmas back to mem_chunk list of the process + new_vmas.iter().for_each(|vma| { + let chunk = Arc::new(Chunk::new_chunk_with_vma(vma.clone())); + mem_chunks.insert(chunk); + }); + + // Add all unchanged single vma chunks back to mem_chunk list + while single_vma_chunks.len() > 0 { + let chunk = single_vma_chunks.pop().unwrap(); + mem_chunks.insert(chunk); + } + + Ok(new_vmas) + } + pub fn get_process_range(&self) -> &VMRange { USER_SPACE_VM_MANAGER.range() } @@ -487,6 +562,7 @@ impl MMapFlags { } } +// TODO: Support MREMAP_DONTUNMAP flag (since Linux 5.7) #[derive(Clone, Copy, Debug, PartialEq)] pub enum MRemapFlags { None, diff --git a/src/libos/src/vm/vm_area.rs b/src/libos/src/vm/vm_area.rs index 5a349eea..c4a4dc23 100644 --- a/src/libos/src/vm/vm_area.rs +++ b/src/libos/src/vm/vm_area.rs @@ -116,9 +116,70 @@ impl VMArea { } } + pub fn is_the_same_to(&self, other: &VMArea) -> bool { + if self.pid() != other.pid() { + return false; + } + + if self.range() != other.range() { + return false; + } + + if self.perms() != other.perms() { + return false; + } + + let self_writeback_file = self.writeback_file(); + let other_writeback_file = other.writeback_file(); + match (self_writeback_file, other_writeback_file) { + (None, None) => return true, + (Some(_), None) => return false, + (None, Some(_)) => return false, + (Some((self_file, self_offset)), Some((other_file, other_offset))) => { + Arc::ptr_eq(&self_file, &other_file) && self_offset == other_offset + } + } + } + pub fn set_end(&mut self, new_end: usize) { self.range.set_end(new_end); } + + pub fn can_merge_vmas(left: &VMArea, right: &VMArea) -> bool { + debug_assert!(left.end() <= right.start()); + + // Both of the two VMAs must not be sentry (whose size == 0) + if left.size() == 0 || right.size() == 0 { + return false; + } + // The two VMAs must be owned by the same process + if left.pid() != right.pid() { + return false; + } + // The two VMAs must border with each other + if left.end() != right.start() { + return false; + } + // The two VMAs must have the same memory permissions + if left.perms() != right.perms() { + return false; + } + + // If the two VMAs have write-back files, the files must be the same and + // the two file regions must be continuous. + let left_writeback_file = left.writeback_file(); + let right_writeback_file = right.writeback_file(); + match (left_writeback_file, right_writeback_file) { + (None, None) => true, + (Some(_), None) => false, + (None, Some(_)) => false, + (Some((left_file, left_offset)), Some((right_file, right_offset))) => { + Arc::ptr_eq(&left_file, &right_file) + && right_offset > left_offset + && right_offset - left_offset == left.size() + } + } + } } impl Deref for VMArea { diff --git a/src/libos/src/vm/vm_chunk_manager.rs b/src/libos/src/vm/vm_chunk_manager.rs index 51003153..3021dc4a 100644 --- a/src/libos/src/vm/vm_chunk_manager.rs +++ b/src/libos/src/vm/vm_chunk_manager.rs @@ -222,227 +222,41 @@ impl ChunkManager { self.munmap_range(munmap_range) } - pub fn mremap(&mut self, options: &VMRemapOptions) -> Result { + pub fn parse_mremap_options(&mut self, options: &VMRemapOptions) -> Result { let old_addr = options.old_addr(); let old_size = options.old_size(); let old_range = VMRange::new_with_size(old_addr, old_size)?; let new_size = options.new_size(); let flags = options.flags(); - let size_type = SizeType::new(&old_size, &new_size); + let size_type = VMRemapSizeType::new(&old_size, &new_size); + let current_pid = current!().process().pid(); - return_errno!(ENOSYS, "Under development"); + // Merge all connecting VMAs here because the old ranges must corresponds to one VMA + self.merge_all_vmas(); - // Old dead code. Could be used for future development. - #[cfg(dev)] - { - // The old range must be contained in one VMA - let idx = self - .find_containing_vma_idx(&old_range) - .ok_or_else(|| errno!(EFAULT, "invalid range"))?; - let containing_vma = &self.vmas[idx]; - // Get the memory permissions of the old range - let perms = containing_vma.perms(); - // Get the write back file of the old range if there is one. - let writeback_file = containing_vma.writeback_file(); - - // FIXME: Current implementation for file-backed memory mremap has limitation that if a SUBRANGE of the previous - // file-backed mmap with MAP_SHARED is then mremap-ed with MREMAP_MAYMOVE, there will be two vmas that have the same backed file. - // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before - // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error. - if writeback_file.is_some() && &old_range != containing_vma.range() { - return_errno!(EINVAL, "Known limition") - } - - // Implement mremap as one optional mmap followed by one optional munmap. - // - // The exact arguments for the mmap and munmap are determined by the values of MRemapFlags, - // SizeType and writeback_file. There is a total of 18 combinations among MRemapFlags and - // SizeType and writeback_file. As some combinations result in the same mmap and munmap operations, - // the following code only needs to match below patterns of (MRemapFlags, SizeType, writeback_file) - // and treat each case accordingly. - - // Determine whether need to do mmap. And when possible, determine the returned address - let (need_mmap, mut ret_addr) = match (flags, size_type, writeback_file) { - (MRemapFlags::None, SizeType::Growing, None) => { - let vm_initializer_for_new_range = VMInitializer::FillZeros(); - let mmap_opts = VMMapOptionsBuilder::default() - .size(new_size - old_size) - .addr(VMMapAddr::Need(old_range.end())) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .build()?; - let ret_addr = Some(old_addr); - (Some(mmap_opts), ret_addr) - } - (MRemapFlags::None, SizeType::Growing, Some((backed_file, offset))) => { - // Update writeback file offset - let new_writeback_file = - Some((backed_file.clone(), offset + containing_vma.size())); - let vm_initializer_for_new_range = VMInitializer::LoadFromFile { - file: backed_file.clone(), - offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file - }; - let mmap_opts = VMMapOptionsBuilder::default() - .size(new_size - old_size) - .addr(VMMapAddr::Need(old_range.end())) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .writeback_file(new_writeback_file) - .build()?; - let ret_addr = Some(old_addr); - (Some(mmap_opts), ret_addr) - } - (MRemapFlags::MayMove, SizeType::Growing, None) => { - let prefered_new_range = - VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; - if self.is_free_range(&prefered_new_range) { - // Don't need to move the old range - let vm_initializer_for_new_range = VMInitializer::FillZeros(); - let mmap_ops = VMMapOptionsBuilder::default() - .size(prefered_new_range.size()) - .addr(VMMapAddr::Need(prefered_new_range.start())) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .build()?; - (Some(mmap_ops), Some(old_addr)) - } else { - // Need to move old range to a new range and init the new range - let vm_initializer_for_new_range = - VMInitializer::CopyFrom { range: old_range }; - let mmap_ops = VMMapOptionsBuilder::default() - .size(new_size) - .addr(VMMapAddr::Any) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .build()?; - // Cannot determine the returned address for now, which can only be obtained after calling mmap - let ret_addr = None; - (Some(mmap_ops), ret_addr) - } - } - (MRemapFlags::MayMove, SizeType::Growing, Some((backed_file, offset))) => { - let prefered_new_range = - VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; - if self.is_free_range(&prefered_new_range) { - // Don't need to move the old range - let vm_initializer_for_new_range = VMInitializer::LoadFromFile { - file: backed_file.clone(), - offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file - }; - // Write back file should start from new offset - let new_writeback_file = - Some((backed_file.clone(), offset + containing_vma.size())); - let mmap_ops = VMMapOptionsBuilder::default() - .size(prefered_new_range.size()) - .addr(VMMapAddr::Need(prefered_new_range.start())) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .writeback_file(new_writeback_file) - .build()?; - (Some(mmap_ops), Some(old_addr)) - } else { - // Need to move old range to a new range and init the new range - let vm_initializer_for_new_range = { - let copy_end = containing_vma.end(); - let copy_range = VMRange::new(old_range.start(), copy_end)?; - let reread_file_start_offset = copy_end - containing_vma.start(); - VMInitializer::CopyOldAndReadNew { - old_range: copy_range, - file: backed_file.clone(), - offset: reread_file_start_offset, - } - }; - let new_writeback_file = Some((backed_file.clone(), *offset)); - let mmap_ops = VMMapOptionsBuilder::default() - .size(new_size) - .addr(VMMapAddr::Any) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .writeback_file(new_writeback_file) - .build()?; - // Cannot determine the returned address for now, which can only be obtained after calling mmap - let ret_addr = None; - (Some(mmap_ops), ret_addr) - } - } - (MRemapFlags::FixedAddr(new_addr), _, None) => { - let vm_initializer_for_new_range = - { VMInitializer::CopyFrom { range: old_range } }; - let mmap_opts = VMMapOptionsBuilder::default() - .size(new_size) - .addr(VMMapAddr::Force(new_addr)) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .build()?; - let ret_addr = Some(new_addr); - (Some(mmap_opts), ret_addr) - } - (MRemapFlags::FixedAddr(new_addr), _, Some((backed_file, offset))) => { - let vm_initializer_for_new_range = { - let copy_end = containing_vma.end(); - let copy_range = VMRange::new(old_range.start(), copy_end)?; - let reread_file_start_offset = copy_end - containing_vma.start(); - VMInitializer::CopyOldAndReadNew { - old_range: copy_range, - file: backed_file.clone(), - offset: reread_file_start_offset, - } - }; - let new_writeback_file = Some((backed_file.clone(), *offset)); - let mmap_opts = VMMapOptionsBuilder::default() - .size(new_size) - .addr(VMMapAddr::Force(new_addr)) - .perms(perms) - .initializer(vm_initializer_for_new_range) - .writeback_file(new_writeback_file) - .build()?; - let ret_addr = Some(new_addr); - (Some(mmap_opts), ret_addr) - } - _ => (None, Some(old_addr)), - }; - - let need_munmap = match (flags, size_type) { - (MRemapFlags::None, SizeType::Shrinking) - | (MRemapFlags::MayMove, SizeType::Shrinking) => { - let unmap_addr = old_addr + new_size; - let unmap_size = old_size - new_size; - Some((unmap_addr, unmap_size)) - } - (MRemapFlags::MayMove, SizeType::Growing) => { - if ret_addr.is_none() { - // We must need to do mmap. Thus unmap the old range - Some((old_addr, old_size)) - } else { - // We must choose to reuse the old range. Thus, no need to unmap - None - } - } - (MRemapFlags::FixedAddr(new_addr), _) => { - let new_range = VMRange::new_with_size(new_addr, new_size)?; - if new_range.overlap_with(&old_range) { - return_errno!(EINVAL, "new range cannot overlap with the old one"); - } - Some((old_addr, old_size)) - } - _ => None, - }; - - // Perform mmap and munmap if needed - if let Some(mmap_options) = need_mmap { - let mmap_addr = self.mmap(&mmap_options)?; - - if ret_addr.is_none() { - ret_addr = Some(mmap_addr); + let containing_vma = { + let bound = old_range.start(); + // Get the VMA whose start address is smaller but closest to the old range's start address + let mut vmas_cursor = self.vmas.upper_bound_mut(Bound::Included(&bound)); + while !vmas_cursor.is_null() + && vmas_cursor.get().unwrap().vma().start() <= old_range.end() + { + let vma = &vmas_cursor.get().unwrap().vma(); + // The old range must be contained in one single VMA + if vma.pid() == current_pid && vma.is_superset_of(&old_range) { + break; + } else { + vmas_cursor.move_next(); + continue; } } - if let Some((addr, size)) = need_munmap { - self.munmap(addr, size).expect("never fail"); + if vmas_cursor.is_null() { + return_errno!(EFAULT, "old range is not a valid vma range"); } + vmas_cursor.get().unwrap().vma().clone() + }; - debug_assert!(ret_addr.is_some()); - Ok(ret_addr.unwrap()) - } + return self.parse(options, &containing_vma); } pub fn mprotect(&mut self, addr: usize, size: usize, new_perms: VMPerms) -> Result<()> { @@ -635,13 +449,50 @@ impl ChunkManager { return used_size as f32 / totol_size as f32; } + fn merge_all_vmas(&mut self) { + let mut vmas_cursor = self.vmas.cursor_mut(); + vmas_cursor.move_next(); // move to the first element of the tree + while !vmas_cursor.is_null() { + let vma_a = vmas_cursor.get().unwrap().vma(); + if vma_a.size() == 0 { + vmas_cursor.move_next(); + continue; + } + + // Peek next, don't move the cursor + let vma_b = vmas_cursor.peek_next().get().unwrap().vma().clone(); + if VMArea::can_merge_vmas(vma_a, &vma_b) { + let merged_vmas = { + let mut new_vma = vma_a.clone(); + new_vma.set_end(vma_b.end()); + new_vma + }; + let new_obj = VMAObj::new_vma_obj(merged_vmas); + vmas_cursor.replace_with(new_obj); + // Move cursor to vma_b + vmas_cursor.move_next(); + let removed_vma = *vmas_cursor.remove().unwrap(); + debug_assert!(removed_vma.vma().is_the_same_to(&vma_b)); + + // Remove operations makes the cursor go to next element. Move it back + vmas_cursor.move_prev(); + } else { + // Can't merge these two vma, just move to next + vmas_cursor.move_next(); + continue; + } + } + } + // Returns whether the requested range is free fn is_free_range(&self, request_range: &VMRange) -> bool { - self.range.is_superset_of(request_range) - && self - .vmas - .iter() - .any(|vma_obj| vma_obj.vma().range().is_superset_of(request_range) == true) + self.free_manager.is_free_range(request_range) + } +} + +impl VMRemapParser for ChunkManager { + fn is_free_range(&self, request_range: &VMRange) -> bool { + self.is_free_range(request_range) } } diff --git a/src/libos/src/vm/vm_manager.rs b/src/libos/src/vm/vm_manager.rs index 3ede121d..f0ab11d9 100644 --- a/src/libos/src/vm/vm_manager.rs +++ b/src/libos/src/vm/vm_manager.rs @@ -412,7 +412,87 @@ impl VMManager { } pub fn mremap(&self, options: &VMRemapOptions) -> Result { - return_errno!(ENOSYS, "Under development"); + let old_addr = options.old_addr(); + let old_size = options.old_size(); + let old_range = VMRange::new_with_size(old_addr, old_size)?; + let new_size = options.new_size(); + let size_type = VMRemapSizeType::new(&old_size, &new_size); + let current = current!(); + + // Try merging all connecting chunks + { + let mut merged_vmas = current.vm().merge_all_single_vma_chunks()?; + let mut internal_manager = self.internal.lock().unwrap(); + while merged_vmas.len() != 0 { + let merged_vma = merged_vmas.pop().unwrap(); + internal_manager.add_new_chunk(¤t, merged_vma); + } + internal_manager.clean_single_vma_chunks(); + } + + // Deternmine the chunk of the old range + let chunk = { + let process_mem_chunks = current.vm().mem_chunks().read().unwrap(); + let chunk = process_mem_chunks + .iter() + .find(|&chunk| chunk.range().is_superset_of(&old_range)); + if chunk.is_none() { + return_errno!(ENOMEM, "invalid range"); + } + + chunk.unwrap().clone() + }; + + // Parse the mremap options to mmap options and munmap options + let remap_result_option = match chunk.internal() { + ChunkType::MultiVMA(manager) => manager + .lock() + .unwrap() + .chunk_manager() + .parse_mremap_options(options), + ChunkType::SingleVMA(vma) => { + self.parse_mremap_options_for_single_vma_chunk(options, vma) + } + }?; + trace!("mremap options after parsing = {:?}", remap_result_option); + + let ret_addr = if let Some(mmap_options) = remap_result_option.mmap_options() { + let mmap_addr = self.mmap(mmap_options); + + // FIXME: For MRemapFlags::MayMove flag, we checked if the prefered range is free when parsing the options. + // But there is no lock after the checking, thus the mmap might fail. In this case, we should try mmap again. + if mmap_addr.is_err() && remap_result_option.may_move() == true { + return_errno!( + EAGAIN, + "There might still be a space for this mremap request" + ); + } + + if remap_result_option.mmap_result_addr().is_none() { + mmap_addr.unwrap() + } else { + remap_result_option.mmap_result_addr().unwrap() + } + } else { + old_addr + }; + + if let Some((munmap_addr, munmap_size)) = remap_result_option.munmap_args() { + self.munmap(*munmap_addr, *munmap_size) + .expect("Shouln't fail"); + } + + return Ok(ret_addr); + } + + fn parse_mremap_options_for_single_vma_chunk( + &self, + options: &VMRemapOptions, + chunk_vma: &SgxMutex, + ) -> Result { + let mut vm_manager = self.internal.lock().unwrap(); + let chunk_vma = chunk_vma.lock().unwrap(); + vm_manager.parse(options, &chunk_vma) } // When process is exiting, free all owned chunks @@ -711,4 +791,16 @@ impl InternalVMManager { .free_manager .find_free_range_internal(size, align, addr); } + + pub fn clean_single_vma_chunks(&mut self) { + self.chunks + .drain_filter(|chunk| chunk.is_single_vma_chunk_should_be_removed()) + .collect::>>(); + } +} + +impl VMRemapParser for InternalVMManager { + fn is_free_range(&self, request_range: &VMRange) -> bool { + self.free_manager.is_free_range(request_range) + } } diff --git a/src/libos/src/vm/vm_util.rs b/src/libos/src/vm/vm_util.rs index 13caa87b..39827f4d 100644 --- a/src/libos/src/vm/vm_util.rs +++ b/src/libos/src/vm/vm_util.rs @@ -195,20 +195,20 @@ impl VMMapOptions { } #[derive(Clone, Copy, PartialEq)] -pub enum SizeType { +pub enum VMRemapSizeType { Same, Shrinking, Growing, } -impl SizeType { +impl VMRemapSizeType { pub fn new(old_size: &usize, new_size: &usize) -> Self { if new_size == old_size { - SizeType::Same + VMRemapSizeType::Same } else if new_size < old_size { - SizeType::Shrinking + VMRemapSizeType::Shrinking } else { - SizeType::Growing + VMRemapSizeType::Growing } } } @@ -274,3 +274,257 @@ impl VMRemapOptions { self.flags } } + +#[derive(Debug)] +pub struct VMRemapResult { + mmap_options: Option, + // For RemapFlags::MayMove and size is growing case: + // If mmap_result_addr is None, we need to do mmap and unmap the old range. + // If not None, then addr is specified, and thus it just mmap after the old range and should be no munmap. + mmap_result_addr: Option, + munmap_args: Option<(usize, usize)>, // (munmap_addr, munmap_size) + // There is no lock between parsing mremap options and do the mmap/munmap. If RemapFlags::MayMove is specified, + // when parsing the mremap options, there could be enough free space for desired address and space. But when doing + // the actual mmap, the free space could be used by other threads or processes. In this case, check this element. + // If true, mmap should be done again. + may_move: bool, +} + +impl VMRemapResult { + pub fn new( + mmap_options: Option, + mmap_result_addr: Option, + munmap_args: Option<(usize, usize)>, + may_move: bool, + ) -> Self { + Self { + mmap_options, + mmap_result_addr, + munmap_args, + may_move, + } + } + + pub fn mmap_options(&self) -> &Option { + &self.mmap_options + } + + pub fn mmap_result_addr(&self) -> &Option { + &self.mmap_result_addr + } + + pub fn munmap_args(&self) -> &Option<(usize, usize)> { + &self.munmap_args + } + + pub fn may_move(&self) -> bool { + self.may_move + } +} + +pub trait VMRemapParser { + fn parse(&self, options: &VMRemapOptions, vma: &VMArea) -> Result { + let old_addr = options.old_addr(); + let old_size = options.old_size(); + let old_range = VMRange::new_with_size(old_addr, old_size)?; + let new_size = options.new_size(); + let flags = options.flags(); + let size_type = VMRemapSizeType::new(&old_size, &new_size); + + // Get the memory permissions of the old range + let perms = vma.perms(); + // Get the write back file of the old range if there is one. + let writeback_file = vma.writeback_file(); + + // FIXME: Current implementation for file-backed memory mremap has limitation that if a SUBRANGE of the previous + // file-backed mmap with MAP_SHARED is then mremap-ed with MREMAP_MAYMOVE, there will be two vmas that have the same backed file. + // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before + // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error. + if writeback_file.is_some() && &old_range != vma.range() { + return_errno!(EINVAL, "Known limition") + } + + // Implement mremap as one optional mmap followed by one optional munmap. + // + // The exact arguments for the mmap and munmap are determined by the values of MRemapFlags, + // SizeType and writeback_file. There is a total of 18 combinations among MRemapFlags and + // SizeType and writeback_file. As some combinations result in the same mmap and munmap operations, + // the following code only needs to match below patterns of (MRemapFlags, SizeType, writeback_file) + // and treat each case accordingly. + + // Determine whether need to do mmap. And when possible, determine the returned address + let (need_mmap, mut ret_addr) = match (flags, size_type, writeback_file) { + (MRemapFlags::None, VMRemapSizeType::Growing, None) => { + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size - old_size) + .addr(VMMapAddr::Need(old_range.end())) + .perms(perms) + .initializer(VMInitializer::DoNothing()) + .build()?; + let ret_addr = Some(old_addr); + (Some(mmap_opts), ret_addr) + } + (MRemapFlags::None, VMRemapSizeType::Growing, Some((backed_file, offset))) => { + // Update writeback file offset + let new_writeback_file = Some((backed_file.clone(), offset + vma.size())); + let vm_initializer_for_new_range = VMInitializer::LoadFromFile { + file: backed_file.clone(), + offset: offset + vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file + }; + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size - old_size) + .addr(VMMapAddr::Need(old_range.end())) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + let ret_addr = Some(old_addr); + (Some(mmap_opts), ret_addr) + } + (MRemapFlags::MayMove, VMRemapSizeType::Growing, None) => { + let prefered_new_range = + VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; + if self.is_free_range(&prefered_new_range) { + // Don't need to move the old range + let mmap_ops = VMMapOptionsBuilder::default() + .size(prefered_new_range.size()) + .addr(VMMapAddr::Need(prefered_new_range.start())) + .perms(perms) + .initializer(VMInitializer::DoNothing()) + .build()?; + (Some(mmap_ops), Some(old_addr)) + } else { + // Need to move old range to a new range and init the new range + let vm_initializer_for_new_range = VMInitializer::CopyFrom { range: old_range }; + let mmap_ops = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Any) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .build()?; + // Cannot determine the returned address for now, which can only be obtained after calling mmap + let ret_addr = None; + (Some(mmap_ops), ret_addr) + } + } + (MRemapFlags::MayMove, VMRemapSizeType::Growing, Some((backed_file, offset))) => { + let prefered_new_range = + VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; + if self.is_free_range(&prefered_new_range) { + // Don't need to move the old range + let vm_initializer_for_new_range = VMInitializer::LoadFromFile { + file: backed_file.clone(), + offset: offset + vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file + }; + // Write back file should start from new offset + let new_writeback_file = Some((backed_file.clone(), offset + vma.size())); + let mmap_ops = VMMapOptionsBuilder::default() + .size(prefered_new_range.size()) + .addr(VMMapAddr::Need(prefered_new_range.start())) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + (Some(mmap_ops), Some(old_addr)) + } else { + // Need to move old range to a new range and init the new range + let vm_initializer_for_new_range = { + let copy_end = vma.end(); + let copy_range = VMRange::new(old_range.start(), copy_end)?; + let reread_file_start_offset = copy_end - vma.start(); + VMInitializer::CopyOldAndReadNew { + old_range: copy_range, + file: backed_file.clone(), + offset: reread_file_start_offset, + } + }; + let new_writeback_file = Some((backed_file.clone(), *offset)); + let mmap_ops = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Any) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + // Cannot determine the returned address for now, which can only be obtained after calling mmap + let ret_addr = None; + (Some(mmap_ops), ret_addr) + } + } + (MRemapFlags::FixedAddr(new_addr), _, None) => { + let vm_initializer_for_new_range = { VMInitializer::CopyFrom { range: old_range } }; + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Force(new_addr)) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .build()?; + let ret_addr = Some(new_addr); + (Some(mmap_opts), ret_addr) + } + (MRemapFlags::FixedAddr(new_addr), _, Some((backed_file, offset))) => { + let vm_initializer_for_new_range = { + let copy_end = vma.end(); + let copy_range = VMRange::new(old_range.start(), copy_end)?; + let reread_file_start_offset = copy_end - vma.start(); + VMInitializer::CopyOldAndReadNew { + old_range: copy_range, + file: backed_file.clone(), + offset: reread_file_start_offset, + } + }; + let new_writeback_file = Some((backed_file.clone(), *offset)); + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Force(new_addr)) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + let ret_addr = Some(new_addr); + (Some(mmap_opts), ret_addr) + } + _ => (None, Some(old_addr)), + }; + + let need_munmap = match (flags, size_type) { + (MRemapFlags::None, VMRemapSizeType::Shrinking) + | (MRemapFlags::MayMove, VMRemapSizeType::Shrinking) => { + let unmap_addr = old_addr + new_size; + let unmap_size = old_size - new_size; + Some((unmap_addr, unmap_size)) + } + (MRemapFlags::MayMove, VMRemapSizeType::Growing) => { + if ret_addr.is_none() { + // We must need to do mmap. Thus unmap the old range + Some((old_addr, old_size)) + } else { + // We must choose to reuse the old range. Thus, no need to unmap + None + } + } + (MRemapFlags::FixedAddr(new_addr), _) => { + let new_range = VMRange::new_with_size(new_addr, new_size)?; + if new_range.overlap_with(&old_range) { + return_errno!(EINVAL, "new range cannot overlap with the old one"); + } + Some((old_addr, old_size)) + } + _ => None, + }; + + let may_move = if let MRemapFlags::MayMove = flags { + true + } else { + false + }; + Ok(VMRemapResult::new( + need_mmap, + ret_addr, + need_munmap, + may_move, + )) + } + + fn is_free_range(&self, request_range: &VMRange) -> bool; +} diff --git a/test/mmap/main.c b/test/mmap/main.c index 7d5ff74f..44010d4d 100644 --- a/test/mmap/main.c +++ b/test/mmap/main.c @@ -1276,13 +1276,11 @@ static test_case_t test_cases[] = { TEST_CASE(test_munmap_with_null_addr), TEST_CASE(test_munmap_with_zero_len), TEST_CASE(test_munmap_with_non_page_aligned_len), -#ifdef MREMAP_SUPPORTED TEST_CASE(test_mremap), TEST_CASE(test_mremap_subrange), TEST_CASE(test_mremap_with_fixed_addr), TEST_CASE(test_file_backed_mremap), TEST_CASE(test_file_backed_mremap_mem_may_move), -#endif TEST_CASE(test_mprotect_once), TEST_CASE(test_mprotect_twice), TEST_CASE(test_mprotect_triple),