Improve userspace VM management
Occlum is a single-address-space library OS. Previously, userspace memory are divided for each process. And all the memory are allocated when the process is created, which leads to a lot of wasted space and complicated configuration. In the current implementation, the whole userspace is managed as a memory pool that consists of chunks. There are two kinds of chunks: (1) Single VMA chunk: a chunk with only one VMA. Should be owned by exactly one process. (2) Multi VMA chunk: a chunk with default chunk size and there could be a lot of VMAs in this chunk. Can be used by different processes. This design can help to achieve mainly two goals: (1) Simplify the configuration: Users don't need to configure the process.default_mmap_size anymore. And multiple processes running in the same Occlum instance can use dramatically different sizes of memory. (2) Gain better performance: Two-level management(chunks & VMAs) reduces the time for finding, inserting, deleting, and iterating.
This commit is contained in:
		
							parent
							
								
									9d63d396db
								
							
						
					
					
						commit
						6dd73c64b5
					
				| @ -103,7 +103,6 @@ enclave { | ||||
|          */ | ||||
|         public int occlum_ecall_kill(int pid, int sig); | ||||
| 
 | ||||
| 
 | ||||
|         /* | ||||
|          * Broadcast interrupts to LibOS threads. | ||||
|          * | ||||
|  | ||||
							
								
								
									
										42
									
								
								src/libos/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										42
									
								
								src/libos/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @ -8,11 +8,14 @@ dependencies = [ | ||||
|  "atomic", | ||||
|  "bitflags", | ||||
|  "bitvec", | ||||
|  "ctor", | ||||
|  "derive_builder", | ||||
|  "goblin", | ||||
|  "intrusive-collections", | ||||
|  "itertools", | ||||
|  "lazy_static", | ||||
|  "log", | ||||
|  "memoffset", | ||||
|  "memoffset 0.6.1", | ||||
|  "rcore-fs", | ||||
|  "rcore-fs-devfs", | ||||
|  "rcore-fs-mountfs", | ||||
| @ -110,6 +113,16 @@ dependencies = [ | ||||
|  "bitflags", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "ctor" | ||||
| version = "0.1.16" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484" | ||||
| dependencies = [ | ||||
|  "quote", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "darling" | ||||
| version = "0.10.2" | ||||
| @ -227,6 +240,24 @@ version = "1.0.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "intrusive-collections" | ||||
| version = "0.9.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "eb4ed164b4cf1c6bd6e18c097490331a0e58fbb0f39e8f6b5ac7f168006511cd" | ||||
| dependencies = [ | ||||
|  "memoffset 0.5.6", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "itertools" | ||||
| version = "0.10.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" | ||||
| dependencies = [ | ||||
|  "either", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "itoa" | ||||
| version = "0.4.5" | ||||
| @ -258,6 +289,15 @@ dependencies = [ | ||||
|  "cfg-if", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "memoffset" | ||||
| version = "0.5.6" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa" | ||||
| dependencies = [ | ||||
|  "autocfg 1.0.1", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "memoffset" | ||||
| version = "0.6.1" | ||||
|  | ||||
| @ -27,6 +27,8 @@ serde = { path = "../../deps/serde-sgx/serde", features = ["derive"] } | ||||
| serde_json = { path = "../../deps/serde-json-sgx" } | ||||
| memoffset = "0.6.1" | ||||
| scroll = { version = "0.10.2", default-features = false } | ||||
| itertools = { version = "0.10.0", default-features = false, features = ["use_alloc"]  } | ||||
| ctor = "0.1" | ||||
| 
 | ||||
| [patch.'https://github.com/apache/teaclave-sgx-sdk.git'] | ||||
| sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd" } | ||||
| @ -48,3 +50,4 @@ sgx_tse = { path = "../../deps/rust-sgx-sdk/sgx_tse" } | ||||
| sgx_tcrypto = { path = "../../deps/rust-sgx-sdk/sgx_tcrypto" } | ||||
| sgx_cov = { path = "../../deps/rust-sgx-sdk/sgx_cov", optional = true } | ||||
| goblin = { version = "0.3.4", default-features = false, features = ["elf64", "elf32", "endian_fd"] } | ||||
| intrusive-collections = "0.9" | ||||
|  | ||||
| @ -15,6 +15,7 @@ use crate::util::log::LevelFilter; | ||||
| use crate::util::mem_util::from_untrusted::*; | ||||
| use crate::util::resolv_conf_util::{parse_resolv_conf, write_resolv_conf}; | ||||
| use crate::util::sgx::allow_debug as sgx_allow_debug; | ||||
| use crate::vm::USER_SPACE_VM_MANAGER; | ||||
| use sgx_tse::*; | ||||
| 
 | ||||
| pub static mut INSTANCE_DIR: String = String::new(); | ||||
|  | ||||
| @ -14,7 +14,7 @@ impl MemInfoINode { | ||||
| impl ProcINode for MemInfoINode { | ||||
|     fn generate_data_in_bytes(&self) -> vfs::Result<Vec<u8>> { | ||||
|         let total_ram = USER_SPACE_VM_MANAGER.get_total_size(); | ||||
|         let free_ram = USER_SPACE_VM_MANAGER.get_free_size(); | ||||
|         let free_ram = current!().vm().get_free_size(); | ||||
|         Ok(format!( | ||||
|             "MemTotal:       {} kB\n\ | ||||
|              MemFree:        {} kB\n\ | ||||
|  | ||||
| @ -9,6 +9,7 @@ | ||||
| #![feature(alloc_layout_extra)] | ||||
| #![feature(concat_idents)] | ||||
| #![feature(trace_macros)] | ||||
| #![feature(drain_filter)] | ||||
| // for !Send in rw_lock
 | ||||
| #![feature(negative_impls)] | ||||
| // for may_dangle in rw_lock
 | ||||
| @ -54,6 +55,8 @@ extern crate serde; | ||||
| extern crate serde_json; | ||||
| #[macro_use] | ||||
| extern crate memoffset; | ||||
| extern crate ctor; | ||||
| extern crate intrusive_collections; | ||||
| extern crate resolv_conf; | ||||
| 
 | ||||
| use sgx_trts::libc; | ||||
|  | ||||
| @ -26,7 +26,7 @@ pub fn do_sysinfo() -> Result<sysinfo_t> { | ||||
|     let info = sysinfo_t { | ||||
|         uptime: time::up_time::get().unwrap().as_secs() as i64, // Duration can't be negative
 | ||||
|         totalram: USER_SPACE_VM_MANAGER.get_total_size() as u64, | ||||
|         freeram: USER_SPACE_VM_MANAGER.get_free_size() as u64, | ||||
|         freeram: current!().vm().get_free_size() as u64, | ||||
|         procs: table::get_all_processes().len() as u16, | ||||
|         mem_unit: 1, | ||||
|         ..Default::default() | ||||
|  | ||||
| @ -2,6 +2,7 @@ pub use sgx_trts::libc; | ||||
| pub use sgx_trts::libc::off_t; | ||||
| pub use sgx_types::*; | ||||
| 
 | ||||
| pub use core::intrinsics::unreachable; | ||||
| use std; | ||||
| pub use std::cell::{Cell, RefCell}; | ||||
| pub use std::cmp::{max, min}; | ||||
|  | ||||
| @ -9,6 +9,7 @@ use super::{table, ProcessRef, TermStatus, ThreadRef, ThreadStatus}; | ||||
| use crate::prelude::*; | ||||
| use crate::signal::{KernelSignal, SigNum}; | ||||
| use crate::syscall::CpuContext; | ||||
| use crate::vm::USER_SPACE_VM_MANAGER; | ||||
| 
 | ||||
| pub fn do_exit_group(status: i32, curr_user_ctxt: &mut CpuContext) -> Result<isize> { | ||||
|     if is_vforked_child_process() { | ||||
| @ -103,6 +104,8 @@ fn exit_process(thread: &ThreadRef, term_status: TermStatus) { | ||||
|     }; | ||||
|     // Lock the current process
 | ||||
|     let mut process_inner = process.inner(); | ||||
|     // Clean used VM
 | ||||
|     USER_SPACE_VM_MANAGER.free_chunks_when_exit(thread); | ||||
| 
 | ||||
|     // The parent is the idle process
 | ||||
|     if parent_inner.is_none() { | ||||
| @ -201,6 +204,9 @@ fn exit_process_for_execve( | ||||
| 
 | ||||
|     // Lock the current process
 | ||||
|     let mut process_inner = process.inner(); | ||||
|     // Clean used VM
 | ||||
|     USER_SPACE_VM_MANAGER.free_chunks_when_exit(thread); | ||||
| 
 | ||||
|     let mut new_parent_inner = new_parent_ref.inner(); | ||||
|     let pid = process.pid(); | ||||
| 
 | ||||
|  | ||||
| @ -710,7 +710,7 @@ fn do_syscall(user_context: &mut CpuContext) { | ||||
|             retval | ||||
|         } | ||||
|     }; | ||||
|     trace!("Retval = {:?}", retval); | ||||
|     trace!("Retval = 0x{:x}", retval); | ||||
| 
 | ||||
|     // Put the return value into user_context.rax, except for syscalls that may
 | ||||
|     // modify user_context directly. Currently, there are three such syscalls:
 | ||||
|  | ||||
							
								
								
									
										239
									
								
								src/libos/src/vm/chunk.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										239
									
								
								src/libos/src/vm/chunk.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,239 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::vm_area::VMArea; | ||||
| use super::vm_chunk_manager::ChunkManager; | ||||
| use super::vm_perms::VMPerms; | ||||
| use super::vm_util::*; | ||||
| use crate::process::ProcessRef; | ||||
| use crate::process::ThreadRef; | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::HashSet; | ||||
| use std::hash::{Hash, Hasher}; | ||||
| 
 | ||||
| // For single VMA chunk, the vma struct doesn't need to update the pid field. Because all the chunks are recorded by the process VM already.
 | ||||
| pub const DUMMY_CHUNK_PROCESS_ID: pid_t = 0; | ||||
| // Default chunk size: 32MB
 | ||||
| pub const CHUNK_DEFAULT_SIZE: usize = 32 * 1024 * 1024; | ||||
| 
 | ||||
| pub type ChunkID = usize; | ||||
| pub type ChunkRef = Arc<Chunk>; | ||||
| 
 | ||||
| pub struct Chunk { | ||||
|     range: VMRange, | ||||
|     internal: ChunkType, | ||||
| } | ||||
| 
 | ||||
| impl Hash for Chunk { | ||||
|     fn hash<H: Hasher>(&self, state: &mut H) { | ||||
|         self.range.hash(state); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Ord for Chunk { | ||||
|     fn cmp(&self, other: &Self) -> Ordering { | ||||
|         self.range.start().cmp(&other.range.start()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl PartialOrd for Chunk { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         Some(self.cmp(other)) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl PartialEq for Chunk { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         self.range == other.range | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Eq for Chunk {} | ||||
| 
 | ||||
| impl Debug for Chunk { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(vma) => write!(f, "Single VMA chunk: {:?}", vma), | ||||
|             ChunkType::MultiVMA(internal_manager) => write!(f, "default chunk: {:?}", self.range()), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Chunk { | ||||
|     pub fn range(&self) -> &VMRange { | ||||
|         &self.range | ||||
|     } | ||||
| 
 | ||||
|     pub fn internal(&self) -> &ChunkType { | ||||
|         &self.internal | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_size(&self) -> usize { | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(vma) => 0, // for single VMA chunk, there is no free space
 | ||||
|             ChunkType::MultiVMA(internal_manager) => internal_manager.lock().unwrap().free_size(), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn new_default_chunk(vm_range: VMRange) -> Result<Self> { | ||||
|         let internal_manager = ChunkInternal::new(vm_range)?; | ||||
|         Ok(Self { | ||||
|             range: vm_range, | ||||
|             internal: ChunkType::MultiVMA(SgxMutex::new(internal_manager)), | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn new_single_vma_chunk(vm_range: VMRange, options: &VMMapOptions) -> Self { | ||||
|         let writeback_file = options.writeback_file().clone(); | ||||
|         let vm_area = VMArea::new( | ||||
|             vm_range.clone(), | ||||
|             *options.perms(), | ||||
|             writeback_file, | ||||
|             DUMMY_CHUNK_PROCESS_ID, | ||||
|         ); | ||||
|         // Initialize the memory of the new range
 | ||||
|         unsafe { | ||||
|             let buf = vm_range.as_slice_mut(); | ||||
|             options.initializer().init_slice(buf); | ||||
|         } | ||||
|         // Set memory permissions
 | ||||
|         if !options.perms().is_default() { | ||||
|             VMPerms::apply_perms(&vm_area, vm_area.perms()); | ||||
|         } | ||||
|         Self::new_chunk_with_vma(vm_area) | ||||
|     } | ||||
| 
 | ||||
|     pub fn new_chunk_with_vma(vma: VMArea) -> Self { | ||||
|         Self { | ||||
|             range: vma.range().clone(), | ||||
|             internal: ChunkType::SingleVMA(SgxMutex::new(vma)), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn add_process(&self, current: &ThreadRef) { | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(vma) => unreachable!(), | ||||
|             ChunkType::MultiVMA(internal_manager) => { | ||||
|                 internal_manager | ||||
|                     .lock() | ||||
|                     .unwrap() | ||||
|                     .add_process(current.process().pid()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn mmap(&self, options: &VMMapOptions) -> Result<usize> { | ||||
|         debug_assert!(!self.is_single_vma()); | ||||
|         trace!("try allocate in chunk: {:?}", self); | ||||
|         let mut internal_manager = if let ChunkType::MultiVMA(internal_manager) = &self.internal { | ||||
|             internal_manager.lock().unwrap() | ||||
|         } else { | ||||
|             unreachable!(); | ||||
|         }; | ||||
|         if internal_manager.chunk_manager.free_size() < options.size() { | ||||
|             return_errno!(ENOMEM, "no enough size without trying. try other chunks"); | ||||
|         } | ||||
|         return internal_manager.chunk_manager.mmap(options); | ||||
|     } | ||||
| 
 | ||||
|     pub fn try_mmap(&self, options: &VMMapOptions) -> Result<usize> { | ||||
|         debug_assert!(!self.is_single_vma()); | ||||
|         // Try lock ChunkManager. If it fails, just return and will try other chunks.
 | ||||
|         let mut internal_manager = if let ChunkType::MultiVMA(internal_manager) = &self.internal { | ||||
|             internal_manager | ||||
|                 .try_lock() | ||||
|                 .map_err(|_| errno!(EAGAIN, "try other chunks"))? | ||||
|         } else { | ||||
|             unreachable!(); | ||||
|         }; | ||||
|         trace!("get lock, try mmap in chunk: {:?}", self); | ||||
|         if internal_manager.chunk_manager().free_size() < options.size() { | ||||
|             return_errno!(ENOMEM, "no enough size without trying. try other chunks"); | ||||
|         } | ||||
|         internal_manager.chunk_manager().mmap(options) | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_single_vma(&self) -> bool { | ||||
|         if let ChunkType::SingleVMA(_) = self.internal { | ||||
|             true | ||||
|         } else { | ||||
|             false | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn find_mmap_region(&self, addr: usize) -> Result<VMRange> { | ||||
|         let internal = &self.internal; | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(vma) => { | ||||
|                 let vma = vma.lock().unwrap(); | ||||
|                 if vma.contains(addr) { | ||||
|                     return Ok(vma.range().clone()); | ||||
|                 } else { | ||||
|                     return_errno!(ESRCH, "addr not found in this chunk") | ||||
|                 } | ||||
|             } | ||||
|             ChunkType::MultiVMA(internal_manager) => { | ||||
|                 return internal_manager | ||||
|                     .lock() | ||||
|                     .unwrap() | ||||
|                     .chunk_manager | ||||
|                     .find_mmap_region(addr); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub enum ChunkType { | ||||
|     SingleVMA(SgxMutex<VMArea>), | ||||
|     MultiVMA(SgxMutex<ChunkInternal>), | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct ChunkInternal { | ||||
|     chunk_manager: ChunkManager, | ||||
|     process_set: HashSet<pid_t>, | ||||
| } | ||||
| 
 | ||||
| const PROCESS_SET_INIT_SIZE: usize = 5; | ||||
| 
 | ||||
| impl ChunkInternal { | ||||
|     pub fn new(vm_range: VMRange) -> Result<Self> { | ||||
|         let chunk_manager = ChunkManager::from(vm_range.start(), vm_range.size())?; | ||||
| 
 | ||||
|         let mut process_set = HashSet::with_capacity(PROCESS_SET_INIT_SIZE); | ||||
|         process_set.insert(current!().process().pid()); | ||||
|         Ok(Self { | ||||
|             chunk_manager, | ||||
|             process_set, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn add_process(&mut self, pid: pid_t) { | ||||
|         self.process_set.insert(pid); | ||||
|     } | ||||
| 
 | ||||
|     pub fn chunk_manager(&mut self) -> &mut ChunkManager { | ||||
|         &mut self.chunk_manager | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_owned_by_current_process(&self) -> bool { | ||||
|         let current_pid = current!().process().pid(); | ||||
|         self.process_set.contains(¤t_pid) && self.process_set.len() == 1 | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_size(&self) -> usize { | ||||
|         *self.chunk_manager.free_size() | ||||
|     } | ||||
| 
 | ||||
|     // Clean vmas when munmap a MultiVMA chunk, return whether this chunk is cleaned
 | ||||
|     pub fn clean_multi_vmas(&mut self) -> bool { | ||||
|         let current_pid = current!().process().pid(); | ||||
|         self.chunk_manager.clean_vmas_with_pid(current_pid); | ||||
|         if self.chunk_manager.is_empty() { | ||||
|             self.process_set.remove(¤t_pid); | ||||
|             return true; | ||||
|         } else { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										149
									
								
								src/libos/src/vm/free_space_manager.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										149
									
								
								src/libos/src/vm/free_space_manager.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,149 @@ | ||||
| // Implements free space management for memory.
 | ||||
| // Currently only use simple vector as the base structure.
 | ||||
| //
 | ||||
| // Basically use address-ordered first fit to find free ranges.
 | ||||
| 
 | ||||
| use super::vm_util::VMMapAddr; | ||||
| use super::*; | ||||
| 
 | ||||
| static INITIAL_SIZE: usize = 100; | ||||
| 
 | ||||
| #[derive(Debug, Default)] | ||||
| pub struct VMFreeSpaceManager { | ||||
|     free_manager: Vec<VMRange>, // Address-ordered first fit
 | ||||
| } | ||||
| 
 | ||||
| impl VMFreeSpaceManager { | ||||
|     pub fn new(initial_free_range: VMRange) -> Self { | ||||
|         let mut free_manager = Vec::with_capacity(INITIAL_SIZE); | ||||
|         free_manager.push(initial_free_range); | ||||
| 
 | ||||
|         VMFreeSpaceManager { | ||||
|             free_manager: free_manager, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_size(&self) -> usize { | ||||
|         self.free_manager | ||||
|             .iter() | ||||
|             .fold(0, |acc, free_range| acc + free_range.size()) | ||||
|     } | ||||
| 
 | ||||
|     // TODO: respect options.align when mmap
 | ||||
|     pub fn find_free_range_internal( | ||||
|         &mut self, | ||||
|         size: usize, | ||||
|         align: usize, | ||||
|         addr: VMMapAddr, | ||||
|     ) -> Result<VMRange> { | ||||
|         // Record the minimal free range that satisfies the contraints
 | ||||
|         let mut result_free_range: Option<VMRange> = None; | ||||
|         let mut result_idx: Option<usize> = None; | ||||
|         let mut free_list = &mut self.free_manager; | ||||
| 
 | ||||
|         trace!("find free range, free list = {:?}", free_list); | ||||
| 
 | ||||
|         for (idx, free_range) in free_list.iter().enumerate() { | ||||
|             let mut free_range = { | ||||
|                 if free_range.size() < size { | ||||
|                     continue; | ||||
|                 } | ||||
|                 unsafe { VMRange::from_unchecked(free_range.start(), free_range.end()) } | ||||
|             }; | ||||
| 
 | ||||
|             match addr { | ||||
|                 // Want a minimal free_range
 | ||||
|                 VMMapAddr::Any => {} | ||||
|                 // Prefer to have free_range.start == addr
 | ||||
|                 VMMapAddr::Hint(addr) => { | ||||
|                     if addr % align == 0 | ||||
|                         && free_range.contains(addr) | ||||
|                         && free_range.end() - addr >= size | ||||
|                     { | ||||
|                         free_range.start = addr; | ||||
|                         free_range.end = addr + size; | ||||
|                         self.free_list_update_range(idx, free_range); | ||||
|                         return Ok(free_range); | ||||
|                     } else { | ||||
|                         // Hint failure, record the result but keep iterating.
 | ||||
|                         if result_free_range == None | ||||
|                             || result_free_range.as_ref().unwrap().size() > free_range.size() | ||||
|                         { | ||||
|                             result_free_range = Some(free_range); | ||||
|                             result_idx = Some(idx); | ||||
|                         } | ||||
|                         continue; | ||||
|                     } | ||||
|                 } | ||||
|                 // Must have free_range.start == addr
 | ||||
|                 VMMapAddr::Need(addr) | VMMapAddr::Force(addr) => { | ||||
|                     if free_range.start() > addr { | ||||
|                         return_errno!(ENOMEM, "not enough memory for fixed mmap"); | ||||
|                     } | ||||
|                     if !free_range.contains(addr) { | ||||
|                         continue; | ||||
|                     } | ||||
|                     if free_range.end() - addr < size { | ||||
|                         return_errno!(ENOMEM, "not enough memory for fixed mmap"); | ||||
|                     } | ||||
|                     free_range.start = addr; | ||||
|                     free_range.end = addr + size; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             result_free_range = Some(free_range); | ||||
|             result_idx = Some(idx); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         if result_free_range.is_none() { | ||||
|             return_errno!(ENOMEM, "not enough memory"); | ||||
|         } | ||||
| 
 | ||||
|         let index = result_idx.unwrap(); | ||||
|         let result_free_range = { | ||||
|             let free_range = result_free_range.unwrap(); | ||||
|             let start = align_up(free_range.start(), align); | ||||
|             let end = start + size; | ||||
|             VMRange { start, end } | ||||
|         }; | ||||
| 
 | ||||
|         self.free_list_update_range(index, result_free_range); | ||||
|         trace!("after find free range, free list = {:?}", self.free_manager); | ||||
|         return Ok(result_free_range); | ||||
|     } | ||||
| 
 | ||||
|     fn free_list_update_range(&mut self, index: usize, range: VMRange) { | ||||
|         let mut free_list = &mut self.free_manager; | ||||
|         let ranges_after_subtraction = free_list[index].subtract(&range); | ||||
|         debug_assert!(ranges_after_subtraction.len() <= 2); | ||||
|         if ranges_after_subtraction.len() == 0 { | ||||
|             free_list.remove(index); | ||||
|             return; | ||||
|         } | ||||
|         free_list[index] = ranges_after_subtraction[0]; | ||||
|         if ranges_after_subtraction.len() == 2 { | ||||
|             free_list.insert(index + 1, ranges_after_subtraction[1]); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn add_range_back_to_free_manager(&mut self, dirty_range: &VMRange) -> Result<()> { | ||||
|         let mut free_list = &mut self.free_manager; | ||||
|         free_list.push(*dirty_range); | ||||
|         // Sort and merge small ranges
 | ||||
|         free_list.sort_unstable_by(|range_a, range_b| range_a.start().cmp(&range_b.start())); | ||||
|         let mut idx = 0; | ||||
|         while (idx < free_list.len() - 1) { | ||||
|             let right_range = free_list[idx + 1]; | ||||
|             let mut left_range = &mut free_list[idx]; | ||||
|             if left_range.end() == right_range.start() { | ||||
|                 left_range.set_end(right_range.end()); | ||||
|                 free_list.remove(idx + 1); | ||||
|                 continue; | ||||
|             } | ||||
|             idx += 1; | ||||
|         } | ||||
|         trace!("after add range back free list = {:?}", free_list); | ||||
|         return Ok(()); | ||||
|     } | ||||
| } | ||||
| @ -1,18 +1,79 @@ | ||||
| /* | ||||
| Occlum is a single-address-space library OS. Previously, userspace memory are divided for each process. | ||||
| And all the memory are allocated when the process is created, which leads to a lot of wasted space and | ||||
| complicated configuration. | ||||
| 
 | ||||
| In the current implementation, the whole userspace is managed as a memory pool that consists of chunks. There | ||||
| are two kinds of chunks: | ||||
| (1) Single VMA chunk: a chunk with only one VMA. Should be owned by exactly one process. | ||||
| (2) Multi VMA chunk: a chunk with default chunk size and there could be a lot of VMAs in this chunk. Can be used | ||||
| by different processes. | ||||
| 
 | ||||
| This design can help to achieve mainly two goals: | ||||
| (1) Simplify the configuration: Users don't need to configure the process.default_mmap_size anymore. And multiple processes | ||||
| running in the same Occlum instance can use dramatically different sizes of memory. | ||||
| (2) Gain better performance: Two-level management(chunks & VMAs) reduces the time for finding, inserting, deleting, and iterating. | ||||
| 
 | ||||
| ***************** Chart for Occlum User Space Memory Management *************** | ||||
|  User Space VM Manager | ||||
| ┌──────────────────────────────────────────────────────────────┐ | ||||
| │                            VMManager                         │ | ||||
| │                                                              │ | ||||
| │  Chunks (in use): B-Tree Set                                 │ | ||||
| │  ┌────────────────────────────────────────────────────────┐  │ | ||||
| │  │                      Multi VMA Chunk                   │  │ | ||||
| │  │                     ┌───────────────────────────────┐  │  │ | ||||
| │  │  Single VMA Chunk   │          ChunkManager         │  │  │ | ||||
| │  │  ┌──────────────┐   │                               │  │  │ | ||||
| │  │  │              │   │  VMAs (in use): Red Black Tree│  │  │ | ||||
| │  │  │    VMArea    │   │  ┌─────────────────────────┐  │  │  │ | ||||
| │  │  │              │   │  │                         │  │  │  │ | ||||
| │  │  └──────────────┘   │  │  ┌──────┐ ┌────┐ ┌────┐ │  │  │  │ | ||||
| │  │                     │  │  │ VMA  │ │VMA │ │VMA │ │  │  │  │ | ||||
| │  │  Single VMA Chunk   │  │  └──────┘ └────┘ └────┘ │  │  │  │ | ||||
| │  │  ┌──────────────┐   │  │                         │  │  │  │ | ||||
| │  │  │              │   │  └─────────────────────────┘  │  │  │ | ||||
| │  │  │    VMArea    │   │                               │  │  │ | ||||
| │  │  │              │   │                               │  │  │ | ||||
| │  │  └──────────────┘   │   Free Manager (free)         │  │  │ | ||||
| │  │                     │   ┌────────────────────────┐  │  │  │ | ||||
| │  │  Single VMA Chunk   │   │                        │  │  │  │ | ||||
| │  │  ┌──────────────┐   │   │   VMFreeSpaceManager   │  │  │  │ | ||||
| │  │  │              │   │   │                        │  │  │  │ | ||||
| │  │  │    VMArea    │   │   └────────────────────────┘  │  │  │ | ||||
| │  │  │              │   │                               │  │  │ | ||||
| │  │  └──────────────┘   └───────────────────────────────┘  │  │ | ||||
| │  │                                                        │  │ | ||||
| │  └────────────────────────────────────────────────────────┘  │ | ||||
| │                                                              │ | ||||
| │  Free Manager (free)                                         │ | ||||
| │  ┌────────────────────────────────────────────────────────┐  │ | ||||
| │  │                                                        │  │ | ||||
| │  │                   VMFreeSpaceManager                   │  │ | ||||
| │  │                                                        │  │ | ||||
| │  └────────────────────────────────────────────────────────┘  │ | ||||
| │                                                              │ | ||||
| └──────────────────────────────────────────────────────────────┘ | ||||
| */ | ||||
| 
 | ||||
| use super::*; | ||||
| use fs::{File, FileDesc, FileRef}; | ||||
| use process::{Process, ProcessRef}; | ||||
| use std::fmt; | ||||
| 
 | ||||
| mod chunk; | ||||
| mod free_space_manager; | ||||
| mod process_vm; | ||||
| mod user_space_vm; | ||||
| mod vm_area; | ||||
| mod vm_chunk_manager; | ||||
| mod vm_layout; | ||||
| mod vm_manager; | ||||
| mod vm_perms; | ||||
| mod vm_range; | ||||
| mod vm_util; | ||||
| 
 | ||||
| use self::vm_layout::VMLayout; | ||||
| use self::vm_manager::{VMManager, VMMapOptionsBuilder}; | ||||
| 
 | ||||
| pub use self::process_vm::{MMapFlags, MRemapFlags, MSyncFlags, ProcessVM, ProcessVMBuilder}; | ||||
| pub use self::user_space_vm::USER_SPACE_VM_MANAGER; | ||||
|  | ||||
| @ -1,12 +1,12 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::chunk::{Chunk, ChunkRef}; | ||||
| use super::config; | ||||
| use super::process::elf_file::{ElfFile, ProgramHeaderExt}; | ||||
| use super::user_space_vm::{UserSpaceVMManager, UserSpaceVMRange, USER_SPACE_VM_MANAGER}; | ||||
| use super::vm_manager::{ | ||||
|     VMInitializer, VMManager, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions, | ||||
| }; | ||||
| use super::user_space_vm::USER_SPACE_VM_MANAGER; | ||||
| use super::vm_perms::VMPerms; | ||||
| use super::vm_util::{VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions}; | ||||
| use std::collections::HashSet; | ||||
| use std::sync::atomic::{AtomicUsize, Ordering}; | ||||
| 
 | ||||
| // Used for heap and stack start address randomization.
 | ||||
| @ -69,9 +69,6 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|         let stack_size = self | ||||
|             .stack_size | ||||
|             .unwrap_or(config::LIBOS_CONFIG.process.default_stack_size); | ||||
|         let mmap_size = self | ||||
|             .mmap_size | ||||
|             .unwrap_or(config::LIBOS_CONFIG.process.default_mmap_size); | ||||
| 
 | ||||
|         // Before allocating memory, let's first calcualte how much memory
 | ||||
|         // we need in total by iterating the memory layouts required by
 | ||||
| @ -92,11 +89,10 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|             }) | ||||
|             .collect(); | ||||
| 
 | ||||
|         // TODO: Make heap and stack 16-byte aligned instead of page aligned.
 | ||||
|         // Make heap and stack 16-byte aligned
 | ||||
|         let other_layouts = vec![ | ||||
|             VMLayout::new(heap_size, PAGE_SIZE)?, | ||||
|             VMLayout::new(stack_size, PAGE_SIZE)?, | ||||
|             VMLayout::new(mmap_size, PAGE_SIZE)?, | ||||
|             VMLayout::new(heap_size, 16)?, | ||||
|             VMLayout::new(stack_size, 16)?, | ||||
|         ]; | ||||
|         let process_layout = elf_layouts.iter().chain(other_layouts.iter()).fold( | ||||
|             VMLayout::new_empty(), | ||||
| @ -108,85 +104,61 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
| 
 | ||||
|         // Now that we end up with the memory layout required by the process,
 | ||||
|         // let's allocate the memory for the process
 | ||||
|         let process_range = { USER_SPACE_VM_MANAGER.alloc(process_layout)? }; | ||||
|         let process_base = process_range.range().start(); | ||||
|         // Use the vm_manager to manage the whole process VM (including mmap region)
 | ||||
|         let mut vm_manager = VMManager::from(process_base, process_range.range().size())?; | ||||
|         // Note: we do not need to fill zeros of the mmap region.
 | ||||
|         // VMManager will fill zeros (if necessary) on mmap.
 | ||||
| 
 | ||||
|         // Tracker to track the min_start for each part
 | ||||
|         let mut min_start = | ||||
|             process_base + Self::get_randomize_offset(process_range.range().size() >> 3); | ||||
|         let mut chunks = HashSet::new(); | ||||
|         // Init the memory for ELFs in the process
 | ||||
|         let mut elf_ranges = Vec::with_capacity(2); | ||||
|         elf_layouts | ||||
|             .iter() | ||||
|             .zip(self.elfs.iter()) | ||||
|             .map(|(elf_layout, elf_file)| { | ||||
|                 let desired_range = VMRange::new_with_layout(elf_layout, min_start); | ||||
|                 let vm_option = VMMapOptionsBuilder::default() | ||||
|                     .size(desired_range.size()) | ||||
|                     .addr(VMMapAddr::Need(desired_range.start())) | ||||
|                     .size(elf_layout.size()) | ||||
|                     .align(elf_layout.align()) | ||||
|                     .perms(VMPerms::ALL) // set it to read | write | exec for simplicity
 | ||||
|                     .initializer(VMInitializer::DoNothing()) | ||||
|                     .build()?; | ||||
|                 let elf_start = vm_manager.mmap(vm_option)?; | ||||
|                 debug_assert!(desired_range.start == elf_start); | ||||
|                 debug_assert!(elf_start % elf_layout.align() == 0); | ||||
|                 debug_assert!(process_range.range().is_superset_of(&desired_range)); | ||||
|                 Self::init_elf_memory(&desired_range, elf_file)?; | ||||
|                 min_start = desired_range.end(); | ||||
|                 elf_ranges.push(desired_range); | ||||
|                 trace!("elf range = {:?}", desired_range); | ||||
|                 let (elf_range, chunk_ref) = USER_SPACE_VM_MANAGER.alloc(&vm_option)?; | ||||
|                 debug_assert!(elf_range.start() % elf_layout.align() == 0); | ||||
|                 Self::init_elf_memory(&elf_range, elf_file)?; | ||||
|                 trace!("elf range = {:?}", elf_range); | ||||
|                 elf_ranges.push(elf_range); | ||||
|                 chunks.insert(chunk_ref); | ||||
|                 Ok(()) | ||||
|             }) | ||||
|             .collect::<Result<()>>()?; | ||||
| 
 | ||||
|         // Init the heap memory in the process
 | ||||
|         let heap_layout = &other_layouts[0]; | ||||
|         let heap_min_start = min_start + Self::get_randomize_offset(RANGE_FOR_RANDOMIZATION); | ||||
|         let heap_range = VMRange::new_with_layout(heap_layout, heap_min_start); | ||||
|         let vm_option = VMMapOptionsBuilder::default() | ||||
|             .size(heap_range.size()) | ||||
|             .addr(VMMapAddr::Need(heap_range.start())) | ||||
|             .size(heap_layout.size()) | ||||
|             .align(heap_layout.align()) | ||||
|             .perms(VMPerms::READ | VMPerms::WRITE) | ||||
|             .build()?; | ||||
|         let heap_start = vm_manager.mmap(vm_option)?; | ||||
|         debug_assert!(heap_range.start == heap_start); | ||||
|         let (heap_range, chunk_ref) = USER_SPACE_VM_MANAGER.alloc(&vm_option)?; | ||||
|         debug_assert!(heap_range.start() % heap_layout.align() == 0); | ||||
|         trace!("heap range = {:?}", heap_range); | ||||
|         let brk = AtomicUsize::new(heap_range.start()); | ||||
|         min_start = heap_range.end(); | ||||
|         chunks.insert(chunk_ref); | ||||
| 
 | ||||
|         // Init the stack memory in the process
 | ||||
|         let stack_layout = &other_layouts[1]; | ||||
|         let stack_min_start = min_start + Self::get_randomize_offset(RANGE_FOR_RANDOMIZATION); | ||||
|         let stack_range = VMRange::new_with_layout(stack_layout, stack_min_start); | ||||
|         let vm_option = VMMapOptionsBuilder::default() | ||||
|             .size(stack_range.size()) | ||||
|             .addr(VMMapAddr::Need(stack_range.start())) | ||||
|             .size(stack_layout.size()) | ||||
|             .align(heap_layout.align()) | ||||
|             .perms(VMPerms::READ | VMPerms::WRITE) | ||||
|             .build()?; | ||||
|         let stack_start = vm_manager.mmap(vm_option)?; | ||||
|         debug_assert!(stack_range.start == stack_start); | ||||
|         let (stack_range, chunk_ref) = USER_SPACE_VM_MANAGER.alloc(&vm_option)?; | ||||
|         debug_assert!(stack_range.start() % stack_layout.align() == 0); | ||||
|         chunks.insert(chunk_ref); | ||||
|         trace!("stack range = {:?}", stack_range); | ||||
|         min_start = stack_range.end(); | ||||
|         // Note: we do not need to fill zeros for stack
 | ||||
| 
 | ||||
|         debug_assert!(process_range.range().is_superset_of(&heap_range)); | ||||
|         debug_assert!(process_range.range().is_superset_of(&stack_range)); | ||||
| 
 | ||||
|         // Set mmap prefered start address
 | ||||
|         vm_manager.set_mmap_prefered_start_addr(min_start); | ||||
|         let vm_manager = SgxMutex::new(vm_manager); | ||||
| 
 | ||||
|         let mem_chunks = Arc::new(RwLock::new(chunks)); | ||||
|         Ok(ProcessVM { | ||||
|             process_range, | ||||
|             elf_ranges, | ||||
|             heap_range, | ||||
|             stack_range, | ||||
|             brk, | ||||
|             vm_manager, | ||||
|             mem_chunks, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
| @ -255,39 +227,83 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // MemChunks is the structure to track all the chunks which are used by this process.
 | ||||
| type MemChunks = Arc<RwLock<HashSet<ChunkRef>>>; | ||||
| 
 | ||||
| /// The per-process virtual memory
 | ||||
| #[derive(Debug)] | ||||
| pub struct ProcessVM { | ||||
|     vm_manager: SgxMutex<VMManager>, // manage the whole process VM
 | ||||
|     elf_ranges: Vec<VMRange>, | ||||
|     heap_range: VMRange, | ||||
|     stack_range: VMRange, | ||||
|     brk: AtomicUsize, | ||||
|     // Memory safety notes: the process_range field must be the last one.
 | ||||
|     // Memory safety notes: the mem_chunks field must be the last one.
 | ||||
|     //
 | ||||
|     // Rust drops fields in the same order as they are declared. So by making
 | ||||
|     // process_range the last field, we ensure that when all other fields are
 | ||||
|     // mem_chunks the last field, we ensure that when all other fields are
 | ||||
|     // dropped, their drop methods (if provided) can still access the memory
 | ||||
|     // region represented by the process_range field.
 | ||||
|     process_range: UserSpaceVMRange, | ||||
|     // region represented by the mem_chunks field.
 | ||||
|     mem_chunks: MemChunks, | ||||
| } | ||||
| 
 | ||||
| impl Default for ProcessVM { | ||||
|     fn default() -> ProcessVM { | ||||
|         ProcessVM { | ||||
|             process_range: USER_SPACE_VM_MANAGER.alloc_dummy(), | ||||
|             elf_ranges: Default::default(), | ||||
|             heap_range: Default::default(), | ||||
|             stack_range: Default::default(), | ||||
|             brk: Default::default(), | ||||
|             vm_manager: Default::default(), | ||||
|             mem_chunks: Arc::new(RwLock::new(HashSet::new())), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Drop for ProcessVM { | ||||
|     fn drop(&mut self) { | ||||
|         let mut mem_chunks = self.mem_chunks.write().unwrap(); | ||||
|         // There are two cases when this drop is called:
 | ||||
|         // (1) Process exits normally and in the end, drop process VM
 | ||||
|         // (2) During creating process stage, process VM is ready but there are some other errors when creating the process, e.g. spawn_attribute is set
 | ||||
|         // to a wrong value
 | ||||
|         //
 | ||||
|         // For the first case, the process VM is cleaned in the exit procedure and nothing is needed. For the second cases, mem_chunks is not empty and should
 | ||||
|         // be cleaned here.
 | ||||
| 
 | ||||
|         // In the first case, the current is reset to idle thread
 | ||||
|         // In the second case, the current thread belongs to parent process
 | ||||
|         let current = current!(); | ||||
|         if current.tid() != 0 { | ||||
|             mem_chunks | ||||
|                 .drain_filter(|chunk| chunk.is_single_vma()) | ||||
|                 .for_each(|chunk| USER_SPACE_VM_MANAGER.free_chunk(&chunk)) | ||||
|         } | ||||
|         assert!(mem_chunks.len() == 0); | ||||
|         info!("Process VM dropped"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl ProcessVM { | ||||
|     pub fn mem_chunks(&self) -> &MemChunks { | ||||
|         &self.mem_chunks | ||||
|     } | ||||
| 
 | ||||
|     pub fn add_mem_chunk(&self, chunk: ChunkRef) { | ||||
|         let mut mem_chunks = self.mem_chunks.write().unwrap(); | ||||
|         mem_chunks.insert(chunk); | ||||
|     } | ||||
| 
 | ||||
|     pub fn remove_mem_chunk(&self, chunk: &ChunkRef) { | ||||
|         let mut mem_chunks = self.mem_chunks.write().unwrap(); | ||||
|         mem_chunks.remove(chunk); | ||||
|     } | ||||
| 
 | ||||
|     pub fn replace_mem_chunk(&self, old_chunk: &ChunkRef, new_chunk: ChunkRef) { | ||||
|         self.remove_mem_chunk(old_chunk); | ||||
|         self.add_mem_chunk(new_chunk) | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_process_range(&self) -> &VMRange { | ||||
|         self.process_range.range() | ||||
|         USER_SPACE_VM_MANAGER.range() | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_elf_ranges(&self) -> &[VMRange] { | ||||
| @ -335,6 +351,18 @@ impl ProcessVM { | ||||
|         Ok(new_brk) | ||||
|     } | ||||
| 
 | ||||
|     // Get a NON-accurate free size for current process
 | ||||
|     pub fn get_free_size(&self) -> usize { | ||||
|         let chunk_free_size = { | ||||
|             let process_chunks = self.mem_chunks.read().unwrap(); | ||||
|             process_chunks | ||||
|                 .iter() | ||||
|                 .fold(0, |acc, chunks| acc + chunks.free_size()) | ||||
|         }; | ||||
|         let free_size = chunk_free_size + USER_SPACE_VM_MANAGER.free_size(); | ||||
|         free_size | ||||
|     } | ||||
| 
 | ||||
|     pub fn mmap( | ||||
|         &self, | ||||
|         addr: usize, | ||||
| @ -346,9 +374,6 @@ impl ProcessVM { | ||||
|     ) -> Result<usize> { | ||||
|         let addr_option = { | ||||
|             if flags.contains(MMapFlags::MAP_FIXED) { | ||||
|                 if !self.process_range.range().contains(addr) { | ||||
|                     return_errno!(EINVAL, "Beyond valid memory range"); | ||||
|                 } | ||||
|                 VMMapAddr::Force(addr) | ||||
|             } else { | ||||
|                 if addr == 0 { | ||||
| @ -360,7 +385,8 @@ impl ProcessVM { | ||||
|         }; | ||||
|         let initializer = { | ||||
|             if flags.contains(MMapFlags::MAP_ANONYMOUS) { | ||||
|                 VMInitializer::FillZeros() | ||||
|                 // There is no need to fill zeros in mmap. Cleaning is done after munmap.
 | ||||
|                 VMInitializer::DoNothing() | ||||
|             } else { | ||||
|                 let file_ref = current!().file(fd)?; | ||||
|                 VMInitializer::LoadFromFile { | ||||
| @ -386,7 +412,7 @@ impl ProcessVM { | ||||
|             .initializer(initializer) | ||||
|             .writeback_file(writeback_file) | ||||
|             .build()?; | ||||
|         let mmap_addr = self.vm_manager.lock().unwrap().mmap(mmap_options)?; | ||||
|         let mmap_addr = USER_SPACE_VM_MANAGER.mmap(&mmap_options)?; | ||||
|         Ok(mmap_addr) | ||||
|     } | ||||
| 
 | ||||
| @ -397,18 +423,12 @@ impl ProcessVM { | ||||
|         new_size: usize, | ||||
|         flags: MRemapFlags, | ||||
|     ) -> Result<usize> { | ||||
|         if let Some(new_addr) = flags.new_addr() { | ||||
|             if !self.process_range.range().contains(new_addr) { | ||||
|                 return_errno!(EINVAL, "new_addr is beyond valid memory range"); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let mremap_option = VMRemapOptions::new(old_addr, old_size, new_size, flags)?; | ||||
|         self.vm_manager.lock().unwrap().mremap(&mremap_option) | ||||
|         USER_SPACE_VM_MANAGER.mremap(&mremap_option) | ||||
|     } | ||||
| 
 | ||||
|     pub fn munmap(&self, addr: usize, size: usize) -> Result<()> { | ||||
|         self.vm_manager.lock().unwrap().munmap(addr, size) | ||||
|         USER_SPACE_VM_MANAGER.munmap(addr, size) | ||||
|     } | ||||
| 
 | ||||
|     pub fn mprotect(&self, addr: usize, size: usize, perms: VMPerms) -> Result<()> { | ||||
| @ -419,38 +439,21 @@ impl ProcessVM { | ||||
|             align_up(size, PAGE_SIZE) | ||||
|         }; | ||||
|         let protect_range = VMRange::new_with_size(addr, size)?; | ||||
|         if !self.process_range.range().is_superset_of(&protect_range) { | ||||
|             return_errno!(ENOMEM, "invalid range"); | ||||
|         } | ||||
|         let mut mmap_manager = self.vm_manager.lock().unwrap(); | ||||
| 
 | ||||
|         // TODO: support mprotect vm regions in addition to mmap
 | ||||
|         if !mmap_manager.range().is_superset_of(&protect_range) { | ||||
|             warn!("Do not support mprotect memory outside the mmap region yet"); | ||||
|             return Ok(()); | ||||
|         } | ||||
| 
 | ||||
|         mmap_manager.mprotect(addr, size, perms) | ||||
|         return USER_SPACE_VM_MANAGER.mprotect(addr, size, perms); | ||||
|     } | ||||
| 
 | ||||
|     pub fn msync(&self, addr: usize, size: usize) -> Result<()> { | ||||
|         let sync_range = VMRange::new_with_size(addr, size)?; | ||||
|         let mut mmap_manager = self.vm_manager.lock().unwrap(); | ||||
|         mmap_manager.msync_by_range(&sync_range) | ||||
|         return USER_SPACE_VM_MANAGER.msync(addr, size); | ||||
|     } | ||||
| 
 | ||||
|     pub fn msync_by_file(&self, sync_file: &FileRef) { | ||||
|         let mut mmap_manager = self.vm_manager.lock().unwrap(); | ||||
|         mmap_manager.msync_by_file(sync_file); | ||||
|         return USER_SPACE_VM_MANAGER.msync_by_file(sync_file); | ||||
|     } | ||||
| 
 | ||||
|     // Return: a copy of the found region
 | ||||
|     pub fn find_mmap_region(&self, addr: usize) -> Result<VMRange> { | ||||
|         self.vm_manager | ||||
|             .lock() | ||||
|             .unwrap() | ||||
|             .find_mmap_region(addr) | ||||
|             .map(|range_ref| *range_ref) | ||||
|         USER_SPACE_VM_MANAGER.find_mmap_region(addr) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -1,62 +1,69 @@ | ||||
| use super::*; | ||||
| use crate::ctor::dtor; | ||||
| use config::LIBOS_CONFIG; | ||||
| use std::ops::{Deref, DerefMut}; | ||||
| use vm_manager::VMManager; | ||||
| 
 | ||||
| /// The virtual memory manager for the entire user space
 | ||||
| pub struct UserSpaceVMManager { | ||||
|     total_size: usize, | ||||
|     free_size: SgxMutex<usize>, | ||||
| } | ||||
| pub struct UserSpaceVMManager(VMManager); | ||||
| 
 | ||||
| impl UserSpaceVMManager { | ||||
|     fn new() -> UserSpaceVMManager { | ||||
|     fn new() -> Result<UserSpaceVMManager> { | ||||
|         let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size; | ||||
|         UserSpaceVMManager { | ||||
|             total_size: rsrv_mem_size, | ||||
|             free_size: SgxMutex::new(rsrv_mem_size), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn alloc(&self, vm_layout: VMLayout) -> Result<UserSpaceVMRange> { | ||||
|         let size = align_up(vm_layout.size(), vm_layout.align()); | ||||
|         let vm_range = unsafe { | ||||
|             let ptr = sgx_alloc_rsrv_mem(size); | ||||
|             // TODO: Current sgx_alloc_rsrv_mem implmentation will commit all the pages of the desired size, which will consume
 | ||||
|             // a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function
 | ||||
|             // which reserves memory but not commit pages should be provided then.
 | ||||
|             let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size); | ||||
|             let perm = MemPerm::READ | MemPerm::WRITE; | ||||
|             if ptr.is_null() { | ||||
|                 return_errno!(ENOMEM, "run out of reserved memory"); | ||||
|             } | ||||
|             // Change the page permission to RW (default)
 | ||||
|             assert!(sgx_tprotect_rsrv_mem(ptr, size, perm.bits()) == sgx_status_t::SGX_SUCCESS); | ||||
|             assert!( | ||||
|                 sgx_tprotect_rsrv_mem(ptr, rsrv_mem_size, perm.bits()) == sgx_status_t::SGX_SUCCESS | ||||
|             ); | ||||
| 
 | ||||
|             let addr = ptr as usize; | ||||
|             debug!("allocated rsrv addr is 0x{:x}, len is 0x{:x}", addr, size); | ||||
|             VMRange::from_unchecked(addr, addr + size) | ||||
|             debug!( | ||||
|                 "allocated rsrv addr is 0x{:x}, len is 0x{:x}", | ||||
|                 addr, rsrv_mem_size | ||||
|             ); | ||||
|             VMRange::from_unchecked(addr, addr + rsrv_mem_size) | ||||
|         }; | ||||
| 
 | ||||
|         *self.free_size.lock().unwrap() -= size; | ||||
|         Ok(UserSpaceVMRange::new(vm_range)) | ||||
|     } | ||||
|         let vm_manager = VMManager::init(vm_range)?; | ||||
| 
 | ||||
|     fn add_free_size(&self, user_space_vmrange: &UserSpaceVMRange) { | ||||
|         *self.free_size.lock().unwrap() += user_space_vmrange.range().size(); | ||||
|     } | ||||
| 
 | ||||
|     // The empty range is not added to sub_range
 | ||||
|     pub fn alloc_dummy(&self) -> UserSpaceVMRange { | ||||
|         let empty_user_vm_range = unsafe { VMRange::from_unchecked(0, 0) }; | ||||
|         UserSpaceVMRange::new(empty_user_vm_range) | ||||
|         Ok(UserSpaceVMManager(vm_manager)) | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_total_size(&self) -> usize { | ||||
|         self.total_size | ||||
|         self.range().size() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|     pub fn get_free_size(&self) -> usize { | ||||
|         *self.free_size.lock().unwrap() | ||||
| // This provides module teardown function attribute similar with `__attribute__((destructor))` in C/C++ and will
 | ||||
| // be called after the main function. Static variables are still safe to visit at this time.
 | ||||
| #[dtor] | ||||
| fn free_user_space() { | ||||
|     let range = USER_SPACE_VM_MANAGER.range(); | ||||
|     assert!(USER_SPACE_VM_MANAGER.verified_clean_when_exit()); | ||||
|     let addr = range.start() as *const c_void; | ||||
|     let size = range.size(); | ||||
|     info!("free user space VM: {:?}", range); | ||||
|     assert!(unsafe { sgx_free_rsrv_mem(addr, size) == 0 }); | ||||
| } | ||||
| 
 | ||||
| impl Deref for UserSpaceVMManager { | ||||
|     type Target = VMManager; | ||||
| 
 | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| lazy_static! { | ||||
|     pub static ref USER_SPACE_VM_MANAGER: UserSpaceVMManager = UserSpaceVMManager::new(); | ||||
|     pub static ref USER_SPACE_VM_MANAGER: UserSpaceVMManager = UserSpaceVMManager::new().unwrap(); | ||||
| } | ||||
| 
 | ||||
| bitflags! { | ||||
| @ -96,32 +103,3 @@ extern "C" { | ||||
|     //
 | ||||
|     fn sgx_tprotect_rsrv_mem(addr: *const c_void, length: usize, prot: i32) -> sgx_status_t; | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct UserSpaceVMRange { | ||||
|     vm_range: VMRange, | ||||
| } | ||||
| 
 | ||||
| impl UserSpaceVMRange { | ||||
|     fn new(vm_range: VMRange) -> UserSpaceVMRange { | ||||
|         UserSpaceVMRange { vm_range } | ||||
|     } | ||||
| 
 | ||||
|     pub fn range(&self) -> &VMRange { | ||||
|         &self.vm_range | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Drop for UserSpaceVMRange { | ||||
|     fn drop(&mut self) { | ||||
|         let addr = self.vm_range.start() as *const c_void; | ||||
|         let size = self.vm_range.size(); | ||||
|         if size == 0 { | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         USER_SPACE_VM_MANAGER.add_free_size(self); | ||||
|         info!("user space vm free: {:?}", self.vm_range); | ||||
|         assert!(unsafe { sgx_free_rsrv_mem(addr, size) == 0 }); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -4,25 +4,40 @@ use super::vm_perms::VMPerms; | ||||
| use super::vm_range::VMRange; | ||||
| use super::*; | ||||
| 
 | ||||
| use intrusive_collections::rbtree::{Link, RBTree}; | ||||
| use intrusive_collections::{intrusive_adapter, KeyAdapter}; | ||||
| 
 | ||||
| #[derive(Clone, Debug, Default)] | ||||
| pub struct VMArea { | ||||
|     range: VMRange, | ||||
|     perms: VMPerms, | ||||
|     writeback_file: Option<(FileRef, usize)>, | ||||
|     pid: pid_t, | ||||
| } | ||||
| 
 | ||||
| impl VMArea { | ||||
|     pub fn new(range: VMRange, perms: VMPerms, writeback_file: Option<(FileRef, usize)>) -> Self { | ||||
|     pub fn new( | ||||
|         range: VMRange, | ||||
|         perms: VMPerms, | ||||
|         writeback_file: Option<(FileRef, usize)>, | ||||
|         pid: pid_t, | ||||
|     ) -> Self { | ||||
|         Self { | ||||
|             range, | ||||
|             perms, | ||||
|             writeback_file, | ||||
|             pid, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Create a new VMArea object that inherits the write-back file (if any), but has
 | ||||
|     /// a new range and permissions.
 | ||||
|     pub fn inherits_file_from(vma: &VMArea, new_range: VMRange, new_perms: VMPerms) -> Self { | ||||
|     pub fn inherits_file_from( | ||||
|         vma: &VMArea, | ||||
|         new_range: VMRange, | ||||
|         new_perms: VMPerms, | ||||
|         pid: pid_t, | ||||
|     ) -> Self { | ||||
|         let new_writeback_file = vma.writeback_file.as_ref().map(|(file, file_offset)| { | ||||
|             let new_file = file.clone(); | ||||
| 
 | ||||
| @ -36,7 +51,7 @@ impl VMArea { | ||||
|             }; | ||||
|             (new_file, new_file_offset) | ||||
|         }); | ||||
|         Self::new(new_range, new_perms, new_writeback_file) | ||||
|         Self::new(new_range, new_perms, new_writeback_file, pid) | ||||
|     } | ||||
| 
 | ||||
|     pub fn perms(&self) -> VMPerms { | ||||
| @ -47,6 +62,10 @@ impl VMArea { | ||||
|         &self.range | ||||
|     } | ||||
| 
 | ||||
|     pub fn pid(&self) -> pid_t { | ||||
|         self.pid | ||||
|     } | ||||
| 
 | ||||
|     pub fn writeback_file(&self) -> &Option<(FileRef, usize)> { | ||||
|         &self.writeback_file | ||||
|     } | ||||
| @ -59,7 +78,7 @@ impl VMArea { | ||||
|         self.deref() | ||||
|             .subtract(other) | ||||
|             .into_iter() | ||||
|             .map(|range| Self::inherits_file_from(self, range, self.perms())) | ||||
|             .map(|range| Self::inherits_file_from(self, range, self.perms(), self.pid())) | ||||
|             .collect() | ||||
|     } | ||||
| 
 | ||||
| @ -72,7 +91,7 @@ impl VMArea { | ||||
|             } | ||||
|             new_range.unwrap() | ||||
|         }; | ||||
|         let new_vma = VMArea::inherits_file_from(self, new_range, self.perms()); | ||||
|         let new_vma = VMArea::inherits_file_from(self, new_range, self.perms(), self.pid()); | ||||
|         Some(new_vma) | ||||
|     } | ||||
| 
 | ||||
| @ -109,3 +128,56 @@ impl Deref for VMArea { | ||||
|         &self.range | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone)] | ||||
| pub struct VMAObj { | ||||
|     link: Link, | ||||
|     vma: VMArea, | ||||
| } | ||||
| 
 | ||||
| impl fmt::Debug for VMAObj { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         write!(f, "{:?}", self.vma) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // key adapter for RBTree which is sorted by the start of vma ranges
 | ||||
| intrusive_adapter!(pub VMAAdapter = Box<VMAObj>: VMAObj { link : Link }); | ||||
| impl<'a> KeyAdapter<'a> for VMAAdapter { | ||||
|     type Key = usize; | ||||
|     fn get_key(&self, vma_obj: &'a VMAObj) -> usize { | ||||
|         vma_obj.vma.range().start() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl VMAObj { | ||||
|     pub fn new_vma_obj(vma: VMArea) -> Box<Self> { | ||||
|         Box::new(Self { | ||||
|             link: Link::new(), | ||||
|             vma, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn vma(&self) -> &VMArea { | ||||
|         &self.vma | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl VMArea { | ||||
|     pub fn new_obj( | ||||
|         range: VMRange, | ||||
|         perms: VMPerms, | ||||
|         writeback_file: Option<(FileRef, usize)>, | ||||
|         pid: pid_t, | ||||
|     ) -> Box<VMAObj> { | ||||
|         Box::new(VMAObj { | ||||
|             link: Link::new(), | ||||
|             vma: VMArea { | ||||
|                 range, | ||||
|                 perms, | ||||
|                 writeback_file, | ||||
|                 pid, | ||||
|             }, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
							
								
								
									
										654
									
								
								src/libos/src/vm/vm_chunk_manager.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										654
									
								
								src/libos/src/vm/vm_chunk_manager.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,654 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::free_space_manager::VMFreeSpaceManager as FreeRangeManager; | ||||
| use super::vm_area::*; | ||||
| use super::vm_perms::VMPerms; | ||||
| use super::vm_util::*; | ||||
| use std::collections::BTreeSet; | ||||
| 
 | ||||
| use intrusive_collections::rbtree::{Link, RBTree}; | ||||
| use intrusive_collections::Bound; | ||||
| use intrusive_collections::RBTreeLink; | ||||
| use intrusive_collections::{intrusive_adapter, KeyAdapter}; | ||||
| 
 | ||||
| /// Memory chunk manager.
 | ||||
| ///
 | ||||
| /// Chunk is the memory unit for Occlum. For chunks with `default` size, every chunk is managed by a ChunkManager which provides
 | ||||
| /// usedful memory management APIs such as mmap, munmap, mremap, mprotect, etc.
 | ||||
| /// ChunkManager is implemented basically with two data structures: a red-black tree to track vmas in use and a FreeRangeManager to track
 | ||||
| /// ranges which are free.
 | ||||
| /// For vmas-in-use, there are two sentry vmas with zero length at the front and end of the red-black tree.
 | ||||
| #[derive(Debug, Default)] | ||||
| pub struct ChunkManager { | ||||
|     range: VMRange, | ||||
|     free_size: usize, | ||||
|     vmas: RBTree<VMAAdapter>, | ||||
|     free_manager: FreeRangeManager, | ||||
| } | ||||
| 
 | ||||
| impl ChunkManager { | ||||
|     pub fn from(addr: usize, size: usize) -> Result<Self> { | ||||
|         let range = VMRange::new(addr, addr + size)?; | ||||
|         let vmas = { | ||||
|             let start = range.start(); | ||||
|             let end = range.end(); | ||||
|             let start_sentry = { | ||||
|                 let range = VMRange::new_empty(start)?; | ||||
|                 let perms = VMPerms::empty(); | ||||
|                 // sentry vma shouldn't belong to any process
 | ||||
|                 VMAObj::new_vma_obj(VMArea::new(range, perms, None, 0)) | ||||
|             }; | ||||
|             let end_sentry = { | ||||
|                 let range = VMRange::new_empty(end)?; | ||||
|                 let perms = VMPerms::empty(); | ||||
|                 VMAObj::new_vma_obj(VMArea::new(range, perms, None, 0)) | ||||
|             }; | ||||
|             let mut new_tree = RBTree::new(VMAAdapter::new()); | ||||
|             new_tree.insert(start_sentry); | ||||
|             new_tree.insert(end_sentry); | ||||
|             new_tree | ||||
|         }; | ||||
|         Ok(ChunkManager { | ||||
|             range, | ||||
|             free_size: range.size(), | ||||
|             vmas, | ||||
|             free_manager: FreeRangeManager::new(range.clone()), | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn range(&self) -> &VMRange { | ||||
|         &self.range | ||||
|     } | ||||
| 
 | ||||
|     pub fn vmas(&self) -> &RBTree<VMAAdapter> { | ||||
|         &self.vmas | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_size(&self) -> &usize { | ||||
|         &self.free_size | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_empty(&self) -> bool { | ||||
|         self.vmas.iter().count() == 2 // only sentry vmas
 | ||||
|     } | ||||
| 
 | ||||
|     pub fn clean_vmas_with_pid(&mut self, pid: pid_t) { | ||||
|         let mut vmas_cursor = self.vmas.cursor_mut(); | ||||
|         vmas_cursor.move_next(); // move to the first element of the tree
 | ||||
|         while !vmas_cursor.is_null() { | ||||
|             let vma = vmas_cursor.get().unwrap().vma(); | ||||
|             if vma.pid() != pid || vma.size() == 0 { | ||||
|                 // Skip vmas which doesn't belong to this process
 | ||||
|                 vmas_cursor.move_next(); | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             Self::flush_file_vma(vma); | ||||
| 
 | ||||
|             if !vma.perms().is_default() { | ||||
|                 VMPerms::apply_perms(vma, VMPerms::default()); | ||||
|             } | ||||
| 
 | ||||
|             unsafe { | ||||
|                 let buf = vma.as_slice_mut(); | ||||
|                 buf.iter_mut().for_each(|b| *b = 0) | ||||
|             } | ||||
| 
 | ||||
|             self.free_manager.add_range_back_to_free_manager(vma); | ||||
|             self.free_size += vma.size(); | ||||
| 
 | ||||
|             // Remove this vma from vmas list
 | ||||
|             vmas_cursor.remove(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn mmap(&mut self, options: &VMMapOptions) -> Result<usize> { | ||||
|         let addr = *options.addr(); | ||||
|         let size = *options.size(); | ||||
|         let align = *options.align(); | ||||
| 
 | ||||
|         if let VMMapAddr::Force(addr) = addr { | ||||
|             self.munmap(addr, size)?; | ||||
|         } | ||||
| 
 | ||||
|         // Find and allocate a new range for this mmap request
 | ||||
|         let new_range = self | ||||
|             .free_manager | ||||
|             .find_free_range_internal(size, align, addr)?; | ||||
|         let new_addr = new_range.start(); | ||||
|         let writeback_file = options.writeback_file().clone(); | ||||
|         let current_pid = current!().process().pid(); | ||||
|         let new_vma = VMArea::new(new_range, *options.perms(), writeback_file, current_pid); | ||||
| 
 | ||||
|         // Initialize the memory of the new range
 | ||||
|         unsafe { | ||||
|             let buf = new_vma.as_slice_mut(); | ||||
|             options.initializer().init_slice(buf)?; | ||||
|         } | ||||
|         // Set memory permissions
 | ||||
|         if !options.perms().is_default() { | ||||
|             VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|         } | ||||
|         self.free_size -= new_vma.size(); | ||||
|         // After initializing, we can safely insert the new VMA
 | ||||
|         self.vmas.insert(VMAObj::new_vma_obj(new_vma)); | ||||
|         Ok(new_addr) | ||||
|     } | ||||
| 
 | ||||
|     pub fn munmap_range(&mut self, range: VMRange) -> Result<()> { | ||||
|         let bound = range.start(); | ||||
|         let current_pid = current!().process().pid(); | ||||
| 
 | ||||
|         // The cursor to iterate vmas that might intersect with munmap_range.
 | ||||
|         // Upper bound returns the vma whose start address is below and nearest to the munmap range. Start from this range.
 | ||||
|         let mut vmas_cursor = self.vmas.upper_bound_mut(Bound::Included(&bound)); | ||||
|         while !vmas_cursor.is_null() && vmas_cursor.get().unwrap().vma().start() <= range.end() { | ||||
|             let vma = &vmas_cursor.get().unwrap().vma(); | ||||
|             warn!("munmap related vma = {:?}", vma); | ||||
|             if vma.size() == 0 || current_pid != vma.pid() { | ||||
|                 vmas_cursor.move_next(); | ||||
|                 continue; | ||||
|             } | ||||
|             let intersection_vma = match vma.intersect(&range) { | ||||
|                 None => { | ||||
|                     vmas_cursor.move_next(); | ||||
|                     continue; | ||||
|                 } | ||||
|                 Some(intersection_vma) => intersection_vma, | ||||
|             }; | ||||
| 
 | ||||
|             // File-backed VMA needs to be flushed upon munmap
 | ||||
|             Self::flush_file_vma(&intersection_vma); | ||||
|             if !&intersection_vma.perms().is_default() { | ||||
|                 VMPerms::apply_perms(&intersection_vma, VMPerms::default()); | ||||
|             } | ||||
| 
 | ||||
|             if vma.range() == intersection_vma.range() { | ||||
|                 // Exact match. Just remove.
 | ||||
|                 vmas_cursor.remove(); | ||||
|             } else { | ||||
|                 // The intersection_vma is a subset of current vma
 | ||||
|                 let mut remain_vmas = vma.subtract(&intersection_vma); | ||||
|                 if remain_vmas.len() == 1 { | ||||
|                     let new_obj = VMAObj::new_vma_obj(remain_vmas.pop().unwrap()); | ||||
|                     vmas_cursor.replace_with(new_obj); | ||||
|                     vmas_cursor.move_next(); | ||||
|                 } else { | ||||
|                     debug_assert!(remain_vmas.len() == 2); | ||||
|                     let vma_left_part = VMAObj::new_vma_obj(remain_vmas.swap_remove(0)); | ||||
|                     vmas_cursor.replace_with(vma_left_part); | ||||
|                     let vma_right_part = VMAObj::new_vma_obj(remain_vmas.pop().unwrap()); | ||||
|                     // The new element will be inserted at the correct position in the tree based on its key automatically.
 | ||||
|                     vmas_cursor.insert(vma_right_part); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // Reset zero
 | ||||
|             unsafe { | ||||
|                 warn!("intersection vma = {:?}", intersection_vma); | ||||
|                 let buf = intersection_vma.as_slice_mut(); | ||||
|                 buf.iter_mut().for_each(|b| *b = 0) | ||||
|             } | ||||
| 
 | ||||
|             self.free_manager | ||||
|                 .add_range_back_to_free_manager(intersection_vma.range()); | ||||
|             self.free_size += intersection_vma.size(); | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     pub fn munmap(&mut self, addr: usize, size: usize) -> Result<()> { | ||||
|         let size = { | ||||
|             if size == 0 { | ||||
|                 return_errno!(EINVAL, "size of munmap must not be zero"); | ||||
|             } | ||||
|             align_up(size, PAGE_SIZE) | ||||
|         }; | ||||
|         let munmap_range = { | ||||
|             let munmap_range = VMRange::new(addr, addr + size)?; | ||||
| 
 | ||||
|             let effective_munmap_range_opt = munmap_range.intersect(&self.range); | ||||
|             if effective_munmap_range_opt.is_none() { | ||||
|                 return Ok(()); | ||||
|             } | ||||
| 
 | ||||
|             let effective_munmap_range = effective_munmap_range_opt.unwrap(); | ||||
|             if effective_munmap_range.empty() { | ||||
|                 return Ok(()); | ||||
|             } | ||||
|             effective_munmap_range | ||||
|         }; | ||||
| 
 | ||||
|         self.munmap_range(munmap_range) | ||||
|     } | ||||
| 
 | ||||
|     pub fn mremap(&mut self, options: &VMRemapOptions) -> Result<usize> { | ||||
|         let old_addr = options.old_addr(); | ||||
|         let old_size = options.old_size(); | ||||
|         let old_range = VMRange::new_with_size(old_addr, old_size)?; | ||||
|         let new_size = options.new_size(); | ||||
|         let flags = options.flags(); | ||||
|         let size_type = SizeType::new(&old_size, &new_size); | ||||
| 
 | ||||
|         return_errno!(ENOSYS, "Under development"); | ||||
| 
 | ||||
|         // Old dead code. Could be used for future development.
 | ||||
|         #[cfg(dev)] | ||||
|         { | ||||
|             // The old range must be contained in one VMA
 | ||||
|             let idx = self | ||||
|                 .find_containing_vma_idx(&old_range) | ||||
|                 .ok_or_else(|| errno!(EFAULT, "invalid range"))?; | ||||
|             let containing_vma = &self.vmas[idx]; | ||||
|             // Get the memory permissions of the old range
 | ||||
|             let perms = containing_vma.perms(); | ||||
|             // Get the write back file of the old range if there is one.
 | ||||
|             let writeback_file = containing_vma.writeback_file(); | ||||
| 
 | ||||
|             // FIXME: Current implementation for file-backed memory mremap has limitation that if a SUBRANGE of the previous
 | ||||
|             // file-backed mmap with MAP_SHARED is then mremap-ed with MREMAP_MAYMOVE, there will be two vmas that have the same backed file.
 | ||||
|             // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before
 | ||||
|             // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error.
 | ||||
|             if writeback_file.is_some() && &old_range != containing_vma.range() { | ||||
|                 return_errno!(EINVAL, "Known limition") | ||||
|             } | ||||
| 
 | ||||
|             // Implement mremap as one optional mmap followed by one optional munmap.
 | ||||
|             //
 | ||||
|             // The exact arguments for the mmap and munmap are determined by the values of MRemapFlags,
 | ||||
|             // SizeType and writeback_file. There is a total of 18 combinations among MRemapFlags and
 | ||||
|             // SizeType and writeback_file. As some combinations result in the same mmap and munmap operations,
 | ||||
|             // the following code only needs to match below patterns of (MRemapFlags, SizeType, writeback_file)
 | ||||
|             // and treat each case accordingly.
 | ||||
| 
 | ||||
|             // Determine whether need to do mmap. And when possible, determine the returned address
 | ||||
|             let (need_mmap, mut ret_addr) = match (flags, size_type, writeback_file) { | ||||
|                 (MRemapFlags::None, SizeType::Growing, None) => { | ||||
|                     let vm_initializer_for_new_range = VMInitializer::FillZeros(); | ||||
|                     let mmap_opts = VMMapOptionsBuilder::default() | ||||
|                         .size(new_size - old_size) | ||||
|                         .addr(VMMapAddr::Need(old_range.end())) | ||||
|                         .perms(perms) | ||||
|                         .initializer(vm_initializer_for_new_range) | ||||
|                         .build()?; | ||||
|                     let ret_addr = Some(old_addr); | ||||
|                     (Some(mmap_opts), ret_addr) | ||||
|                 } | ||||
|                 (MRemapFlags::None, SizeType::Growing, Some((backed_file, offset))) => { | ||||
|                     // Update writeback file offset
 | ||||
|                     let new_writeback_file = | ||||
|                         Some((backed_file.clone(), offset + containing_vma.size())); | ||||
|                     let vm_initializer_for_new_range = VMInitializer::LoadFromFile { | ||||
|                         file: backed_file.clone(), | ||||
|                         offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file
 | ||||
|                     }; | ||||
|                     let mmap_opts = VMMapOptionsBuilder::default() | ||||
|                         .size(new_size - old_size) | ||||
|                         .addr(VMMapAddr::Need(old_range.end())) | ||||
|                         .perms(perms) | ||||
|                         .initializer(vm_initializer_for_new_range) | ||||
|                         .writeback_file(new_writeback_file) | ||||
|                         .build()?; | ||||
|                     let ret_addr = Some(old_addr); | ||||
|                     (Some(mmap_opts), ret_addr) | ||||
|                 } | ||||
|                 (MRemapFlags::MayMove, SizeType::Growing, None) => { | ||||
|                     let prefered_new_range = | ||||
|                         VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; | ||||
|                     if self.is_free_range(&prefered_new_range) { | ||||
|                         // Don't need to move the old range
 | ||||
|                         let vm_initializer_for_new_range = VMInitializer::FillZeros(); | ||||
|                         let mmap_ops = VMMapOptionsBuilder::default() | ||||
|                             .size(prefered_new_range.size()) | ||||
|                             .addr(VMMapAddr::Need(prefered_new_range.start())) | ||||
|                             .perms(perms) | ||||
|                             .initializer(vm_initializer_for_new_range) | ||||
|                             .build()?; | ||||
|                         (Some(mmap_ops), Some(old_addr)) | ||||
|                     } else { | ||||
|                         // Need to move old range to a new range and init the new range
 | ||||
|                         let vm_initializer_for_new_range = | ||||
|                             VMInitializer::CopyFrom { range: old_range }; | ||||
|                         let mmap_ops = VMMapOptionsBuilder::default() | ||||
|                             .size(new_size) | ||||
|                             .addr(VMMapAddr::Any) | ||||
|                             .perms(perms) | ||||
|                             .initializer(vm_initializer_for_new_range) | ||||
|                             .build()?; | ||||
|                         // Cannot determine the returned address for now, which can only be obtained after calling mmap
 | ||||
|                         let ret_addr = None; | ||||
|                         (Some(mmap_ops), ret_addr) | ||||
|                     } | ||||
|                 } | ||||
|                 (MRemapFlags::MayMove, SizeType::Growing, Some((backed_file, offset))) => { | ||||
|                     let prefered_new_range = | ||||
|                         VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; | ||||
|                     if self.is_free_range(&prefered_new_range) { | ||||
|                         // Don't need to move the old range
 | ||||
|                         let vm_initializer_for_new_range = VMInitializer::LoadFromFile { | ||||
|                             file: backed_file.clone(), | ||||
|                             offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file
 | ||||
|                         }; | ||||
|                         // Write back file should start from new offset
 | ||||
|                         let new_writeback_file = | ||||
|                             Some((backed_file.clone(), offset + containing_vma.size())); | ||||
|                         let mmap_ops = VMMapOptionsBuilder::default() | ||||
|                             .size(prefered_new_range.size()) | ||||
|                             .addr(VMMapAddr::Need(prefered_new_range.start())) | ||||
|                             .perms(perms) | ||||
|                             .initializer(vm_initializer_for_new_range) | ||||
|                             .writeback_file(new_writeback_file) | ||||
|                             .build()?; | ||||
|                         (Some(mmap_ops), Some(old_addr)) | ||||
|                     } else { | ||||
|                         // Need to move old range to a new range and init the new range
 | ||||
|                         let vm_initializer_for_new_range = { | ||||
|                             let copy_end = containing_vma.end(); | ||||
|                             let copy_range = VMRange::new(old_range.start(), copy_end)?; | ||||
|                             let reread_file_start_offset = copy_end - containing_vma.start(); | ||||
|                             VMInitializer::CopyOldAndReadNew { | ||||
|                                 old_range: copy_range, | ||||
|                                 file: backed_file.clone(), | ||||
|                                 offset: reread_file_start_offset, | ||||
|                             } | ||||
|                         }; | ||||
|                         let new_writeback_file = Some((backed_file.clone(), *offset)); | ||||
|                         let mmap_ops = VMMapOptionsBuilder::default() | ||||
|                             .size(new_size) | ||||
|                             .addr(VMMapAddr::Any) | ||||
|                             .perms(perms) | ||||
|                             .initializer(vm_initializer_for_new_range) | ||||
|                             .writeback_file(new_writeback_file) | ||||
|                             .build()?; | ||||
|                         // Cannot determine the returned address for now, which can only be obtained after calling mmap
 | ||||
|                         let ret_addr = None; | ||||
|                         (Some(mmap_ops), ret_addr) | ||||
|                     } | ||||
|                 } | ||||
|                 (MRemapFlags::FixedAddr(new_addr), _, None) => { | ||||
|                     let vm_initializer_for_new_range = | ||||
|                         { VMInitializer::CopyFrom { range: old_range } }; | ||||
|                     let mmap_opts = VMMapOptionsBuilder::default() | ||||
|                         .size(new_size) | ||||
|                         .addr(VMMapAddr::Force(new_addr)) | ||||
|                         .perms(perms) | ||||
|                         .initializer(vm_initializer_for_new_range) | ||||
|                         .build()?; | ||||
|                     let ret_addr = Some(new_addr); | ||||
|                     (Some(mmap_opts), ret_addr) | ||||
|                 } | ||||
|                 (MRemapFlags::FixedAddr(new_addr), _, Some((backed_file, offset))) => { | ||||
|                     let vm_initializer_for_new_range = { | ||||
|                         let copy_end = containing_vma.end(); | ||||
|                         let copy_range = VMRange::new(old_range.start(), copy_end)?; | ||||
|                         let reread_file_start_offset = copy_end - containing_vma.start(); | ||||
|                         VMInitializer::CopyOldAndReadNew { | ||||
|                             old_range: copy_range, | ||||
|                             file: backed_file.clone(), | ||||
|                             offset: reread_file_start_offset, | ||||
|                         } | ||||
|                     }; | ||||
|                     let new_writeback_file = Some((backed_file.clone(), *offset)); | ||||
|                     let mmap_opts = VMMapOptionsBuilder::default() | ||||
|                         .size(new_size) | ||||
|                         .addr(VMMapAddr::Force(new_addr)) | ||||
|                         .perms(perms) | ||||
|                         .initializer(vm_initializer_for_new_range) | ||||
|                         .writeback_file(new_writeback_file) | ||||
|                         .build()?; | ||||
|                     let ret_addr = Some(new_addr); | ||||
|                     (Some(mmap_opts), ret_addr) | ||||
|                 } | ||||
|                 _ => (None, Some(old_addr)), | ||||
|             }; | ||||
| 
 | ||||
|             let need_munmap = match (flags, size_type) { | ||||
|                 (MRemapFlags::None, SizeType::Shrinking) | ||||
|                 | (MRemapFlags::MayMove, SizeType::Shrinking) => { | ||||
|                     let unmap_addr = old_addr + new_size; | ||||
|                     let unmap_size = old_size - new_size; | ||||
|                     Some((unmap_addr, unmap_size)) | ||||
|                 } | ||||
|                 (MRemapFlags::MayMove, SizeType::Growing) => { | ||||
|                     if ret_addr.is_none() { | ||||
|                         // We must need to do mmap. Thus unmap the old range
 | ||||
|                         Some((old_addr, old_size)) | ||||
|                     } else { | ||||
|                         // We must choose to reuse the old range. Thus, no need to unmap
 | ||||
|                         None | ||||
|                     } | ||||
|                 } | ||||
|                 (MRemapFlags::FixedAddr(new_addr), _) => { | ||||
|                     let new_range = VMRange::new_with_size(new_addr, new_size)?; | ||||
|                     if new_range.overlap_with(&old_range) { | ||||
|                         return_errno!(EINVAL, "new range cannot overlap with the old one"); | ||||
|                     } | ||||
|                     Some((old_addr, old_size)) | ||||
|                 } | ||||
|                 _ => None, | ||||
|             }; | ||||
| 
 | ||||
|             // Perform mmap and munmap if needed
 | ||||
|             if let Some(mmap_options) = need_mmap { | ||||
|                 let mmap_addr = self.mmap(&mmap_options)?; | ||||
| 
 | ||||
|                 if ret_addr.is_none() { | ||||
|                     ret_addr = Some(mmap_addr); | ||||
|                 } | ||||
|             } | ||||
|             if let Some((addr, size)) = need_munmap { | ||||
|                 self.munmap(addr, size).expect("never fail"); | ||||
|             } | ||||
| 
 | ||||
|             debug_assert!(ret_addr.is_some()); | ||||
|             Ok(ret_addr.unwrap()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn mprotect(&mut self, addr: usize, size: usize, new_perms: VMPerms) -> Result<()> { | ||||
|         let protect_range = VMRange::new_with_size(addr, size)?; | ||||
|         let bound = protect_range.start(); | ||||
|         let mut containing_vmas = self.vmas.upper_bound_mut(Bound::Included(&bound)); | ||||
|         if containing_vmas.is_null() { | ||||
|             return_errno!(ENOMEM, "invalid range"); | ||||
|         } | ||||
|         let current_pid = current!().process().pid(); | ||||
| 
 | ||||
|         // If a mprotect range is not a subrange of one vma, it must be subrange of multiple connecting vmas.
 | ||||
|         while !containing_vmas.is_null() | ||||
|             && containing_vmas.get().unwrap().vma().start() <= protect_range.end() | ||||
|         { | ||||
|             let mut containing_vma = containing_vmas.get().unwrap().vma().clone(); | ||||
|             if containing_vma.pid() != current_pid { | ||||
|                 containing_vmas.move_next(); | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             let old_perms = containing_vma.perms(); | ||||
|             if new_perms == old_perms { | ||||
|                 containing_vmas.move_next(); | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             let intersection_vma = match containing_vma.intersect(&protect_range) { | ||||
|                 None => { | ||||
|                     containing_vmas.move_next(); | ||||
|                     continue; | ||||
|                 } | ||||
|                 Some(intersection_vma) => intersection_vma, | ||||
|             }; | ||||
| 
 | ||||
|             if intersection_vma.range() == containing_vma.range() { | ||||
|                 // The whole containing_vma is mprotected
 | ||||
|                 containing_vma.set_perms(new_perms); | ||||
|                 VMPerms::apply_perms(&containing_vma, containing_vma.perms()); | ||||
|                 warn!("containing_vma = {:?}", containing_vma); | ||||
|                 containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma)); | ||||
|                 containing_vmas.move_next(); | ||||
|                 continue; | ||||
|             } else { | ||||
|                 // A subrange of containing_vma is mprotected
 | ||||
|                 debug_assert!(containing_vma.is_superset_of(&intersection_vma)); | ||||
|                 let mut remain_vmas = containing_vma.subtract(&intersection_vma); | ||||
|                 match remain_vmas.len() { | ||||
|                     2 => { | ||||
|                         // The containing VMA is divided into three VMAs:
 | ||||
|                         // Shrinked old VMA:    [containing_vma.start,     protect_range.start)
 | ||||
|                         // New VMA:             [protect_range.start,      protect_range.end)
 | ||||
|                         // Another new vma:     [protect_range.end,        containing_vma.end)
 | ||||
|                         let old_end = containing_vma.end(); | ||||
|                         let protect_end = protect_range.end(); | ||||
| 
 | ||||
|                         // Shrinked old VMA
 | ||||
|                         containing_vma.set_end(protect_range.start()); | ||||
| 
 | ||||
|                         // New VMA
 | ||||
|                         let new_vma = VMArea::inherits_file_from( | ||||
|                             &containing_vma, | ||||
|                             protect_range, | ||||
|                             new_perms, | ||||
|                             current_pid, | ||||
|                         ); | ||||
|                         VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|                         let new_vma = VMAObj::new_vma_obj(new_vma); | ||||
| 
 | ||||
|                         // Another new VMA
 | ||||
|                         let new_vma2 = { | ||||
|                             let range = VMRange::new(protect_end, old_end).unwrap(); | ||||
|                             let new_vma = VMArea::inherits_file_from( | ||||
|                                 &containing_vma, | ||||
|                                 range, | ||||
|                                 old_perms, | ||||
|                                 current_pid, | ||||
|                             ); | ||||
|                             VMAObj::new_vma_obj(new_vma) | ||||
|                         }; | ||||
| 
 | ||||
|                         containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma)); | ||||
|                         containing_vmas.insert(new_vma); | ||||
|                         containing_vmas.insert(new_vma2); | ||||
|                         // In this case, there is no need to check other vmas.
 | ||||
|                         break; | ||||
|                     } | ||||
|                     1 => { | ||||
|                         let remain_vma = remain_vmas.pop().unwrap(); | ||||
|                         if remain_vma.start() == containing_vma.start() { | ||||
|                             // mprotect right side of the vma
 | ||||
|                             containing_vma.set_end(remain_vma.end()); | ||||
|                         } else { | ||||
|                             // mprotect left side of the vma
 | ||||
|                             debug_assert!(remain_vma.end() == containing_vma.end()); | ||||
|                             containing_vma.set_start(remain_vma.start()); | ||||
|                         } | ||||
|                         let new_vma = VMArea::inherits_file_from( | ||||
|                             &containing_vma, | ||||
|                             intersection_vma.range().clone(), | ||||
|                             new_perms, | ||||
|                             current_pid, | ||||
|                         ); | ||||
|                         VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
| 
 | ||||
|                         containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma)); | ||||
|                         containing_vmas.insert(VMAObj::new_vma_obj(new_vma)); | ||||
|                         containing_vmas.move_next(); | ||||
|                         continue; | ||||
|                     } | ||||
|                     _ => unreachable!(), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     /// Sync all shared, file-backed memory mappings in the given range by flushing the
 | ||||
|     /// memory content to its underlying file.
 | ||||
|     pub fn msync_by_range(&mut self, sync_range: &VMRange) -> Result<()> { | ||||
|         if !self.range().is_superset_of(sync_range) { | ||||
|             return_errno!(ENOMEM, "invalid range"); | ||||
|         } | ||||
| 
 | ||||
|         // ?FIXME: check if sync_range covers unmapped memory
 | ||||
|         for vma_obj in &self.vmas { | ||||
|             let vma = match vma_obj.vma().intersect(sync_range) { | ||||
|                 None => continue, | ||||
|                 Some(vma) => vma, | ||||
|             }; | ||||
|             Self::flush_file_vma(&vma); | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     /// Sync all shared, file-backed memory mappings of the given file by flushing
 | ||||
|     /// the memory content to the file.
 | ||||
|     pub fn msync_by_file(&mut self, sync_file: &FileRef) { | ||||
|         for vma_obj in &self.vmas { | ||||
|             let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) }; | ||||
|             Self::flush_file_vma_with_cond(&vma_obj.vma(), is_same_file); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
 | ||||
|     pub fn flush_file_vma(vma: &VMArea) { | ||||
|         Self::flush_file_vma_with_cond(vma, |_| true) | ||||
|     } | ||||
| 
 | ||||
|     /// Same as flush_vma, except that an extra condition on the file needs to satisfy.
 | ||||
|     pub fn flush_file_vma_with_cond<F: Fn(&FileRef) -> bool>(vma: &VMArea, cond_fn: F) { | ||||
|         let (file, file_offset) = match vma.writeback_file().as_ref() { | ||||
|             None => return, | ||||
|             Some((file_and_offset)) => file_and_offset, | ||||
|         }; | ||||
|         let file_writable = file | ||||
|             .access_mode() | ||||
|             .map(|ac| ac.writable()) | ||||
|             .unwrap_or_default(); | ||||
|         if !file_writable { | ||||
|             return; | ||||
|         } | ||||
|         if !cond_fn(file) { | ||||
|             return; | ||||
|         } | ||||
|         file.write_at(*file_offset, unsafe { vma.as_slice() }); | ||||
|     } | ||||
| 
 | ||||
|     pub fn find_mmap_region(&self, addr: usize) -> Result<VMRange> { | ||||
|         let vma = self.vmas.upper_bound(Bound::Included(&addr)); | ||||
|         if vma.is_null() { | ||||
|             return_errno!(ESRCH, "no mmap regions that contains the address"); | ||||
|         } | ||||
|         let vma = vma.get().unwrap().vma(); | ||||
|         if vma.pid() != current!().process().pid() || !vma.contains(addr) { | ||||
|             return_errno!(ESRCH, "no mmap regions that contains the address"); | ||||
|         } | ||||
| 
 | ||||
|         return Ok(vma.range().clone()); | ||||
|     } | ||||
| 
 | ||||
|     pub fn usage_percentage(&self) -> f32 { | ||||
|         let totol_size = self.range.size(); | ||||
|         let mut used_size = 0; | ||||
|         self.vmas | ||||
|             .iter() | ||||
|             .for_each(|vma_obj| used_size += vma_obj.vma().size()); | ||||
| 
 | ||||
|         return used_size as f32 / totol_size as f32; | ||||
|     } | ||||
| 
 | ||||
|     // Returns whether the requested range is free
 | ||||
|     fn is_free_range(&self, request_range: &VMRange) -> bool { | ||||
|         self.range.is_superset_of(request_range) | ||||
|             && self | ||||
|                 .vmas | ||||
|                 .iter() | ||||
|                 .any(|vma_obj| vma_obj.vma().range().is_superset_of(request_range) == true) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Drop for ChunkManager { | ||||
|     fn drop(&mut self) { | ||||
|         assert!(self.is_empty()); | ||||
|         assert!(self.free_size == self.range.size()); | ||||
|         assert!(self.free_manager.free_size() == self.range.size()); | ||||
|     } | ||||
| } | ||||
| @ -8,7 +8,7 @@ pub struct VMLayout { | ||||
| 
 | ||||
| impl VMLayout { | ||||
|     pub fn new(size: usize, align: usize) -> Result<VMLayout> { | ||||
|         if !align.is_power_of_two() || align % PAGE_SIZE != 0 { | ||||
|         if !align.is_power_of_two() { | ||||
|             return_errno!(EINVAL, "invalid layout"); | ||||
|         } | ||||
|         Ok(VMLayout { size, align }) | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -31,6 +31,26 @@ impl VMPerms { | ||||
|     pub fn is_default(&self) -> bool { | ||||
|         self.bits == Self::DEFAULT.bits | ||||
|     } | ||||
| 
 | ||||
|     pub fn apply_perms(protect_range: &VMRange, perms: VMPerms) { | ||||
|         extern "C" { | ||||
|             pub fn occlum_ocall_mprotect( | ||||
|                 retval: *mut i32, | ||||
|                 addr: *const c_void, | ||||
|                 len: usize, | ||||
|                 prot: i32, | ||||
|             ) -> sgx_status_t; | ||||
|         }; | ||||
| 
 | ||||
|         unsafe { | ||||
|             let mut retval = 0; | ||||
|             let addr = protect_range.start() as *const c_void; | ||||
|             let len = protect_range.size(); | ||||
|             let prot = perms.bits() as i32; | ||||
|             let sgx_status = occlum_ocall_mprotect(&mut retval, addr, len, prot); | ||||
|             assert!(sgx_status == sgx_status_t::SGX_SUCCESS && retval == 0); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Default for VMPerms { | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| #[derive(Clone, Copy, Default, PartialEq)] | ||||
| #[derive(Clone, Copy, Default, Eq, PartialEq, Hash)] | ||||
| pub struct VMRange { | ||||
|     pub(super) start: usize, | ||||
|     pub(super) end: usize, | ||||
| @ -130,7 +130,7 @@ impl VMRange { | ||||
|     pub fn intersect(&self, other: &VMRange) -> Option<VMRange> { | ||||
|         let intersection_start = self.start().max(other.start()); | ||||
|         let intersection_end = self.end().min(other.end()); | ||||
|         if intersection_start > intersection_end { | ||||
|         if intersection_start >= intersection_end { | ||||
|             return None; | ||||
|         } | ||||
|         unsafe { | ||||
|  | ||||
							
								
								
									
										276
									
								
								src/libos/src/vm/vm_util.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										276
									
								
								src/libos/src/vm/vm_util.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,276 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| // use super::vm_area::VMArea;
 | ||||
| // use super::free_space_manager::VMFreeSpaceManager;
 | ||||
| use super::vm_area::*; | ||||
| use super::vm_perms::VMPerms; | ||||
| use std::collections::BTreeSet; | ||||
| 
 | ||||
| use intrusive_collections::rbtree::{Link, RBTree}; | ||||
| use intrusive_collections::Bound; | ||||
| use intrusive_collections::RBTreeLink; | ||||
| use intrusive_collections::{intrusive_adapter, KeyAdapter}; | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
| pub enum VMInitializer { | ||||
|     DoNothing(), | ||||
|     FillZeros(), | ||||
|     CopyFrom { | ||||
|         range: VMRange, | ||||
|     }, | ||||
|     LoadFromFile { | ||||
|         file: FileRef, | ||||
|         offset: usize, | ||||
|     }, | ||||
|     // For file-backed mremap which may move from old range to new range and read extra bytes from file
 | ||||
|     CopyOldAndReadNew { | ||||
|         old_range: VMRange, | ||||
|         file: FileRef, | ||||
|         offset: usize, // read file from this offset
 | ||||
|     }, | ||||
| } | ||||
| 
 | ||||
| impl Default for VMInitializer { | ||||
|     fn default() -> VMInitializer { | ||||
|         VMInitializer::DoNothing() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl VMInitializer { | ||||
|     pub fn init_slice(&self, buf: &mut [u8]) -> Result<()> { | ||||
|         match self { | ||||
|             VMInitializer::DoNothing() => { | ||||
|                 // Do nothing
 | ||||
|             } | ||||
|             VMInitializer::FillZeros() => { | ||||
|                 for b in buf { | ||||
|                     *b = 0; | ||||
|                 } | ||||
|             } | ||||
|             VMInitializer::CopyFrom { range } => { | ||||
|                 let src_slice = unsafe { range.as_slice() }; | ||||
|                 let copy_len = min(buf.len(), src_slice.len()); | ||||
|                 buf[..copy_len].copy_from_slice(&src_slice[..copy_len]); | ||||
|                 for b in &mut buf[copy_len..] { | ||||
|                     *b = 0; | ||||
|                 } | ||||
|             } | ||||
|             VMInitializer::LoadFromFile { file, offset } => { | ||||
|                 // TODO: make sure that read_at does not move file cursor
 | ||||
|                 let len = file | ||||
|                     .read_at(*offset, buf) | ||||
|                     .cause_err(|_| errno!(EIO, "failed to init memory from file"))?; | ||||
|                 for b in &mut buf[len..] { | ||||
|                     *b = 0; | ||||
|                 } | ||||
|             } | ||||
|             VMInitializer::CopyOldAndReadNew { | ||||
|                 old_range, | ||||
|                 file, | ||||
|                 offset, | ||||
|             } => { | ||||
|                 // TODO: Handle old_range with non-readable subrange
 | ||||
|                 let src_slice = unsafe { old_range.as_slice() }; | ||||
|                 let copy_len = src_slice.len(); | ||||
|                 debug_assert!(copy_len <= buf.len()); | ||||
|                 let read_len = buf.len() - copy_len; | ||||
|                 buf[..copy_len].copy_from_slice(&src_slice[..copy_len]); | ||||
|                 let len = file | ||||
|                     .read_at(*offset, &mut buf[copy_len..]) | ||||
|                     .cause_err(|_| errno!(EIO, "failed to init memory from file"))?; | ||||
|                 for b in &mut buf[(copy_len + len)..] { | ||||
|                     *b = 0; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Copy, Debug, PartialEq)] | ||||
| pub enum VMMapAddr { | ||||
|     Any,          // Free to choose any address
 | ||||
|     Hint(usize),  // Prefer the address, but can use other address
 | ||||
|     Need(usize),  // Need to use the address, otherwise report error
 | ||||
|     Force(usize), // Force using the address by munmap first
 | ||||
| } | ||||
| 
 | ||||
| impl Default for VMMapAddr { | ||||
|     fn default() -> VMMapAddr { | ||||
|         VMMapAddr::Any | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Builder, Debug)] | ||||
| #[builder(pattern = "owned", build_fn(skip), no_std)] | ||||
| pub struct VMMapOptions { | ||||
|     size: usize, | ||||
|     align: usize, | ||||
|     perms: VMPerms, | ||||
|     addr: VMMapAddr, | ||||
|     initializer: VMInitializer, | ||||
|     // The content of the VMA can be written back to a given file at a given offset
 | ||||
|     writeback_file: Option<(FileRef, usize)>, | ||||
| } | ||||
| 
 | ||||
| // VMMapOptionsBuilder is generated automatically, except the build function
 | ||||
| impl VMMapOptionsBuilder { | ||||
|     pub fn build(mut self) -> Result<VMMapOptions> { | ||||
|         let size = { | ||||
|             let size = self | ||||
|                 .size | ||||
|                 .ok_or_else(|| errno!(EINVAL, "invalid size for mmap"))?; | ||||
|             if size == 0 { | ||||
|                 return_errno!(EINVAL, "invalid size for mmap"); | ||||
|             } | ||||
|             align_up(size, PAGE_SIZE) | ||||
|         }; | ||||
|         let align = { | ||||
|             let align = self.align.unwrap_or(PAGE_SIZE); | ||||
|             if align == 0 || !align.is_power_of_two() { | ||||
|                 return_errno!(EINVAL, "invalid size for mmap"); | ||||
|             } | ||||
|             align | ||||
|         }; | ||||
|         let perms = self | ||||
|             .perms | ||||
|             .ok_or_else(|| errno!(EINVAL, "perms must be given"))?; | ||||
|         let addr = { | ||||
|             let addr = self.addr.unwrap_or_default(); | ||||
|             match addr { | ||||
|                 // TODO: check addr + size overflow
 | ||||
|                 VMMapAddr::Any => VMMapAddr::Any, | ||||
|                 VMMapAddr::Hint(addr) => { | ||||
|                     let addr = align_down(addr, PAGE_SIZE); | ||||
|                     VMMapAddr::Hint(addr) | ||||
|                 } | ||||
|                 VMMapAddr::Need(addr_) | VMMapAddr::Force(addr_) => { | ||||
|                     if addr_ % align != 0 { | ||||
|                         return_errno!(EINVAL, "unaligned addr for fixed mmap"); | ||||
|                     } | ||||
|                     addr | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
|         let initializer = match self.initializer.as_ref() { | ||||
|             Some(initializer) => initializer.clone(), | ||||
|             None => VMInitializer::default(), | ||||
|         }; | ||||
|         let writeback_file = self.writeback_file.take().unwrap_or_default(); | ||||
|         Ok(VMMapOptions { | ||||
|             size, | ||||
|             align, | ||||
|             perms, | ||||
|             addr, | ||||
|             initializer, | ||||
|             writeback_file, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl VMMapOptions { | ||||
|     pub fn size(&self) -> &usize { | ||||
|         &self.size | ||||
|     } | ||||
| 
 | ||||
|     pub fn addr(&self) -> &VMMapAddr { | ||||
|         &self.addr | ||||
|     } | ||||
| 
 | ||||
|     pub fn perms(&self) -> &VMPerms { | ||||
|         &self.perms | ||||
|     } | ||||
| 
 | ||||
|     pub fn align(&self) -> &usize { | ||||
|         &self.align | ||||
|     } | ||||
| 
 | ||||
|     pub fn initializer(&self) -> &VMInitializer { | ||||
|         &self.initializer | ||||
|     } | ||||
| 
 | ||||
|     pub fn writeback_file(&self) -> &Option<(FileRef, usize)> { | ||||
|         &self.writeback_file | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Copy, PartialEq)] | ||||
| pub enum SizeType { | ||||
|     Same, | ||||
|     Shrinking, | ||||
|     Growing, | ||||
| } | ||||
| 
 | ||||
| impl SizeType { | ||||
|     pub fn new(old_size: &usize, new_size: &usize) -> Self { | ||||
|         if new_size == old_size { | ||||
|             SizeType::Same | ||||
|         } else if new_size < old_size { | ||||
|             SizeType::Shrinking | ||||
|         } else { | ||||
|             SizeType::Growing | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct VMRemapOptions { | ||||
|     old_addr: usize, | ||||
|     old_size: usize, | ||||
|     new_size: usize, | ||||
|     flags: MRemapFlags, | ||||
| } | ||||
| 
 | ||||
| impl VMRemapOptions { | ||||
|     pub fn new( | ||||
|         old_addr: usize, | ||||
|         old_size: usize, | ||||
|         new_size: usize, | ||||
|         flags: MRemapFlags, | ||||
|     ) -> Result<Self> { | ||||
|         let old_addr = if old_addr % PAGE_SIZE != 0 { | ||||
|             return_errno!(EINVAL, "unaligned old address"); | ||||
|         } else { | ||||
|             old_addr | ||||
|         }; | ||||
|         let old_size = if old_size == 0 { | ||||
|             // TODO: support old_size is zero for shareable mapping
 | ||||
|             warn!("do not support old_size is zero"); | ||||
|             return_errno!(EINVAL, "invalid old size"); | ||||
|         } else { | ||||
|             align_up(old_size, PAGE_SIZE) | ||||
|         }; | ||||
|         if let Some(new_addr) = flags.new_addr() { | ||||
|             if new_addr % PAGE_SIZE != 0 { | ||||
|                 return_errno!(EINVAL, "unaligned new address"); | ||||
|             } | ||||
|         } | ||||
|         let new_size = if new_size == 0 { | ||||
|             return_errno!(EINVAL, "invalid new size"); | ||||
|         } else { | ||||
|             align_up(new_size, PAGE_SIZE) | ||||
|         }; | ||||
|         Ok(Self { | ||||
|             old_addr, | ||||
|             old_size, | ||||
|             new_size, | ||||
|             flags, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn old_addr(&self) -> usize { | ||||
|         self.old_addr | ||||
|     } | ||||
| 
 | ||||
|     pub fn old_size(&self) -> usize { | ||||
|         self.old_size | ||||
|     } | ||||
| 
 | ||||
|     pub fn new_size(&self) -> usize { | ||||
|         self.new_size | ||||
|     } | ||||
| 
 | ||||
|     pub fn flags(&self) -> MRemapFlags { | ||||
|         self.flags | ||||
|     } | ||||
| } | ||||
| @ -239,7 +239,6 @@ int occlum_pal_destroy(void) { | ||||
|     } | ||||
| 
 | ||||
|     int ret = 0; | ||||
| 
 | ||||
|     if (pal_interrupt_thread_stop() < 0) { | ||||
|         ret = -1; | ||||
|         PAL_WARN("Cannot stop the interrupt thread: %s", errno2str(errno)); | ||||
|  | ||||
| @ -47,6 +47,10 @@ static int get_a_valid_range_of_hints(size_t *hint_begin, size_t *hint_end) { | ||||
|     if (big_buf == MAP_FAILED) { | ||||
|         THROW_ERROR("mmap failed"); | ||||
|     } | ||||
| 
 | ||||
|     // Check if munmap will clean the range
 | ||||
|     memset(big_buf, 0xff, big_buf_len); | ||||
| 
 | ||||
|     int ret = munmap(big_buf, big_buf_len); | ||||
|     if (ret < 0) { | ||||
|         THROW_ERROR("munmap failed"); | ||||
| @ -1038,6 +1042,47 @@ int test_mprotect_with_non_page_aligned_size() { | ||||
|     *(char *)buf = 1; | ||||
|     *(char *)(buf  + PAGE_SIZE) = 1; | ||||
| 
 | ||||
|     ret = munmap(buf, PAGE_SIZE * 2); | ||||
|     if (ret < 0) { | ||||
|         THROW_ERROR("munmap failed"); | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| int test_mprotect_multiple_vmas() { | ||||
|     // Create multiple VMA with PROT_NONE
 | ||||
|     int flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||||
|     void *buf_a = mmap((void *)HINT_BEGIN, PAGE_SIZE * 2, PROT_NONE, flags, -1, 0); | ||||
|     if (buf_a == MAP_FAILED || buf_a != (void *)HINT_BEGIN) { | ||||
|         THROW_ERROR("mmap failed"); | ||||
|     } | ||||
|     void *buf_b = mmap((void *)(HINT_BEGIN + 2 * PAGE_SIZE), PAGE_SIZE, PROT_NONE, flags, -1, | ||||
|                        0); | ||||
|     if (buf_b == MAP_FAILED || buf_b != (void *)(HINT_BEGIN + 2 * PAGE_SIZE)) { | ||||
|         THROW_ERROR("mmap failed"); | ||||
|     } | ||||
|     void *buf_c = mmap((void *)(HINT_BEGIN + 3 * PAGE_SIZE), PAGE_SIZE * 2, PROT_NONE, flags, | ||||
|                        -1, 0); | ||||
|     if (buf_c == MAP_FAILED || buf_c != (void *)(HINT_BEGIN + 3 * PAGE_SIZE)) { | ||||
|         THROW_ERROR("mmap failed"); | ||||
|     } | ||||
| 
 | ||||
|     // Set a part of the ranges to read-write
 | ||||
|     int ret = mprotect(buf_a + PAGE_SIZE, 3 * PAGE_SIZE, PROT_READ | PROT_WRITE); | ||||
|     if (ret < 0) { | ||||
|         THROW_ERROR("mprotect multiple vmas failed"); | ||||
|     } | ||||
| 
 | ||||
|     // Check if these ranges are writable
 | ||||
|     *(char *)(buf_a + PAGE_SIZE) = 1; | ||||
|     *(char *)(buf_b) = 1; | ||||
|     *(char *)(buf_c) = 1; | ||||
| 
 | ||||
|     ret = munmap(buf_a, PAGE_SIZE * 5); | ||||
|     if (ret < 0) { | ||||
|         THROW_ERROR("munmap multiple vmas failed"); | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| @ -1231,11 +1276,13 @@ static test_case_t test_cases[] = { | ||||
|     TEST_CASE(test_munmap_with_null_addr), | ||||
|     TEST_CASE(test_munmap_with_zero_len), | ||||
|     TEST_CASE(test_munmap_with_non_page_aligned_len), | ||||
| #ifdef MREMAP_SUPPORTED | ||||
|     TEST_CASE(test_mremap), | ||||
|     TEST_CASE(test_mremap_subrange), | ||||
|     TEST_CASE(test_mremap_with_fixed_addr), | ||||
|     TEST_CASE(test_file_backed_mremap), | ||||
|     TEST_CASE(test_file_backed_mremap_mem_may_move), | ||||
| #endif | ||||
|     TEST_CASE(test_mprotect_once), | ||||
|     TEST_CASE(test_mprotect_twice), | ||||
|     TEST_CASE(test_mprotect_triple), | ||||
| @ -1243,6 +1290,7 @@ static test_case_t test_cases[] = { | ||||
|     TEST_CASE(test_mprotect_with_invalid_addr), | ||||
|     TEST_CASE(test_mprotect_with_invalid_prot), | ||||
|     TEST_CASE(test_mprotect_with_non_page_aligned_size), | ||||
|     TEST_CASE(test_mprotect_multiple_vmas), | ||||
| }; | ||||
| 
 | ||||
| int main() { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user