Add EDMM support for Legacy Occlum
This commit is contained in:
		
							parent
							
								
									28c29c8896
								
							
						
					
					
						commit
						d49b3af0aa
					
				
							
								
								
									
										75
									
								
								src/libos/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										75
									
								
								src/libos/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @ -9,7 +9,7 @@ dependencies = [ | ||||
|  "aligned", | ||||
|  "atomic", | ||||
|  "bitflags", | ||||
|  "bitvec", | ||||
|  "bitvec 1.0.1", | ||||
|  "ctor", | ||||
|  "derive_builder", | ||||
|  "goblin", | ||||
| @ -18,6 +18,7 @@ dependencies = [ | ||||
|  "lazy_static", | ||||
|  "log", | ||||
|  "memoffset 0.6.5", | ||||
|  "modular-bitfield", | ||||
|  "rcore-fs", | ||||
|  "rcore-fs-devfs", | ||||
|  "rcore-fs-mountfs", | ||||
| @ -94,7 +95,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "41262f11d771fd4a61aa3ce019fca363b4b6c282fca9da2a31186d3965a47a5c" | ||||
| dependencies = [ | ||||
|  "either", | ||||
|  "radium", | ||||
|  "radium 0.3.0", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "bitvec" | ||||
| version = "1.0.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" | ||||
| dependencies = [ | ||||
|  "funty", | ||||
|  "radium 0.7.0", | ||||
|  "tap", | ||||
|  "wyz", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| @ -206,6 +219,12 @@ version = "0.1.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "funty" | ||||
| version = "2.0.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "goblin" | ||||
| version = "0.5.4" | ||||
| @ -294,6 +313,27 @@ dependencies = [ | ||||
|  "autocfg 1.1.0", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "modular-bitfield" | ||||
| version = "0.11.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a53d79ba8304ac1c4f9eb3b9d281f21f7be9d4626f72ce7df4ad8fbde4f38a74" | ||||
| dependencies = [ | ||||
|  "modular-bitfield-impl", | ||||
|  "static_assertions 1.1.0", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "modular-bitfield-impl" | ||||
| version = "0.11.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "5a7d5f7076603ebc68de2dc6a650ec331a062a13abaa346975be747bbfa4b789" | ||||
| dependencies = [ | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "plain" | ||||
| version = "0.2.3" | ||||
| @ -334,6 +374,12 @@ version = "0.3.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "def50a86306165861203e7f84ecffbbdfdea79f0e51039b33de1e952358c47ac" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "radium" | ||||
| version = "0.7.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "rand" | ||||
| version = "0.6.5" | ||||
| @ -479,11 +525,11 @@ dependencies = [ | ||||
| name = "rcore-fs-sefs" | ||||
| version = "0.1.0" | ||||
| dependencies = [ | ||||
|  "bitvec", | ||||
|  "bitvec 0.17.4", | ||||
|  "log", | ||||
|  "rcore-fs", | ||||
|  "spin 0.5.2", | ||||
|  "static_assertions", | ||||
|  "static_assertions 0.3.4", | ||||
|  "uuid", | ||||
| ] | ||||
| 
 | ||||
| @ -719,6 +765,12 @@ version = "0.3.4" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "static_assertions" | ||||
| version = "1.1.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "strsim" | ||||
| version = "0.9.3" | ||||
| @ -736,6 +788,12 @@ dependencies = [ | ||||
|  "unicode-ident", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "tap" | ||||
| version = "1.0.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "unicode-ident" | ||||
| version = "1.0.3" | ||||
| @ -772,3 +830,12 @@ name = "winapi-x86_64-pc-windows-gnu" | ||||
| version = "0.4.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "wyz" | ||||
| version = "0.5.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" | ||||
| dependencies = [ | ||||
|  "tap", | ||||
| ] | ||||
|  | ||||
| @ -10,7 +10,7 @@ crate-type = ["staticlib"] | ||||
| [dependencies] | ||||
| atomic = "0.5" | ||||
| bitflags = "1.0" | ||||
| bitvec = { version = "0.17", default-features = false, features = ["alloc"]  } | ||||
| bitvec = { version = "1", default-features = false, features = ["alloc"]  } | ||||
| log = "0.4" | ||||
| aligned = "0.4.1" | ||||
| lazy_static = { version = "1.1.0", features = ["spin_no_std"] } # Implies nightly | ||||
| @ -33,6 +33,7 @@ regex = { git = "https://github.com/mesalock-linux/regex-sgx", default-features | ||||
| goblin = { version = "0.5.4", default-features = false, features = ["elf64", "elf32", "endian_fd"] } | ||||
| intrusive-collections = "0.9" | ||||
| spin = "0.7" | ||||
| modular-bitfield = "0.11.2" | ||||
| 
 | ||||
| [patch.'https://github.com/apache/teaclave-sgx-sdk.git'] | ||||
| sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd" } | ||||
|  | ||||
| @ -6,10 +6,14 @@ use self::syscall::{handle_syscall_exception, SYSCALL_OPCODE}; | ||||
| use super::*; | ||||
| use crate::signal::{FaultSignal, SigSet}; | ||||
| use crate::syscall::exception_interrupt_syscall_c_abi; | ||||
| use crate::syscall::{CpuContext, FpRegs, SyscallNum}; | ||||
| use aligned::{Aligned, A16}; | ||||
| use core::arch::x86_64::_fxsave; | ||||
| use crate::syscall::{CpuContext, ExtraContext, SyscallNum}; | ||||
| use crate::vm::{enclave_page_fault_handler, USER_SPACE_VM_MANAGER}; | ||||
| use sgx_types::*; | ||||
| use sgx_types::{sgx_exception_type_t, sgx_exception_vector_t}; | ||||
| 
 | ||||
| const ENCLU: u32 = 0xd7010f; | ||||
| const EACCEPT: u32 = 0x5; | ||||
| const EACCEPTCOPY: u32 = 0x7; | ||||
| 
 | ||||
| // Modules for instruction simulation
 | ||||
| mod cpuid; | ||||
| @ -25,14 +29,63 @@ pub fn register_exception_handlers() { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn try_handle_kernel_exception(info: &sgx_exception_info_t) -> i32 { | ||||
|     if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_PF { | ||||
|         let pf_addr = info.exinfo.faulting_address as usize; | ||||
|         // The PF address must be in the user space. Otherwise, keep searching for the exception handler
 | ||||
|         if !USER_SPACE_VM_MANAGER.range().contains(pf_addr) { | ||||
|             SGX_MM_EXCEPTION_CONTINUE_SEARCH | ||||
|         } else { | ||||
|             let rip = info.cpu_context.rip as *const u32; | ||||
|             let rax = info.cpu_context.rax as u32; | ||||
|             // This can happen when two threads both try to EAUG a new page. Thread 1 EAUG because it first
 | ||||
|             // touches the memory and triggers #PF. Thread 2 EAUG because it uses sgx_mm_commit to commit a
 | ||||
|             // new page with EACCEPT and triggers #PF. If Thread 1 first acquires the lock to do EAUG, when Thread 2
 | ||||
|             // acquires the lock, it can't do EAUG again and will fail. The failure will raise a signal.
 | ||||
|             // This signal will eventually be handled here. And the instruction that triggers this exception is EACCEPT/EACCEPTCOPY.
 | ||||
|             // In this case, since the new page is EAUG-ed already, just need to excecute the EACCEPT again. Thus here
 | ||||
|             // just return SGX_MM_EXCEPTION_CONTINUE_EXECUTION
 | ||||
|             if ENCLU == (unsafe { *rip } as u32) & 0xffffff | ||||
|                 && (EACCEPT == rax || EACCEPTCOPY == rax) | ||||
|             { | ||||
|                 return SGX_MM_EXCEPTION_CONTINUE_EXECUTION; | ||||
|             } | ||||
| 
 | ||||
|             // If the triggered code is not user's code and the #PF address is in the userspace, then it is a
 | ||||
|             // kernel-triggered #PF that we can handle. This can happen e.g. when read syscall triggers user buffer #PF
 | ||||
|             info!("kernel code triggers #PF"); | ||||
|             let kernel_triggers = true; | ||||
|             enclave_page_fault_handler(info.cpu_context.rip as usize, info.exinfo, kernel_triggers) | ||||
|                 .expect("handle PF failure"); | ||||
|             SGX_MM_EXCEPTION_CONTINUE_EXECUTION | ||||
|         } | ||||
|     } else { | ||||
|         // Otherwise, we can't handle. Keep searching for the exception handler
 | ||||
|         error!( | ||||
|             "We can't handle this exception: {:?}", | ||||
|             info.exception_vector | ||||
|         ); | ||||
|         SGX_MM_EXCEPTION_CONTINUE_SEARCH | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[no_mangle] | ||||
| extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> i32 { | ||||
|     let mut fpregs = FpRegs::save(); | ||||
|     let info = unsafe { &mut *info }; | ||||
| 
 | ||||
|     // Try handle kernel-trigged #PF
 | ||||
|     if !USER_SPACE_VM_MANAGER | ||||
|         .range() | ||||
|         .contains(info.cpu_context.rip as usize) | ||||
|     { | ||||
|         return try_handle_kernel_exception(&info); | ||||
|     } | ||||
| 
 | ||||
|     // User-space-triggered exception
 | ||||
|     unsafe { | ||||
|         exception_interrupt_syscall_c_abi( | ||||
|             SyscallNum::HandleException as u32, | ||||
|             info as *mut _, | ||||
|             &mut fpregs as *mut FpRegs, | ||||
|             info as *mut sgx_exception_info_t as *mut _, | ||||
|         ) | ||||
|     }; | ||||
|     unreachable!(); | ||||
| @ -41,20 +94,22 @@ extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> i32 { | ||||
| /// Exceptions are handled as a special kind of system calls.
 | ||||
| pub fn do_handle_exception( | ||||
|     info: *mut sgx_exception_info_t, | ||||
|     fpregs: *mut FpRegs, | ||||
|     user_context: *mut CpuContext, | ||||
| ) -> Result<isize> { | ||||
|     let info = unsafe { &mut *info }; | ||||
|     check_exception_type(info.exception_type)?; | ||||
|     info!("do handle exception: {:?}", info.exception_vector); | ||||
| 
 | ||||
|     let user_context = unsafe { &mut *user_context }; | ||||
|     *user_context = CpuContext::from_sgx(&info.cpu_context); | ||||
|     user_context.fpregs = fpregs; | ||||
|     let xsave_area = info.xsave_area.as_mut_ptr(); | ||||
|     user_context.extra_context = ExtraContext::Xsave; | ||||
|     user_context.extra_context_ptr = xsave_area; | ||||
| 
 | ||||
|     // Try to do instruction emulation first
 | ||||
|     if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD { | ||||
|         // Assume the length of opcode is 2 bytes
 | ||||
|         let ip_opcode = unsafe { *(user_context.rip as *const u16) }; | ||||
|         let ip_opcode: u16 = unsafe { *(user_context.rip as *const u16) }; | ||||
|         if ip_opcode == RDTSC_OPCODE { | ||||
|             return handle_rdtsc_exception(user_context); | ||||
|         } else if ip_opcode == SYSCALL_OPCODE { | ||||
| @ -64,6 +119,23 @@ pub fn do_handle_exception( | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Normally, We should only handled PF exception with SGX bit set which is due to uncommitted EPC.
 | ||||
|     // However, it happens that when committing a no-read-write page (e.g. RWX), there is a short gap
 | ||||
|     // after EACCEPTCOPY and before the mprotect ocall. And if the user touches memory during this short
 | ||||
|     // gap, the SGX bit will not be set. Thus, here we don't check the SGX bit.
 | ||||
|     if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_PF { | ||||
|         info!("Userspace #PF caught, try handle"); | ||||
|         if enclave_page_fault_handler(info.cpu_context.rip as usize, info.exinfo, false).is_ok() { | ||||
|             info!("#PF handling is done successfully"); | ||||
|             return Ok(0); | ||||
|         } | ||||
| 
 | ||||
|         warn!( | ||||
|             "#PF not handled. Turn to signal. user context = {:?}", | ||||
|             user_context | ||||
|         ); | ||||
|     } | ||||
| 
 | ||||
|     // Then, it must be a "real" exception. Convert it to signal and force delivering it.
 | ||||
|     // The generated signal is SIGBUS, SIGFPE, SIGILL, or SIGSEGV.
 | ||||
|     //
 | ||||
| @ -108,3 +180,21 @@ fn check_exception_type(type_: sgx_exception_type_t) -> Result<()> { | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| // Based on Page-Fault Error Code of Intel Mannul
 | ||||
| const PF_EXCEPTION_SGX_BIT: u32 = 0x1; | ||||
| const PF_EXCEPTION_RW_BIT: u32 = 0x2; | ||||
| 
 | ||||
| // Return value:
 | ||||
| // True     - SGX bit is set
 | ||||
| // False    - SGX bit is not set
 | ||||
| pub fn check_sgx_bit(exception_error_code: u32) -> bool { | ||||
|     exception_error_code & PF_EXCEPTION_SGX_BIT == PF_EXCEPTION_SGX_BIT | ||||
| } | ||||
| 
 | ||||
| // Return value:
 | ||||
| // True     - write bit is set, #PF caused by write
 | ||||
| // False    - read bit is set, #PF caused by read
 | ||||
| pub fn check_rw_bit(exception_error_code: u32) -> bool { | ||||
|     exception_error_code & PF_EXCEPTION_RW_BIT == PF_EXCEPTION_RW_BIT | ||||
| } | ||||
|  | ||||
| @ -91,7 +91,7 @@ fn get_output_for_vma(vma: &VMArea, heap_or_stack: Option<&str>) -> String { | ||||
|     let perms = vma.perms(); | ||||
| 
 | ||||
|     let (file_path, offset, device_id, inode_num) = { | ||||
|         if let Some((file, offset)) = vma.init_file() { | ||||
|         if let Some((file, offset)) = vma.backed_file() { | ||||
|             let inode_file = file.as_inode_file().unwrap(); | ||||
|             let file_path = inode_file.abs_path(); | ||||
|             let inode_num = inode_file.inode().metadata().unwrap().inode; | ||||
|  | ||||
| @ -2,9 +2,7 @@ pub use self::sgx::sgx_interrupt_info_t; | ||||
| use crate::prelude::*; | ||||
| use crate::process::ThreadRef; | ||||
| use crate::syscall::exception_interrupt_syscall_c_abi; | ||||
| use crate::syscall::{CpuContext, FpRegs, SyscallNum}; | ||||
| use aligned::{Aligned, A16}; | ||||
| use core::arch::x86_64::_fxsave; | ||||
| use crate::syscall::{CpuContext, ExtraContext, SyscallNum}; | ||||
| 
 | ||||
| mod sgx; | ||||
| 
 | ||||
| @ -16,28 +14,23 @@ pub fn init() { | ||||
| } | ||||
| 
 | ||||
| extern "C" fn handle_interrupt(info: *mut sgx_interrupt_info_t) -> i32 { | ||||
|     let mut fpregs = FpRegs::save(); | ||||
|     unsafe { | ||||
|         exception_interrupt_syscall_c_abi( | ||||
|             SyscallNum::HandleInterrupt as u32, | ||||
|             info as *mut _, | ||||
|             &mut fpregs as *mut FpRegs, | ||||
|         ) | ||||
|         exception_interrupt_syscall_c_abi(SyscallNum::HandleInterrupt as u32, info as *mut _) | ||||
|     }; | ||||
|     unreachable!(); | ||||
| } | ||||
| 
 | ||||
| pub fn do_handle_interrupt( | ||||
|     info: *mut sgx_interrupt_info_t, | ||||
|     fpregs: *mut FpRegs, | ||||
|     cpu_context: *mut CpuContext, | ||||
| ) -> Result<isize> { | ||||
|     let info = unsafe { &*info }; | ||||
|     let info = unsafe { &mut *info }; | ||||
|     let context = unsafe { &mut *cpu_context }; | ||||
|     // The cpu context is overriden so that it is as if the syscall is called from where the
 | ||||
|     // interrupt happened
 | ||||
|     *context = CpuContext::from_sgx(&info.cpu_context); | ||||
|     context.fpregs = fpregs; | ||||
|     context.extra_context = ExtraContext::Xsave; | ||||
|     context.extra_context_ptr = info.xsave_area.as_mut_ptr(); | ||||
|     Ok(0) | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -1,10 +1,15 @@ | ||||
| use crate::prelude::*; | ||||
| 
 | ||||
| #[repr(C)] | ||||
| #[repr(C, align(64))] | ||||
| #[derive(Default, Clone, Copy)] | ||||
| #[allow(non_camel_case_types)] | ||||
| pub struct sgx_interrupt_info_t { | ||||
|     pub cpu_context: sgx_cpu_context_t, | ||||
|     pub interrupt_valid: uint32_t, | ||||
|     reserved: uint32_t, | ||||
|     pub xsave_size: uint64_t, | ||||
|     pub reserved1: [uint64_t; 4], | ||||
|     pub xsave_area: [uint8_t; 0], | ||||
| } | ||||
| 
 | ||||
| #[allow(non_camel_case_types)] | ||||
|  | ||||
| @ -21,8 +21,11 @@ | ||||
| #![feature(test)] | ||||
| #![feature(atomic_from_mut)] | ||||
| #![feature(btree_drain_filter)] | ||||
| #![feature(bench_black_box)] | ||||
| #![feature(arbitrary_enum_discriminant)] | ||||
| // for core::ptr::non_null::NonNull addr() method
 | ||||
| #![feature(strict_provenance)] | ||||
| // for VMArea::can_merge_vmas
 | ||||
| #![feature(is_some_and)] | ||||
| 
 | ||||
| #[macro_use] | ||||
| extern crate alloc; | ||||
| @ -59,6 +62,7 @@ extern crate memoffset; | ||||
| extern crate ctor; | ||||
| extern crate intrusive_collections; | ||||
| extern crate itertools; | ||||
| extern crate modular_bitfield; | ||||
| extern crate resolv_conf; | ||||
| 
 | ||||
| use sgx_trts::libc; | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| use crate::process::do_vfork::reap_zombie_child_created_with_vfork; | ||||
| use crate::signal::constants::*; | ||||
| use std::intrinsics::atomic_store; | ||||
| use std::intrinsics::atomic_store_seqcst; | ||||
| 
 | ||||
| use super::do_futex::futex_wake; | ||||
| use super::do_vfork::{is_vforked_child_process, vfork_return_to_parent}; | ||||
| @ -61,7 +61,7 @@ fn exit_thread(term_status: TermStatus) { | ||||
|     // Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info.
 | ||||
|     if let Some(ctid_ptr) = thread.clear_ctid() { | ||||
|         unsafe { | ||||
|             atomic_store(ctid_ptr.as_ptr(), 0); | ||||
|             atomic_store_seqcst(ctid_ptr.as_ptr(), 0); | ||||
|         } | ||||
|         futex_wake(ctid_ptr.as_ptr() as *const i32, 1); | ||||
|     } | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| use std::collections::hash_map::DefaultHasher; | ||||
| use std::hash::{Hash, Hasher}; | ||||
| use std::intrinsics::atomic_load; | ||||
| use std::intrinsics::atomic_load_seqcst; | ||||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||||
| 
 | ||||
| use crate::prelude::*; | ||||
| @ -258,7 +258,7 @@ impl FutexKey { | ||||
|     } | ||||
| 
 | ||||
|     pub fn load_val(&self) -> i32 { | ||||
|         unsafe { atomic_load(self.0 as *const i32) } | ||||
|         unsafe { atomic_load_seqcst(self.0 as *const i32) } | ||||
|     } | ||||
| 
 | ||||
|     pub fn addr(&self) -> usize { | ||||
|  | ||||
| @ -8,6 +8,7 @@ | ||||
| //! * If `cpu_set[i] == true`, then the i-th CPU core belongs to the set;
 | ||||
| //! * Otherwise, the i-th CPU core is not in the set.
 | ||||
| 
 | ||||
| use bitvec::order::LocalBits as Local; | ||||
| use bitvec::prelude::*; | ||||
| use std::ops::Index; | ||||
| 
 | ||||
| @ -15,7 +16,7 @@ use crate::prelude::*; | ||||
| 
 | ||||
| #[derive(Debug, Clone, PartialEq)] | ||||
| pub struct CpuSet { | ||||
|     bits: BitBox<Local, u8>, | ||||
|     bits: BitBox<u8, Local>, | ||||
| } | ||||
| 
 | ||||
| impl CpuSet { | ||||
| @ -33,14 +34,14 @@ impl CpuSet { | ||||
| 
 | ||||
|     /// Create a CpuSet that consists of all of the CPU cores.
 | ||||
|     pub fn new_full() -> Self { | ||||
|         let mut bits = bitbox![Local, u8; 1; Self::len() * 8]; | ||||
|         let mut bits = bitbox![u8, Local; 1; Self::len() * 8]; | ||||
|         Self::clear_unused(&mut bits); | ||||
|         Self { bits } | ||||
|     } | ||||
| 
 | ||||
|     /// Create a CpuSet that consists of none of the CPU cores.
 | ||||
|     pub fn new_empty() -> Self { | ||||
|         let bits = bitbox![Local, u8; 0; Self::len() * 8]; | ||||
|         let bits = bitbox![u8, Local; 0; Self::len() * 8]; | ||||
|         Self { bits } | ||||
|     } | ||||
| 
 | ||||
| @ -61,7 +62,7 @@ impl CpuSet { | ||||
| 
 | ||||
|     /// Returns the first index of CPUs in set.
 | ||||
|     pub fn first_cpu_idx(&self) -> Option<usize> { | ||||
|         self.iter().position(|&b| b == true) | ||||
|         self.iter().position(|b| b == true) | ||||
|     } | ||||
| 
 | ||||
|     // Returns if the CpuSet is a subset of available cpu set
 | ||||
| @ -75,7 +76,7 @@ impl CpuSet { | ||||
|             return_errno!(EINVAL, "slice is not long enough"); | ||||
|         } | ||||
|         let slice = &slice[..Self::len()]; | ||||
|         let mut bits = BitBox::from_slice(slice); | ||||
|         let mut bits = BitBox::from_bitslice(&BitSlice::from_slice(slice)); | ||||
|         Self::clear_unused(&mut bits); | ||||
| 
 | ||||
|         Ok(Self { bits }) | ||||
| @ -85,11 +86,11 @@ impl CpuSet { | ||||
|     ///
 | ||||
|     /// The last, unused bits in the byte slice are guaranteed to be zero.
 | ||||
|     pub fn as_slice(&self) -> &[u8] { | ||||
|         self.bits.as_slice() | ||||
|         self.bits.as_raw_slice() | ||||
|     } | ||||
| 
 | ||||
|     pub fn as_mut_slice(&mut self) -> &mut [u8] { | ||||
|         self.bits.as_mut_slice() | ||||
|         self.bits.as_raw_mut_slice() | ||||
|     } | ||||
| 
 | ||||
|     /// Returns an iterator that allows accessing the underlying bits.
 | ||||
| @ -102,7 +103,7 @@ impl CpuSet { | ||||
|         self.bits.iter_mut() | ||||
|     } | ||||
| 
 | ||||
|     fn clear_unused(bits: &mut BitSlice<Local, u8>) { | ||||
|     fn clear_unused(bits: &mut BitSlice<u8, Local>) { | ||||
|         let unused_bits = &mut bits[Self::ncores()..(Self::len() * 8)]; | ||||
|         for mut bit in unused_bits { | ||||
|             *bit = false; | ||||
| @ -110,8 +111,8 @@ impl CpuSet { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub type Iter<'a> = bitvec::slice::Iter<'a, Local, u8>; | ||||
| pub type IterMut<'a> = bitvec::slice::IterMut<'a, Local, u8>; | ||||
| pub type Iter<'a> = bitvec::slice::Iter<'a, u8, Local>; | ||||
| pub type IterMut<'a> = bitvec::slice::IterMut<'a, u8, Local>; | ||||
| 
 | ||||
| impl Index<usize> for CpuSet { | ||||
|     type Output = bool; | ||||
|  | ||||
| @ -199,7 +199,7 @@ impl siginfo_t { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Copy)] | ||||
| #[derive(Clone)] | ||||
| #[repr(C)] | ||||
| pub struct ucontext_t { | ||||
|     pub uc_flags: u64, | ||||
| @ -225,7 +225,8 @@ pub type stack_t = sigaltstack_t; | ||||
| pub struct mcontext_t { | ||||
|     pub inner: CpuContext, | ||||
|     // TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
 | ||||
|     _unused0: [u64; 5], | ||||
|     // The number should be 5 but we use extra 2 spaces to store something else in the CpuContext. Thus make it 3.
 | ||||
|     _unused0: [u64; 3], | ||||
|     // TODO: this field should be `fpregs: fpregset_t,`
 | ||||
|     _unused1: usize, | ||||
|     _reserved: [u64; 8], | ||||
|  | ||||
| @ -5,9 +5,8 @@ use super::{SigAction, SigActionFlags, SigDefaultAction, SigSet, Signal}; | ||||
| use crate::lazy_static::__Deref; | ||||
| use crate::prelude::*; | ||||
| use crate::process::{ProcessRef, TermStatus, ThreadRef}; | ||||
| use crate::syscall::{CpuContext, FpRegs}; | ||||
| use crate::syscall::{CpuContext, ExtraContext, FpRegs, XsaveArea}; | ||||
| use aligned::{Aligned, A16}; | ||||
| use core::arch::x86_64::{_fxrstor, _fxsave}; | ||||
| use std::{ptr, slice}; | ||||
| 
 | ||||
| pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> { | ||||
| @ -34,11 +33,27 @@ pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> { | ||||
|     *curr_user_ctxt = last_ucontext.uc_mcontext.inner; | ||||
| 
 | ||||
|     // Restore the floating point registers to a temp area
 | ||||
|     // The floating point registers would be recoved just
 | ||||
|     // before return to user's code
 | ||||
|     // The floating point registers would be recoved just before return to user's code
 | ||||
|     match curr_user_ctxt.extra_context { | ||||
|         ExtraContext::Fpregs => { | ||||
|             // Signal raised by direct syscall
 | ||||
|             // fpregs should be stored on the heap. Because the ucontext_t will be freed when this function returns. And curr_user_ctxt only stores the pointer
 | ||||
|             let mut fpregs = Box::new(unsafe { FpRegs::from_slice(&last_ucontext.fpregs) }); | ||||
|     curr_user_ctxt.fpregs = Box::into_raw(fpregs); | ||||
|     curr_user_ctxt.fpregs_on_heap = 1; // indicates the fpregs is on heap
 | ||||
|             curr_user_ctxt.extra_context_ptr = Box::into_raw(fpregs) as *mut u8; | ||||
|         } | ||||
|         ExtraContext::Xsave => { | ||||
|             // Signal raised by exception
 | ||||
|             // The xsave_area is stored at a special area reserved on kernel's stack. We can just overwrite this area with the latest user context
 | ||||
|             // Note: Currently, we only restore the fpregs instead of restoring the whole xsave area for sigreturn. Because during the
 | ||||
|             // handle path, we don't touch other advanced registers. However, in the future, if we have to touch those registers,
 | ||||
|             // we should restore the whole xsave area when sigreturn.
 | ||||
|             let latest_fpregs = unsafe { FpRegs::from_slice(&last_ucontext.fpregs) }; | ||||
|             let xsave_area = | ||||
|                 unsafe { (&mut *(curr_user_ctxt.extra_context_ptr as *mut XsaveArea)) }; | ||||
|             xsave_area.set_fpregs_area(latest_fpregs); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| @ -261,16 +276,24 @@ fn handle_signals_by_user( | ||||
|         // Save the old sigmask
 | ||||
|         ucontext.uc_sigmask = old_sigmask.to_c(); | ||||
|         // Save the user context
 | ||||
|         ucontext.uc_mcontext.inner = *curr_user_ctxt; | ||||
|         ucontext.uc_mcontext.inner = curr_user_ctxt.clone(); | ||||
| 
 | ||||
|         // Save the floating point registers
 | ||||
|         if curr_user_ctxt.fpregs != ptr::null_mut() { | ||||
|             ucontext | ||||
|                 .fpregs | ||||
|                 .copy_from_slice(unsafe { curr_user_ctxt.fpregs.as_ref().unwrap().as_slice() }); | ||||
|             // Clear the floating point registers, since we do not need to recover is when this syscall return
 | ||||
|             curr_user_ctxt.fpregs = ptr::null_mut(); | ||||
|         if curr_user_ctxt.extra_context_ptr != ptr::null_mut() { | ||||
|             // Signal from exception handling
 | ||||
|             debug_assert!(matches!(curr_user_ctxt.extra_context, ExtraContext::Xsave)); | ||||
|             let fpregs_area = | ||||
|                 unsafe { (&*(curr_user_ctxt.extra_context_ptr as *mut XsaveArea)) }.get_fpregs(); | ||||
|             ucontext.fpregs.copy_from_slice(fpregs_area.as_slice()); | ||||
|             // Clear the floating point registers, since we do not need to recover this when this syscall return
 | ||||
|             curr_user_ctxt.extra_context_ptr = ptr::null_mut(); | ||||
|         } else { | ||||
|             // Raise the signal with direct syscall
 | ||||
|             debug_assert!( | ||||
|                 matches!(curr_user_ctxt.extra_context, ExtraContext::Fpregs) | ||||
|                     && curr_user_ctxt.extra_context_ptr == ptr::null_mut() | ||||
|             ); | ||||
| 
 | ||||
|             // We need a correct fxsave structure in the buffer,
 | ||||
|             // because the app may modify part of it to update the
 | ||||
|             // floating point after the signal handler finished.
 | ||||
|  | ||||
| @ -36,12 +36,12 @@ impl FaultSignal { | ||||
|             // Page fault exception
 | ||||
|             SGX_EXCEPTION_VECTOR_PF => { | ||||
|                 const PF_ERR_FLAG_PRESENT : u32 = 1u32 << 0; | ||||
|                 let code = if info.exinfo.errcd & PF_ERR_FLAG_PRESENT != 0 { | ||||
|                 let code = if info.exinfo.error_code & PF_ERR_FLAG_PRESENT != 0 { | ||||
|                     SEGV_ACCERR | ||||
|                 } else { | ||||
|                     SEGV_MAPERR | ||||
|                 }; | ||||
|                 let addr = Some(info.exinfo.maddr); | ||||
|                 let addr = Some(info.exinfo.faulting_address ); | ||||
|                 (SIGSEGV, code, addr) | ||||
|             }, | ||||
|             // General protection exception
 | ||||
|  | ||||
| @ -7,7 +7,7 @@ | ||||
| //! 3. Preprocess the system call and then call `dispatch_syscall` (in this file)
 | ||||
| //! 4. Call `do_*` to process the system call (in other modules)
 | ||||
| 
 | ||||
| use aligned::{Aligned, A16}; | ||||
| use aligned::{Aligned, A16, A64}; | ||||
| use core::arch::x86_64::{_fxrstor, _fxsave}; | ||||
| use std::any::Any; | ||||
| use std::convert::TryFrom; | ||||
| @ -60,7 +60,7 @@ use crate::signal::{ | ||||
|     do_rt_sigtimedwait, do_sigaltstack, do_tgkill, do_tkill, sigaction_t, siginfo_t, sigset_t, | ||||
|     stack_t, | ||||
| }; | ||||
| use crate::vm::{MMapFlags, MRemapFlags, MSyncFlags, VMPerms}; | ||||
| use crate::vm::{MMapFlags, MRemapFlags, MSyncFlags, MadviceFlags, VMPerms}; | ||||
| use crate::{fs, process, std, vm}; | ||||
| 
 | ||||
| use super::*; | ||||
| @ -122,7 +122,7 @@ macro_rules! process_syscall_table_with_callback { | ||||
|             (Mremap = 25) => do_mremap(old_addr: usize, old_size: usize, new_size: usize, flags: i32, new_addr: usize), | ||||
|             (Msync = 26) => do_msync(addr: usize, size: usize, flags: u32), | ||||
|             (Mincore = 27) => handle_unsupported(), | ||||
|             (Madvise = 28) => handle_unsupported(), | ||||
|             (Madvise = 28) => do_madvice(addr: usize, length: usize, advice: i32), | ||||
|             (Shmget = 29) => do_shmget(key: key_t, size: size_t, shmflg: i32), | ||||
|             (Shmat = 30) => do_shmat(shmid: i32, shmaddr: usize, shmflg: i32), | ||||
|             (Shmctl = 31) => do_shmctl(shmid: i32, cmd: i32, buf: *mut shmids_t), | ||||
| @ -424,8 +424,8 @@ macro_rules! process_syscall_table_with_callback { | ||||
|             // Occlum-specific system calls
 | ||||
|             (SpawnGlibc = 359) => do_spawn_for_glibc(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fa: *const SpawnFileActions, attribute_list: *const posix_spawnattr_t), | ||||
|             (SpawnMusl = 360) => do_spawn_for_musl(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp, attribute_list: *const posix_spawnattr_t), | ||||
|             (HandleException = 361) => do_handle_exception(info: *mut sgx_exception_info_t, fpregs: *mut FpRegs, context: *mut CpuContext), | ||||
|             (HandleInterrupt = 362) => do_handle_interrupt(info: *mut sgx_interrupt_info_t, fpregs: *mut FpRegs, context: *mut CpuContext), | ||||
|             (HandleException = 361) => do_handle_exception(info: *mut sgx_exception_info_t, context: *mut CpuContext), | ||||
|             (HandleInterrupt = 362) => do_handle_interrupt(info: *mut sgx_interrupt_info_t, context: *mut CpuContext), | ||||
|             (MountRootFS = 363) => do_mount_rootfs(key_ptr: *const sgx_key_128bit_t, rootfs_config_ptr: *const user_rootfs_config), | ||||
|         } | ||||
|     }; | ||||
| @ -649,12 +649,10 @@ fn do_syscall(user_context: &mut CpuContext) { | ||||
|             syscall.args[1] = user_context as *mut _ as isize; | ||||
|         } else if syscall_num == SyscallNum::HandleException { | ||||
|             // syscall.args[0] == info
 | ||||
|             // syscall.args[1] == fpregs
 | ||||
|             syscall.args[2] = user_context as *mut _ as isize; | ||||
|             syscall.args[1] = user_context as *mut _ as isize; | ||||
|         } else if syscall.num == SyscallNum::HandleInterrupt { | ||||
|             // syscall.args[0] == info
 | ||||
|             // syscall.args[1] == fpregs
 | ||||
|             syscall.args[2] = user_context as *mut _ as isize; | ||||
|             syscall.args[1] = user_context as *mut _ as isize; | ||||
|         } else if syscall.num == SyscallNum::Sigaltstack { | ||||
|             // syscall.args[0] == new_ss
 | ||||
|             // syscall.args[1] == old_ss
 | ||||
| @ -751,21 +749,27 @@ fn do_sysret(user_context: &mut CpuContext) -> ! { | ||||
|         fn do_exit_task() -> !; | ||||
|     } | ||||
|     if current!().status() != ThreadStatus::Exited { | ||||
|         // Restore the floating point registers
 | ||||
|         // Todo: Is it correct to do fxstor in kernel?
 | ||||
|         let fpregs = user_context.fpregs; | ||||
|         if (fpregs != ptr::null_mut()) { | ||||
|             if user_context.fpregs_on_heap == 1 { | ||||
|                 let fpregs = unsafe { Box::from_raw(user_context.fpregs as *mut FpRegs) }; | ||||
|                 fpregs.restore(); | ||||
|             } else { | ||||
|         if user_context.extra_context_ptr != ptr::null_mut() { | ||||
|             match user_context.extra_context { | ||||
|                 ExtraContext::Fpregs => { | ||||
|                     let fpregs = user_context.extra_context_ptr as *mut FpRegs; | ||||
|                     unsafe { fpregs.as_ref().unwrap().restore() }; | ||||
|                     // The fpregs must be allocated on heap
 | ||||
|                     drop(unsafe { Box::from_raw(user_context.extra_context_ptr as *mut FpRegs) }); | ||||
|                 } | ||||
|                 ExtraContext::Xsave => { | ||||
|                     let xsave_area = user_context.extra_context_ptr; | ||||
|                     unsafe { (&*(xsave_area as *mut XsaveArea)).restore() }; | ||||
|                 } | ||||
|             } | ||||
|             user_context.extra_context_ptr = ptr::null_mut(); | ||||
|         } | ||||
|         unsafe { __occlum_sysret(user_context) } // jump to user space
 | ||||
|     } else { | ||||
|         if user_context.fpregs != ptr::null_mut() && user_context.fpregs_on_heap == 1 { | ||||
|             drop(unsafe { Box::from_raw(user_context.fpregs as *mut FpRegs) }); | ||||
|         if user_context.extra_context_ptr != ptr::null_mut() | ||||
|             && matches!(user_context.extra_context, ExtraContext::Fpregs) | ||||
|         { | ||||
|             drop(unsafe { Box::from_raw(user_context.extra_context_ptr as *mut FpRegs) }); | ||||
|         } | ||||
|         unsafe { do_exit_task() } // exit enclave
 | ||||
|     } | ||||
| @ -828,6 +832,12 @@ fn do_msync(addr: usize, size: usize, flags: u32) -> Result<isize> { | ||||
|     Ok(0) | ||||
| } | ||||
| 
 | ||||
| fn do_madvice(addr: usize, length: usize, advice: i32) -> Result<isize> { | ||||
|     let flags = MadviceFlags::from_i32(advice)?; | ||||
|     vm::do_madvice(addr, length, flags)?; | ||||
|     Ok(0) | ||||
| } | ||||
| 
 | ||||
| fn do_sysinfo(info: *mut sysinfo_t) -> Result<isize> { | ||||
|     check_mut_ptr(info)?; | ||||
|     let info = unsafe { &mut *info }; | ||||
| @ -977,7 +987,6 @@ fn handle_unsupported() -> Result<isize> { | ||||
| /// Floating point registers
 | ||||
| ///
 | ||||
| /// Note. The area is used to save fxsave result
 | ||||
| //#[derive(Clone, Copy)]
 | ||||
| #[repr(C)] | ||||
| pub struct FpRegs { | ||||
|     inner: Aligned<A16, [u8; 512]>, | ||||
| @ -1017,6 +1026,41 @@ impl FpRegs { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| #[repr(C)] | ||||
| pub struct XsaveArea { | ||||
|     inner: Aligned<A64, [u8; 4096]>, | ||||
| } | ||||
| 
 | ||||
| impl XsaveArea { | ||||
|     // The first 512 bytes of xsave area is used for FP registers
 | ||||
|     const FXSAVE_AREA_LEN: usize = 512; | ||||
| 
 | ||||
|     /// Save the current CPU floating pointer states to an instance of FpRegs
 | ||||
|     pub fn save() -> Self { | ||||
|         let mut xsave_area = MaybeUninit::<Self>::uninit(); | ||||
|         unsafe { | ||||
|             save_xregs(xsave_area.as_mut_ptr() as *mut u8); | ||||
|             xsave_area.assume_init() | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Restore the current CPU floating pointer states from this FpRegs instance
 | ||||
|     pub fn restore(&self) { | ||||
|         unsafe { | ||||
|             restore_xregs(self.inner.as_ptr()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_fpregs(&self) -> FpRegs { | ||||
|         unsafe { FpRegs::from_slice(&self.inner[..Self::FXSAVE_AREA_LEN]) } | ||||
|     } | ||||
| 
 | ||||
|     pub fn set_fpregs_area(&mut self, fpregs: FpRegs) { | ||||
|         self.inner[..Self::FXSAVE_AREA_LEN].copy_from_slice(fpregs.as_slice()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// Cpu context.
 | ||||
| ///
 | ||||
| /// Note. The definition of this struct must be kept in sync with the assembly
 | ||||
| @ -1042,8 +1086,21 @@ pub struct CpuContext { | ||||
|     pub rsp: u64, | ||||
|     pub rip: u64, | ||||
|     pub rflags: u64, | ||||
|     pub fpregs_on_heap: u64, | ||||
|     pub fpregs: *mut FpRegs, | ||||
|     pub extra_context: ExtraContext, | ||||
|     pub extra_context_ptr: *mut u8, | ||||
| } | ||||
| 
 | ||||
| #[repr(u64)] | ||||
| #[derive(Clone, Copy, Debug)] | ||||
| pub enum ExtraContext { | ||||
|     Fpregs = 0, | ||||
|     Xsave = 1, | ||||
| } | ||||
| 
 | ||||
| impl Default for ExtraContext { | ||||
|     fn default() -> Self { | ||||
|         Self::Fpregs | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl CpuContext { | ||||
| @ -1067,8 +1124,8 @@ impl CpuContext { | ||||
|             rsp: src.rsp, | ||||
|             rip: src.rip, | ||||
|             rflags: src.rflags, | ||||
|             fpregs_on_heap: 0, | ||||
|             fpregs: ptr::null_mut(), | ||||
|             extra_context: Default::default(), | ||||
|             extra_context_ptr: ptr::null_mut(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @ -1082,14 +1139,15 @@ impl CpuContext { | ||||
| //  pointer that is not safe to use by external modules. In our case, the
 | ||||
| //  FpRegs pointer will not be used actually. So the Rust warning is a
 | ||||
| //  false alarm. We suppress it here.
 | ||||
| pub unsafe fn exception_interrupt_syscall_c_abi( | ||||
|     num: u32, | ||||
|     info: *mut c_void, | ||||
|     fpregs: *mut FpRegs, | ||||
| ) -> u32 { | ||||
| pub unsafe fn exception_interrupt_syscall_c_abi(num: u32, info: *mut c_void) -> u32 { | ||||
|     #[allow(improper_ctypes)] | ||||
|     extern "C" { | ||||
|         pub fn __occlum_syscall_c_abi(num: u32, info: *mut c_void, fpregs: *mut FpRegs) -> u32; | ||||
|         pub fn __occlum_syscall_c_abi(num: u32, info: *mut c_void) -> u32; | ||||
|     } | ||||
|     __occlum_syscall_c_abi(num, info, fpregs) | ||||
|     __occlum_syscall_c_abi(num, info) | ||||
| } | ||||
| 
 | ||||
| extern "C" { | ||||
|     pub fn save_xregs(save_area: *mut u8); | ||||
|     pub fn restore_xregs(save_area: *const u8); | ||||
| } | ||||
|  | ||||
| @ -52,8 +52,8 @@ __occlum_syscall_linux_abi: | ||||
|     // Save the target CPU state when `call __occlum_syscall` is returned in | ||||
|     // a CpuContext struct. The registers are saved in the reverse order of  | ||||
|     // the fields in CpuContext. | ||||
|     pushq $0         // default fpregs is NULL  | ||||
|     pushq $0         // default fpregs is allocated on stack | ||||
|     pushq $0         // default extra_context_ptr is NULL | ||||
|     pushq $0         // default extra_context is floating point registers | ||||
|     pushfq | ||||
|     push %rcx       // save %rip | ||||
|     push %r11       // save %rsp | ||||
|  | ||||
| @ -100,16 +100,9 @@ impl Chunk { | ||||
|             *options.perms(), | ||||
|             options.initializer().backed_file(), | ||||
|             current!().process().pid(), | ||||
|         ); | ||||
|         // Initialize the memory of the new range
 | ||||
|         unsafe { | ||||
|             let buf = vm_range.as_slice_mut(); | ||||
|             options.initializer().init_slice(buf)?; | ||||
|         } | ||||
|         // Set memory permissions
 | ||||
|         if !options.perms().is_default() { | ||||
|             VMPerms::apply_perms(&vm_area, vm_area.perms()); | ||||
|         } | ||||
|         ) | ||||
|         .init_memory(options)?; | ||||
| 
 | ||||
|         Ok(Self::new_chunk_with_vma(vm_area)) | ||||
|     } | ||||
| 
 | ||||
| @ -238,6 +231,30 @@ impl Chunk { | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn handle_page_fault( | ||||
|         &self, | ||||
|         rip: usize, | ||||
|         pf_addr: usize, | ||||
|         errcd: u32, | ||||
|         kernel_triggers: bool, | ||||
|     ) -> Result<()> { | ||||
|         let internal = &self.internal; | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(vma) => { | ||||
|                 let mut vma = vma.lock().unwrap(); | ||||
|                 debug_assert!(vma.contains(pf_addr)); | ||||
|                 return vma.handle_page_fault(rip, pf_addr, errcd, kernel_triggers); | ||||
|             } | ||||
|             ChunkType::MultiVMA(internal_manager) => { | ||||
|                 return internal_manager | ||||
|                     .lock() | ||||
|                     .unwrap() | ||||
|                     .chunk_manager | ||||
|                     .handle_page_fault(rip, pf_addr, errcd, kernel_triggers); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_free_range(&self, request_range: &VMRange) -> bool { | ||||
|         match self.internal() { | ||||
|             ChunkType::SingleVMA(_) => false, // single-vma chunk can't be free
 | ||||
|  | ||||
| @ -63,11 +63,13 @@ use std::fmt; | ||||
| 
 | ||||
| mod chunk; | ||||
| mod free_space_manager; | ||||
| mod page_tracker; | ||||
| mod process_vm; | ||||
| mod shm_manager; | ||||
| mod user_space_vm; | ||||
| mod vm_area; | ||||
| mod vm_chunk_manager; | ||||
| mod vm_epc; | ||||
| mod vm_layout; | ||||
| mod vm_manager; | ||||
| mod vm_perms; | ||||
| @ -77,9 +79,12 @@ mod vm_util; | ||||
| use self::vm_layout::VMLayout; | ||||
| 
 | ||||
| pub use self::chunk::{ChunkRef, ChunkType}; | ||||
| pub use self::process_vm::{MMapFlags, MRemapFlags, MSyncFlags, ProcessVM, ProcessVMBuilder}; | ||||
| pub use self::process_vm::{ | ||||
|     MMapFlags, MRemapFlags, MSyncFlags, MadviceFlags, ProcessVM, ProcessVMBuilder, | ||||
| }; | ||||
| pub use self::user_space_vm::USER_SPACE_VM_MANAGER; | ||||
| pub use self::vm_area::VMArea; | ||||
| pub use self::vm_epc::enclave_page_fault_handler; | ||||
| pub use self::vm_manager::MunmapChunkFlag; | ||||
| pub use self::vm_perms::VMPerms; | ||||
| pub use self::vm_range::VMRange; | ||||
| @ -154,4 +159,9 @@ pub fn do_msync(addr: usize, size: usize, flags: MSyncFlags) -> Result<()> { | ||||
|     current!().vm().msync(addr, size) | ||||
| } | ||||
| 
 | ||||
| pub fn do_madvice(addr: usize, length: usize, advice: MadviceFlags) -> Result<()> { | ||||
|     warn!("madvice is not supported. madvice flags:{:?}", advice); | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| pub const PAGE_SIZE: usize = 4096; | ||||
|  | ||||
							
								
								
									
										488
									
								
								src/libos/src/vm/page_tracker.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										488
									
								
								src/libos/src/vm/page_tracker.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,488 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::user_space_vm::USER_SPACE_VM_MANAGER; | ||||
| use super::vm_util::{GB, KB, MB}; | ||||
| use bitvec::vec::BitVec; | ||||
| use util::sync::RwLock; | ||||
| use vm_epc::EPCMemType; | ||||
| 
 | ||||
| // In SGX v2, there is no upper limit for the size of EPC. If the user configure 1 TB memory,
 | ||||
| // and we only use one bit to track if the page is committed, that's 1 TB / 4 kB / 8 bit = 32 MB of memory.
 | ||||
| // And the memory footprint will keep the same size during the whole libOS life cycle.
 | ||||
| // In order to track the commit status of a huge number of pages, use two level tracking.
 | ||||
| // In the first level, global level, we use `PAGE_CHUNK_UNIT` as the unit size for a page chunk.
 | ||||
| // In the second level, we just use the page size as the unit size, and use one bit to represent if the page is committed.
 | ||||
| // For example, if the user configure 64 TB memory, when a page is committed, the second level tracker will mark the correponding bit as 1.
 | ||||
| // And when all the pages of a whole global page chunk are fully committed, the global level tracker will mark the page chunk as fully committed.
 | ||||
| // And the corresponding tracker can be freed. In this way, we can use just several bytes to represent the commit status of a big chunk of memory.
 | ||||
| // In a worse case, let's say there are several discrete global page chunks which are not not fully committed at the same time.
 | ||||
| // And each of them will take some space in the memory. Within a memory-intensive case, we can
 | ||||
| // commit the page by hand and make the global page chunk fully committed and free the page tracker.
 | ||||
| 
 | ||||
| // There are mainly three types of data structure to track the page status, from the top to the bottom:
 | ||||
| // 1. PageChunkManager - Create for the whole user space. This sructure is used to manage the global paging status.
 | ||||
| // 2. GlobalPageChunk - Denotes a chunk of pages. The actual unit of the PageChunkManager. It holds the paging status of a memory range. Stored only
 | ||||
| // in the PageChunkManager. A newly created VMA should ask the corresponding GlobalPageChunk for the paging status. When all the pages recoreded by
 | ||||
| // GlobalPageChunk are all committed, it will mark itself as "fully committed" and free the inner structure tracking the paging status. All the GlobalPageChunk
 | ||||
| // records the VM ranges with the SAME size.
 | ||||
| // 3. PageTracker - The real tracker of the paging status. Under the hood, it is a bitvec that tracks every page with a bit. There are mainly two types
 | ||||
| // PageTracker:
 | ||||
| //      * GlobalTracker - Used by GlobalPageChunk to track the paging status. All records the VM range with the same size.
 | ||||
| //      * VMATracker - Used by VMA to track its paging status. Records different range size according to the VMA.
 | ||||
| // Since the VM operations are mostly performed by VMA, the VMA tracker will update itself accordingly. And also update the corresponding GlobalTracker.
 | ||||
| 
 | ||||
| lazy_static! { | ||||
|     pub static ref USER_SPACE_PAGE_CHUNK_MANAGER: RwLock<PageChunkManager> = | ||||
|         RwLock::new(PageChunkManager::new(USER_SPACE_VM_MANAGER.range())); | ||||
| } | ||||
| 
 | ||||
| const PAGE_CHUNK_UNIT: usize = 4 * MB; | ||||
| const PAGE_CHUNK_PAGE_NUM: usize = PAGE_CHUNK_UNIT / PAGE_SIZE; | ||||
| 
 | ||||
| pub struct PageChunkManager { | ||||
|     // The total range that the manager manages.
 | ||||
|     range: VMRange, | ||||
|     // The page chunks
 | ||||
|     inner: HashMap<usize, GlobalPageChunk>, // K: Page chunk start address, V: Global page chunk
 | ||||
| } | ||||
| 
 | ||||
| impl PageChunkManager { | ||||
|     fn new(range: &VMRange) -> Self { | ||||
|         Self { | ||||
|             range: range.clone(), | ||||
|             inner: HashMap::new(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| // A chunk of pages. Memory space is precious. Don't put anything unnecessary.
 | ||||
| struct GlobalPageChunk { | ||||
|     fully_committed: bool, | ||||
|     tracker: Option<Arc<RwLock<PageTracker>>>, // if this page chunk is fully committed, the tracker will be set to None.
 | ||||
| } | ||||
| 
 | ||||
| impl GlobalPageChunk { | ||||
|     fn new(tracker: PageTracker) -> Self { | ||||
|         Self { | ||||
|             fully_committed: false, | ||||
|             tracker: Some(Arc::new(RwLock::new(tracker))), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(PartialEq, Clone, Debug)] | ||||
| enum TrackerType { | ||||
|     GlobalTracker, // PAGE_CHUNK_UNIT size for global management to track the global paging status
 | ||||
|     VMATracker,    // various size for different vma to track its own paging status
 | ||||
| } | ||||
| 
 | ||||
| // Used for tracking the paging status of global tracker or VMA tracker
 | ||||
| #[derive(Clone)] | ||||
| pub struct PageTracker { | ||||
|     type_: TrackerType, | ||||
|     range: VMRange, | ||||
|     inner: BitVec, | ||||
|     fully_committed: bool, | ||||
| } | ||||
| 
 | ||||
| impl Debug for PageTracker { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         f.debug_struct("PageTracker") | ||||
|             .field("type", &self.type_) | ||||
|             .field("range", &self.range) | ||||
|             .field("fully committed", &self.fully_committed) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl PageTracker { | ||||
|     // Create a new page tracker for GlobalPageChunk.
 | ||||
|     // When a new global tracker is needed, none of the pages are committed.
 | ||||
|     fn new_global_tracker(start_addr: usize) -> Result<Self> { | ||||
|         let range = VMRange::new_with_size(start_addr, PAGE_CHUNK_UNIT)?; | ||||
| 
 | ||||
|         let inner = bitvec![0; PAGE_CHUNK_PAGE_NUM]; | ||||
|         Ok(Self { | ||||
|             type_: TrackerType::GlobalTracker, | ||||
|             range, | ||||
|             inner, | ||||
|             fully_committed: false, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn new_vma_tracker(vm_range: &VMRange, epc_type: &EPCMemType) -> Result<Self> { | ||||
|         trace!("new vma tracker, range = {:?}", vm_range); | ||||
|         let page_num = vm_range.size() / PAGE_SIZE; | ||||
|         let new_vma_tracker = match epc_type { | ||||
|             EPCMemType::UserRegion => { | ||||
|                 let mut new_vma_tracker = Self { | ||||
|                     type_: TrackerType::VMATracker, | ||||
|                     range: vm_range.clone(), | ||||
|                     inner: bitvec![0; page_num], | ||||
|                     fully_committed: false, | ||||
|                 }; | ||||
| 
 | ||||
|                 // Skip sentry
 | ||||
|                 if page_num != 0 { | ||||
|                     new_vma_tracker.get_committed_pages_from_global_tracker()?; | ||||
|                 } | ||||
|                 new_vma_tracker | ||||
|             } | ||||
|             EPCMemType::Reserved => { | ||||
|                 // For reserved memory, there is no need to udpate global page tracker.
 | ||||
|                 // And there is no GLobalPageChunk for reserved memory.
 | ||||
|                 Self { | ||||
|                     type_: TrackerType::VMATracker, | ||||
|                     range: vm_range.clone(), | ||||
|                     inner: bitvec![1; page_num], | ||||
|                     fully_committed: true, | ||||
|                 } | ||||
|             } | ||||
|             _ => unreachable!(), | ||||
|         }; | ||||
| 
 | ||||
|         Ok(new_vma_tracker) | ||||
|     } | ||||
| 
 | ||||
|     pub fn range(&self) -> &VMRange { | ||||
|         &self.range | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_fully_committed(&self) -> bool { | ||||
|         self.fully_committed | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_reserved_only(&self) -> bool { | ||||
|         !self.fully_committed && self.inner.not_any() | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_partially_committed(&self) -> bool { | ||||
|         !self.fully_committed && self.inner.any() | ||||
|     } | ||||
| 
 | ||||
|     // Get all committed or uncommitted ranges of consecutive page.
 | ||||
|     // If committed is true, get all committed ranges
 | ||||
|     // If committed is false, get all uncommitted ranges
 | ||||
|     pub fn get_ranges(&self, committed: bool) -> Vec<VMRange> { | ||||
|         if self.is_fully_committed() { | ||||
|             if committed { | ||||
|                 return vec![self.range.clone()]; | ||||
|             } else { | ||||
|                 return Vec::new(); | ||||
|             } | ||||
|         } | ||||
|         if self.is_reserved_only() { | ||||
|             if committed { | ||||
|                 return Vec::new(); | ||||
|             } else { | ||||
|                 return vec![self.range.clone()]; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let tracker_start_addr = self.range.start(); | ||||
|         let mut ret = Vec::new(); | ||||
|         let mut start = None; | ||||
|         let mut end = None; | ||||
| 
 | ||||
|         for i in 0..self.inner.len() { | ||||
|             if self.inner[i] == committed { | ||||
|                 match (start, end) { | ||||
|                     // Meet committed page for the first time. Update both the start and end marker.
 | ||||
|                     (None, None) => { | ||||
|                         start = Some(i); | ||||
|                         end = Some(i); | ||||
|                         // Reach the end of the tracker. Only one page
 | ||||
|                         if i == self.inner.len() - 1 { | ||||
|                             let committed_range = VMRange::new_with_size( | ||||
|                                 tracker_start_addr + i * PAGE_SIZE, | ||||
|                                 PAGE_SIZE, | ||||
|                             ) | ||||
|                             .unwrap(); | ||||
|                             ret.push(committed_range); | ||||
|                         } | ||||
|                     } | ||||
|                     // Previous pages are committed. Update the end marker.
 | ||||
|                     (Some(s), Some(e)) => { | ||||
|                         end = Some(i); | ||||
|                         // Reach the end of the tracker.
 | ||||
|                         if i == self.inner.len() - 1 { | ||||
|                             let committed_range = VMRange::new_with_size( | ||||
|                                 tracker_start_addr + s * PAGE_SIZE, | ||||
|                                 PAGE_SIZE * (i - s + 1), | ||||
|                             ) | ||||
|                             .unwrap(); | ||||
|                             ret.push(committed_range); | ||||
|                         } | ||||
|                     } | ||||
|                     _ => unreachable!(), | ||||
|                 } | ||||
|             } else { | ||||
|                 match (start, end) { | ||||
|                     (None, None) => { | ||||
|                         // No committed pages.
 | ||||
|                     } | ||||
|                     (Some(s), Some(e)) => { | ||||
|                         // Meet the first uncommitted pages after recording all the previous committed pages.
 | ||||
|                         let committed_range = VMRange::new_with_size( | ||||
|                             tracker_start_addr + s * PAGE_SIZE, | ||||
|                             PAGE_SIZE * (e - s + 1), | ||||
|                         ) | ||||
|                         .unwrap(); | ||||
|                         ret.push(committed_range); | ||||
|                         // Reset markers
 | ||||
|                         start = None; | ||||
|                         end = None; | ||||
|                     } | ||||
|                     _ => { | ||||
|                         unreachable!() | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let total_size = ret.iter().fold(0, |a, b| a + b.size()); | ||||
|         if committed { | ||||
|             trace!("get committed ranges = {:?}", ret); | ||||
|             debug_assert!(total_size == self.inner.count_ones() * PAGE_SIZE); | ||||
|         } else { | ||||
|             trace!("get uncommitted ranges = {:?}", ret); | ||||
|             debug_assert!(total_size == self.inner.count_zeros() * PAGE_SIZE); | ||||
|         } | ||||
| 
 | ||||
|         ret | ||||
|     } | ||||
| 
 | ||||
|     pub fn split_for_new_range(&mut self, new_range: &VMRange) { | ||||
|         debug_assert!(self.range.is_superset_of(new_range)); | ||||
| 
 | ||||
|         let new_start = new_range.start(); | ||||
|         let page_num = new_range.size() / PAGE_SIZE; | ||||
| 
 | ||||
|         let split_idx = (new_start - self.range.start()) / PAGE_SIZE; | ||||
|         let mut new_inner = self.inner.split_off(split_idx); | ||||
|         new_inner.truncate(page_num); | ||||
| 
 | ||||
|         trace!( | ||||
|             "old range= {:?}, new_start = {:x}, idx = {:?}", | ||||
|             self.range, | ||||
|             new_start, | ||||
|             split_idx | ||||
|         ); | ||||
| 
 | ||||
|         self.inner = new_inner; | ||||
|         if self.inner.all() { | ||||
|             self.fully_committed = true; | ||||
|         } | ||||
| 
 | ||||
|         self.range = *new_range; | ||||
|     } | ||||
| 
 | ||||
|     // Commit memory for the whole current VMA (VMATracker)
 | ||||
|     pub fn commit_whole(&mut self, perms: VMPerms) -> Result<()> { | ||||
|         debug_assert!(self.type_ == TrackerType::VMATracker); | ||||
| 
 | ||||
|         if self.is_fully_committed() { | ||||
|             return Ok(()); | ||||
|         } | ||||
| 
 | ||||
|         // Commit EPC
 | ||||
|         if self.is_reserved_only() { | ||||
|             vm_epc::commit_memory(self.range().start(), self.range().size(), Some(perms)).unwrap(); | ||||
|         } else { | ||||
|             debug_assert!(self.is_partially_committed()); | ||||
|             let uncommitted_ranges = self.get_ranges(false); | ||||
|             for range in uncommitted_ranges { | ||||
|                 vm_epc::commit_memory(range.start(), range.size(), Some(perms)).unwrap(); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // Update the tracker
 | ||||
|         self.inner.fill(true); | ||||
|         self.fully_committed = true; | ||||
| 
 | ||||
|         self.set_committed_pages_for_global_tracker(self.range().start(), self.range().size()); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     // Commit memory of a specific range for the current VMA (VMATracker). The range should be verified by caller.
 | ||||
|     pub fn commit_range(&mut self, range: &VMRange, new_perms: Option<VMPerms>) -> Result<()> { | ||||
|         debug_assert!(self.type_ == TrackerType::VMATracker); | ||||
|         debug_assert!(self.range().is_superset_of(range)); | ||||
| 
 | ||||
|         vm_epc::commit_memory(range.start(), range.size(), new_perms)?; | ||||
| 
 | ||||
|         self.commit_pages_common(range.start(), range.size()); | ||||
|         self.set_committed_pages_for_global_tracker(range.start(), range.size()); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     pub fn commit_memory_and_init_with_file( | ||||
|         &mut self, | ||||
|         range: &VMRange, | ||||
|         file: &FileRef, | ||||
|         file_offset: usize, | ||||
|         new_perms: VMPerms, | ||||
|     ) -> Result<()> { | ||||
|         debug_assert!(self.type_ == TrackerType::VMATracker); | ||||
|         debug_assert!(self.range().is_superset_of(range)); | ||||
| 
 | ||||
|         vm_epc::commit_memory_and_init_with_file( | ||||
|             range.start(), | ||||
|             range.size(), | ||||
|             file, | ||||
|             file_offset, | ||||
|             new_perms, | ||||
|         )?; | ||||
| 
 | ||||
|         self.commit_pages_common(range.start(), range.size()); | ||||
|         self.set_committed_pages_for_global_tracker(range.start(), range.size()); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     // VMATracker get page commit status from global tracker and update itself
 | ||||
|     // This should be called when the VMATracker inits
 | ||||
|     fn get_committed_pages_from_global_tracker(&mut self) -> Result<()> { | ||||
|         debug_assert!(self.type_ == TrackerType::VMATracker); | ||||
|         let mut vma_tracker = self; | ||||
|         let mut page_chunk_start = get_page_chunk_start_addr(vma_tracker.range().start()); | ||||
| 
 | ||||
|         let range_end = vma_tracker.range().end(); | ||||
|         for page_chunk_addr in (page_chunk_start..range_end).step_by(PAGE_CHUNK_UNIT) { | ||||
|             let manager = USER_SPACE_PAGE_CHUNK_MANAGER.read().unwrap(); | ||||
|             if let Some(page_chunk) = manager.inner.get(&page_chunk_addr) { | ||||
|                 if page_chunk.fully_committed { | ||||
|                     // global page chunk fully committed. commit pages for vma page chunk
 | ||||
|                     vma_tracker.commit_pages_common(page_chunk_addr, PAGE_CHUNK_UNIT); | ||||
|                 } else { | ||||
|                     debug_assert!(page_chunk.tracker.is_some()); | ||||
|                     let global_tracker = page_chunk.tracker.as_ref().unwrap().read().unwrap(); | ||||
|                     global_tracker.set_committed_pages_for_vma_tracker(vma_tracker); | ||||
|                 } | ||||
|                 drop(manager); | ||||
|             } else { | ||||
|                 // Not tracking this page chunk. Release read lock and acquire write lock for an update.
 | ||||
|                 drop(manager); | ||||
|                 // This page chunk is not tracked by global tracker. Thus none of the pages are committed.
 | ||||
|                 let page_chunk = { | ||||
|                     let global_page_tracker = PageTracker::new_global_tracker(page_chunk_addr)?; | ||||
|                     GlobalPageChunk::new(global_page_tracker) | ||||
|                 }; | ||||
| 
 | ||||
|                 // There could be data race here. But it's fine, because the ultimate state is the same.
 | ||||
|                 USER_SPACE_PAGE_CHUNK_MANAGER | ||||
|                     .write() | ||||
|                     .unwrap() | ||||
|                     .inner | ||||
|                     .insert(page_chunk_addr, page_chunk); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     // VMAtracker helps to update global tracker based on the paging status of itself.
 | ||||
|     // This should be called whenever the VMATracker updates and needs to sync with the GlobalTracker.
 | ||||
|     fn set_committed_pages_for_global_tracker(&self, commit_start_addr: usize, commit_size: usize) { | ||||
|         debug_assert!(self.type_ == TrackerType::VMATracker); | ||||
| 
 | ||||
|         let commit_end_addr = commit_start_addr + commit_size; | ||||
|         let page_chunk_start_addr = get_page_chunk_start_addr(commit_start_addr); | ||||
|         for page_chunk_addr in (page_chunk_start_addr..commit_end_addr).step_by(PAGE_CHUNK_UNIT) { | ||||
|             let is_global_tracker_fully_committed = { | ||||
|                 // Find the correponding page chunk
 | ||||
|                 let manager = USER_SPACE_PAGE_CHUNK_MANAGER.read().unwrap(); | ||||
|                 let page_chunk = manager | ||||
|                     .inner | ||||
|                     .get(&page_chunk_addr) | ||||
|                     .expect("this page chunk must exist"); | ||||
| 
 | ||||
|                 // Update the global page tracker
 | ||||
|                 if let Some(global_page_tracker) = &page_chunk.tracker { | ||||
|                     let mut global_tracker = global_page_tracker.write().unwrap(); | ||||
|                     global_tracker.commit_pages_common(commit_start_addr, commit_size); | ||||
|                     global_tracker.fully_committed | ||||
|                 } else { | ||||
|                     // page_tracker is none, the page chunk is fully committed. Go to next chunk.
 | ||||
|                     debug_assert!(page_chunk.fully_committed); | ||||
|                     continue; | ||||
|                 } | ||||
|             }; | ||||
| 
 | ||||
|             // Free the global page tracker if fully committed
 | ||||
|             if is_global_tracker_fully_committed { | ||||
|                 // Update the global page chunk manager. Need to acquire the write lock this time. There can be data race because the lock
 | ||||
|                 // could be dropped for a while before acquire again. But its fine, because the ultimate state is the same.
 | ||||
|                 let mut manager = USER_SPACE_PAGE_CHUNK_MANAGER.write().unwrap(); | ||||
|                 if let Some(mut page_chunk) = manager.inner.get_mut(&page_chunk_addr) { | ||||
|                     page_chunk.fully_committed = true; | ||||
|                     page_chunk.tracker = None; | ||||
|                 } else { | ||||
|                     warn!( | ||||
|                         "the global page chunk with start addr: 0x{:x} has been freed already", | ||||
|                         page_chunk_addr | ||||
|                     ); | ||||
|                     unreachable!(); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // GlobalTracker helps to update VMATracker based on the paging status of itself.
 | ||||
|     // This should be called when the VMATracker inits.
 | ||||
|     fn set_committed_pages_for_vma_tracker(&self, vma_tracker: &mut PageTracker) { | ||||
|         debug_assert!(self.type_ == TrackerType::GlobalTracker); | ||||
|         debug_assert!(vma_tracker.type_ == TrackerType::VMATracker); | ||||
| 
 | ||||
|         let global_tracker = self; | ||||
| 
 | ||||
|         if let Some(intersection_range) = global_tracker.range().intersect(vma_tracker.range()) { | ||||
|             let vma_tracker_page_id = | ||||
|                 (intersection_range.start() - vma_tracker.range().start()) / PAGE_SIZE; | ||||
|             let global_tracker_page_id = | ||||
|                 (intersection_range.start() - global_tracker.range().start()) / PAGE_SIZE; | ||||
|             let page_num = intersection_range.size() / PAGE_SIZE; | ||||
| 
 | ||||
|             vma_tracker.inner[vma_tracker_page_id..vma_tracker_page_id + page_num] | ||||
|                 .copy_from_bitslice( | ||||
|                     &global_tracker.inner | ||||
|                         [global_tracker_page_id..global_tracker_page_id + page_num], | ||||
|                 ); | ||||
|             if vma_tracker.inner.all() { | ||||
|                 vma_tracker.fully_committed = true; | ||||
|             } | ||||
|         } else { | ||||
|             // No intersection range, why calling this? Wierd.
 | ||||
|             unreachable!(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // Commit pages for page tracker itself. This is a common method for both VMATracker and GlobalTracker.
 | ||||
|     fn commit_pages_common(&mut self, start_addr: usize, size: usize) { | ||||
|         debug_assert!(!self.fully_committed); | ||||
| 
 | ||||
|         if let Some(intersection_range) = { | ||||
|             let range = VMRange::new_with_size(start_addr, size).unwrap(); | ||||
|             self.range.intersect(&range) | ||||
|         } { | ||||
|             trace!("commit for page tracker: {:?}", self); | ||||
|             let page_start_id = (intersection_range.start() - self.range().start()) / PAGE_SIZE; | ||||
|             let page_num = intersection_range.size() / PAGE_SIZE; | ||||
|             self.inner[page_start_id..page_start_id + page_num].fill(true); | ||||
|             if self.inner.all() { | ||||
|                 self.fully_committed = true; | ||||
|             } | ||||
|         } else { | ||||
|             // No intersect range, wierd
 | ||||
|             unreachable!(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[inline(always)] | ||||
| fn get_page_chunk_start_addr(addr: usize) -> usize { | ||||
|     align_down(addr, PAGE_CHUNK_UNIT) | ||||
| } | ||||
| @ -6,7 +6,8 @@ use super::vm_area::VMArea; | ||||
| use super::vm_manager::MunmapChunkFlag; | ||||
| use super::vm_perms::VMPerms; | ||||
| use super::vm_util::{ | ||||
|     FileBacked, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions, | ||||
|     FileBacked, PagePolicy, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, | ||||
|     VMRemapOptions, | ||||
| }; | ||||
| use crate::config; | ||||
| use crate::ipc::SHM_MANAGER; | ||||
| @ -124,6 +125,8 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|                     .initializer(VMInitializer::ElfSpecific { | ||||
|                         elf_file: elf_file.file_ref().clone(), | ||||
|                     }) | ||||
|                     // We only load loadable segments, just commit the memory when allocating.
 | ||||
|                     .page_policy(PagePolicy::CommitNow) | ||||
|                     .build() | ||||
|                     .map_err(|e| { | ||||
|                         &self.handle_error_when_init(&chunks); | ||||
| @ -152,6 +155,8 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|             .size(heap_layout.size()) | ||||
|             .align(heap_layout.align()) | ||||
|             .perms(VMPerms::READ | VMPerms::WRITE) | ||||
|             .page_policy(PagePolicy::CommitOnDemand) | ||||
|             // .page_policy(PagePolicy::CommitNow)
 | ||||
|             .build() | ||||
|             .map_err(|e| { | ||||
|                 &self.handle_error_when_init(&chunks); | ||||
| @ -171,8 +176,10 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { | ||||
|         let stack_layout = &other_layouts[1]; | ||||
|         let vm_option = VMMapOptionsBuilder::default() | ||||
|             .size(stack_layout.size()) | ||||
|             .align(heap_layout.align()) | ||||
|             .align(stack_layout.align()) | ||||
|             .perms(VMPerms::READ | VMPerms::WRITE) | ||||
|             // There are cases that we can't handle when the #PF happens at user's stack. Commit the stack memory now.
 | ||||
|             .page_policy(PagePolicy::CommitNow) | ||||
|             .build() | ||||
|             .map_err(|e| { | ||||
|                 &self.handle_error_when_init(&chunks); | ||||
| @ -537,11 +544,26 @@ impl ProcessVM { | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         let page_policy = { | ||||
|             if flags.contains(MMapFlags::MAP_STACK) { | ||||
|                 // With MAP_STACK, the mmaped memory will be used as user's stack. If not committed, the #PF can occurs
 | ||||
|                 // when switching to user space and can't be handled correctly by us.
 | ||||
|                 PagePolicy::CommitNow | ||||
|             } else if !flags.contains(MMapFlags::MAP_ANONYMOUS) { | ||||
|                 // Use commit-now policy for file-backed mmap. We tried the commit-on-demand policy, but didn't get any performance gain at all.
 | ||||
|                 // However, the path for file-backed mmap with commit-on-demand policy is ready. We can enable this whenever needed.
 | ||||
|                 PagePolicy::CommitNow | ||||
|             } else { | ||||
|                 PagePolicy::CommitOnDemand | ||||
|             } | ||||
|         }; | ||||
|         let mmap_options = VMMapOptionsBuilder::default() | ||||
|             .size(size) | ||||
|             .addr(addr_option) | ||||
|             .perms(perms) | ||||
|             .initializer(initializer) | ||||
|             .page_policy(page_policy) | ||||
|             .build()?; | ||||
|         let mmap_addr = USER_SPACE_VM_MANAGER.mmap(&mmap_options)?; | ||||
|         Ok(mmap_addr) | ||||
| @ -674,3 +696,33 @@ impl MSyncFlags { | ||||
|         Ok(flags) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[allow(non_camel_case_types)] | ||||
| #[repr(i32)] | ||||
| #[derive(Debug)] | ||||
| pub enum MadviceFlags { | ||||
|     MADV_NORMAL = 0, | ||||
|     MADV_RANDOM = 1, | ||||
|     MADV_SEQUENTIAL = 2, | ||||
|     MADV_WILLNEED = 3, | ||||
|     MADV_DONTNEED = 4, | ||||
| } | ||||
| 
 | ||||
| impl MadviceFlags { | ||||
|     pub fn from_i32(raw: i32) -> Result<Self> { | ||||
|         const MADV_NORMAL: i32 = 0; | ||||
|         const MADV_RANDOM: i32 = 1; | ||||
|         const MADV_SEQUENTIAL: i32 = 2; | ||||
|         const MADV_WILLNEED: i32 = 3; | ||||
|         const MADV_DONTNEED: i32 = 4; | ||||
| 
 | ||||
|         match raw { | ||||
|             MADV_NORMAL => Ok(MadviceFlags::MADV_NORMAL), | ||||
|             MADV_RANDOM => Ok(MadviceFlags::MADV_RANDOM), | ||||
|             MADV_SEQUENTIAL => Ok(MadviceFlags::MADV_SEQUENTIAL), | ||||
|             MADV_WILLNEED => Ok(MadviceFlags::MADV_WILLNEED), | ||||
|             MADV_DONTNEED => Ok(MadviceFlags::MADV_DONTNEED), | ||||
|             _ => return_errno!(ENOSYS, "unknown madvice flags"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -206,8 +206,8 @@ impl ShmManager { | ||||
|             let old_perms = old_vma.perms(); | ||||
|             if new_perms != old_perms { | ||||
|                 let perms = new_perms | old_perms; | ||||
|                 VMPerms::apply_perms(new_vma.range(), perms); | ||||
|                 new_vma.set_perms(perms); | ||||
|                 new_vma.modify_permissions_for_committed_pages(perms); | ||||
|             } | ||||
| 
 | ||||
|             let inode_id = Self::inode_id_of(&new_vma); | ||||
| @ -279,7 +279,7 @@ impl ShmManager { | ||||
|         if perms == old_perms { | ||||
|             return; | ||||
|         } | ||||
|         VMPerms::apply_perms(vma.range(), perms); | ||||
|         vma.set_perms(perms); | ||||
|         vma.modify_permissions_for_committed_pages(perms); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -1,46 +1,50 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::vm_manager::VMManager; | ||||
| use crate::config::LIBOS_CONFIG; | ||||
| use crate::ctor::dtor; | ||||
| use crate::ipc::SHM_MANAGER; | ||||
| use crate::ipc::SYSTEM_V_SHM_MANAGER; | ||||
| use crate::util::pku_util; | ||||
| 
 | ||||
| use std::ops::{Deref, DerefMut}; | ||||
| use vm_epc::SGXPlatform; | ||||
| use vm_manager::VMManager; | ||||
| use vm_perms::VMPerms; | ||||
| 
 | ||||
| const RSRV_MEM_PERM: MemPerm = | ||||
|     MemPerm::from_bits_truncate(MemPerm::READ.bits() | MemPerm::WRITE.bits()); | ||||
| const USER_SPACE_DEFAULT_MEM_PERM: VMPerms = VMPerms::DEFAULT; | ||||
| 
 | ||||
| /// The virtual memory manager for the entire user space
 | ||||
| pub struct UserSpaceVMManager(VMManager); | ||||
| pub struct UserSpaceVMManager { | ||||
|     inner: VMManager, | ||||
|     sgx_platform: SGXPlatform, | ||||
| } | ||||
| 
 | ||||
| impl UserSpaceVMManager { | ||||
|     fn new() -> Result<UserSpaceVMManager> { | ||||
|         let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size; | ||||
|         let vm_range = unsafe { | ||||
|             // TODO: Current sgx_alloc_rsrv_mem implementation will commit all the pages of the desired size, which will consume
 | ||||
|             // a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function
 | ||||
|             // which reserves memory but not commit pages should be provided then.
 | ||||
|             let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size); | ||||
|             if ptr.is_null() { | ||||
|                 return_errno!(ENOMEM, "run out of reserved memory"); | ||||
|             } | ||||
|         let sgx_platform = SGXPlatform::new(); | ||||
|         let init_size = LIBOS_CONFIG.resource_limits.user_space_init_size; | ||||
|         let max_size = LIBOS_CONFIG.resource_limits.user_space_max_size; | ||||
| 
 | ||||
|             // Without EDMM support and the ReservedMemExecutable is set to 1, the reserved memory will be RWX. And we can't change the reserved memory permission.
 | ||||
|             // With EDMM support, the reserved memory permission is RW by default. And we can change the permissions when needed.
 | ||||
|         let (userspace_vm_range, gap_range) = sgx_platform.alloc_user_space(init_size, max_size)?; | ||||
| 
 | ||||
|             let addr = ptr as usize; | ||||
|             debug!( | ||||
|                 "allocated rsrv addr is 0x{:x}, len is 0x{:x}", | ||||
|                 addr, rsrv_mem_size | ||||
|         info!( | ||||
|             "user space allocated, range = {:?}, gap_range = {:?}", | ||||
|             userspace_vm_range, gap_range | ||||
|         ); | ||||
|             pku_util::pkey_mprotect_userspace_mem(addr, rsrv_mem_size, RSRV_MEM_PERM.bits()); | ||||
|             VMRange::new(addr, addr + rsrv_mem_size)? | ||||
|         }; | ||||
| 
 | ||||
|         let vm_manager = VMManager::init(vm_range)?; | ||||
|         // Use pkey_mprotect to set the whole userspace to R/W permissions. If user specifies a new
 | ||||
|         // permission, the mprotect ocall will update the permission.
 | ||||
|         pku_util::pkey_mprotect_userspace_mem( | ||||
|             &userspace_vm_range, | ||||
|             gap_range.as_ref(), | ||||
|             USER_SPACE_DEFAULT_MEM_PERM, | ||||
|         ); | ||||
| 
 | ||||
|         Ok(UserSpaceVMManager(vm_manager)) | ||||
|         let vm_manager = VMManager::init(userspace_vm_range, gap_range)?; | ||||
| 
 | ||||
|         Ok(Self { | ||||
|             inner: vm_manager, | ||||
|             sgx_platform, | ||||
|         }) | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_total_size(&self) -> usize { | ||||
| @ -52,51 +56,34 @@ impl UserSpaceVMManager { | ||||
| // be called after the main function. Static variables are still safe to visit at this time.
 | ||||
| #[dtor] | ||||
| fn free_user_space() { | ||||
|     SHM_MANAGER.clean_when_libos_exit(); | ||||
|     let range = USER_SPACE_VM_MANAGER.range(); | ||||
|     info!("free user space at the end"); | ||||
|     SYSTEM_V_SHM_MANAGER.clean_when_libos_exit(); | ||||
|     let total_user_space_range = USER_SPACE_VM_MANAGER.range(); | ||||
|     let gap_range = USER_SPACE_VM_MANAGER.gap_range(); | ||||
|     assert!(USER_SPACE_VM_MANAGER.verified_clean_when_exit()); | ||||
|     let addr = range.start(); | ||||
|     let size = range.size(); | ||||
|     info!("free user space VM: {:?}", range); | ||||
|     pku_util::clear_pku_when_libos_exit(addr, size, RSRV_MEM_PERM.bits()); | ||||
|     assert!(unsafe { sgx_free_rsrv_mem(addr as *const c_void, size) == 0 }); | ||||
|     let addr = total_user_space_range.start(); | ||||
|     let size = total_user_space_range.size(); | ||||
|     info!("free user space VM: {:?}", total_user_space_range); | ||||
| 
 | ||||
|     pku_util::clear_pku_when_libos_exit( | ||||
|         total_user_space_range, | ||||
|         gap_range.as_ref(), | ||||
|         USER_SPACE_DEFAULT_MEM_PERM, | ||||
|     ); | ||||
| 
 | ||||
|     USER_SPACE_VM_MANAGER | ||||
|         .sgx_platform | ||||
|         .free_user_space(total_user_space_range, gap_range.as_ref()); | ||||
| } | ||||
| 
 | ||||
| impl Deref for UserSpaceVMManager { | ||||
|     type Target = VMManager; | ||||
| 
 | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|         &self.inner | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| lazy_static! { | ||||
|     pub static ref USER_SPACE_VM_MANAGER: UserSpaceVMManager = UserSpaceVMManager::new().unwrap(); | ||||
| } | ||||
| 
 | ||||
| bitflags! { | ||||
|     struct MemPerm: i32 { | ||||
|         const READ  = 1; | ||||
|         const WRITE = 2; | ||||
|         const EXEC  = 4; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| extern "C" { | ||||
|     // Allocate a range of EPC memory from the reserved memory area with RW permission
 | ||||
|     //
 | ||||
|     // Parameters:
 | ||||
|     // Inputs: length [in]: Size of region to be allocated in bytes. Page aligned
 | ||||
|     // Return: Starting address of the new allocated memory area on success; otherwise NULL
 | ||||
|     //
 | ||||
|     fn sgx_alloc_rsrv_mem(length: usize) -> *const c_void; | ||||
| 
 | ||||
|     // Free a range of EPC memory from the reserved memory area
 | ||||
|     //
 | ||||
|     // Parameters:
 | ||||
|     // Inputs: addr[in]: Starting address of region to be freed. Page aligned.
 | ||||
|     //         length[in]: The length of the memory to be freed in bytes.  Page aligned
 | ||||
|     // Return: 0 on success; otherwise -1
 | ||||
|     //
 | ||||
|     fn sgx_free_rsrv_mem(addr: *const c_void, length: usize) -> i32; | ||||
| } | ||||
|  | ||||
| @ -1,19 +1,28 @@ | ||||
| use super::*; | ||||
| 
 | ||||
| use super::page_tracker::PageTracker; | ||||
| use super::vm_epc::EPCMemType; | ||||
| use super::vm_perms::VMPerms; | ||||
| use super::vm_range::VMRange; | ||||
| use super::vm_util::FileBacked; | ||||
| 
 | ||||
| use super::vm_util::{FileBacked, PagePolicy, VMInitializer, VMMapOptions, GB, KB, MB}; | ||||
| use intrusive_collections::rbtree::{Link, RBTree}; | ||||
| use intrusive_collections::{intrusive_adapter, KeyAdapter}; | ||||
| use std::ops::{Deref, DerefMut}; | ||||
| 
 | ||||
| #[derive(Clone, Debug, Default)] | ||||
| // Commit memory size unit when the #PF occurs.
 | ||||
| const COMMIT_SIZE_UNIT: usize = 4 * KB; | ||||
| // Commit the whole VMA when this threshold reaches.
 | ||||
| const PF_NUM_THRESHOLD: u64 = 3; | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
| pub struct VMArea { | ||||
|     range: VMRange, | ||||
|     perms: VMPerms, | ||||
|     file_backed: Option<FileBacked>, | ||||
|     access: VMAccess, | ||||
|     pages: Option<PageTracker>, // Track the paging status of this VMA
 | ||||
|     epc_type: EPCMemType,       // Track the type of the EPC to use specific APIs
 | ||||
|     pf_count: u64, | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Debug, Eq, PartialEq)] | ||||
| @ -32,11 +41,47 @@ impl VMArea { | ||||
|         file_backed: Option<FileBacked>, | ||||
|         pid: pid_t, | ||||
|     ) -> Self { | ||||
|         Self { | ||||
|         let epc_type = EPCMemType::new(&range); | ||||
|         let pages = { | ||||
|             match epc_type { | ||||
|                 EPCMemType::Reserved => None, | ||||
|                 EPCMemType::UserRegion => { | ||||
|                     let pages = | ||||
|                         PageTracker::new_vma_tracker(&range, &EPCMemType::UserRegion).unwrap(); | ||||
|                     (!pages.is_fully_committed()).then_some(pages) | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         let new_vma = Self { | ||||
|             range, | ||||
|             perms, | ||||
|             file_backed, | ||||
|             access: VMAccess::Private(pid), | ||||
|             pages, | ||||
|             epc_type, | ||||
|             pf_count: 0, | ||||
|         }; | ||||
|         trace!("new vma = {:?}", new_vma); | ||||
|         new_vma | ||||
|     } | ||||
| 
 | ||||
|     fn new_with_page_tracker( | ||||
|         range: VMRange, | ||||
|         perms: VMPerms, | ||||
|         file_backed: Option<FileBacked>, | ||||
|         access: VMAccess, | ||||
|         pages: Option<PageTracker>, | ||||
|     ) -> VMArea { | ||||
|         let epc_type = EPCMemType::new(&range); | ||||
|         Self { | ||||
|             range, | ||||
|             perms, | ||||
|             file_backed, | ||||
|             access, | ||||
|             pages, | ||||
|             epc_type, | ||||
|             pf_count: 0, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -49,30 +94,41 @@ impl VMArea { | ||||
|         access: VMAccess, | ||||
|     ) -> Self { | ||||
|         debug_assert!(vma.is_superset_of(&new_range)); | ||||
|         let new_backed_file = vma.file_backed.as_ref().map(|file| { | ||||
| 
 | ||||
|         let new_backed_file = if let Some(file) = &vma.file_backed { | ||||
|             let mut new_file = file.clone(); | ||||
|             let file_offset = file.offset(); | ||||
| 
 | ||||
|             let new_file_offset = if vma.start() < new_range.start() { | ||||
|                 let vma_offset = new_range.start() - vma.start(); | ||||
|                 file_offset + vma_offset | ||||
|             } else { | ||||
|                 let vma_offset = vma.start() - new_range.start(); | ||||
|                 debug_assert!(file_offset >= vma_offset); | ||||
|                 file_offset - vma_offset | ||||
|             }; | ||||
|             debug_assert!(vma.start() <= new_range.start()); | ||||
|             let new_start_offset = new_range.start() - vma.start(); | ||||
|             let new_file_offset = file_offset + new_start_offset; | ||||
| 
 | ||||
|             new_file.set_offset(new_file_offset); | ||||
|             Some(new_file) | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
| 
 | ||||
|             new_file | ||||
|         }); | ||||
|         let new_pages = { | ||||
|             let mut new_pages = vma.pages.clone(); | ||||
| 
 | ||||
|         Self { | ||||
|             range: new_range, | ||||
|             perms: new_perms, | ||||
|             file_backed: new_backed_file, | ||||
|             access, | ||||
|             if let Some(pages) = &mut new_pages { | ||||
|                 pages.split_for_new_range(&new_range); | ||||
|                 if pages.is_fully_committed() { | ||||
|                     None | ||||
|                 } else { | ||||
|                     new_pages | ||||
|                 } | ||||
|             } else { | ||||
|                 None | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         let new_vma = | ||||
|             Self::new_with_page_tracker(new_range, new_perms, new_backed_file, access, new_pages); | ||||
| 
 | ||||
|         trace!("inherits vma: {:?}, create new vma: {:?}", vma, new_vma); | ||||
|         new_vma | ||||
|     } | ||||
| 
 | ||||
|     pub fn perms(&self) -> VMPerms { | ||||
| @ -87,6 +143,13 @@ impl VMArea { | ||||
|         &self.access | ||||
|     } | ||||
| 
 | ||||
|     pub fn get_private_pid(&self) -> Option<pid_t> { | ||||
|         match &self.access { | ||||
|             VMAccess::Private(pid) => Some(*pid), | ||||
|             VMAccess::Shared(_) => None, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn belong_to(&self, target_pid: pid_t) -> bool { | ||||
|         match &self.access { | ||||
|             VMAccess::Private(pid) => *pid == target_pid, | ||||
| @ -105,9 +168,199 @@ impl VMArea { | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn init_file(&self) -> Option<(&FileRef, usize)> { | ||||
|     fn pages(&self) -> &PageTracker { | ||||
|         debug_assert!(!self.is_fully_committed()); | ||||
|         self.pages.as_ref().unwrap() | ||||
|     } | ||||
| 
 | ||||
|     fn pages_mut(&mut self) -> &mut PageTracker { | ||||
|         debug_assert!(!self.is_fully_committed()); | ||||
|         self.pages.as_mut().unwrap() | ||||
|     } | ||||
| 
 | ||||
|     // Get pid for private VMA
 | ||||
|     pub fn pid(&self) -> pid_t { | ||||
|         match self.access { | ||||
|             VMAccess::Private(pid) => pid, | ||||
|             VMAccess::Shared(_) => unreachable!(), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_reserved_only(&self) -> bool { | ||||
|         if let Some(pages) = &self.pages { | ||||
|             return pages.is_reserved_only(); | ||||
|         } else { | ||||
|             false | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_fully_committed(&self) -> bool { | ||||
|         self.pages.is_none() | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_partially_committed(&self) -> bool { | ||||
|         if let Some(pages) = &self.pages { | ||||
|             return pages.is_partially_committed(); | ||||
|         } else { | ||||
|             false | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn init_memory(mut self, options: &VMMapOptions) -> Result<Self> { | ||||
|         let mut vm_area = self; | ||||
|         let page_policy = options.page_policy(); | ||||
| 
 | ||||
|         // Commit pages if needed
 | ||||
|         if !vm_area.is_fully_committed() && page_policy == &PagePolicy::CommitNow { | ||||
|             vm_area.pages_mut().commit_whole(VMPerms::DEFAULT)?; | ||||
|             vm_area.pages = None; | ||||
|         } | ||||
| 
 | ||||
|         // Initialize committed memory
 | ||||
|         if vm_area.is_partially_committed() { | ||||
|             let committed = true; | ||||
|             for range in vm_area.pages().get_ranges(committed) { | ||||
|                 vm_area.init_memory_internal(&range, Some(options.initializer()))?; | ||||
|             } | ||||
|         } else if vm_area.is_fully_committed() { | ||||
|             // Initialize the memory of the new range
 | ||||
|             unsafe { | ||||
|                 let buf = vm_area.range().as_slice_mut(); | ||||
|                 options.initializer().init_slice(buf)?; | ||||
|             } | ||||
| 
 | ||||
|             // Set memory permissions
 | ||||
|             if !options.perms().is_default() { | ||||
|                 vm_area.modify_protection_force(None, vm_area.perms()); | ||||
|             } | ||||
|         } | ||||
|         // Do nothing if this vma has no committed memory
 | ||||
| 
 | ||||
|         Ok(vm_area) | ||||
|     } | ||||
| 
 | ||||
|     pub fn flush_and_clean_memory(&self) -> Result<()> { | ||||
|         let (need_flush, file, file_offset) = match self.writeback_file() { | ||||
|             None => (false, None, None), | ||||
|             Some((file_handle, offset)) => { | ||||
|                 if !file_handle.access_mode().unwrap().writable() { | ||||
|                     (false, None, None) | ||||
|                 } else { | ||||
|                     (true, Some(file_handle), Some(offset)) | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         if self.is_fully_committed() { | ||||
|             self.flush_and_clean_internal(self.range(), need_flush, file, file_offset); | ||||
|         } else { | ||||
|             let committed = true; | ||||
|             for range in self.pages().get_ranges(committed) { | ||||
|                 self.flush_and_clean_internal(&range, need_flush, file, file_offset); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn flush_and_clean_internal( | ||||
|         &self, | ||||
|         target_range: &VMRange, | ||||
|         need_flush: bool, | ||||
|         file: Option<&FileRef>, | ||||
|         file_offset: Option<usize>, | ||||
|     ) { | ||||
|         trace!("flush and clean committed range: {:?}", target_range); | ||||
|         debug_assert!(self.range().is_superset_of(target_range)); | ||||
|         let buf = unsafe { target_range.as_slice_mut() }; | ||||
|         if !self.perms().is_default() { | ||||
|             self.modify_protection_force(Some(&target_range), VMPerms::default()); | ||||
|         } | ||||
| 
 | ||||
|         if need_flush { | ||||
|             let file_offset = file_offset.unwrap() + (target_range.start() - self.range.start()); | ||||
|             file.unwrap().write_at(file_offset, buf); | ||||
|         } | ||||
| 
 | ||||
|         // reset zeros
 | ||||
|         unsafe { | ||||
|             buf.iter_mut().for_each(|b| *b = 0); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn modify_permissions_for_committed_pages(&self, new_perms: VMPerms) { | ||||
|         if self.is_fully_committed() { | ||||
|             self.modify_protection_force(None, new_perms); | ||||
|         } else if self.is_partially_committed() { | ||||
|             let committed = true; | ||||
|             for range in self.pages().get_ranges(committed) { | ||||
|                 self.modify_protection_force(Some(&range), new_perms); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn handle_page_fault( | ||||
|         &mut self, | ||||
|         rip: usize, | ||||
|         pf_addr: usize, | ||||
|         errcd: u32, | ||||
|         kernel_triggers: bool, | ||||
|     ) -> Result<()> { | ||||
|         trace!("PF vma = {:?}", self); | ||||
|         if (self.perms() == VMPerms::NONE) | ||||
|             || (crate::exception::check_rw_bit(errcd) == false | ||||
|                 && !self.perms().contains(VMPerms::READ)) | ||||
|         { | ||||
|             return_errno!( | ||||
|                 EACCES, | ||||
|                 "Page is set to None permission. This is user-intended" | ||||
|             ); | ||||
|         } | ||||
| 
 | ||||
|         if crate::exception::check_rw_bit(errcd) && !self.perms().contains(VMPerms::WRITE) { | ||||
|             return_errno!( | ||||
|                 EACCES, "Page is set to not contain WRITE permission but this PF is triggered by write. This is user-intended" | ||||
|             ) | ||||
|         } | ||||
| 
 | ||||
|         if rip == pf_addr && !self.perms().contains(VMPerms::EXEC) { | ||||
|             return_errno!( | ||||
|                 EACCES, "Page is set to not contain EXEC permission but this PF is triggered by execution. This is user-intended" | ||||
|             ) | ||||
|         } | ||||
| 
 | ||||
|         if self.is_fully_committed() { | ||||
|             // This vma has been commited by other threads already. Just return.
 | ||||
|             info!("This vma has been committed by other threads already."); | ||||
|             return Ok(()); | ||||
|         } | ||||
| 
 | ||||
|         if matches!(self.epc_type, EPCMemType::Reserved) { | ||||
|             return_errno!(EINVAL, "reserved memory shouldn't trigger PF"); | ||||
|         } | ||||
| 
 | ||||
|         if kernel_triggers || self.pf_count >= PF_NUM_THRESHOLD { | ||||
|             return self.commit_current_vma_whole(); | ||||
|         } | ||||
| 
 | ||||
|         self.pf_count += 1; | ||||
|         // The return commit_size can be 0 when other threads already commit the PF-containing range but the vma is not fully committed yet.
 | ||||
|         let commit_size = self.commit_once_for_page_fault(pf_addr).unwrap(); | ||||
| 
 | ||||
|         trace!("page fault commit memory size = {:?}", commit_size); | ||||
| 
 | ||||
|         if commit_size == 0 { | ||||
|             warn!("This PF has been handled by other threads already."); | ||||
|         } | ||||
| 
 | ||||
|         info!("page fault handle success"); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     pub fn backed_file(&self) -> Option<(&FileRef, usize)> { | ||||
|         if let Some(file) = &self.file_backed { | ||||
|             Some(file.init_file()) | ||||
|             Some(file.backed_file()) | ||||
|         } else { | ||||
|             None | ||||
|         } | ||||
| @ -147,36 +400,51 @@ impl VMArea { | ||||
|         Some(new_vma) | ||||
|     } | ||||
| 
 | ||||
|     pub fn resize(&mut self, new_size: usize) { | ||||
|         self.range.resize(new_size) | ||||
|     } | ||||
| 
 | ||||
|     pub fn set_start(&mut self, new_start: usize) { | ||||
|         let old_start = self.start(); | ||||
|         self.range.set_start(new_start); | ||||
| 
 | ||||
|         if let Some(file) = self.file_backed.as_mut() { | ||||
|             if !file.need_write_back() { | ||||
|         if new_start == old_start { | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         self.range.set_start(new_start); | ||||
| 
 | ||||
|         if new_start < old_start { | ||||
|             // Extend this VMA
 | ||||
|             let pages = { | ||||
|                 let pages = PageTracker::new_vma_tracker(&self.range, &self.epc_type).unwrap(); | ||||
|                 (!pages.is_fully_committed()).then_some(pages) | ||||
|             }; | ||||
|             self.pages = pages; | ||||
|         } else { | ||||
|             // Split this VMA
 | ||||
|             debug_assert!(new_start > old_start); | ||||
|             if let Some(pages) = &mut self.pages { | ||||
|                 pages.split_for_new_range(&self.range); | ||||
|                 if pages.is_fully_committed() { | ||||
|                     self.pages = None; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if let Some(file) = self.file_backed.as_mut() { | ||||
|             // If the updates to the VMA needs to write back to a file, then the
 | ||||
|             // file offset must be adjusted according to the new start address.
 | ||||
|             Self::set_file_offset(file, new_start, old_start); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn set_file_offset(file: &mut FileBacked, new_start_offset: usize, old_start_offset: usize) { | ||||
|         let offset = file.offset(); | ||||
|             if old_start < new_start { | ||||
|                 file.set_offset(offset + (new_start - old_start)); | ||||
|         if old_start_offset < new_start_offset { | ||||
|             file.set_offset(offset + (new_start_offset - old_start_offset)); | ||||
|         } else { | ||||
|             // The caller must guarantee that the new start makes sense
 | ||||
|                 debug_assert!(offset >= old_start - new_start); | ||||
|                 file.set_offset(offset - (old_start - new_start)); | ||||
|             } | ||||
|             debug_assert!(offset >= old_start_offset - new_start_offset); | ||||
|             file.set_offset(offset - (old_start_offset - new_start_offset)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_the_same_to(&self, other: &VMArea) -> bool { | ||||
|         if self.access() != other.access() { | ||||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         if self.range() != other.range() { | ||||
|             return false; | ||||
|         } | ||||
| @ -185,6 +453,10 @@ impl VMArea { | ||||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         if self.access() != other.access() { | ||||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         let self_writeback_file = self.writeback_file(); | ||||
|         let other_writeback_file = other.writeback_file(); | ||||
|         match (self_writeback_file, other_writeback_file) { | ||||
| @ -199,6 +471,13 @@ impl VMArea { | ||||
| 
 | ||||
|     pub fn set_end(&mut self, new_end: usize) { | ||||
|         self.range.set_end(new_end); | ||||
|         let pages = if self.range.size() > 0 { | ||||
|             let pages = PageTracker::new_vma_tracker(&self.range, &self.epc_type).unwrap(); | ||||
|             (!pages.is_fully_committed()).then_some(pages) | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|         self.pages = pages; | ||||
|     } | ||||
| 
 | ||||
|     pub fn can_merge_vmas(left: &VMArea, right: &VMArea) -> bool { | ||||
| @ -208,10 +487,6 @@ impl VMArea { | ||||
|         if left.size() == 0 || right.size() == 0 { | ||||
|             return false; | ||||
|         } | ||||
|         // The two VMAs must be owned by the same process
 | ||||
|         if left.access() != right.access() { | ||||
|             return false; | ||||
|         } | ||||
|         // The two VMAs must border with each other
 | ||||
|         if left.end() != right.start() { | ||||
|             return false; | ||||
| @ -220,6 +495,15 @@ impl VMArea { | ||||
|         if left.perms() != right.perms() { | ||||
|             return false; | ||||
|         } | ||||
|         // The two VMAs must be owned by the same process privately
 | ||||
|         // Return false if (either is none) or (both are some but two private pids are different)
 | ||||
|         let private_access = left.get_private_pid().zip(right.get_private_pid()); | ||||
|         if private_access.is_none() { | ||||
|             return false; | ||||
|         } | ||||
|         if private_access.is_some_and(|(left_pid, right_pid)| left_pid != right_pid) { | ||||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         // If the two VMAs have write-back files, the files must be the same and
 | ||||
|         // the two file regions must be continuous.
 | ||||
| @ -238,12 +522,12 @@ impl VMArea { | ||||
|     } | ||||
| 
 | ||||
|     /// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
 | ||||
|     pub fn flush_backed_file(&self) { | ||||
|         self.flush_backed_file_with_cond(|_| true) | ||||
|     pub fn flush_committed_backed_file(&self) { | ||||
|         self.flush_committed_backed_file_with_cond(|_| true) | ||||
|     } | ||||
| 
 | ||||
|     /// Same as `flush_backed_file()`, except that an extra condition on the file needs to satisfy.
 | ||||
|     pub fn flush_backed_file_with_cond<F: Fn(&FileRef) -> bool>(&self, cond_fn: F) { | ||||
|     /// Same as `flush_committed_backed_file()`, except that an extra condition on the file needs to satisfy.
 | ||||
|     pub fn flush_committed_backed_file_with_cond<F: Fn(&FileRef) -> bool>(&self, cond_fn: F) { | ||||
|         let (file, file_offset) = match self.writeback_file() { | ||||
|             None => return, | ||||
|             Some((file_and_offset)) => file_and_offset, | ||||
| @ -258,7 +542,16 @@ impl VMArea { | ||||
|         if !cond_fn(file) { | ||||
|             return; | ||||
|         } | ||||
|         if self.is_fully_committed() { | ||||
|             file.write_at(file_offset, unsafe { self.as_slice() }); | ||||
|         } else { | ||||
|             let committed = true; | ||||
|             let vm_range_start = self.range().start(); | ||||
|             for range in self.pages().get_ranges(committed) { | ||||
|                 let file_offset = file_offset + (range.start() - vm_range_start); | ||||
|                 file.write_at(file_offset, unsafe { range.as_slice() }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn is_shared(&self) -> bool { | ||||
| @ -310,6 +603,198 @@ impl VMArea { | ||||
|     pub fn inherits_access_from(&mut self, vma: &VMArea) { | ||||
|         self.access = vma.access().clone() | ||||
|     } | ||||
| 
 | ||||
|     // Current implementation with "unwrap()" can help us find the error quickly by panicing directly. Also, restoring VM state
 | ||||
|     // when this function fails will require some work and is not that simple.
 | ||||
|     // TODO: Return with Result instead of "unwrap()"" in this function.
 | ||||
|     fn modify_protection_force(&self, protect_range: Option<&VMRange>, new_perms: VMPerms) { | ||||
|         let protect_range = protect_range.unwrap_or_else(|| self.range()); | ||||
| 
 | ||||
|         self.epc_type | ||||
|             .modify_protection(protect_range.start(), protect_range.size(), new_perms) | ||||
|             .unwrap() | ||||
|     } | ||||
| 
 | ||||
|     // With initializer, the memory should be committed already.
 | ||||
|     // Without initializer, the memory need to be committed and initialized.
 | ||||
|     fn init_memory_internal( | ||||
|         &mut self, | ||||
|         target_range: &VMRange, | ||||
|         initializer: Option<&VMInitializer>, | ||||
|     ) -> Result<()> { | ||||
|         debug_assert!(self.range().is_superset_of(target_range)); | ||||
|         trace!("init range = {:?}", target_range); | ||||
|         let perms = self.perms(); | ||||
|         if let Some(initializer) = initializer { | ||||
|             match initializer { | ||||
|                 VMInitializer::FileBacked { file } => { | ||||
|                     let (file, offset) = file.backed_file(); | ||||
|                     let vma_range_start = self.range.start(); | ||||
| 
 | ||||
|                     let init_file_offset = offset + (target_range.start() - vma_range_start); | ||||
| 
 | ||||
|                     self.init_file_backed_mem(target_range, &file, init_file_offset, perms)?; | ||||
|                 } | ||||
|                 VMInitializer::DoNothing() => { | ||||
|                     if !self.perms().is_default() { | ||||
|                         self.modify_protection_force(Some(target_range), perms); | ||||
|                     } | ||||
|                 } | ||||
|                 VMInitializer::FillZeros() => { | ||||
|                     unsafe { | ||||
|                         let buf = target_range.as_slice_mut(); | ||||
|                         buf.iter_mut().for_each(|b| *b = 0); | ||||
|                     } | ||||
|                     if !perms.is_default() { | ||||
|                         self.modify_protection_force(Some(target_range), perms); | ||||
|                     } | ||||
|                 } | ||||
|                 _ => todo!(), | ||||
|             } | ||||
|         } else { | ||||
|             // No initializer, #PF triggered.
 | ||||
|             let init_file = self | ||||
|                 .backed_file() | ||||
|                 .map(|(file, offset)| (file.clone(), offset)); | ||||
|             if let Some((file, offset)) = init_file { | ||||
|                 let vma_range_start = self.range.start(); | ||||
| 
 | ||||
|                 let init_file_offset = offset + (target_range.start() - vma_range_start); | ||||
| 
 | ||||
|                 self.pages | ||||
|                     .as_mut() | ||||
|                     .unwrap() | ||||
|                     .commit_memory_and_init_with_file( | ||||
|                         target_range, | ||||
|                         &file, | ||||
|                         init_file_offset, | ||||
|                         perms, | ||||
|                     )?; | ||||
|             } else { | ||||
|                 // PF triggered, no file-backed memory, just modify protection
 | ||||
|                 self.pages | ||||
|                     .as_mut() | ||||
|                     .unwrap() | ||||
|                     .commit_range(target_range, Some(perms))?; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn init_file_backed_mem( | ||||
|         &mut self, | ||||
|         target_range: &VMRange, | ||||
|         file: &FileRef, | ||||
|         file_offset: usize, | ||||
|         new_perm: VMPerms, | ||||
|     ) -> Result<()> { | ||||
|         if !file.access_mode().unwrap().readable() { | ||||
|             return_errno!(EBADF, "file is not readable"); | ||||
|         } | ||||
| 
 | ||||
|         let buf = unsafe { target_range.as_slice_mut() }; | ||||
|         let file_size = file.metadata().unwrap().size; | ||||
| 
 | ||||
|         let len = file | ||||
|             .read_at(file_offset, buf) | ||||
|             .map_err(|_| errno!(EACCES, "failed to init memory from file"))?; | ||||
| 
 | ||||
|         if !new_perm.is_default() { | ||||
|             self.modify_protection_force(Some(target_range), new_perm); | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn get_commit_once_size(&self) -> usize { | ||||
|         COMMIT_SIZE_UNIT | ||||
|     } | ||||
| 
 | ||||
|     fn commit_once_for_page_fault(&mut self, pf_addr: usize) -> Result<usize> { | ||||
|         debug_assert!(!self.is_fully_committed()); | ||||
|         let mut early_return = false; | ||||
|         let mut total_commit_size = 0; | ||||
|         let vma_range_start = self.range.start(); | ||||
|         let permission = self.perms(); | ||||
|         let committed = false; | ||||
|         let mut uncommitted_ranges = self.pages().get_ranges(committed); | ||||
|         let commit_once_size = self.get_commit_once_size(); | ||||
| 
 | ||||
|         for range in uncommitted_ranges | ||||
|             .iter_mut() | ||||
|             .skip_while(|range| !range.contains(pf_addr)) | ||||
|         { | ||||
|             // Skip until first reach the range which contains the pf_addr
 | ||||
|             if total_commit_size == 0 { | ||||
|                 debug_assert!(range.contains(pf_addr)); | ||||
|                 range.set_start(align_down(pf_addr, PAGE_SIZE)); | ||||
|                 range.resize(std::cmp::min(range.size(), commit_once_size)); | ||||
|             } else if range.size() + total_commit_size > commit_once_size { | ||||
|                 // This is not first time commit. Try to commit until reaching the commit_once_size
 | ||||
|                 range.resize(commit_once_size - total_commit_size); | ||||
|             } | ||||
| 
 | ||||
|             // We don't take care the file-backed memory here
 | ||||
|             debug_assert!(self.backed_file().is_none()); | ||||
|             self.init_memory_internal(&range, None)?; | ||||
| 
 | ||||
|             total_commit_size += range.size(); | ||||
|             if total_commit_size >= commit_once_size { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if self.pages().is_fully_committed() { | ||||
|             trace!("vma is fully committed"); | ||||
|             self.pages = None; | ||||
|         } | ||||
| 
 | ||||
|         Ok(total_commit_size) | ||||
|     } | ||||
| 
 | ||||
|     // Only used to handle PF triggered by the kernel
 | ||||
|     fn commit_current_vma_whole(&mut self) -> Result<()> { | ||||
|         debug_assert!(!self.is_fully_committed()); | ||||
|         debug_assert!(self.backed_file().is_none()); | ||||
| 
 | ||||
|         let mut uncommitted_ranges = self.pages.as_ref().unwrap().get_ranges(false); | ||||
|         for range in uncommitted_ranges { | ||||
|             self.init_memory_internal(&range, None).unwrap(); | ||||
|         } | ||||
|         self.pages = None; | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     // TODO: We can re-enable this when we support lazy extend permissions.
 | ||||
|     #[allow(dead_code)] | ||||
|     fn page_fault_handler_extend_permission(&mut self, pf_addr: usize) -> Result<()> { | ||||
|         let permission = self.perms(); | ||||
| 
 | ||||
|         // This is intended by the application.
 | ||||
|         if permission == VMPerms::NONE { | ||||
|             return_errno!(EPERM, "trying to access PROT_NONE memory"); | ||||
|         } | ||||
| 
 | ||||
|         if self.is_fully_committed() { | ||||
|             self.modify_protection_force(None, permission); | ||||
|             return Ok(()); | ||||
|         } | ||||
| 
 | ||||
|         let committed = true; | ||||
|         let committed_ranges = self.pages().get_ranges(committed); | ||||
|         for range in committed_ranges.iter() { | ||||
|             if !range.contains(pf_addr) { | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             self.epc_type | ||||
|                 .modify_protection(range.start(), range.size(), permission)?; | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Deref for VMArea { | ||||
|  | ||||
| @ -83,16 +83,7 @@ impl ChunkManager { | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             vma.flush_backed_file(); | ||||
| 
 | ||||
|             if !vma.perms().is_default() { | ||||
|                 VMPerms::apply_perms(vma, VMPerms::default()); | ||||
|             } | ||||
| 
 | ||||
|             unsafe { | ||||
|                 let buf = vma.as_slice_mut(); | ||||
|                 buf.iter_mut().for_each(|b| *b = 0) | ||||
|             } | ||||
|             vma.flush_and_clean_memory().unwrap(); | ||||
| 
 | ||||
|             self.free_manager.add_range_back_to_free_manager(vma); | ||||
|             self.free_size += vma.size(); | ||||
| @ -110,6 +101,7 @@ impl ChunkManager { | ||||
|         if let VMMapAddr::Force(addr) = addr { | ||||
|             self.munmap(addr, size)?; | ||||
|         } | ||||
|         trace!("mmap options = {:?}", options); | ||||
| 
 | ||||
|         // Find and allocate a new range for this mmap request
 | ||||
|         let new_range = self | ||||
| @ -117,27 +109,29 @@ impl ChunkManager { | ||||
|             .find_free_range_internal(size, align, addr)?; | ||||
|         let new_addr = new_range.start(); | ||||
|         let current_pid = current!().process().pid(); | ||||
|         let new_vma = { | ||||
|             let new_vma = VMArea::new( | ||||
|                 new_range, | ||||
|                 *options.perms(), | ||||
|                 options.initializer().backed_file(), | ||||
|                 current_pid, | ||||
|         ); | ||||
|             ) | ||||
|             .init_memory(options); | ||||
| 
 | ||||
|         // Initialize the memory of the new range
 | ||||
|         let buf = unsafe { new_vma.as_slice_mut() }; | ||||
|         let ret = options.initializer().init_slice(buf); | ||||
|         if let Err(e) = ret { | ||||
|             // Return the free range before return with error
 | ||||
|             if new_vma.is_err() { | ||||
|                 let error = new_vma.err().unwrap(); | ||||
|                 error!("init memory failure: {}", error.backtrace()); | ||||
|                 let range = VMRange::new_with_size(new_addr, size).unwrap(); | ||||
|                 self.free_manager | ||||
|                 .add_range_back_to_free_manager(new_vma.range()); | ||||
|             return_errno!(e.errno(), "failed to mmap"); | ||||
|                     .add_range_back_to_free_manager(&range) | ||||
|                     .unwrap(); | ||||
|                 return Err(error); | ||||
|             } | ||||
| 
 | ||||
|         // Set memory permissions
 | ||||
|         if !options.perms().is_default() { | ||||
|             VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|         } | ||||
|             new_vma.unwrap() | ||||
|         }; | ||||
|         trace!("new vma is ready"); | ||||
| 
 | ||||
|         self.free_size -= new_vma.size(); | ||||
|         // After initializing, we can safely insert the new VMA
 | ||||
|         self.vmas.insert(VMAObj::new_vma_obj(new_vma)); | ||||
| @ -168,11 +162,7 @@ impl ChunkManager { | ||||
|                 Some(intersection_vma) => intersection_vma, | ||||
|             }; | ||||
| 
 | ||||
|             // File-backed VMA needs to be flushed upon munmap
 | ||||
|             intersection_vma.flush_backed_file(); | ||||
|             if !&intersection_vma.perms().is_default() { | ||||
|                 VMPerms::apply_perms(&intersection_vma, VMPerms::default()); | ||||
|             } | ||||
|             intersection_vma.flush_and_clean_memory()?; | ||||
| 
 | ||||
|             if vma.range() == intersection_vma.range() { | ||||
|                 // Exact match. Just remove.
 | ||||
| @ -194,13 +184,6 @@ impl ChunkManager { | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             // Reset zero
 | ||||
|             unsafe { | ||||
|                 trace!("intersection vma = {:?}", intersection_vma); | ||||
|                 let buf = intersection_vma.as_slice_mut(); | ||||
|                 buf.iter_mut().for_each(|b| *b = 0) | ||||
|             } | ||||
| 
 | ||||
|             self.free_manager | ||||
|                 .add_range_back_to_free_manager(intersection_vma.range()); | ||||
|             self.free_size += intersection_vma.size(); | ||||
| @ -306,8 +289,7 @@ impl ChunkManager { | ||||
|             if intersection_vma.range() == containing_vma.range() { | ||||
|                 // The whole containing_vma is mprotected
 | ||||
|                 containing_vma.set_perms(new_perms); | ||||
|                 VMPerms::apply_perms(&containing_vma, containing_vma.perms()); | ||||
|                 trace!("containing_vma = {:?}", containing_vma); | ||||
|                 containing_vma.modify_permissions_for_committed_pages(containing_vma.perms()); | ||||
|                 containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma)); | ||||
|                 containing_vmas.move_next(); | ||||
|                 continue; | ||||
| @ -325,13 +307,13 @@ impl ChunkManager { | ||||
|                         let protect_end = protect_range.end(); | ||||
| 
 | ||||
|                         // New VMA
 | ||||
|                         let new_vma = VMArea::inherits_file_from( | ||||
|                         let mut new_vma = VMArea::inherits_file_from( | ||||
|                             &containing_vma, | ||||
|                             protect_range, | ||||
|                             new_perms, | ||||
|                             VMAccess::Private(current_pid), | ||||
|                         ); | ||||
|                         VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|                         new_vma.modify_permissions_for_committed_pages(new_vma.perms()); | ||||
|                         let new_vma = VMAObj::new_vma_obj(new_vma); | ||||
| 
 | ||||
|                         // Another new VMA
 | ||||
| @ -356,15 +338,16 @@ impl ChunkManager { | ||||
|                         break; | ||||
|                     } | ||||
|                     1 => { | ||||
|                         let remain_vma = remain_vmas.pop().unwrap(); | ||||
|                         let mut remain_vma = remain_vmas.pop().unwrap(); | ||||
| 
 | ||||
|                         let new_vma = VMArea::inherits_file_from( | ||||
|                         let mut new_vma = VMArea::inherits_file_from( | ||||
|                             &containing_vma, | ||||
|                             intersection_vma.range().clone(), | ||||
|                             new_perms, | ||||
|                             VMAccess::Private(current_pid), | ||||
|                         ); | ||||
|                         VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
| 
 | ||||
|                         new_vma.modify_permissions_for_committed_pages(new_vma.perms()); | ||||
| 
 | ||||
|                         if remain_vma.start() == containing_vma.start() { | ||||
|                             // mprotect right side of the vma
 | ||||
| @ -374,6 +357,7 @@ impl ChunkManager { | ||||
|                             debug_assert!(remain_vma.end() == containing_vma.end()); | ||||
|                             containing_vma.set_start(remain_vma.start()); | ||||
|                         } | ||||
|                         debug_assert!(containing_vma.range() == remain_vma.range()); | ||||
| 
 | ||||
|                         containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma)); | ||||
|                         containing_vmas.insert(VMAObj::new_vma_obj(new_vma)); | ||||
| @ -401,7 +385,7 @@ impl ChunkManager { | ||||
|                 None => continue, | ||||
|                 Some(vma) => vma, | ||||
|             }; | ||||
|             vma.flush_backed_file(); | ||||
|             vma.flush_committed_backed_file(); | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| @ -409,9 +393,11 @@ impl ChunkManager { | ||||
|     /// Sync all shared, file-backed memory mappings of the given file by flushing
 | ||||
|     /// the memory content to the file.
 | ||||
|     pub fn msync_by_file(&mut self, sync_file: &FileRef) { | ||||
|         for vma_obj in &self.vmas { | ||||
|         let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) }; | ||||
|             vma_obj.vma().flush_backed_file_with_cond(is_same_file); | ||||
|         for vma_obj in &self.vmas { | ||||
|             vma_obj | ||||
|                 .vma() | ||||
|                 .flush_committed_backed_file_with_cond(is_same_file); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -428,6 +414,34 @@ impl ChunkManager { | ||||
|         return Ok(vma.range().clone()); | ||||
|     } | ||||
| 
 | ||||
|     pub fn handle_page_fault( | ||||
|         &mut self, | ||||
|         rip: usize, | ||||
|         pf_addr: usize, | ||||
|         errcd: u32, | ||||
|         kernel_triggers: bool, | ||||
|     ) -> Result<()> { | ||||
|         trace!( | ||||
|             "handle_page_fault chunk manager range = {:?}, free_size = {:?}", | ||||
|             self.range, | ||||
|             self.free_size | ||||
|         ); | ||||
|         let mut vma_cursor = self.vmas.upper_bound_mut(Bound::Included(&pf_addr)); | ||||
|         if vma_cursor.is_null() { | ||||
|             return_errno!(ENOMEM, "no mmap regions that contains the address"); | ||||
|         } | ||||
|         let vma = vma_cursor.get().unwrap().vma(); | ||||
|         if vma.pid() != current!().process().pid() || !vma.contains(pf_addr) { | ||||
|             return_errno!(ENOMEM, "no mmap regions that contains the address"); | ||||
|         } | ||||
| 
 | ||||
|         let mut vma = vma.clone(); | ||||
|         vma.handle_page_fault(rip, pf_addr, errcd, kernel_triggers)?; | ||||
|         vma_cursor.replace_with(VMAObj::new_vma_obj(vma)); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     pub fn usage_percentage(&self) -> f32 { | ||||
|         let total_size = self.range.size(); | ||||
|         let mut used_size = 0; | ||||
| @ -487,6 +501,7 @@ impl VMRemapParser for ChunkManager { | ||||
| 
 | ||||
| impl Drop for ChunkManager { | ||||
|     fn drop(&mut self) { | ||||
|         info!("drop chunk manager = {:?}", self); | ||||
|         assert!(self.is_empty()); | ||||
|         assert!(self.free_size == self.range.size()); | ||||
|         assert!(self.free_manager.free_size() == self.range.size()); | ||||
|  | ||||
							
								
								
									
										405
									
								
								src/libos/src/vm/vm_epc.rs
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										405
									
								
								src/libos/src/vm/vm_epc.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,405 @@ | ||||
| // This file contains EPC related APIs and definitions.
 | ||||
| use super::*; | ||||
| use sgx_trts::emm::{ | ||||
|     AllocAddr, AllocFlags, AllocOptions, EmmAlloc, HandleResult, PageFaultHandler, Perm, | ||||
| }; | ||||
| use sgx_trts::enclave::rsgx_is_supported_EDMM; | ||||
| use std::ptr::NonNull; | ||||
| 
 | ||||
| // Memory Layout for Platforms with EDMM support
 | ||||
| //
 | ||||
| // Addr low -> high
 | ||||
| // |---------------------------------------------||---------------------||--------------------------------------|
 | ||||
| //     Reserved Memory                                Gap Range                User Region Memory
 | ||||
| //    (commit memory when loading the enclave)       (used by SDK)           (commit on demand when PF occurs)
 | ||||
| //
 | ||||
| // For platforms without EDMM support, we only use reserved memory.
 | ||||
| 
 | ||||
| pub enum SGXPlatform { | ||||
|     WithEDMM, | ||||
|     NoEDMM, | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone)] | ||||
| pub enum EPCMemType { | ||||
|     Reserved, | ||||
|     UserRegion, | ||||
| } | ||||
| 
 | ||||
| pub struct ReservedMem; | ||||
| pub struct UserRegionMem; | ||||
| 
 | ||||
| #[repr(C, align(4096))] | ||||
| #[derive(Clone)] | ||||
| struct ZeroPage([u8; PAGE_SIZE]); | ||||
| 
 | ||||
| impl ZeroPage { | ||||
|     fn new() -> Self { | ||||
|         Self([0; PAGE_SIZE]) | ||||
|     } | ||||
| 
 | ||||
|     fn new_page_aligned_vec(size: usize) -> Vec<u8> { | ||||
|         debug_assert!(size % PAGE_SIZE == 0); | ||||
|         let page_num = size / PAGE_SIZE; | ||||
|         let mut page_vec = vec![Self::new(); page_num]; | ||||
| 
 | ||||
|         let ptr = page_vec.as_mut_ptr(); | ||||
| 
 | ||||
|         let size = page_num * std::mem::size_of::<Self>(); | ||||
|         std::mem::forget(page_vec); | ||||
| 
 | ||||
|         unsafe { Vec::from_raw_parts(ptr as *mut u8, size, size) } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| lazy_static! { | ||||
|     static ref ZERO_PAGE: Vec<u8> = ZeroPage::new_page_aligned_vec(PAGE_SIZE); | ||||
| } | ||||
| 
 | ||||
| pub trait EPCAllocator { | ||||
|     fn alloc(size: usize) -> Result<usize> { | ||||
|         return_errno!(ENOSYS, "operation not supported"); | ||||
|     } | ||||
| 
 | ||||
|     fn alloc_with_addr(addr: usize, size: usize) -> Result<usize> { | ||||
|         return_errno!(ENOSYS, "operation not supported"); | ||||
|     } | ||||
| 
 | ||||
|     fn free(addr: usize, size: usize) -> Result<()> { | ||||
|         return_errno!(ENOSYS, "operation not supported"); | ||||
|     } | ||||
| 
 | ||||
|     fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> { | ||||
|         return_errno!(ENOSYS, "operation not supported"); | ||||
|     } | ||||
| 
 | ||||
|     fn mem_type() -> EPCMemType; | ||||
| } | ||||
| 
 | ||||
| impl EPCAllocator for ReservedMem { | ||||
|     fn alloc(size: usize) -> Result<usize> { | ||||
|         let ptr = unsafe { sgx_alloc_rsrv_mem(size) }; | ||||
|         if ptr.is_null() { | ||||
|             return_errno!(ENOMEM, "run out of reserved memory"); | ||||
|         } | ||||
|         Ok(ptr as usize) | ||||
|     } | ||||
| 
 | ||||
|     fn alloc_with_addr(addr: usize, size: usize) -> Result<usize> { | ||||
|         let ptr = unsafe { sgx_alloc_rsrv_mem_ex(addr as *const c_void, size) }; | ||||
|         if ptr.is_null() { | ||||
|             return_errno!(ENOMEM, "can't allocate reserved memory at desired address"); | ||||
|         } | ||||
|         Ok(ptr as usize) | ||||
|     } | ||||
| 
 | ||||
|     fn free(addr: usize, size: usize) -> Result<()> { | ||||
|         let ret = unsafe { sgx_free_rsrv_mem(addr as *const c_void, size) }; | ||||
|         assert!(ret == 0); | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> { | ||||
|         let mut ret_val = 0; | ||||
|         let ret = if rsgx_is_supported_EDMM() { | ||||
|             unsafe { | ||||
|                 sgx_tprotect_rsrv_mem(addr as *const c_void, length, protection.bits() as i32) | ||||
|             } | ||||
|         } else { | ||||
|             // For platforms without EDMM, sgx_tprotect_rsrv_mem is actually useless.
 | ||||
|             // However, at least we can set pages to desired protections in the host kernel page table.
 | ||||
|             unsafe { | ||||
|                 occlum_ocall_mprotect( | ||||
|                     &mut ret_val as *mut i32, | ||||
|                     addr as *const c_void, | ||||
|                     length, | ||||
|                     protection.bits() as i32, | ||||
|                 ) | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         if ret != sgx_status_t::SGX_SUCCESS || ret_val != 0 { | ||||
|             return_errno!(ENOMEM, "reserved memory modify protection failure"); | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn mem_type() -> EPCMemType { | ||||
|         EPCMemType::Reserved | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl EPCAllocator for UserRegionMem { | ||||
|     fn alloc(size: usize) -> Result<usize> { | ||||
|         let alloc_options = AllocOptions::new() | ||||
|             .set_flags(AllocFlags::COMMIT_ON_DEMAND) | ||||
|             .set_handler(enclave_page_fault_handler_dummy, 0); | ||||
|         let ptr = unsafe { EmmAlloc.alloc(AllocAddr::Any, size, alloc_options) } | ||||
|             .map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
| 
 | ||||
|         Ok(ptr.addr().get()) | ||||
|     } | ||||
| 
 | ||||
|     fn free(addr: usize, size: usize) -> Result<()> { | ||||
|         let ptr = NonNull::<u8>::new(addr as *mut u8).unwrap(); | ||||
|         unsafe { EmmAlloc.dealloc(ptr, size) }.map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> { | ||||
|         trace!( | ||||
|             "user region modify protection, protection = {:?}, range = {:?}", | ||||
|             protection, | ||||
|             VMRange::new_with_size(addr, length).unwrap() | ||||
|         ); | ||||
|         let ptr = NonNull::<u8>::new(addr as *mut u8).unwrap(); | ||||
|         unsafe { | ||||
|             EmmAlloc.modify_permissions(ptr, length, Perm::from_bits(protection.bits()).unwrap()) | ||||
|         } | ||||
|         .map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn mem_type() -> EPCMemType { | ||||
|         EPCMemType::UserRegion | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl UserRegionMem { | ||||
|     fn commit_memory(start_addr: usize, size: usize) -> Result<()> { | ||||
|         let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap(); | ||||
|         unsafe { EmmAlloc.commit(ptr, size) }.map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn commit_memory_with_new_permission( | ||||
|         start_addr: usize, | ||||
|         size: usize, | ||||
|         new_perms: VMPerms, | ||||
|     ) -> Result<()> { | ||||
|         let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap(); | ||||
|         let perm = Perm::from_bits(new_perms.bits()).unwrap(); | ||||
|         if size == PAGE_SIZE { | ||||
|             unsafe { EmmAlloc::commit_with_data(ptr, ZERO_PAGE.as_slice(), perm) } | ||||
|                 .map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
|         } else { | ||||
|             let data = ZeroPage::new_page_aligned_vec(size); | ||||
|             unsafe { EmmAlloc::commit_with_data(ptr, data.as_slice(), perm) } | ||||
|                 .map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
|     fn commit_memory_and_init_with_file( | ||||
|         start_addr: usize, | ||||
|         size: usize, | ||||
|         file: &FileRef, | ||||
|         file_offset: usize, | ||||
|         new_perms: VMPerms, | ||||
|     ) -> Result<()> { | ||||
|         let mut data = ZeroPage::new_page_aligned_vec(size); | ||||
|         let len = file | ||||
|             .read_at(file_offset, data.as_mut_slice()) | ||||
|             .map_err(|_| errno!(EACCES, "failed to init memory from file"))?; | ||||
| 
 | ||||
|         let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap(); | ||||
|         let perm = Perm::from_bits(new_perms.bits()).unwrap(); | ||||
| 
 | ||||
|         unsafe { EmmAlloc::commit_with_data(ptr, data.as_slice(), perm) } | ||||
|             .map_err(|e| errno!(Errno::from(e as u32)))?; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl SGXPlatform { | ||||
|     pub fn new() -> Self { | ||||
|         if rsgx_is_supported_EDMM() { | ||||
|             SGXPlatform::WithEDMM | ||||
|         } else { | ||||
|             SGXPlatform::NoEDMM // including SGX simulation mode
 | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn alloc_user_space( | ||||
|         &self, | ||||
|         init_size: usize, | ||||
|         max_size: usize, | ||||
|     ) -> Result<(VMRange, Option<VMRange>)> { | ||||
|         debug!( | ||||
|             "alloc user space init size = {:?}, max size = {:?}", | ||||
|             init_size, max_size | ||||
|         ); | ||||
|         if matches!(self, SGXPlatform::WithEDMM) && max_size > init_size { | ||||
|             let user_region_size = max_size - init_size; | ||||
| 
 | ||||
|             let reserved_mem_start_addr = ReservedMem::alloc(init_size)?; | ||||
| 
 | ||||
|             let user_region_start_addr = UserRegionMem::alloc(user_region_size)?; | ||||
| 
 | ||||
|             let total_user_space_range = VMRange::new( | ||||
|                 reserved_mem_start_addr, | ||||
|                 user_region_start_addr + user_region_size, | ||||
|             )?; | ||||
|             let gap_range = | ||||
|                 VMRange::new(reserved_mem_start_addr + init_size, user_region_start_addr)?; | ||||
| 
 | ||||
|             info!( | ||||
|                 "allocated user space range is {:?}, gap range is {:?}. reserved_mem range is {:?}, user region range is {:?}", | ||||
|                 total_user_space_range, gap_range, VMRange::new_with_size(reserved_mem_start_addr, init_size), | ||||
|                 VMRange::new_with_size(user_region_start_addr, user_region_size) | ||||
|             ); | ||||
| 
 | ||||
|             Ok((total_user_space_range, Some(gap_range))) | ||||
|         } else { | ||||
|             // For platform with no-edmm support, or the max_size is the same as init_size, use reserved memory for the whole userspace
 | ||||
|             let reserved_mem_start_addr = ReservedMem::alloc(max_size)?; | ||||
|             let total_user_space_range = | ||||
|                 VMRange::new(reserved_mem_start_addr, reserved_mem_start_addr + max_size)?; | ||||
| 
 | ||||
|             info!( | ||||
|                 "allocated user space range is {:?}, gap range is None", | ||||
|                 total_user_space_range | ||||
|             ); | ||||
| 
 | ||||
|             Ok((total_user_space_range, None)) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_user_space(&self, user_space_range: &VMRange, gap_range: Option<&VMRange>) { | ||||
|         let user_space_ranges = if let Some(gap_range) = gap_range { | ||||
|             user_space_range.subtract(gap_range) | ||||
|         } else { | ||||
|             vec![*user_space_range] | ||||
|         }; | ||||
| 
 | ||||
|         if user_space_ranges.len() == 2 { | ||||
|             debug_assert!(matches!(self, SGXPlatform::WithEDMM)); | ||||
|             let reserved_mem = user_space_ranges[0]; | ||||
|             let user_region_mem = user_space_ranges[1]; | ||||
|             ReservedMem::free(reserved_mem.start(), reserved_mem.size()).unwrap(); | ||||
|             UserRegionMem::free(user_region_mem.start(), user_region_mem.size()).unwrap(); | ||||
|         } else { | ||||
|             // For platforms with EDMM but max_size equals init_size or the paltforms without EDMM, there is no gap range.
 | ||||
|             debug_assert!(user_space_ranges.len() == 1); | ||||
|             let reserved_mem = user_space_ranges[0]; | ||||
|             ReservedMem::free(reserved_mem.start(), reserved_mem.size()).unwrap(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Debug for EPCMemType { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let output_str = match self { | ||||
|             EPCMemType::Reserved => "reserved memory region", | ||||
|             EPCMemType::UserRegion => "user region memory", | ||||
|         }; | ||||
|         write!(f, "{}", output_str) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl EPCMemType { | ||||
|     pub fn new(range: &VMRange) -> Self { | ||||
|         trace!("EPC new range = {:?}", range); | ||||
|         if rsgx_is_supported_EDMM() { | ||||
|             if let Some(gap_range) = USER_SPACE_VM_MANAGER.gap_range() { | ||||
|                 debug_assert!({ | ||||
|                     if range.size() > 0 { | ||||
|                         !gap_range.overlap_with(range) | ||||
|                     } else { | ||||
|                         // Ignore for sentry VMA
 | ||||
|                         true | ||||
|                     } | ||||
|                 }); | ||||
|                 if range.end() <= gap_range.start() { | ||||
|                     EPCMemType::Reserved | ||||
|                 } else { | ||||
|                     debug_assert!(gap_range.end() <= range.start()); | ||||
|                     EPCMemType::UserRegion | ||||
|                 } | ||||
|             } else { | ||||
|                 // There is no gap, this indicates that there is no user region memory
 | ||||
|                 EPCMemType::Reserved | ||||
|             } | ||||
|         } else { | ||||
|             // Only reserved memory
 | ||||
|             EPCMemType::Reserved | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn modify_protection(&self, addr: usize, length: usize, protection: VMPerms) -> Result<()> { | ||||
|         // PT_GROWSDOWN should only be applied to stack segment or a segment mapped with the MAP_GROWSDOWN flag set.
 | ||||
|         // Since the memory are managed by our own, mprotect ocall shouldn't use this flag. Otherwise, EINVAL will be thrown.
 | ||||
|         let mut prot = protection.clone(); | ||||
|         prot.remove(VMPerms::GROWSDOWN); | ||||
| 
 | ||||
|         match self { | ||||
|             EPCMemType::Reserved => ReservedMem::modify_protection(addr, length, prot), | ||||
|             EPCMemType::UserRegion => UserRegionMem::modify_protection(addr, length, prot), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn commit_memory(start_addr: usize, size: usize, new_perms: Option<VMPerms>) -> Result<()> { | ||||
|     trace!( | ||||
|         "commit epc: {:?}, new permission: {:?}", | ||||
|         VMRange::new_with_size(start_addr, size).unwrap(), | ||||
|         new_perms | ||||
|     ); | ||||
| 
 | ||||
|     // We should make memory commit and permission change atomic to prevent data races. Thus, if the new perms
 | ||||
|     // are not the default permission (RW), we implement a different function by calling EACCEPTCOPY
 | ||||
|     match new_perms { | ||||
|         Some(perms) if perms != VMPerms::DEFAULT => { | ||||
|             UserRegionMem::commit_memory_with_new_permission(start_addr, size, perms) | ||||
|         } | ||||
|         _ => UserRegionMem::commit_memory(start_addr, size), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn commit_memory_and_init_with_file( | ||||
|     start_addr: usize, | ||||
|     size: usize, | ||||
|     file: &FileRef, | ||||
|     file_offset: usize, | ||||
|     new_perms: VMPerms, | ||||
| ) -> Result<()> { | ||||
|     UserRegionMem::commit_memory_and_init_with_file(start_addr, size, file, file_offset, new_perms) | ||||
| } | ||||
| 
 | ||||
| // This is a dummy function for sgx_mm_alloc. The real handler is "enclave_page_fault_handler" shown below.
 | ||||
| extern "C" fn enclave_page_fault_handler_dummy( | ||||
|     pfinfo: &sgx_pfinfo, | ||||
|     private: usize, | ||||
| ) -> HandleResult { | ||||
|     // Don't do anything here. Modification of registers can cause the PF handling error.
 | ||||
|     return HandleResult::Search; | ||||
| } | ||||
| 
 | ||||
| pub fn enclave_page_fault_handler( | ||||
|     rip: usize, | ||||
|     exception_info: sgx_misc_exinfo_t, | ||||
|     kernel_triggers: bool, | ||||
| ) -> Result<()> { | ||||
|     let pf_addr = exception_info.faulting_address as usize; | ||||
|     let pf_errcd = exception_info.error_code; | ||||
|     trace!( | ||||
|         "enclave page fault caught, pf_addr = 0x{:x}, error code = {:?}", | ||||
|         pf_addr, | ||||
|         pf_errcd | ||||
|     ); | ||||
| 
 | ||||
|     USER_SPACE_VM_MANAGER.handle_page_fault(rip, pf_addr, pf_errcd, kernel_triggers)?; | ||||
| 
 | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| extern "C" { | ||||
|     fn occlum_ocall_mprotect( | ||||
|         retval: *mut i32, | ||||
|         addr: *const c_void, | ||||
|         len: usize, | ||||
|         prot: i32, | ||||
|     ) -> sgx_status_t; | ||||
| } | ||||
| @ -22,14 +22,16 @@ use std::ops::Bound::{Excluded, Included}; | ||||
| #[derive(Debug)] | ||||
| pub struct VMManager { | ||||
|     range: VMRange, | ||||
|     gap_range: Option<VMRange>, | ||||
|     internal: SgxMutex<InternalVMManager>, | ||||
| } | ||||
| 
 | ||||
| impl VMManager { | ||||
|     pub fn init(vm_range: VMRange) -> Result<Self> { | ||||
|         let internal = InternalVMManager::init(vm_range.clone()); | ||||
|     pub fn init(vm_range: VMRange, gap_range: Option<VMRange>) -> Result<Self> { | ||||
|         let mut internal = InternalVMManager::init(vm_range.clone(), &gap_range); | ||||
|         Ok(VMManager { | ||||
|             range: vm_range, | ||||
|             gap_range: gap_range, | ||||
|             internal: SgxMutex::new(internal), | ||||
|         }) | ||||
|     } | ||||
| @ -38,6 +40,10 @@ impl VMManager { | ||||
|         &self.range | ||||
|     } | ||||
| 
 | ||||
|     pub fn gap_range(&self) -> &Option<VMRange> { | ||||
|         &self.gap_range | ||||
|     } | ||||
| 
 | ||||
|     pub fn internal(&self) -> SgxMutexGuard<InternalVMManager> { | ||||
|         self.internal.lock().unwrap() | ||||
|     } | ||||
| @ -56,8 +62,15 @@ impl VMManager { | ||||
|     } | ||||
| 
 | ||||
|     pub fn verified_clean_when_exit(&self) -> bool { | ||||
|         let gap_size = if let Some(gap) = self.gap_range() { | ||||
|             gap.size() | ||||
|         } else { | ||||
|             0 | ||||
|         }; | ||||
| 
 | ||||
|         let internal = self.internal(); | ||||
|         internal.chunks.len() == 0 && internal.free_manager.free_size() == self.range.size() | ||||
|         internal.chunks.len() == 0 | ||||
|             && internal.free_manager.free_size() + gap_size == self.range.size() | ||||
|     } | ||||
| 
 | ||||
|     pub fn free_chunk(&self, chunk: &ChunkRef) { | ||||
| @ -358,22 +371,19 @@ impl VMManager { | ||||
| 
 | ||||
|         intersect_chunks.iter().for_each(|chunk| { | ||||
|             if let ChunkType::SingleVMA(vma) = chunk.internal() { | ||||
|                 if let Some(intersection_range) = chunk.range().intersect(&reset_range) { | ||||
|                     let mut internal_manager = self.internal(); | ||||
|                     internal_manager.mprotect_single_vma_chunk( | ||||
|                         &chunk, | ||||
|                         intersection_range, | ||||
|                         VMPerms::DEFAULT, | ||||
|                     ); | ||||
| 
 | ||||
|                     unsafe { | ||||
|                         let buf = intersection_range.as_slice_mut(); | ||||
|                         buf.iter_mut().for_each(|b| *b = 0) | ||||
|                 let mut vma = vma.lock().unwrap(); | ||||
|                 if let Some(intersection_vma) = vma.intersect(&reset_range) { | ||||
|                     intersection_vma.flush_and_clean_memory().unwrap(); | ||||
|                 } | ||||
|                 // clear permission for SingleVMA chunk
 | ||||
|                 if vma.perms() != VMPerms::DEFAULT { | ||||
|                     vma.set_perms(VMPerms::default()); | ||||
|                 } | ||||
|             } else { | ||||
|                 // Currently only used for heap de-allocation. Thus must be SingleVMA chunk.
 | ||||
|                 unreachable!() | ||||
|             } | ||||
|         }); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
| @ -394,11 +404,11 @@ impl VMManager { | ||||
|         match chunk.internal() { | ||||
|             ChunkType::MultiVMA(manager) => { | ||||
|                 trace!("msync default chunk: {:?}", chunk.range()); | ||||
|                 return manager | ||||
|                 manager | ||||
|                     .lock() | ||||
|                     .unwrap() | ||||
|                     .chunk_manager_mut() | ||||
|                     .msync_by_range(&sync_range); | ||||
|                     .msync_by_range(&sync_range)?; | ||||
|             } | ||||
|             ChunkType::SingleVMA(vma) => { | ||||
|                 // Note: There are rare cases that mutliple threads do mprotect or munmap for the same single-vma chunk
 | ||||
| @ -406,7 +416,7 @@ impl VMManager { | ||||
|                 // It is fine here because this function doesn't modify the global chunk list and only operates on the vma
 | ||||
|                 // which is updated realtimely.
 | ||||
|                 let vma = vma.lock().unwrap(); | ||||
|                 vma.flush_backed_file(); | ||||
|                 vma.flush_committed_backed_file(); | ||||
|             } | ||||
|         } | ||||
|         Ok(()) | ||||
| @ -429,7 +439,7 @@ impl VMManager { | ||||
|                 ChunkType::SingleVMA(vma) => { | ||||
|                     vma.lock() | ||||
|                         .unwrap() | ||||
|                         .flush_backed_file_with_cond(is_same_file); | ||||
|                         .flush_committed_backed_file_with_cond(is_same_file); | ||||
|                 } | ||||
|             }); | ||||
|     } | ||||
| @ -539,6 +549,41 @@ impl VMManager { | ||||
| 
 | ||||
|         assert!(mem_chunks.len() == 0); | ||||
|     } | ||||
| 
 | ||||
|     pub fn handle_page_fault( | ||||
|         &self, | ||||
|         rip: usize, | ||||
|         pf_addr: usize, | ||||
|         errcd: u32, | ||||
|         kernel_triggers: bool, | ||||
|     ) -> Result<()> { | ||||
|         let current = current!(); | ||||
|         let page_fault_chunk = { | ||||
|             let current_process_mem_chunks = current.vm().mem_chunks().read().unwrap(); | ||||
|             if let Some(page_fault_chunk) = current_process_mem_chunks | ||||
|                 .iter() | ||||
|                 .find(|chunk| chunk.range().contains(pf_addr)) | ||||
|             { | ||||
|                 Some(page_fault_chunk.clone()) | ||||
|             } else { | ||||
|                 None | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         if let Some(page_fault_chunk) = page_fault_chunk { | ||||
|             return page_fault_chunk.handle_page_fault(rip, pf_addr, errcd, kernel_triggers); | ||||
|         } | ||||
| 
 | ||||
|         // System V SHM segments are not tracked by the process VM. Try find the chunk here.
 | ||||
|         if let Some(page_fault_shm_chunk) = | ||||
|             SYSTEM_V_SHM_MANAGER.get_shm_chunk_containing_addr(pf_addr, current.process().pid()) | ||||
|         { | ||||
|             return page_fault_shm_chunk.handle_page_fault(rip, pf_addr, errcd, kernel_triggers); | ||||
|         } | ||||
| 
 | ||||
|         // This can happen for example, when the user intends to trigger the SIGSEGV handler by visit nullptr.
 | ||||
|         return_errno!(ENOMEM, "can't find the chunk containing the address"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Modification on this structure must acquire the global lock.
 | ||||
| @ -552,11 +597,21 @@ pub struct InternalVMManager { | ||||
| } | ||||
| 
 | ||||
| impl InternalVMManager { | ||||
|     pub fn init(vm_range: VMRange) -> Self { | ||||
|     pub fn init(vm_range: VMRange, gap_range: &Option<VMRange>) -> Self { | ||||
|         let chunks = BTreeSet::new(); | ||||
|         let fast_default_chunks = Vec::new(); | ||||
|         let free_manager = VMFreeSpaceManager::new(vm_range); | ||||
|         let mut free_manager = VMFreeSpaceManager::new(vm_range); | ||||
|         let shm_manager = ShmManager::new(); | ||||
|         if let Some(gap_range) = gap_range { | ||||
|             debug_assert!(vm_range.is_superset_of(&gap_range)); | ||||
|             free_manager | ||||
|                 .find_free_range_internal( | ||||
|                     gap_range.size(), | ||||
|                     PAGE_SIZE, | ||||
|                     VMMapAddr::Force(gap_range.start()), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|         } | ||||
|         Self { | ||||
|             chunks, | ||||
|             fast_default_chunks, | ||||
| @ -657,19 +712,7 @@ impl InternalVMManager { | ||||
|             _ => unreachable!(), | ||||
|         }; | ||||
| 
 | ||||
|         // File-backed VMA needs to be flushed upon munmap
 | ||||
|         intersection_vma.flush_backed_file(); | ||||
| 
 | ||||
|         // Reset memory permissions
 | ||||
|         if !&intersection_vma.perms().is_default() { | ||||
|             VMPerms::apply_perms(&intersection_vma, VMPerms::default()); | ||||
|         } | ||||
| 
 | ||||
|         // Reset to zero
 | ||||
|         unsafe { | ||||
|             let buf = intersection_vma.as_slice_mut(); | ||||
|             buf.iter_mut().for_each(|b| *b = 0) | ||||
|         } | ||||
|         intersection_vma.flush_and_clean_memory()?; | ||||
| 
 | ||||
|         let mut new_vmas = vma.subtract(&intersection_vma); | ||||
|         let current = current!(); | ||||
| @ -724,10 +767,10 @@ impl InternalVMManager { | ||||
|                 self.shm_manager | ||||
|                     .create_shared_chunk(options, new_chunk.clone()) | ||||
|                     .map_err(|e| { | ||||
|                         let vma = new_chunk.get_vma_for_single_vma_chunk(); | ||||
|                         let mut vma = new_chunk.get_vma_for_single_vma_chunk(); | ||||
|                         // Reset memory permissions
 | ||||
|                         if !vma.perms().is_default() { | ||||
|                             VMPerms::apply_perms(&vma, VMPerms::default()); | ||||
|                             vma.modify_permissions_for_committed_pages(VMPerms::default()) | ||||
|                         } | ||||
|                         // Reset memory contents
 | ||||
|                         unsafe { | ||||
| @ -777,20 +820,12 @@ impl InternalVMManager { | ||||
|             .shm_manager | ||||
|             .munmap_shared_chunk(chunk, munmap_range, flag)? | ||||
|             == MunmapSharedResult::Freeable | ||||
|         { | ||||
|             // Flush memory contents to backed file and reset memory contents
 | ||||
|             { | ||||
|                 let vma = chunk.get_vma_for_single_vma_chunk(); | ||||
|             // Flush memory contents to backed file
 | ||||
|             vma.flush_backed_file(); | ||||
|             // Reset memory permissions
 | ||||
|             if !vma.perms().is_default() { | ||||
|                 VMPerms::apply_perms(&vma, VMPerms::default()); | ||||
|                 vma.flush_and_clean_memory()?; | ||||
|             } | ||||
|             // Reset memory contents
 | ||||
|             unsafe { | ||||
|                 let buf = vma.as_slice_mut(); | ||||
|                 buf.iter_mut().for_each(|b| *b = 0) | ||||
|             } | ||||
|             drop(vma); | ||||
| 
 | ||||
|             self.free_chunk(chunk); | ||||
|             let current = current!(); | ||||
| @ -855,7 +890,6 @@ impl InternalVMManager { | ||||
|             } | ||||
|             ChunkType::SingleVMA(vma) => vma, | ||||
|         }; | ||||
| 
 | ||||
|         let mut updated_vmas = { | ||||
|             let mut containing_vma = vma.lock().unwrap(); | ||||
|             trace!( | ||||
| @ -865,7 +899,8 @@ impl InternalVMManager { | ||||
|             ); | ||||
|             debug_assert!(chunk.range() == containing_vma.range()); | ||||
| 
 | ||||
|             if containing_vma.perms() == new_perms { | ||||
|             let old_perms = containing_vma.perms(); | ||||
|             if old_perms == new_perms { | ||||
|                 return Ok(()); | ||||
|             } | ||||
| 
 | ||||
| @ -876,7 +911,7 @@ impl InternalVMManager { | ||||
|                 (true, true) => { | ||||
|                     // Exact the same vma
 | ||||
|                     containing_vma.set_perms(new_perms); | ||||
|                     VMPerms::apply_perms(&containing_vma, containing_vma.perms()); | ||||
|                     containing_vma.modify_permissions_for_committed_pages(new_perms); | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|                 (false, false) => { | ||||
| @ -886,15 +921,13 @@ impl InternalVMManager { | ||||
|                     // remaining old VMA:     [protect_range.end,        containing_vma.end)
 | ||||
| 
 | ||||
|                     let old_end = containing_vma.end(); | ||||
|                     let old_perms = containing_vma.perms(); | ||||
| 
 | ||||
|                     let new_vma = VMArea::inherits_file_from( | ||||
|                     let mut new_vma = VMArea::inherits_file_from( | ||||
|                         &containing_vma, | ||||
|                         protect_range, | ||||
|                         new_perms, | ||||
|                         VMAccess::Private(current_pid), | ||||
|                     ); | ||||
|                     VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|                     new_vma.modify_permissions_for_committed_pages(new_perms); | ||||
| 
 | ||||
|                     let remaining_old_vma = { | ||||
|                         let range = VMRange::new(protect_range.end(), old_end).unwrap(); | ||||
| @ -905,7 +938,6 @@ impl InternalVMManager { | ||||
|                             VMAccess::Private(current_pid), | ||||
|                         ) | ||||
|                     }; | ||||
| 
 | ||||
|                     containing_vma.set_end(protect_range.start()); | ||||
| 
 | ||||
|                     // Put containing_vma at last to be updated first.
 | ||||
| @ -913,19 +945,19 @@ impl InternalVMManager { | ||||
|                     updated_vmas | ||||
|                 } | ||||
|                 _ => { | ||||
|                     let new_vma = VMArea::inherits_file_from( | ||||
|                     let mut new_vma = VMArea::inherits_file_from( | ||||
|                         &containing_vma, | ||||
|                         protect_range, | ||||
|                         new_perms, | ||||
|                         VMAccess::Private(current_pid), | ||||
|                     ); | ||||
|                     VMPerms::apply_perms(&new_vma, new_vma.perms()); | ||||
|                     new_vma.modify_permissions_for_committed_pages(new_perms); | ||||
| 
 | ||||
|                     if same_start { | ||||
|                         // Protect range is at left side of the cotaining vma
 | ||||
|                         // Protect range is at left side of the containing vma
 | ||||
|                         containing_vma.set_start(protect_range.end()); | ||||
|                     } else { | ||||
|                         // Protect range is at right side of the cotaining vma
 | ||||
|                         // Protect range is at right side of the containing vma
 | ||||
|                         containing_vma.set_end(protect_range.start()); | ||||
|                     } | ||||
| 
 | ||||
| @ -935,19 +967,16 @@ impl InternalVMManager { | ||||
|                 } | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         let current = current!(); | ||||
|         // First update current vma chunk
 | ||||
|         if updated_vmas.len() > 1 { | ||||
|             let update_vma = updated_vmas.pop().unwrap(); | ||||
|             self.update_single_vma_chunk(¤t, &chunk, update_vma); | ||||
|         } | ||||
| 
 | ||||
|         // Then add new chunks if any
 | ||||
|         updated_vmas.into_iter().for_each(|vma| { | ||||
|             self.add_new_chunk(¤t, vma); | ||||
|         }); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| 
 | ||||
| @ -964,9 +993,6 @@ impl InternalVMManager { | ||||
|         // Remove from chunks
 | ||||
|         self.chunks.remove(chunk); | ||||
| 
 | ||||
|         // Mprotect the whole chunk to reduce the usage of vma count of host
 | ||||
|         VMPerms::apply_perms(range, VMPerms::DEFAULT); | ||||
| 
 | ||||
|         // Add range back to freespace manager
 | ||||
|         self.free_manager.add_range_back_to_free_manager(range); | ||||
|         Ok(()) | ||||
| @ -1131,6 +1157,7 @@ impl InternalVMManager { | ||||
|             let perms = options.perms().clone(); | ||||
|             let align = options.align().clone(); | ||||
|             let initializer = options.initializer(); | ||||
|             let page_policy = options.page_policy(); | ||||
|             target_contained_ranges | ||||
|                 .iter() | ||||
|                 .map(|range| { | ||||
| @ -1146,6 +1173,7 @@ impl InternalVMManager { | ||||
|                         .initializer(initializer.clone()) | ||||
|                         .addr(addr) | ||||
|                         .size(size) | ||||
|                         .page_policy(*page_policy) | ||||
|                         .build() | ||||
|                         .unwrap() | ||||
|                 }) | ||||
|  | ||||
| @ -39,37 +39,6 @@ impl VMPerms { | ||||
|         self.bits == Self::DEFAULT.bits | ||||
|     } | ||||
| 
 | ||||
|     pub fn apply_perms(protect_range: &VMRange, perms: VMPerms) { | ||||
|         use sgx_trts::enclave::rsgx_is_supported_EDMM; | ||||
| 
 | ||||
|         unsafe { | ||||
|             let mut retval = 0; | ||||
|             let addr = protect_range.start() as *const c_void; | ||||
|             let len = protect_range.size(); | ||||
|             // PT_GROWSDOWN should only be applied to stack segment or a segment mapped with the MAP_GROWSDOWN flag set.
 | ||||
|             // Since the memory are managed by our own, mprotect ocall shouldn't use this flag. Otherwise, EINVAL will be thrown.
 | ||||
|             let mut prot = perms.clone(); | ||||
|             prot.remove(VMPerms::GROWSDOWN); | ||||
| 
 | ||||
|             if rsgx_is_supported_EDMM() { | ||||
|                 // With EDMM support, reserved memory permission should be updated.
 | ||||
|                 let sgx_status = sgx_tprotect_rsrv_mem(addr, len, prot.bits() as i32); | ||||
|                 if sgx_status != sgx_status_t::SGX_SUCCESS { | ||||
|                     panic!("sgx_tprotect_rsrv_mem status {}", sgx_status); | ||||
|                 } | ||||
|             } else { | ||||
|                 // Without EDMM support, reserved memory permission is statically RWX and we only need to do mprotect ocall.
 | ||||
|                 let sgx_status = occlum_ocall_mprotect(&mut retval, addr, len, prot.bits() as i32); | ||||
|                 if sgx_status != sgx_status_t::SGX_SUCCESS || retval != 0 { | ||||
|                     panic!( | ||||
|                         "occlum_ocall_mprotect status {}, retval {}", | ||||
|                         sgx_status, retval | ||||
|                     ); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn display(&self) -> String { | ||||
|         let mut str = String::new(); | ||||
|         if self.can_read() { | ||||
| @ -96,23 +65,3 @@ impl Default for VMPerms { | ||||
|         VMPerms::DEFAULT | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| extern "C" { | ||||
|     // Modify the access permissions of the pages in the reserved memory area
 | ||||
|     //
 | ||||
|     // Parameters:
 | ||||
|     // Inputs: addr[in]: Starting address of region which needs to change access
 | ||||
|     //         permission. Page aligned.
 | ||||
|     //         length[in]: The length of the memory to be manipulated in bytes. Page aligned.
 | ||||
|     //         prot[in]: The target memory protection.
 | ||||
|     // Return: sgx_status_t
 | ||||
|     //
 | ||||
|     fn sgx_tprotect_rsrv_mem(addr: *const c_void, length: usize, prot: i32) -> sgx_status_t; | ||||
| 
 | ||||
|     fn occlum_ocall_mprotect( | ||||
|         retval: *mut i32, | ||||
|         addr: *const c_void, | ||||
|         len: usize, | ||||
|         prot: i32, | ||||
|     ) -> sgx_status_t; | ||||
| } | ||||
|  | ||||
| @ -10,6 +10,11 @@ use intrusive_collections::RBTreeLink; | ||||
| use intrusive_collections::{intrusive_adapter, KeyAdapter}; | ||||
| use rcore_fs::vfs::Metadata; | ||||
| 
 | ||||
| pub const GB: usize = 1 << 30; | ||||
| pub const TB: usize = 1 << 40; | ||||
| pub const MB: usize = 1 << 20; | ||||
| pub const KB: usize = 1 << 10; | ||||
| 
 | ||||
| #[derive(Clone, Debug)] | ||||
| pub enum VMInitializer { | ||||
|     DoNothing(), | ||||
| @ -139,7 +144,7 @@ impl FileBacked { | ||||
|         self.write_back | ||||
|     } | ||||
| 
 | ||||
|     pub fn init_file(&self) -> (&FileRef, usize) { | ||||
|     pub fn backed_file(&self) -> (&FileRef, usize) { | ||||
|         (&self.file, self.offset) | ||||
|     } | ||||
| 
 | ||||
| @ -179,6 +184,19 @@ impl VMMapAddr { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Copy, Clone, Eq, PartialEq)] | ||||
| pub enum PagePolicy { | ||||
|     ReserveOnly = 0x1,    // Only reserve
 | ||||
|     CommitNow = 0x2,      // Commit all pages when mmap.
 | ||||
|     CommitOnDemand = 0x4, // Reserve space when mmap, commit in the PF handler. This is the default policy.
 | ||||
| } | ||||
| 
 | ||||
| impl Default for PagePolicy { | ||||
|     fn default() -> PagePolicy { | ||||
|         PagePolicy::CommitOnDemand | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Builder, Debug)] | ||||
| #[builder(pattern = "owned", build_fn(skip), no_std)] | ||||
| pub struct VMMapOptions { | ||||
| @ -187,6 +205,7 @@ pub struct VMMapOptions { | ||||
|     perms: VMPerms, | ||||
|     addr: VMMapAddr, | ||||
|     initializer: VMInitializer, | ||||
|     page_policy: PagePolicy, | ||||
| } | ||||
| 
 | ||||
| // VMMapOptionsBuilder is generated automatically, except the build function
 | ||||
| @ -232,12 +251,21 @@ impl VMMapOptionsBuilder { | ||||
|             Some(initializer) => initializer.clone(), | ||||
|             None => VMInitializer::default(), | ||||
|         }; | ||||
|         let page_policy = { | ||||
|             match &initializer { | ||||
|                 VMInitializer::CopyFrom { .. } => PagePolicy::CommitNow, | ||||
|                 VMInitializer::CopyOldAndReadNew { .. } => PagePolicy::CommitNow, | ||||
|                 _ => self.page_policy.unwrap_or_default(), | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         Ok(VMMapOptions { | ||||
|             size, | ||||
|             align, | ||||
|             perms, | ||||
|             addr, | ||||
|             initializer, | ||||
|             page_policy, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| @ -269,6 +297,10 @@ impl VMMapOptions { | ||||
|         } | ||||
|         false | ||||
|     } | ||||
| 
 | ||||
|     pub fn page_policy(&self) -> &PagePolicy { | ||||
|         &self.page_policy | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Copy, PartialEq)] | ||||
|  | ||||
							
								
								
									
										2
									
								
								tools/toolchains/dcap_lib/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										2
									
								
								tools/toolchains/dcap_lib/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @ -25,4 +25,4 @@ dependencies = [ | ||||
| 
 | ||||
| [[package]] | ||||
| name = "sgx_types" | ||||
| version = "1.1.5" | ||||
| version = "1.1.6" | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user