Add EDMM support for Legacy Occlum
This commit is contained in:
parent
28c29c8896
commit
d49b3af0aa
75
src/libos/Cargo.lock
generated
75
src/libos/Cargo.lock
generated
@ -9,7 +9,7 @@ dependencies = [
|
|||||||
"aligned",
|
"aligned",
|
||||||
"atomic",
|
"atomic",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"bitvec",
|
"bitvec 1.0.1",
|
||||||
"ctor",
|
"ctor",
|
||||||
"derive_builder",
|
"derive_builder",
|
||||||
"goblin",
|
"goblin",
|
||||||
@ -18,6 +18,7 @@ dependencies = [
|
|||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
"memoffset 0.6.5",
|
"memoffset 0.6.5",
|
||||||
|
"modular-bitfield",
|
||||||
"rcore-fs",
|
"rcore-fs",
|
||||||
"rcore-fs-devfs",
|
"rcore-fs-devfs",
|
||||||
"rcore-fs-mountfs",
|
"rcore-fs-mountfs",
|
||||||
@ -94,7 +95,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "41262f11d771fd4a61aa3ce019fca363b4b6c282fca9da2a31186d3965a47a5c"
|
checksum = "41262f11d771fd4a61aa3ce019fca363b4b6c282fca9da2a31186d3965a47a5c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"either",
|
"either",
|
||||||
"radium",
|
"radium 0.3.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitvec"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
|
||||||
|
dependencies = [
|
||||||
|
"funty",
|
||||||
|
"radium 0.7.0",
|
||||||
|
"tap",
|
||||||
|
"wyz",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -206,6 +219,12 @@ version = "0.1.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
|
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "funty"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "goblin"
|
name = "goblin"
|
||||||
version = "0.5.4"
|
version = "0.5.4"
|
||||||
@ -294,6 +313,27 @@ dependencies = [
|
|||||||
"autocfg 1.1.0",
|
"autocfg 1.1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "modular-bitfield"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a53d79ba8304ac1c4f9eb3b9d281f21f7be9d4626f72ce7df4ad8fbde4f38a74"
|
||||||
|
dependencies = [
|
||||||
|
"modular-bitfield-impl",
|
||||||
|
"static_assertions 1.1.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "modular-bitfield-impl"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a7d5f7076603ebc68de2dc6a650ec331a062a13abaa346975be747bbfa4b789"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "plain"
|
name = "plain"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
@ -334,6 +374,12 @@ version = "0.3.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "def50a86306165861203e7f84ecffbbdfdea79f0e51039b33de1e952358c47ac"
|
checksum = "def50a86306165861203e7f84ecffbbdfdea79f0e51039b33de1e952358c47ac"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "radium"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rand"
|
name = "rand"
|
||||||
version = "0.6.5"
|
version = "0.6.5"
|
||||||
@ -479,11 +525,11 @@ dependencies = [
|
|||||||
name = "rcore-fs-sefs"
|
name = "rcore-fs-sefs"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitvec",
|
"bitvec 0.17.4",
|
||||||
"log",
|
"log",
|
||||||
"rcore-fs",
|
"rcore-fs",
|
||||||
"spin 0.5.2",
|
"spin 0.5.2",
|
||||||
"static_assertions",
|
"static_assertions 0.3.4",
|
||||||
"uuid",
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -719,6 +765,12 @@ version = "0.3.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "static_assertions"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.9.3"
|
version = "0.9.3"
|
||||||
@ -736,6 +788,12 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tap"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.3"
|
version = "1.0.3"
|
||||||
@ -772,3 +830,12 @@ name = "winapi-x86_64-pc-windows-gnu"
|
|||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wyz"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
|
||||||
|
dependencies = [
|
||||||
|
"tap",
|
||||||
|
]
|
||||||
|
@ -10,7 +10,7 @@ crate-type = ["staticlib"]
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
atomic = "0.5"
|
atomic = "0.5"
|
||||||
bitflags = "1.0"
|
bitflags = "1.0"
|
||||||
bitvec = { version = "0.17", default-features = false, features = ["alloc"] }
|
bitvec = { version = "1", default-features = false, features = ["alloc"] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
aligned = "0.4.1"
|
aligned = "0.4.1"
|
||||||
lazy_static = { version = "1.1.0", features = ["spin_no_std"] } # Implies nightly
|
lazy_static = { version = "1.1.0", features = ["spin_no_std"] } # Implies nightly
|
||||||
@ -33,6 +33,7 @@ regex = { git = "https://github.com/mesalock-linux/regex-sgx", default-features
|
|||||||
goblin = { version = "0.5.4", default-features = false, features = ["elf64", "elf32", "endian_fd"] }
|
goblin = { version = "0.5.4", default-features = false, features = ["elf64", "elf32", "endian_fd"] }
|
||||||
intrusive-collections = "0.9"
|
intrusive-collections = "0.9"
|
||||||
spin = "0.7"
|
spin = "0.7"
|
||||||
|
modular-bitfield = "0.11.2"
|
||||||
|
|
||||||
[patch.'https://github.com/apache/teaclave-sgx-sdk.git']
|
[patch.'https://github.com/apache/teaclave-sgx-sdk.git']
|
||||||
sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd" }
|
sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd" }
|
||||||
|
@ -6,10 +6,14 @@ use self::syscall::{handle_syscall_exception, SYSCALL_OPCODE};
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::signal::{FaultSignal, SigSet};
|
use crate::signal::{FaultSignal, SigSet};
|
||||||
use crate::syscall::exception_interrupt_syscall_c_abi;
|
use crate::syscall::exception_interrupt_syscall_c_abi;
|
||||||
use crate::syscall::{CpuContext, FpRegs, SyscallNum};
|
use crate::syscall::{CpuContext, ExtraContext, SyscallNum};
|
||||||
use aligned::{Aligned, A16};
|
use crate::vm::{enclave_page_fault_handler, USER_SPACE_VM_MANAGER};
|
||||||
use core::arch::x86_64::_fxsave;
|
|
||||||
use sgx_types::*;
|
use sgx_types::*;
|
||||||
|
use sgx_types::{sgx_exception_type_t, sgx_exception_vector_t};
|
||||||
|
|
||||||
|
const ENCLU: u32 = 0xd7010f;
|
||||||
|
const EACCEPT: u32 = 0x5;
|
||||||
|
const EACCEPTCOPY: u32 = 0x7;
|
||||||
|
|
||||||
// Modules for instruction simulation
|
// Modules for instruction simulation
|
||||||
mod cpuid;
|
mod cpuid;
|
||||||
@ -25,14 +29,63 @@ pub fn register_exception_handlers() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn try_handle_kernel_exception(info: &sgx_exception_info_t) -> i32 {
|
||||||
|
if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_PF {
|
||||||
|
let pf_addr = info.exinfo.faulting_address as usize;
|
||||||
|
// The PF address must be in the user space. Otherwise, keep searching for the exception handler
|
||||||
|
if !USER_SPACE_VM_MANAGER.range().contains(pf_addr) {
|
||||||
|
SGX_MM_EXCEPTION_CONTINUE_SEARCH
|
||||||
|
} else {
|
||||||
|
let rip = info.cpu_context.rip as *const u32;
|
||||||
|
let rax = info.cpu_context.rax as u32;
|
||||||
|
// This can happen when two threads both try to EAUG a new page. Thread 1 EAUG because it first
|
||||||
|
// touches the memory and triggers #PF. Thread 2 EAUG because it uses sgx_mm_commit to commit a
|
||||||
|
// new page with EACCEPT and triggers #PF. If Thread 1 first acquires the lock to do EAUG, when Thread 2
|
||||||
|
// acquires the lock, it can't do EAUG again and will fail. The failure will raise a signal.
|
||||||
|
// This signal will eventually be handled here. And the instruction that triggers this exception is EACCEPT/EACCEPTCOPY.
|
||||||
|
// In this case, since the new page is EAUG-ed already, just need to excecute the EACCEPT again. Thus here
|
||||||
|
// just return SGX_MM_EXCEPTION_CONTINUE_EXECUTION
|
||||||
|
if ENCLU == (unsafe { *rip } as u32) & 0xffffff
|
||||||
|
&& (EACCEPT == rax || EACCEPTCOPY == rax)
|
||||||
|
{
|
||||||
|
return SGX_MM_EXCEPTION_CONTINUE_EXECUTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the triggered code is not user's code and the #PF address is in the userspace, then it is a
|
||||||
|
// kernel-triggered #PF that we can handle. This can happen e.g. when read syscall triggers user buffer #PF
|
||||||
|
info!("kernel code triggers #PF");
|
||||||
|
let kernel_triggers = true;
|
||||||
|
enclave_page_fault_handler(info.cpu_context.rip as usize, info.exinfo, kernel_triggers)
|
||||||
|
.expect("handle PF failure");
|
||||||
|
SGX_MM_EXCEPTION_CONTINUE_EXECUTION
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Otherwise, we can't handle. Keep searching for the exception handler
|
||||||
|
error!(
|
||||||
|
"We can't handle this exception: {:?}",
|
||||||
|
info.exception_vector
|
||||||
|
);
|
||||||
|
SGX_MM_EXCEPTION_CONTINUE_SEARCH
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> i32 {
|
extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> i32 {
|
||||||
let mut fpregs = FpRegs::save();
|
let info = unsafe { &mut *info };
|
||||||
|
|
||||||
|
// Try handle kernel-trigged #PF
|
||||||
|
if !USER_SPACE_VM_MANAGER
|
||||||
|
.range()
|
||||||
|
.contains(info.cpu_context.rip as usize)
|
||||||
|
{
|
||||||
|
return try_handle_kernel_exception(&info);
|
||||||
|
}
|
||||||
|
|
||||||
|
// User-space-triggered exception
|
||||||
unsafe {
|
unsafe {
|
||||||
exception_interrupt_syscall_c_abi(
|
exception_interrupt_syscall_c_abi(
|
||||||
SyscallNum::HandleException as u32,
|
SyscallNum::HandleException as u32,
|
||||||
info as *mut _,
|
info as *mut sgx_exception_info_t as *mut _,
|
||||||
&mut fpregs as *mut FpRegs,
|
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
unreachable!();
|
unreachable!();
|
||||||
@ -41,20 +94,22 @@ extern "C" fn handle_exception(info: *mut sgx_exception_info_t) -> i32 {
|
|||||||
/// Exceptions are handled as a special kind of system calls.
|
/// Exceptions are handled as a special kind of system calls.
|
||||||
pub fn do_handle_exception(
|
pub fn do_handle_exception(
|
||||||
info: *mut sgx_exception_info_t,
|
info: *mut sgx_exception_info_t,
|
||||||
fpregs: *mut FpRegs,
|
|
||||||
user_context: *mut CpuContext,
|
user_context: *mut CpuContext,
|
||||||
) -> Result<isize> {
|
) -> Result<isize> {
|
||||||
let info = unsafe { &mut *info };
|
let info = unsafe { &mut *info };
|
||||||
check_exception_type(info.exception_type)?;
|
check_exception_type(info.exception_type)?;
|
||||||
|
info!("do handle exception: {:?}", info.exception_vector);
|
||||||
|
|
||||||
let user_context = unsafe { &mut *user_context };
|
let user_context = unsafe { &mut *user_context };
|
||||||
*user_context = CpuContext::from_sgx(&info.cpu_context);
|
*user_context = CpuContext::from_sgx(&info.cpu_context);
|
||||||
user_context.fpregs = fpregs;
|
let xsave_area = info.xsave_area.as_mut_ptr();
|
||||||
|
user_context.extra_context = ExtraContext::Xsave;
|
||||||
|
user_context.extra_context_ptr = xsave_area;
|
||||||
|
|
||||||
// Try to do instruction emulation first
|
// Try to do instruction emulation first
|
||||||
if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD {
|
if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_UD {
|
||||||
// Assume the length of opcode is 2 bytes
|
// Assume the length of opcode is 2 bytes
|
||||||
let ip_opcode = unsafe { *(user_context.rip as *const u16) };
|
let ip_opcode: u16 = unsafe { *(user_context.rip as *const u16) };
|
||||||
if ip_opcode == RDTSC_OPCODE {
|
if ip_opcode == RDTSC_OPCODE {
|
||||||
return handle_rdtsc_exception(user_context);
|
return handle_rdtsc_exception(user_context);
|
||||||
} else if ip_opcode == SYSCALL_OPCODE {
|
} else if ip_opcode == SYSCALL_OPCODE {
|
||||||
@ -64,6 +119,23 @@ pub fn do_handle_exception(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normally, We should only handled PF exception with SGX bit set which is due to uncommitted EPC.
|
||||||
|
// However, it happens that when committing a no-read-write page (e.g. RWX), there is a short gap
|
||||||
|
// after EACCEPTCOPY and before the mprotect ocall. And if the user touches memory during this short
|
||||||
|
// gap, the SGX bit will not be set. Thus, here we don't check the SGX bit.
|
||||||
|
if info.exception_vector == sgx_exception_vector_t::SGX_EXCEPTION_VECTOR_PF {
|
||||||
|
info!("Userspace #PF caught, try handle");
|
||||||
|
if enclave_page_fault_handler(info.cpu_context.rip as usize, info.exinfo, false).is_ok() {
|
||||||
|
info!("#PF handling is done successfully");
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
"#PF not handled. Turn to signal. user context = {:?}",
|
||||||
|
user_context
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Then, it must be a "real" exception. Convert it to signal and force delivering it.
|
// Then, it must be a "real" exception. Convert it to signal and force delivering it.
|
||||||
// The generated signal is SIGBUS, SIGFPE, SIGILL, or SIGSEGV.
|
// The generated signal is SIGBUS, SIGFPE, SIGILL, or SIGSEGV.
|
||||||
//
|
//
|
||||||
@ -108,3 +180,21 @@ fn check_exception_type(type_: sgx_exception_type_t) -> Result<()> {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Based on Page-Fault Error Code of Intel Mannul
|
||||||
|
const PF_EXCEPTION_SGX_BIT: u32 = 0x1;
|
||||||
|
const PF_EXCEPTION_RW_BIT: u32 = 0x2;
|
||||||
|
|
||||||
|
// Return value:
|
||||||
|
// True - SGX bit is set
|
||||||
|
// False - SGX bit is not set
|
||||||
|
pub fn check_sgx_bit(exception_error_code: u32) -> bool {
|
||||||
|
exception_error_code & PF_EXCEPTION_SGX_BIT == PF_EXCEPTION_SGX_BIT
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return value:
|
||||||
|
// True - write bit is set, #PF caused by write
|
||||||
|
// False - read bit is set, #PF caused by read
|
||||||
|
pub fn check_rw_bit(exception_error_code: u32) -> bool {
|
||||||
|
exception_error_code & PF_EXCEPTION_RW_BIT == PF_EXCEPTION_RW_BIT
|
||||||
|
}
|
||||||
|
@ -91,7 +91,7 @@ fn get_output_for_vma(vma: &VMArea, heap_or_stack: Option<&str>) -> String {
|
|||||||
let perms = vma.perms();
|
let perms = vma.perms();
|
||||||
|
|
||||||
let (file_path, offset, device_id, inode_num) = {
|
let (file_path, offset, device_id, inode_num) = {
|
||||||
if let Some((file, offset)) = vma.init_file() {
|
if let Some((file, offset)) = vma.backed_file() {
|
||||||
let inode_file = file.as_inode_file().unwrap();
|
let inode_file = file.as_inode_file().unwrap();
|
||||||
let file_path = inode_file.abs_path();
|
let file_path = inode_file.abs_path();
|
||||||
let inode_num = inode_file.inode().metadata().unwrap().inode;
|
let inode_num = inode_file.inode().metadata().unwrap().inode;
|
||||||
|
@ -2,9 +2,7 @@ pub use self::sgx::sgx_interrupt_info_t;
|
|||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::process::ThreadRef;
|
use crate::process::ThreadRef;
|
||||||
use crate::syscall::exception_interrupt_syscall_c_abi;
|
use crate::syscall::exception_interrupt_syscall_c_abi;
|
||||||
use crate::syscall::{CpuContext, FpRegs, SyscallNum};
|
use crate::syscall::{CpuContext, ExtraContext, SyscallNum};
|
||||||
use aligned::{Aligned, A16};
|
|
||||||
use core::arch::x86_64::_fxsave;
|
|
||||||
|
|
||||||
mod sgx;
|
mod sgx;
|
||||||
|
|
||||||
@ -16,28 +14,23 @@ pub fn init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" fn handle_interrupt(info: *mut sgx_interrupt_info_t) -> i32 {
|
extern "C" fn handle_interrupt(info: *mut sgx_interrupt_info_t) -> i32 {
|
||||||
let mut fpregs = FpRegs::save();
|
|
||||||
unsafe {
|
unsafe {
|
||||||
exception_interrupt_syscall_c_abi(
|
exception_interrupt_syscall_c_abi(SyscallNum::HandleInterrupt as u32, info as *mut _)
|
||||||
SyscallNum::HandleInterrupt as u32,
|
|
||||||
info as *mut _,
|
|
||||||
&mut fpregs as *mut FpRegs,
|
|
||||||
)
|
|
||||||
};
|
};
|
||||||
unreachable!();
|
unreachable!();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn do_handle_interrupt(
|
pub fn do_handle_interrupt(
|
||||||
info: *mut sgx_interrupt_info_t,
|
info: *mut sgx_interrupt_info_t,
|
||||||
fpregs: *mut FpRegs,
|
|
||||||
cpu_context: *mut CpuContext,
|
cpu_context: *mut CpuContext,
|
||||||
) -> Result<isize> {
|
) -> Result<isize> {
|
||||||
let info = unsafe { &*info };
|
let info = unsafe { &mut *info };
|
||||||
let context = unsafe { &mut *cpu_context };
|
let context = unsafe { &mut *cpu_context };
|
||||||
// The cpu context is overriden so that it is as if the syscall is called from where the
|
// The cpu context is overriden so that it is as if the syscall is called from where the
|
||||||
// interrupt happened
|
// interrupt happened
|
||||||
*context = CpuContext::from_sgx(&info.cpu_context);
|
*context = CpuContext::from_sgx(&info.cpu_context);
|
||||||
context.fpregs = fpregs;
|
context.extra_context = ExtraContext::Xsave;
|
||||||
|
context.extra_context_ptr = info.xsave_area.as_mut_ptr();
|
||||||
Ok(0)
|
Ok(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C, align(64))]
|
||||||
#[derive(Default, Clone, Copy)]
|
#[derive(Default, Clone, Copy)]
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
pub struct sgx_interrupt_info_t {
|
pub struct sgx_interrupt_info_t {
|
||||||
pub cpu_context: sgx_cpu_context_t,
|
pub cpu_context: sgx_cpu_context_t,
|
||||||
|
pub interrupt_valid: uint32_t,
|
||||||
|
reserved: uint32_t,
|
||||||
|
pub xsave_size: uint64_t,
|
||||||
|
pub reserved1: [uint64_t; 4],
|
||||||
|
pub xsave_area: [uint8_t; 0],
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
|
@ -21,8 +21,11 @@
|
|||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
#![feature(atomic_from_mut)]
|
#![feature(atomic_from_mut)]
|
||||||
#![feature(btree_drain_filter)]
|
#![feature(btree_drain_filter)]
|
||||||
#![feature(bench_black_box)]
|
|
||||||
#![feature(arbitrary_enum_discriminant)]
|
#![feature(arbitrary_enum_discriminant)]
|
||||||
|
// for core::ptr::non_null::NonNull addr() method
|
||||||
|
#![feature(strict_provenance)]
|
||||||
|
// for VMArea::can_merge_vmas
|
||||||
|
#![feature(is_some_and)]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
@ -59,6 +62,7 @@ extern crate memoffset;
|
|||||||
extern crate ctor;
|
extern crate ctor;
|
||||||
extern crate intrusive_collections;
|
extern crate intrusive_collections;
|
||||||
extern crate itertools;
|
extern crate itertools;
|
||||||
|
extern crate modular_bitfield;
|
||||||
extern crate resolv_conf;
|
extern crate resolv_conf;
|
||||||
|
|
||||||
use sgx_trts::libc;
|
use sgx_trts::libc;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use crate::process::do_vfork::reap_zombie_child_created_with_vfork;
|
use crate::process::do_vfork::reap_zombie_child_created_with_vfork;
|
||||||
use crate::signal::constants::*;
|
use crate::signal::constants::*;
|
||||||
use std::intrinsics::atomic_store;
|
use std::intrinsics::atomic_store_seqcst;
|
||||||
|
|
||||||
use super::do_futex::futex_wake;
|
use super::do_futex::futex_wake;
|
||||||
use super::do_vfork::{is_vforked_child_process, vfork_return_to_parent};
|
use super::do_vfork::{is_vforked_child_process, vfork_return_to_parent};
|
||||||
@ -61,7 +61,7 @@ fn exit_thread(term_status: TermStatus) {
|
|||||||
// Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info.
|
// Notify a thread, if any, that waits on ctid. See set_tid_address(2) for more info.
|
||||||
if let Some(ctid_ptr) = thread.clear_ctid() {
|
if let Some(ctid_ptr) = thread.clear_ctid() {
|
||||||
unsafe {
|
unsafe {
|
||||||
atomic_store(ctid_ptr.as_ptr(), 0);
|
atomic_store_seqcst(ctid_ptr.as_ptr(), 0);
|
||||||
}
|
}
|
||||||
futex_wake(ctid_ptr.as_ptr() as *const i32, 1);
|
futex_wake(ctid_ptr.as_ptr() as *const i32, 1);
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::collections::hash_map::DefaultHasher;
|
use std::collections::hash_map::DefaultHasher;
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
use std::intrinsics::atomic_load;
|
use std::intrinsics::atomic_load_seqcst;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
@ -258,7 +258,7 @@ impl FutexKey {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn load_val(&self) -> i32 {
|
pub fn load_val(&self) -> i32 {
|
||||||
unsafe { atomic_load(self.0 as *const i32) }
|
unsafe { atomic_load_seqcst(self.0 as *const i32) }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn addr(&self) -> usize {
|
pub fn addr(&self) -> usize {
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
//! * If `cpu_set[i] == true`, then the i-th CPU core belongs to the set;
|
//! * If `cpu_set[i] == true`, then the i-th CPU core belongs to the set;
|
||||||
//! * Otherwise, the i-th CPU core is not in the set.
|
//! * Otherwise, the i-th CPU core is not in the set.
|
||||||
|
|
||||||
|
use bitvec::order::LocalBits as Local;
|
||||||
use bitvec::prelude::*;
|
use bitvec::prelude::*;
|
||||||
use std::ops::Index;
|
use std::ops::Index;
|
||||||
|
|
||||||
@ -15,7 +16,7 @@ use crate::prelude::*;
|
|||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct CpuSet {
|
pub struct CpuSet {
|
||||||
bits: BitBox<Local, u8>,
|
bits: BitBox<u8, Local>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CpuSet {
|
impl CpuSet {
|
||||||
@ -33,14 +34,14 @@ impl CpuSet {
|
|||||||
|
|
||||||
/// Create a CpuSet that consists of all of the CPU cores.
|
/// Create a CpuSet that consists of all of the CPU cores.
|
||||||
pub fn new_full() -> Self {
|
pub fn new_full() -> Self {
|
||||||
let mut bits = bitbox![Local, u8; 1; Self::len() * 8];
|
let mut bits = bitbox![u8, Local; 1; Self::len() * 8];
|
||||||
Self::clear_unused(&mut bits);
|
Self::clear_unused(&mut bits);
|
||||||
Self { bits }
|
Self { bits }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a CpuSet that consists of none of the CPU cores.
|
/// Create a CpuSet that consists of none of the CPU cores.
|
||||||
pub fn new_empty() -> Self {
|
pub fn new_empty() -> Self {
|
||||||
let bits = bitbox![Local, u8; 0; Self::len() * 8];
|
let bits = bitbox![u8, Local; 0; Self::len() * 8];
|
||||||
Self { bits }
|
Self { bits }
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ impl CpuSet {
|
|||||||
|
|
||||||
/// Returns the first index of CPUs in set.
|
/// Returns the first index of CPUs in set.
|
||||||
pub fn first_cpu_idx(&self) -> Option<usize> {
|
pub fn first_cpu_idx(&self) -> Option<usize> {
|
||||||
self.iter().position(|&b| b == true)
|
self.iter().position(|b| b == true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns if the CpuSet is a subset of available cpu set
|
// Returns if the CpuSet is a subset of available cpu set
|
||||||
@ -75,7 +76,7 @@ impl CpuSet {
|
|||||||
return_errno!(EINVAL, "slice is not long enough");
|
return_errno!(EINVAL, "slice is not long enough");
|
||||||
}
|
}
|
||||||
let slice = &slice[..Self::len()];
|
let slice = &slice[..Self::len()];
|
||||||
let mut bits = BitBox::from_slice(slice);
|
let mut bits = BitBox::from_bitslice(&BitSlice::from_slice(slice));
|
||||||
Self::clear_unused(&mut bits);
|
Self::clear_unused(&mut bits);
|
||||||
|
|
||||||
Ok(Self { bits })
|
Ok(Self { bits })
|
||||||
@ -85,11 +86,11 @@ impl CpuSet {
|
|||||||
///
|
///
|
||||||
/// The last, unused bits in the byte slice are guaranteed to be zero.
|
/// The last, unused bits in the byte slice are guaranteed to be zero.
|
||||||
pub fn as_slice(&self) -> &[u8] {
|
pub fn as_slice(&self) -> &[u8] {
|
||||||
self.bits.as_slice()
|
self.bits.as_raw_slice()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_mut_slice(&mut self) -> &mut [u8] {
|
pub fn as_mut_slice(&mut self) -> &mut [u8] {
|
||||||
self.bits.as_mut_slice()
|
self.bits.as_raw_mut_slice()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator that allows accessing the underlying bits.
|
/// Returns an iterator that allows accessing the underlying bits.
|
||||||
@ -102,7 +103,7 @@ impl CpuSet {
|
|||||||
self.bits.iter_mut()
|
self.bits.iter_mut()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clear_unused(bits: &mut BitSlice<Local, u8>) {
|
fn clear_unused(bits: &mut BitSlice<u8, Local>) {
|
||||||
let unused_bits = &mut bits[Self::ncores()..(Self::len() * 8)];
|
let unused_bits = &mut bits[Self::ncores()..(Self::len() * 8)];
|
||||||
for mut bit in unused_bits {
|
for mut bit in unused_bits {
|
||||||
*bit = false;
|
*bit = false;
|
||||||
@ -110,8 +111,8 @@ impl CpuSet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Iter<'a> = bitvec::slice::Iter<'a, Local, u8>;
|
pub type Iter<'a> = bitvec::slice::Iter<'a, u8, Local>;
|
||||||
pub type IterMut<'a> = bitvec::slice::IterMut<'a, Local, u8>;
|
pub type IterMut<'a> = bitvec::slice::IterMut<'a, u8, Local>;
|
||||||
|
|
||||||
impl Index<usize> for CpuSet {
|
impl Index<usize> for CpuSet {
|
||||||
type Output = bool;
|
type Output = bool;
|
||||||
|
@ -199,7 +199,7 @@ impl siginfo_t {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct ucontext_t {
|
pub struct ucontext_t {
|
||||||
pub uc_flags: u64,
|
pub uc_flags: u64,
|
||||||
@ -225,7 +225,8 @@ pub type stack_t = sigaltstack_t;
|
|||||||
pub struct mcontext_t {
|
pub struct mcontext_t {
|
||||||
pub inner: CpuContext,
|
pub inner: CpuContext,
|
||||||
// TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
|
// TODO: the fields should be csgsfs, err, trapno, oldmask, and cr2
|
||||||
_unused0: [u64; 5],
|
// The number should be 5 but we use extra 2 spaces to store something else in the CpuContext. Thus make it 3.
|
||||||
|
_unused0: [u64; 3],
|
||||||
// TODO: this field should be `fpregs: fpregset_t,`
|
// TODO: this field should be `fpregs: fpregset_t,`
|
||||||
_unused1: usize,
|
_unused1: usize,
|
||||||
_reserved: [u64; 8],
|
_reserved: [u64; 8],
|
||||||
|
@ -5,9 +5,8 @@ use super::{SigAction, SigActionFlags, SigDefaultAction, SigSet, Signal};
|
|||||||
use crate::lazy_static::__Deref;
|
use crate::lazy_static::__Deref;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use crate::process::{ProcessRef, TermStatus, ThreadRef};
|
use crate::process::{ProcessRef, TermStatus, ThreadRef};
|
||||||
use crate::syscall::{CpuContext, FpRegs};
|
use crate::syscall::{CpuContext, ExtraContext, FpRegs, XsaveArea};
|
||||||
use aligned::{Aligned, A16};
|
use aligned::{Aligned, A16};
|
||||||
use core::arch::x86_64::{_fxrstor, _fxsave};
|
|
||||||
use std::{ptr, slice};
|
use std::{ptr, slice};
|
||||||
|
|
||||||
pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> {
|
pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> {
|
||||||
@ -34,11 +33,27 @@ pub fn do_rt_sigreturn(curr_user_ctxt: &mut CpuContext) -> Result<()> {
|
|||||||
*curr_user_ctxt = last_ucontext.uc_mcontext.inner;
|
*curr_user_ctxt = last_ucontext.uc_mcontext.inner;
|
||||||
|
|
||||||
// Restore the floating point registers to a temp area
|
// Restore the floating point registers to a temp area
|
||||||
// The floating point registers would be recoved just
|
// The floating point registers would be recoved just before return to user's code
|
||||||
// before return to user's code
|
match curr_user_ctxt.extra_context {
|
||||||
|
ExtraContext::Fpregs => {
|
||||||
|
// Signal raised by direct syscall
|
||||||
|
// fpregs should be stored on the heap. Because the ucontext_t will be freed when this function returns. And curr_user_ctxt only stores the pointer
|
||||||
let mut fpregs = Box::new(unsafe { FpRegs::from_slice(&last_ucontext.fpregs) });
|
let mut fpregs = Box::new(unsafe { FpRegs::from_slice(&last_ucontext.fpregs) });
|
||||||
curr_user_ctxt.fpregs = Box::into_raw(fpregs);
|
curr_user_ctxt.extra_context_ptr = Box::into_raw(fpregs) as *mut u8;
|
||||||
curr_user_ctxt.fpregs_on_heap = 1; // indicates the fpregs is on heap
|
}
|
||||||
|
ExtraContext::Xsave => {
|
||||||
|
// Signal raised by exception
|
||||||
|
// The xsave_area is stored at a special area reserved on kernel's stack. We can just overwrite this area with the latest user context
|
||||||
|
// Note: Currently, we only restore the fpregs instead of restoring the whole xsave area for sigreturn. Because during the
|
||||||
|
// handle path, we don't touch other advanced registers. However, in the future, if we have to touch those registers,
|
||||||
|
// we should restore the whole xsave area when sigreturn.
|
||||||
|
let latest_fpregs = unsafe { FpRegs::from_slice(&last_ucontext.fpregs) };
|
||||||
|
let xsave_area =
|
||||||
|
unsafe { (&mut *(curr_user_ctxt.extra_context_ptr as *mut XsaveArea)) };
|
||||||
|
xsave_area.set_fpregs_area(latest_fpregs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -261,16 +276,24 @@ fn handle_signals_by_user(
|
|||||||
// Save the old sigmask
|
// Save the old sigmask
|
||||||
ucontext.uc_sigmask = old_sigmask.to_c();
|
ucontext.uc_sigmask = old_sigmask.to_c();
|
||||||
// Save the user context
|
// Save the user context
|
||||||
ucontext.uc_mcontext.inner = *curr_user_ctxt;
|
ucontext.uc_mcontext.inner = curr_user_ctxt.clone();
|
||||||
|
|
||||||
// Save the floating point registers
|
// Save the floating point registers
|
||||||
if curr_user_ctxt.fpregs != ptr::null_mut() {
|
if curr_user_ctxt.extra_context_ptr != ptr::null_mut() {
|
||||||
ucontext
|
// Signal from exception handling
|
||||||
.fpregs
|
debug_assert!(matches!(curr_user_ctxt.extra_context, ExtraContext::Xsave));
|
||||||
.copy_from_slice(unsafe { curr_user_ctxt.fpregs.as_ref().unwrap().as_slice() });
|
let fpregs_area =
|
||||||
// Clear the floating point registers, since we do not need to recover is when this syscall return
|
unsafe { (&*(curr_user_ctxt.extra_context_ptr as *mut XsaveArea)) }.get_fpregs();
|
||||||
curr_user_ctxt.fpregs = ptr::null_mut();
|
ucontext.fpregs.copy_from_slice(fpregs_area.as_slice());
|
||||||
|
// Clear the floating point registers, since we do not need to recover this when this syscall return
|
||||||
|
curr_user_ctxt.extra_context_ptr = ptr::null_mut();
|
||||||
} else {
|
} else {
|
||||||
|
// Raise the signal with direct syscall
|
||||||
|
debug_assert!(
|
||||||
|
matches!(curr_user_ctxt.extra_context, ExtraContext::Fpregs)
|
||||||
|
&& curr_user_ctxt.extra_context_ptr == ptr::null_mut()
|
||||||
|
);
|
||||||
|
|
||||||
// We need a correct fxsave structure in the buffer,
|
// We need a correct fxsave structure in the buffer,
|
||||||
// because the app may modify part of it to update the
|
// because the app may modify part of it to update the
|
||||||
// floating point after the signal handler finished.
|
// floating point after the signal handler finished.
|
||||||
|
@ -36,12 +36,12 @@ impl FaultSignal {
|
|||||||
// Page fault exception
|
// Page fault exception
|
||||||
SGX_EXCEPTION_VECTOR_PF => {
|
SGX_EXCEPTION_VECTOR_PF => {
|
||||||
const PF_ERR_FLAG_PRESENT : u32 = 1u32 << 0;
|
const PF_ERR_FLAG_PRESENT : u32 = 1u32 << 0;
|
||||||
let code = if info.exinfo.errcd & PF_ERR_FLAG_PRESENT != 0 {
|
let code = if info.exinfo.error_code & PF_ERR_FLAG_PRESENT != 0 {
|
||||||
SEGV_ACCERR
|
SEGV_ACCERR
|
||||||
} else {
|
} else {
|
||||||
SEGV_MAPERR
|
SEGV_MAPERR
|
||||||
};
|
};
|
||||||
let addr = Some(info.exinfo.maddr);
|
let addr = Some(info.exinfo.faulting_address );
|
||||||
(SIGSEGV, code, addr)
|
(SIGSEGV, code, addr)
|
||||||
},
|
},
|
||||||
// General protection exception
|
// General protection exception
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
//! 3. Preprocess the system call and then call `dispatch_syscall` (in this file)
|
//! 3. Preprocess the system call and then call `dispatch_syscall` (in this file)
|
||||||
//! 4. Call `do_*` to process the system call (in other modules)
|
//! 4. Call `do_*` to process the system call (in other modules)
|
||||||
|
|
||||||
use aligned::{Aligned, A16};
|
use aligned::{Aligned, A16, A64};
|
||||||
use core::arch::x86_64::{_fxrstor, _fxsave};
|
use core::arch::x86_64::{_fxrstor, _fxsave};
|
||||||
use std::any::Any;
|
use std::any::Any;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
@ -60,7 +60,7 @@ use crate::signal::{
|
|||||||
do_rt_sigtimedwait, do_sigaltstack, do_tgkill, do_tkill, sigaction_t, siginfo_t, sigset_t,
|
do_rt_sigtimedwait, do_sigaltstack, do_tgkill, do_tkill, sigaction_t, siginfo_t, sigset_t,
|
||||||
stack_t,
|
stack_t,
|
||||||
};
|
};
|
||||||
use crate::vm::{MMapFlags, MRemapFlags, MSyncFlags, VMPerms};
|
use crate::vm::{MMapFlags, MRemapFlags, MSyncFlags, MadviceFlags, VMPerms};
|
||||||
use crate::{fs, process, std, vm};
|
use crate::{fs, process, std, vm};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -122,7 +122,7 @@ macro_rules! process_syscall_table_with_callback {
|
|||||||
(Mremap = 25) => do_mremap(old_addr: usize, old_size: usize, new_size: usize, flags: i32, new_addr: usize),
|
(Mremap = 25) => do_mremap(old_addr: usize, old_size: usize, new_size: usize, flags: i32, new_addr: usize),
|
||||||
(Msync = 26) => do_msync(addr: usize, size: usize, flags: u32),
|
(Msync = 26) => do_msync(addr: usize, size: usize, flags: u32),
|
||||||
(Mincore = 27) => handle_unsupported(),
|
(Mincore = 27) => handle_unsupported(),
|
||||||
(Madvise = 28) => handle_unsupported(),
|
(Madvise = 28) => do_madvice(addr: usize, length: usize, advice: i32),
|
||||||
(Shmget = 29) => do_shmget(key: key_t, size: size_t, shmflg: i32),
|
(Shmget = 29) => do_shmget(key: key_t, size: size_t, shmflg: i32),
|
||||||
(Shmat = 30) => do_shmat(shmid: i32, shmaddr: usize, shmflg: i32),
|
(Shmat = 30) => do_shmat(shmid: i32, shmaddr: usize, shmflg: i32),
|
||||||
(Shmctl = 31) => do_shmctl(shmid: i32, cmd: i32, buf: *mut shmids_t),
|
(Shmctl = 31) => do_shmctl(shmid: i32, cmd: i32, buf: *mut shmids_t),
|
||||||
@ -424,8 +424,8 @@ macro_rules! process_syscall_table_with_callback {
|
|||||||
// Occlum-specific system calls
|
// Occlum-specific system calls
|
||||||
(SpawnGlibc = 359) => do_spawn_for_glibc(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fa: *const SpawnFileActions, attribute_list: *const posix_spawnattr_t),
|
(SpawnGlibc = 359) => do_spawn_for_glibc(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fa: *const SpawnFileActions, attribute_list: *const posix_spawnattr_t),
|
||||||
(SpawnMusl = 360) => do_spawn_for_musl(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp, attribute_list: *const posix_spawnattr_t),
|
(SpawnMusl = 360) => do_spawn_for_musl(child_pid_ptr: *mut u32, path: *const i8, argv: *const *const i8, envp: *const *const i8, fdop_list: *const FdOp, attribute_list: *const posix_spawnattr_t),
|
||||||
(HandleException = 361) => do_handle_exception(info: *mut sgx_exception_info_t, fpregs: *mut FpRegs, context: *mut CpuContext),
|
(HandleException = 361) => do_handle_exception(info: *mut sgx_exception_info_t, context: *mut CpuContext),
|
||||||
(HandleInterrupt = 362) => do_handle_interrupt(info: *mut sgx_interrupt_info_t, fpregs: *mut FpRegs, context: *mut CpuContext),
|
(HandleInterrupt = 362) => do_handle_interrupt(info: *mut sgx_interrupt_info_t, context: *mut CpuContext),
|
||||||
(MountRootFS = 363) => do_mount_rootfs(key_ptr: *const sgx_key_128bit_t, rootfs_config_ptr: *const user_rootfs_config),
|
(MountRootFS = 363) => do_mount_rootfs(key_ptr: *const sgx_key_128bit_t, rootfs_config_ptr: *const user_rootfs_config),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -649,12 +649,10 @@ fn do_syscall(user_context: &mut CpuContext) {
|
|||||||
syscall.args[1] = user_context as *mut _ as isize;
|
syscall.args[1] = user_context as *mut _ as isize;
|
||||||
} else if syscall_num == SyscallNum::HandleException {
|
} else if syscall_num == SyscallNum::HandleException {
|
||||||
// syscall.args[0] == info
|
// syscall.args[0] == info
|
||||||
// syscall.args[1] == fpregs
|
syscall.args[1] = user_context as *mut _ as isize;
|
||||||
syscall.args[2] = user_context as *mut _ as isize;
|
|
||||||
} else if syscall.num == SyscallNum::HandleInterrupt {
|
} else if syscall.num == SyscallNum::HandleInterrupt {
|
||||||
// syscall.args[0] == info
|
// syscall.args[0] == info
|
||||||
// syscall.args[1] == fpregs
|
syscall.args[1] = user_context as *mut _ as isize;
|
||||||
syscall.args[2] = user_context as *mut _ as isize;
|
|
||||||
} else if syscall.num == SyscallNum::Sigaltstack {
|
} else if syscall.num == SyscallNum::Sigaltstack {
|
||||||
// syscall.args[0] == new_ss
|
// syscall.args[0] == new_ss
|
||||||
// syscall.args[1] == old_ss
|
// syscall.args[1] == old_ss
|
||||||
@ -751,21 +749,27 @@ fn do_sysret(user_context: &mut CpuContext) -> ! {
|
|||||||
fn do_exit_task() -> !;
|
fn do_exit_task() -> !;
|
||||||
}
|
}
|
||||||
if current!().status() != ThreadStatus::Exited {
|
if current!().status() != ThreadStatus::Exited {
|
||||||
// Restore the floating point registers
|
if user_context.extra_context_ptr != ptr::null_mut() {
|
||||||
// Todo: Is it correct to do fxstor in kernel?
|
match user_context.extra_context {
|
||||||
let fpregs = user_context.fpregs;
|
ExtraContext::Fpregs => {
|
||||||
if (fpregs != ptr::null_mut()) {
|
let fpregs = user_context.extra_context_ptr as *mut FpRegs;
|
||||||
if user_context.fpregs_on_heap == 1 {
|
|
||||||
let fpregs = unsafe { Box::from_raw(user_context.fpregs as *mut FpRegs) };
|
|
||||||
fpregs.restore();
|
|
||||||
} else {
|
|
||||||
unsafe { fpregs.as_ref().unwrap().restore() };
|
unsafe { fpregs.as_ref().unwrap().restore() };
|
||||||
|
// The fpregs must be allocated on heap
|
||||||
|
drop(unsafe { Box::from_raw(user_context.extra_context_ptr as *mut FpRegs) });
|
||||||
}
|
}
|
||||||
|
ExtraContext::Xsave => {
|
||||||
|
let xsave_area = user_context.extra_context_ptr;
|
||||||
|
unsafe { (&*(xsave_area as *mut XsaveArea)).restore() };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
user_context.extra_context_ptr = ptr::null_mut();
|
||||||
}
|
}
|
||||||
unsafe { __occlum_sysret(user_context) } // jump to user space
|
unsafe { __occlum_sysret(user_context) } // jump to user space
|
||||||
} else {
|
} else {
|
||||||
if user_context.fpregs != ptr::null_mut() && user_context.fpregs_on_heap == 1 {
|
if user_context.extra_context_ptr != ptr::null_mut()
|
||||||
drop(unsafe { Box::from_raw(user_context.fpregs as *mut FpRegs) });
|
&& matches!(user_context.extra_context, ExtraContext::Fpregs)
|
||||||
|
{
|
||||||
|
drop(unsafe { Box::from_raw(user_context.extra_context_ptr as *mut FpRegs) });
|
||||||
}
|
}
|
||||||
unsafe { do_exit_task() } // exit enclave
|
unsafe { do_exit_task() } // exit enclave
|
||||||
}
|
}
|
||||||
@ -828,6 +832,12 @@ fn do_msync(addr: usize, size: usize, flags: u32) -> Result<isize> {
|
|||||||
Ok(0)
|
Ok(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn do_madvice(addr: usize, length: usize, advice: i32) -> Result<isize> {
|
||||||
|
let flags = MadviceFlags::from_i32(advice)?;
|
||||||
|
vm::do_madvice(addr, length, flags)?;
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
|
||||||
fn do_sysinfo(info: *mut sysinfo_t) -> Result<isize> {
|
fn do_sysinfo(info: *mut sysinfo_t) -> Result<isize> {
|
||||||
check_mut_ptr(info)?;
|
check_mut_ptr(info)?;
|
||||||
let info = unsafe { &mut *info };
|
let info = unsafe { &mut *info };
|
||||||
@ -977,7 +987,6 @@ fn handle_unsupported() -> Result<isize> {
|
|||||||
/// Floating point registers
|
/// Floating point registers
|
||||||
///
|
///
|
||||||
/// Note. The area is used to save fxsave result
|
/// Note. The area is used to save fxsave result
|
||||||
//#[derive(Clone, Copy)]
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct FpRegs {
|
pub struct FpRegs {
|
||||||
inner: Aligned<A16, [u8; 512]>,
|
inner: Aligned<A16, [u8; 512]>,
|
||||||
@ -1017,6 +1026,41 @@ impl FpRegs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct XsaveArea {
|
||||||
|
inner: Aligned<A64, [u8; 4096]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl XsaveArea {
|
||||||
|
// The first 512 bytes of xsave area is used for FP registers
|
||||||
|
const FXSAVE_AREA_LEN: usize = 512;
|
||||||
|
|
||||||
|
/// Save the current CPU floating pointer states to an instance of FpRegs
|
||||||
|
pub fn save() -> Self {
|
||||||
|
let mut xsave_area = MaybeUninit::<Self>::uninit();
|
||||||
|
unsafe {
|
||||||
|
save_xregs(xsave_area.as_mut_ptr() as *mut u8);
|
||||||
|
xsave_area.assume_init()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Restore the current CPU floating pointer states from this FpRegs instance
|
||||||
|
pub fn restore(&self) {
|
||||||
|
unsafe {
|
||||||
|
restore_xregs(self.inner.as_ptr());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_fpregs(&self) -> FpRegs {
|
||||||
|
unsafe { FpRegs::from_slice(&self.inner[..Self::FXSAVE_AREA_LEN]) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_fpregs_area(&mut self, fpregs: FpRegs) {
|
||||||
|
self.inner[..Self::FXSAVE_AREA_LEN].copy_from_slice(fpregs.as_slice())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Cpu context.
|
/// Cpu context.
|
||||||
///
|
///
|
||||||
/// Note. The definition of this struct must be kept in sync with the assembly
|
/// Note. The definition of this struct must be kept in sync with the assembly
|
||||||
@ -1042,8 +1086,21 @@ pub struct CpuContext {
|
|||||||
pub rsp: u64,
|
pub rsp: u64,
|
||||||
pub rip: u64,
|
pub rip: u64,
|
||||||
pub rflags: u64,
|
pub rflags: u64,
|
||||||
pub fpregs_on_heap: u64,
|
pub extra_context: ExtraContext,
|
||||||
pub fpregs: *mut FpRegs,
|
pub extra_context_ptr: *mut u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(u64)]
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub enum ExtraContext {
|
||||||
|
Fpregs = 0,
|
||||||
|
Xsave = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ExtraContext {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Fpregs
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CpuContext {
|
impl CpuContext {
|
||||||
@ -1067,8 +1124,8 @@ impl CpuContext {
|
|||||||
rsp: src.rsp,
|
rsp: src.rsp,
|
||||||
rip: src.rip,
|
rip: src.rip,
|
||||||
rflags: src.rflags,
|
rflags: src.rflags,
|
||||||
fpregs_on_heap: 0,
|
extra_context: Default::default(),
|
||||||
fpregs: ptr::null_mut(),
|
extra_context_ptr: ptr::null_mut(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1082,14 +1139,15 @@ impl CpuContext {
|
|||||||
// pointer that is not safe to use by external modules. In our case, the
|
// pointer that is not safe to use by external modules. In our case, the
|
||||||
// FpRegs pointer will not be used actually. So the Rust warning is a
|
// FpRegs pointer will not be used actually. So the Rust warning is a
|
||||||
// false alarm. We suppress it here.
|
// false alarm. We suppress it here.
|
||||||
pub unsafe fn exception_interrupt_syscall_c_abi(
|
pub unsafe fn exception_interrupt_syscall_c_abi(num: u32, info: *mut c_void) -> u32 {
|
||||||
num: u32,
|
|
||||||
info: *mut c_void,
|
|
||||||
fpregs: *mut FpRegs,
|
|
||||||
) -> u32 {
|
|
||||||
#[allow(improper_ctypes)]
|
#[allow(improper_ctypes)]
|
||||||
extern "C" {
|
extern "C" {
|
||||||
pub fn __occlum_syscall_c_abi(num: u32, info: *mut c_void, fpregs: *mut FpRegs) -> u32;
|
pub fn __occlum_syscall_c_abi(num: u32, info: *mut c_void) -> u32;
|
||||||
}
|
}
|
||||||
__occlum_syscall_c_abi(num, info, fpregs)
|
__occlum_syscall_c_abi(num, info)
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
pub fn save_xregs(save_area: *mut u8);
|
||||||
|
pub fn restore_xregs(save_area: *const u8);
|
||||||
}
|
}
|
||||||
|
@ -52,8 +52,8 @@ __occlum_syscall_linux_abi:
|
|||||||
// Save the target CPU state when `call __occlum_syscall` is returned in
|
// Save the target CPU state when `call __occlum_syscall` is returned in
|
||||||
// a CpuContext struct. The registers are saved in the reverse order of
|
// a CpuContext struct. The registers are saved in the reverse order of
|
||||||
// the fields in CpuContext.
|
// the fields in CpuContext.
|
||||||
pushq $0 // default fpregs is NULL
|
pushq $0 // default extra_context_ptr is NULL
|
||||||
pushq $0 // default fpregs is allocated on stack
|
pushq $0 // default extra_context is floating point registers
|
||||||
pushfq
|
pushfq
|
||||||
push %rcx // save %rip
|
push %rcx // save %rip
|
||||||
push %r11 // save %rsp
|
push %r11 // save %rsp
|
||||||
|
@ -100,16 +100,9 @@ impl Chunk {
|
|||||||
*options.perms(),
|
*options.perms(),
|
||||||
options.initializer().backed_file(),
|
options.initializer().backed_file(),
|
||||||
current!().process().pid(),
|
current!().process().pid(),
|
||||||
);
|
)
|
||||||
// Initialize the memory of the new range
|
.init_memory(options)?;
|
||||||
unsafe {
|
|
||||||
let buf = vm_range.as_slice_mut();
|
|
||||||
options.initializer().init_slice(buf)?;
|
|
||||||
}
|
|
||||||
// Set memory permissions
|
|
||||||
if !options.perms().is_default() {
|
|
||||||
VMPerms::apply_perms(&vm_area, vm_area.perms());
|
|
||||||
}
|
|
||||||
Ok(Self::new_chunk_with_vma(vm_area))
|
Ok(Self::new_chunk_with_vma(vm_area))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -238,6 +231,30 @@ impl Chunk {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn handle_page_fault(
|
||||||
|
&self,
|
||||||
|
rip: usize,
|
||||||
|
pf_addr: usize,
|
||||||
|
errcd: u32,
|
||||||
|
kernel_triggers: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let internal = &self.internal;
|
||||||
|
match self.internal() {
|
||||||
|
ChunkType::SingleVMA(vma) => {
|
||||||
|
let mut vma = vma.lock().unwrap();
|
||||||
|
debug_assert!(vma.contains(pf_addr));
|
||||||
|
return vma.handle_page_fault(rip, pf_addr, errcd, kernel_triggers);
|
||||||
|
}
|
||||||
|
ChunkType::MultiVMA(internal_manager) => {
|
||||||
|
return internal_manager
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.chunk_manager
|
||||||
|
.handle_page_fault(rip, pf_addr, errcd, kernel_triggers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_free_range(&self, request_range: &VMRange) -> bool {
|
pub fn is_free_range(&self, request_range: &VMRange) -> bool {
|
||||||
match self.internal() {
|
match self.internal() {
|
||||||
ChunkType::SingleVMA(_) => false, // single-vma chunk can't be free
|
ChunkType::SingleVMA(_) => false, // single-vma chunk can't be free
|
||||||
|
@ -63,11 +63,13 @@ use std::fmt;
|
|||||||
|
|
||||||
mod chunk;
|
mod chunk;
|
||||||
mod free_space_manager;
|
mod free_space_manager;
|
||||||
|
mod page_tracker;
|
||||||
mod process_vm;
|
mod process_vm;
|
||||||
mod shm_manager;
|
mod shm_manager;
|
||||||
mod user_space_vm;
|
mod user_space_vm;
|
||||||
mod vm_area;
|
mod vm_area;
|
||||||
mod vm_chunk_manager;
|
mod vm_chunk_manager;
|
||||||
|
mod vm_epc;
|
||||||
mod vm_layout;
|
mod vm_layout;
|
||||||
mod vm_manager;
|
mod vm_manager;
|
||||||
mod vm_perms;
|
mod vm_perms;
|
||||||
@ -77,9 +79,12 @@ mod vm_util;
|
|||||||
use self::vm_layout::VMLayout;
|
use self::vm_layout::VMLayout;
|
||||||
|
|
||||||
pub use self::chunk::{ChunkRef, ChunkType};
|
pub use self::chunk::{ChunkRef, ChunkType};
|
||||||
pub use self::process_vm::{MMapFlags, MRemapFlags, MSyncFlags, ProcessVM, ProcessVMBuilder};
|
pub use self::process_vm::{
|
||||||
|
MMapFlags, MRemapFlags, MSyncFlags, MadviceFlags, ProcessVM, ProcessVMBuilder,
|
||||||
|
};
|
||||||
pub use self::user_space_vm::USER_SPACE_VM_MANAGER;
|
pub use self::user_space_vm::USER_SPACE_VM_MANAGER;
|
||||||
pub use self::vm_area::VMArea;
|
pub use self::vm_area::VMArea;
|
||||||
|
pub use self::vm_epc::enclave_page_fault_handler;
|
||||||
pub use self::vm_manager::MunmapChunkFlag;
|
pub use self::vm_manager::MunmapChunkFlag;
|
||||||
pub use self::vm_perms::VMPerms;
|
pub use self::vm_perms::VMPerms;
|
||||||
pub use self::vm_range::VMRange;
|
pub use self::vm_range::VMRange;
|
||||||
@ -154,4 +159,9 @@ pub fn do_msync(addr: usize, size: usize, flags: MSyncFlags) -> Result<()> {
|
|||||||
current!().vm().msync(addr, size)
|
current!().vm().msync(addr, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn do_madvice(addr: usize, length: usize, advice: MadviceFlags) -> Result<()> {
|
||||||
|
warn!("madvice is not supported. madvice flags:{:?}", advice);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub const PAGE_SIZE: usize = 4096;
|
pub const PAGE_SIZE: usize = 4096;
|
||||||
|
488
src/libos/src/vm/page_tracker.rs
Normal file
488
src/libos/src/vm/page_tracker.rs
Normal file
@ -0,0 +1,488 @@
|
|||||||
|
use super::*;
|
||||||
|
|
||||||
|
use super::user_space_vm::USER_SPACE_VM_MANAGER;
|
||||||
|
use super::vm_util::{GB, KB, MB};
|
||||||
|
use bitvec::vec::BitVec;
|
||||||
|
use util::sync::RwLock;
|
||||||
|
use vm_epc::EPCMemType;
|
||||||
|
|
||||||
|
// In SGX v2, there is no upper limit for the size of EPC. If the user configure 1 TB memory,
|
||||||
|
// and we only use one bit to track if the page is committed, that's 1 TB / 4 kB / 8 bit = 32 MB of memory.
|
||||||
|
// And the memory footprint will keep the same size during the whole libOS life cycle.
|
||||||
|
// In order to track the commit status of a huge number of pages, use two level tracking.
|
||||||
|
// In the first level, global level, we use `PAGE_CHUNK_UNIT` as the unit size for a page chunk.
|
||||||
|
// In the second level, we just use the page size as the unit size, and use one bit to represent if the page is committed.
|
||||||
|
// For example, if the user configure 64 TB memory, when a page is committed, the second level tracker will mark the correponding bit as 1.
|
||||||
|
// And when all the pages of a whole global page chunk are fully committed, the global level tracker will mark the page chunk as fully committed.
|
||||||
|
// And the corresponding tracker can be freed. In this way, we can use just several bytes to represent the commit status of a big chunk of memory.
|
||||||
|
// In a worse case, let's say there are several discrete global page chunks which are not not fully committed at the same time.
|
||||||
|
// And each of them will take some space in the memory. Within a memory-intensive case, we can
|
||||||
|
// commit the page by hand and make the global page chunk fully committed and free the page tracker.
|
||||||
|
|
||||||
|
// There are mainly three types of data structure to track the page status, from the top to the bottom:
|
||||||
|
// 1. PageChunkManager - Create for the whole user space. This sructure is used to manage the global paging status.
|
||||||
|
// 2. GlobalPageChunk - Denotes a chunk of pages. The actual unit of the PageChunkManager. It holds the paging status of a memory range. Stored only
|
||||||
|
// in the PageChunkManager. A newly created VMA should ask the corresponding GlobalPageChunk for the paging status. When all the pages recoreded by
|
||||||
|
// GlobalPageChunk are all committed, it will mark itself as "fully committed" and free the inner structure tracking the paging status. All the GlobalPageChunk
|
||||||
|
// records the VM ranges with the SAME size.
|
||||||
|
// 3. PageTracker - The real tracker of the paging status. Under the hood, it is a bitvec that tracks every page with a bit. There are mainly two types
|
||||||
|
// PageTracker:
|
||||||
|
// * GlobalTracker - Used by GlobalPageChunk to track the paging status. All records the VM range with the same size.
|
||||||
|
// * VMATracker - Used by VMA to track its paging status. Records different range size according to the VMA.
|
||||||
|
// Since the VM operations are mostly performed by VMA, the VMA tracker will update itself accordingly. And also update the corresponding GlobalTracker.
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
pub static ref USER_SPACE_PAGE_CHUNK_MANAGER: RwLock<PageChunkManager> =
|
||||||
|
RwLock::new(PageChunkManager::new(USER_SPACE_VM_MANAGER.range()));
|
||||||
|
}
|
||||||
|
|
||||||
|
const PAGE_CHUNK_UNIT: usize = 4 * MB;
|
||||||
|
const PAGE_CHUNK_PAGE_NUM: usize = PAGE_CHUNK_UNIT / PAGE_SIZE;
|
||||||
|
|
||||||
|
pub struct PageChunkManager {
|
||||||
|
// The total range that the manager manages.
|
||||||
|
range: VMRange,
|
||||||
|
// The page chunks
|
||||||
|
inner: HashMap<usize, GlobalPageChunk>, // K: Page chunk start address, V: Global page chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PageChunkManager {
|
||||||
|
fn new(range: &VMRange) -> Self {
|
||||||
|
Self {
|
||||||
|
range: range.clone(),
|
||||||
|
inner: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
// A chunk of pages. Memory space is precious. Don't put anything unnecessary.
|
||||||
|
struct GlobalPageChunk {
|
||||||
|
fully_committed: bool,
|
||||||
|
tracker: Option<Arc<RwLock<PageTracker>>>, // if this page chunk is fully committed, the tracker will be set to None.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GlobalPageChunk {
|
||||||
|
fn new(tracker: PageTracker) -> Self {
|
||||||
|
Self {
|
||||||
|
fully_committed: false,
|
||||||
|
tracker: Some(Arc::new(RwLock::new(tracker))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Clone, Debug)]
|
||||||
|
enum TrackerType {
|
||||||
|
GlobalTracker, // PAGE_CHUNK_UNIT size for global management to track the global paging status
|
||||||
|
VMATracker, // various size for different vma to track its own paging status
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for tracking the paging status of global tracker or VMA tracker
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PageTracker {
|
||||||
|
type_: TrackerType,
|
||||||
|
range: VMRange,
|
||||||
|
inner: BitVec,
|
||||||
|
fully_committed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for PageTracker {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_struct("PageTracker")
|
||||||
|
.field("type", &self.type_)
|
||||||
|
.field("range", &self.range)
|
||||||
|
.field("fully committed", &self.fully_committed)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PageTracker {
|
||||||
|
// Create a new page tracker for GlobalPageChunk.
|
||||||
|
// When a new global tracker is needed, none of the pages are committed.
|
||||||
|
fn new_global_tracker(start_addr: usize) -> Result<Self> {
|
||||||
|
let range = VMRange::new_with_size(start_addr, PAGE_CHUNK_UNIT)?;
|
||||||
|
|
||||||
|
let inner = bitvec![0; PAGE_CHUNK_PAGE_NUM];
|
||||||
|
Ok(Self {
|
||||||
|
type_: TrackerType::GlobalTracker,
|
||||||
|
range,
|
||||||
|
inner,
|
||||||
|
fully_committed: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_vma_tracker(vm_range: &VMRange, epc_type: &EPCMemType) -> Result<Self> {
|
||||||
|
trace!("new vma tracker, range = {:?}", vm_range);
|
||||||
|
let page_num = vm_range.size() / PAGE_SIZE;
|
||||||
|
let new_vma_tracker = match epc_type {
|
||||||
|
EPCMemType::UserRegion => {
|
||||||
|
let mut new_vma_tracker = Self {
|
||||||
|
type_: TrackerType::VMATracker,
|
||||||
|
range: vm_range.clone(),
|
||||||
|
inner: bitvec![0; page_num],
|
||||||
|
fully_committed: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Skip sentry
|
||||||
|
if page_num != 0 {
|
||||||
|
new_vma_tracker.get_committed_pages_from_global_tracker()?;
|
||||||
|
}
|
||||||
|
new_vma_tracker
|
||||||
|
}
|
||||||
|
EPCMemType::Reserved => {
|
||||||
|
// For reserved memory, there is no need to udpate global page tracker.
|
||||||
|
// And there is no GLobalPageChunk for reserved memory.
|
||||||
|
Self {
|
||||||
|
type_: TrackerType::VMATracker,
|
||||||
|
range: vm_range.clone(),
|
||||||
|
inner: bitvec![1; page_num],
|
||||||
|
fully_committed: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(new_vma_tracker)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn range(&self) -> &VMRange {
|
||||||
|
&self.range
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_fully_committed(&self) -> bool {
|
||||||
|
self.fully_committed
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_reserved_only(&self) -> bool {
|
||||||
|
!self.fully_committed && self.inner.not_any()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_partially_committed(&self) -> bool {
|
||||||
|
!self.fully_committed && self.inner.any()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all committed or uncommitted ranges of consecutive page.
|
||||||
|
// If committed is true, get all committed ranges
|
||||||
|
// If committed is false, get all uncommitted ranges
|
||||||
|
pub fn get_ranges(&self, committed: bool) -> Vec<VMRange> {
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
if committed {
|
||||||
|
return vec![self.range.clone()];
|
||||||
|
} else {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if self.is_reserved_only() {
|
||||||
|
if committed {
|
||||||
|
return Vec::new();
|
||||||
|
} else {
|
||||||
|
return vec![self.range.clone()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let tracker_start_addr = self.range.start();
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
let mut start = None;
|
||||||
|
let mut end = None;
|
||||||
|
|
||||||
|
for i in 0..self.inner.len() {
|
||||||
|
if self.inner[i] == committed {
|
||||||
|
match (start, end) {
|
||||||
|
// Meet committed page for the first time. Update both the start and end marker.
|
||||||
|
(None, None) => {
|
||||||
|
start = Some(i);
|
||||||
|
end = Some(i);
|
||||||
|
// Reach the end of the tracker. Only one page
|
||||||
|
if i == self.inner.len() - 1 {
|
||||||
|
let committed_range = VMRange::new_with_size(
|
||||||
|
tracker_start_addr + i * PAGE_SIZE,
|
||||||
|
PAGE_SIZE,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
ret.push(committed_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Previous pages are committed. Update the end marker.
|
||||||
|
(Some(s), Some(e)) => {
|
||||||
|
end = Some(i);
|
||||||
|
// Reach the end of the tracker.
|
||||||
|
if i == self.inner.len() - 1 {
|
||||||
|
let committed_range = VMRange::new_with_size(
|
||||||
|
tracker_start_addr + s * PAGE_SIZE,
|
||||||
|
PAGE_SIZE * (i - s + 1),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
ret.push(committed_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match (start, end) {
|
||||||
|
(None, None) => {
|
||||||
|
// No committed pages.
|
||||||
|
}
|
||||||
|
(Some(s), Some(e)) => {
|
||||||
|
// Meet the first uncommitted pages after recording all the previous committed pages.
|
||||||
|
let committed_range = VMRange::new_with_size(
|
||||||
|
tracker_start_addr + s * PAGE_SIZE,
|
||||||
|
PAGE_SIZE * (e - s + 1),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
ret.push(committed_range);
|
||||||
|
// Reset markers
|
||||||
|
start = None;
|
||||||
|
end = None;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_size = ret.iter().fold(0, |a, b| a + b.size());
|
||||||
|
if committed {
|
||||||
|
trace!("get committed ranges = {:?}", ret);
|
||||||
|
debug_assert!(total_size == self.inner.count_ones() * PAGE_SIZE);
|
||||||
|
} else {
|
||||||
|
trace!("get uncommitted ranges = {:?}", ret);
|
||||||
|
debug_assert!(total_size == self.inner.count_zeros() * PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn split_for_new_range(&mut self, new_range: &VMRange) {
|
||||||
|
debug_assert!(self.range.is_superset_of(new_range));
|
||||||
|
|
||||||
|
let new_start = new_range.start();
|
||||||
|
let page_num = new_range.size() / PAGE_SIZE;
|
||||||
|
|
||||||
|
let split_idx = (new_start - self.range.start()) / PAGE_SIZE;
|
||||||
|
let mut new_inner = self.inner.split_off(split_idx);
|
||||||
|
new_inner.truncate(page_num);
|
||||||
|
|
||||||
|
trace!(
|
||||||
|
"old range= {:?}, new_start = {:x}, idx = {:?}",
|
||||||
|
self.range,
|
||||||
|
new_start,
|
||||||
|
split_idx
|
||||||
|
);
|
||||||
|
|
||||||
|
self.inner = new_inner;
|
||||||
|
if self.inner.all() {
|
||||||
|
self.fully_committed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.range = *new_range;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit memory for the whole current VMA (VMATracker)
|
||||||
|
pub fn commit_whole(&mut self, perms: VMPerms) -> Result<()> {
|
||||||
|
debug_assert!(self.type_ == TrackerType::VMATracker);
|
||||||
|
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit EPC
|
||||||
|
if self.is_reserved_only() {
|
||||||
|
vm_epc::commit_memory(self.range().start(), self.range().size(), Some(perms)).unwrap();
|
||||||
|
} else {
|
||||||
|
debug_assert!(self.is_partially_committed());
|
||||||
|
let uncommitted_ranges = self.get_ranges(false);
|
||||||
|
for range in uncommitted_ranges {
|
||||||
|
vm_epc::commit_memory(range.start(), range.size(), Some(perms)).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the tracker
|
||||||
|
self.inner.fill(true);
|
||||||
|
self.fully_committed = true;
|
||||||
|
|
||||||
|
self.set_committed_pages_for_global_tracker(self.range().start(), self.range().size());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit memory of a specific range for the current VMA (VMATracker). The range should be verified by caller.
|
||||||
|
pub fn commit_range(&mut self, range: &VMRange, new_perms: Option<VMPerms>) -> Result<()> {
|
||||||
|
debug_assert!(self.type_ == TrackerType::VMATracker);
|
||||||
|
debug_assert!(self.range().is_superset_of(range));
|
||||||
|
|
||||||
|
vm_epc::commit_memory(range.start(), range.size(), new_perms)?;
|
||||||
|
|
||||||
|
self.commit_pages_common(range.start(), range.size());
|
||||||
|
self.set_committed_pages_for_global_tracker(range.start(), range.size());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn commit_memory_and_init_with_file(
|
||||||
|
&mut self,
|
||||||
|
range: &VMRange,
|
||||||
|
file: &FileRef,
|
||||||
|
file_offset: usize,
|
||||||
|
new_perms: VMPerms,
|
||||||
|
) -> Result<()> {
|
||||||
|
debug_assert!(self.type_ == TrackerType::VMATracker);
|
||||||
|
debug_assert!(self.range().is_superset_of(range));
|
||||||
|
|
||||||
|
vm_epc::commit_memory_and_init_with_file(
|
||||||
|
range.start(),
|
||||||
|
range.size(),
|
||||||
|
file,
|
||||||
|
file_offset,
|
||||||
|
new_perms,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
self.commit_pages_common(range.start(), range.size());
|
||||||
|
self.set_committed_pages_for_global_tracker(range.start(), range.size());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// VMATracker get page commit status from global tracker and update itself
|
||||||
|
// This should be called when the VMATracker inits
|
||||||
|
fn get_committed_pages_from_global_tracker(&mut self) -> Result<()> {
|
||||||
|
debug_assert!(self.type_ == TrackerType::VMATracker);
|
||||||
|
let mut vma_tracker = self;
|
||||||
|
let mut page_chunk_start = get_page_chunk_start_addr(vma_tracker.range().start());
|
||||||
|
|
||||||
|
let range_end = vma_tracker.range().end();
|
||||||
|
for page_chunk_addr in (page_chunk_start..range_end).step_by(PAGE_CHUNK_UNIT) {
|
||||||
|
let manager = USER_SPACE_PAGE_CHUNK_MANAGER.read().unwrap();
|
||||||
|
if let Some(page_chunk) = manager.inner.get(&page_chunk_addr) {
|
||||||
|
if page_chunk.fully_committed {
|
||||||
|
// global page chunk fully committed. commit pages for vma page chunk
|
||||||
|
vma_tracker.commit_pages_common(page_chunk_addr, PAGE_CHUNK_UNIT);
|
||||||
|
} else {
|
||||||
|
debug_assert!(page_chunk.tracker.is_some());
|
||||||
|
let global_tracker = page_chunk.tracker.as_ref().unwrap().read().unwrap();
|
||||||
|
global_tracker.set_committed_pages_for_vma_tracker(vma_tracker);
|
||||||
|
}
|
||||||
|
drop(manager);
|
||||||
|
} else {
|
||||||
|
// Not tracking this page chunk. Release read lock and acquire write lock for an update.
|
||||||
|
drop(manager);
|
||||||
|
// This page chunk is not tracked by global tracker. Thus none of the pages are committed.
|
||||||
|
let page_chunk = {
|
||||||
|
let global_page_tracker = PageTracker::new_global_tracker(page_chunk_addr)?;
|
||||||
|
GlobalPageChunk::new(global_page_tracker)
|
||||||
|
};
|
||||||
|
|
||||||
|
// There could be data race here. But it's fine, because the ultimate state is the same.
|
||||||
|
USER_SPACE_PAGE_CHUNK_MANAGER
|
||||||
|
.write()
|
||||||
|
.unwrap()
|
||||||
|
.inner
|
||||||
|
.insert(page_chunk_addr, page_chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// VMAtracker helps to update global tracker based on the paging status of itself.
|
||||||
|
// This should be called whenever the VMATracker updates and needs to sync with the GlobalTracker.
|
||||||
|
fn set_committed_pages_for_global_tracker(&self, commit_start_addr: usize, commit_size: usize) {
|
||||||
|
debug_assert!(self.type_ == TrackerType::VMATracker);
|
||||||
|
|
||||||
|
let commit_end_addr = commit_start_addr + commit_size;
|
||||||
|
let page_chunk_start_addr = get_page_chunk_start_addr(commit_start_addr);
|
||||||
|
for page_chunk_addr in (page_chunk_start_addr..commit_end_addr).step_by(PAGE_CHUNK_UNIT) {
|
||||||
|
let is_global_tracker_fully_committed = {
|
||||||
|
// Find the correponding page chunk
|
||||||
|
let manager = USER_SPACE_PAGE_CHUNK_MANAGER.read().unwrap();
|
||||||
|
let page_chunk = manager
|
||||||
|
.inner
|
||||||
|
.get(&page_chunk_addr)
|
||||||
|
.expect("this page chunk must exist");
|
||||||
|
|
||||||
|
// Update the global page tracker
|
||||||
|
if let Some(global_page_tracker) = &page_chunk.tracker {
|
||||||
|
let mut global_tracker = global_page_tracker.write().unwrap();
|
||||||
|
global_tracker.commit_pages_common(commit_start_addr, commit_size);
|
||||||
|
global_tracker.fully_committed
|
||||||
|
} else {
|
||||||
|
// page_tracker is none, the page chunk is fully committed. Go to next chunk.
|
||||||
|
debug_assert!(page_chunk.fully_committed);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Free the global page tracker if fully committed
|
||||||
|
if is_global_tracker_fully_committed {
|
||||||
|
// Update the global page chunk manager. Need to acquire the write lock this time. There can be data race because the lock
|
||||||
|
// could be dropped for a while before acquire again. But its fine, because the ultimate state is the same.
|
||||||
|
let mut manager = USER_SPACE_PAGE_CHUNK_MANAGER.write().unwrap();
|
||||||
|
if let Some(mut page_chunk) = manager.inner.get_mut(&page_chunk_addr) {
|
||||||
|
page_chunk.fully_committed = true;
|
||||||
|
page_chunk.tracker = None;
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
"the global page chunk with start addr: 0x{:x} has been freed already",
|
||||||
|
page_chunk_addr
|
||||||
|
);
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GlobalTracker helps to update VMATracker based on the paging status of itself.
|
||||||
|
// This should be called when the VMATracker inits.
|
||||||
|
fn set_committed_pages_for_vma_tracker(&self, vma_tracker: &mut PageTracker) {
|
||||||
|
debug_assert!(self.type_ == TrackerType::GlobalTracker);
|
||||||
|
debug_assert!(vma_tracker.type_ == TrackerType::VMATracker);
|
||||||
|
|
||||||
|
let global_tracker = self;
|
||||||
|
|
||||||
|
if let Some(intersection_range) = global_tracker.range().intersect(vma_tracker.range()) {
|
||||||
|
let vma_tracker_page_id =
|
||||||
|
(intersection_range.start() - vma_tracker.range().start()) / PAGE_SIZE;
|
||||||
|
let global_tracker_page_id =
|
||||||
|
(intersection_range.start() - global_tracker.range().start()) / PAGE_SIZE;
|
||||||
|
let page_num = intersection_range.size() / PAGE_SIZE;
|
||||||
|
|
||||||
|
vma_tracker.inner[vma_tracker_page_id..vma_tracker_page_id + page_num]
|
||||||
|
.copy_from_bitslice(
|
||||||
|
&global_tracker.inner
|
||||||
|
[global_tracker_page_id..global_tracker_page_id + page_num],
|
||||||
|
);
|
||||||
|
if vma_tracker.inner.all() {
|
||||||
|
vma_tracker.fully_committed = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No intersection range, why calling this? Wierd.
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit pages for page tracker itself. This is a common method for both VMATracker and GlobalTracker.
|
||||||
|
fn commit_pages_common(&mut self, start_addr: usize, size: usize) {
|
||||||
|
debug_assert!(!self.fully_committed);
|
||||||
|
|
||||||
|
if let Some(intersection_range) = {
|
||||||
|
let range = VMRange::new_with_size(start_addr, size).unwrap();
|
||||||
|
self.range.intersect(&range)
|
||||||
|
} {
|
||||||
|
trace!("commit for page tracker: {:?}", self);
|
||||||
|
let page_start_id = (intersection_range.start() - self.range().start()) / PAGE_SIZE;
|
||||||
|
let page_num = intersection_range.size() / PAGE_SIZE;
|
||||||
|
self.inner[page_start_id..page_start_id + page_num].fill(true);
|
||||||
|
if self.inner.all() {
|
||||||
|
self.fully_committed = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No intersect range, wierd
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn get_page_chunk_start_addr(addr: usize) -> usize {
|
||||||
|
align_down(addr, PAGE_CHUNK_UNIT)
|
||||||
|
}
|
@ -6,7 +6,8 @@ use super::vm_area::VMArea;
|
|||||||
use super::vm_manager::MunmapChunkFlag;
|
use super::vm_manager::MunmapChunkFlag;
|
||||||
use super::vm_perms::VMPerms;
|
use super::vm_perms::VMPerms;
|
||||||
use super::vm_util::{
|
use super::vm_util::{
|
||||||
FileBacked, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder, VMRemapOptions,
|
FileBacked, PagePolicy, VMInitializer, VMMapAddr, VMMapOptions, VMMapOptionsBuilder,
|
||||||
|
VMRemapOptions,
|
||||||
};
|
};
|
||||||
use crate::config;
|
use crate::config;
|
||||||
use crate::ipc::SHM_MANAGER;
|
use crate::ipc::SHM_MANAGER;
|
||||||
@ -124,6 +125,8 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
|
|||||||
.initializer(VMInitializer::ElfSpecific {
|
.initializer(VMInitializer::ElfSpecific {
|
||||||
elf_file: elf_file.file_ref().clone(),
|
elf_file: elf_file.file_ref().clone(),
|
||||||
})
|
})
|
||||||
|
// We only load loadable segments, just commit the memory when allocating.
|
||||||
|
.page_policy(PagePolicy::CommitNow)
|
||||||
.build()
|
.build()
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
&self.handle_error_when_init(&chunks);
|
&self.handle_error_when_init(&chunks);
|
||||||
@ -152,6 +155,8 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
|
|||||||
.size(heap_layout.size())
|
.size(heap_layout.size())
|
||||||
.align(heap_layout.align())
|
.align(heap_layout.align())
|
||||||
.perms(VMPerms::READ | VMPerms::WRITE)
|
.perms(VMPerms::READ | VMPerms::WRITE)
|
||||||
|
.page_policy(PagePolicy::CommitOnDemand)
|
||||||
|
// .page_policy(PagePolicy::CommitNow)
|
||||||
.build()
|
.build()
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
&self.handle_error_when_init(&chunks);
|
&self.handle_error_when_init(&chunks);
|
||||||
@ -171,8 +176,10 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> {
|
|||||||
let stack_layout = &other_layouts[1];
|
let stack_layout = &other_layouts[1];
|
||||||
let vm_option = VMMapOptionsBuilder::default()
|
let vm_option = VMMapOptionsBuilder::default()
|
||||||
.size(stack_layout.size())
|
.size(stack_layout.size())
|
||||||
.align(heap_layout.align())
|
.align(stack_layout.align())
|
||||||
.perms(VMPerms::READ | VMPerms::WRITE)
|
.perms(VMPerms::READ | VMPerms::WRITE)
|
||||||
|
// There are cases that we can't handle when the #PF happens at user's stack. Commit the stack memory now.
|
||||||
|
.page_policy(PagePolicy::CommitNow)
|
||||||
.build()
|
.build()
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
&self.handle_error_when_init(&chunks);
|
&self.handle_error_when_init(&chunks);
|
||||||
@ -537,11 +544,26 @@ impl ProcessVM {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let page_policy = {
|
||||||
|
if flags.contains(MMapFlags::MAP_STACK) {
|
||||||
|
// With MAP_STACK, the mmaped memory will be used as user's stack. If not committed, the #PF can occurs
|
||||||
|
// when switching to user space and can't be handled correctly by us.
|
||||||
|
PagePolicy::CommitNow
|
||||||
|
} else if !flags.contains(MMapFlags::MAP_ANONYMOUS) {
|
||||||
|
// Use commit-now policy for file-backed mmap. We tried the commit-on-demand policy, but didn't get any performance gain at all.
|
||||||
|
// However, the path for file-backed mmap with commit-on-demand policy is ready. We can enable this whenever needed.
|
||||||
|
PagePolicy::CommitNow
|
||||||
|
} else {
|
||||||
|
PagePolicy::CommitOnDemand
|
||||||
|
}
|
||||||
|
};
|
||||||
let mmap_options = VMMapOptionsBuilder::default()
|
let mmap_options = VMMapOptionsBuilder::default()
|
||||||
.size(size)
|
.size(size)
|
||||||
.addr(addr_option)
|
.addr(addr_option)
|
||||||
.perms(perms)
|
.perms(perms)
|
||||||
.initializer(initializer)
|
.initializer(initializer)
|
||||||
|
.page_policy(page_policy)
|
||||||
.build()?;
|
.build()?;
|
||||||
let mmap_addr = USER_SPACE_VM_MANAGER.mmap(&mmap_options)?;
|
let mmap_addr = USER_SPACE_VM_MANAGER.mmap(&mmap_options)?;
|
||||||
Ok(mmap_addr)
|
Ok(mmap_addr)
|
||||||
@ -674,3 +696,33 @@ impl MSyncFlags {
|
|||||||
Ok(flags)
|
Ok(flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[repr(i32)]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum MadviceFlags {
|
||||||
|
MADV_NORMAL = 0,
|
||||||
|
MADV_RANDOM = 1,
|
||||||
|
MADV_SEQUENTIAL = 2,
|
||||||
|
MADV_WILLNEED = 3,
|
||||||
|
MADV_DONTNEED = 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MadviceFlags {
|
||||||
|
pub fn from_i32(raw: i32) -> Result<Self> {
|
||||||
|
const MADV_NORMAL: i32 = 0;
|
||||||
|
const MADV_RANDOM: i32 = 1;
|
||||||
|
const MADV_SEQUENTIAL: i32 = 2;
|
||||||
|
const MADV_WILLNEED: i32 = 3;
|
||||||
|
const MADV_DONTNEED: i32 = 4;
|
||||||
|
|
||||||
|
match raw {
|
||||||
|
MADV_NORMAL => Ok(MadviceFlags::MADV_NORMAL),
|
||||||
|
MADV_RANDOM => Ok(MadviceFlags::MADV_RANDOM),
|
||||||
|
MADV_SEQUENTIAL => Ok(MadviceFlags::MADV_SEQUENTIAL),
|
||||||
|
MADV_WILLNEED => Ok(MadviceFlags::MADV_WILLNEED),
|
||||||
|
MADV_DONTNEED => Ok(MadviceFlags::MADV_DONTNEED),
|
||||||
|
_ => return_errno!(ENOSYS, "unknown madvice flags"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -206,8 +206,8 @@ impl ShmManager {
|
|||||||
let old_perms = old_vma.perms();
|
let old_perms = old_vma.perms();
|
||||||
if new_perms != old_perms {
|
if new_perms != old_perms {
|
||||||
let perms = new_perms | old_perms;
|
let perms = new_perms | old_perms;
|
||||||
VMPerms::apply_perms(new_vma.range(), perms);
|
|
||||||
new_vma.set_perms(perms);
|
new_vma.set_perms(perms);
|
||||||
|
new_vma.modify_permissions_for_committed_pages(perms);
|
||||||
}
|
}
|
||||||
|
|
||||||
let inode_id = Self::inode_id_of(&new_vma);
|
let inode_id = Self::inode_id_of(&new_vma);
|
||||||
@ -279,7 +279,7 @@ impl ShmManager {
|
|||||||
if perms == old_perms {
|
if perms == old_perms {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
VMPerms::apply_perms(vma.range(), perms);
|
|
||||||
vma.set_perms(perms);
|
vma.set_perms(perms);
|
||||||
|
vma.modify_permissions_for_committed_pages(perms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,46 +1,50 @@
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use super::vm_manager::VMManager;
|
|
||||||
use crate::config::LIBOS_CONFIG;
|
use crate::config::LIBOS_CONFIG;
|
||||||
use crate::ctor::dtor;
|
use crate::ctor::dtor;
|
||||||
use crate::ipc::SHM_MANAGER;
|
use crate::ipc::SYSTEM_V_SHM_MANAGER;
|
||||||
use crate::util::pku_util;
|
use crate::util::pku_util;
|
||||||
|
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
use vm_epc::SGXPlatform;
|
||||||
|
use vm_manager::VMManager;
|
||||||
|
use vm_perms::VMPerms;
|
||||||
|
|
||||||
const RSRV_MEM_PERM: MemPerm =
|
const USER_SPACE_DEFAULT_MEM_PERM: VMPerms = VMPerms::DEFAULT;
|
||||||
MemPerm::from_bits_truncate(MemPerm::READ.bits() | MemPerm::WRITE.bits());
|
|
||||||
|
|
||||||
/// The virtual memory manager for the entire user space
|
/// The virtual memory manager for the entire user space
|
||||||
pub struct UserSpaceVMManager(VMManager);
|
pub struct UserSpaceVMManager {
|
||||||
|
inner: VMManager,
|
||||||
|
sgx_platform: SGXPlatform,
|
||||||
|
}
|
||||||
|
|
||||||
impl UserSpaceVMManager {
|
impl UserSpaceVMManager {
|
||||||
fn new() -> Result<UserSpaceVMManager> {
|
fn new() -> Result<UserSpaceVMManager> {
|
||||||
let rsrv_mem_size = LIBOS_CONFIG.resource_limits.user_space_size;
|
let sgx_platform = SGXPlatform::new();
|
||||||
let vm_range = unsafe {
|
let init_size = LIBOS_CONFIG.resource_limits.user_space_init_size;
|
||||||
// TODO: Current sgx_alloc_rsrv_mem implementation will commit all the pages of the desired size, which will consume
|
let max_size = LIBOS_CONFIG.resource_limits.user_space_max_size;
|
||||||
// a lot of time. When EDMM is supported, there is no need to commit all the pages at the initialization stage. A function
|
|
||||||
// which reserves memory but not commit pages should be provided then.
|
|
||||||
let ptr = sgx_alloc_rsrv_mem(rsrv_mem_size);
|
|
||||||
if ptr.is_null() {
|
|
||||||
return_errno!(ENOMEM, "run out of reserved memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Without EDMM support and the ReservedMemExecutable is set to 1, the reserved memory will be RWX. And we can't change the reserved memory permission.
|
let (userspace_vm_range, gap_range) = sgx_platform.alloc_user_space(init_size, max_size)?;
|
||||||
// With EDMM support, the reserved memory permission is RW by default. And we can change the permissions when needed.
|
|
||||||
|
|
||||||
let addr = ptr as usize;
|
info!(
|
||||||
debug!(
|
"user space allocated, range = {:?}, gap_range = {:?}",
|
||||||
"allocated rsrv addr is 0x{:x}, len is 0x{:x}",
|
userspace_vm_range, gap_range
|
||||||
addr, rsrv_mem_size
|
|
||||||
);
|
);
|
||||||
pku_util::pkey_mprotect_userspace_mem(addr, rsrv_mem_size, RSRV_MEM_PERM.bits());
|
|
||||||
VMRange::new(addr, addr + rsrv_mem_size)?
|
|
||||||
};
|
|
||||||
|
|
||||||
let vm_manager = VMManager::init(vm_range)?;
|
// Use pkey_mprotect to set the whole userspace to R/W permissions. If user specifies a new
|
||||||
|
// permission, the mprotect ocall will update the permission.
|
||||||
|
pku_util::pkey_mprotect_userspace_mem(
|
||||||
|
&userspace_vm_range,
|
||||||
|
gap_range.as_ref(),
|
||||||
|
USER_SPACE_DEFAULT_MEM_PERM,
|
||||||
|
);
|
||||||
|
|
||||||
Ok(UserSpaceVMManager(vm_manager))
|
let vm_manager = VMManager::init(userspace_vm_range, gap_range)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
inner: vm_manager,
|
||||||
|
sgx_platform,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_total_size(&self) -> usize {
|
pub fn get_total_size(&self) -> usize {
|
||||||
@ -52,51 +56,34 @@ impl UserSpaceVMManager {
|
|||||||
// be called after the main function. Static variables are still safe to visit at this time.
|
// be called after the main function. Static variables are still safe to visit at this time.
|
||||||
#[dtor]
|
#[dtor]
|
||||||
fn free_user_space() {
|
fn free_user_space() {
|
||||||
SHM_MANAGER.clean_when_libos_exit();
|
info!("free user space at the end");
|
||||||
let range = USER_SPACE_VM_MANAGER.range();
|
SYSTEM_V_SHM_MANAGER.clean_when_libos_exit();
|
||||||
|
let total_user_space_range = USER_SPACE_VM_MANAGER.range();
|
||||||
|
let gap_range = USER_SPACE_VM_MANAGER.gap_range();
|
||||||
assert!(USER_SPACE_VM_MANAGER.verified_clean_when_exit());
|
assert!(USER_SPACE_VM_MANAGER.verified_clean_when_exit());
|
||||||
let addr = range.start();
|
let addr = total_user_space_range.start();
|
||||||
let size = range.size();
|
let size = total_user_space_range.size();
|
||||||
info!("free user space VM: {:?}", range);
|
info!("free user space VM: {:?}", total_user_space_range);
|
||||||
pku_util::clear_pku_when_libos_exit(addr, size, RSRV_MEM_PERM.bits());
|
|
||||||
assert!(unsafe { sgx_free_rsrv_mem(addr as *const c_void, size) == 0 });
|
pku_util::clear_pku_when_libos_exit(
|
||||||
|
total_user_space_range,
|
||||||
|
gap_range.as_ref(),
|
||||||
|
USER_SPACE_DEFAULT_MEM_PERM,
|
||||||
|
);
|
||||||
|
|
||||||
|
USER_SPACE_VM_MANAGER
|
||||||
|
.sgx_platform
|
||||||
|
.free_user_space(total_user_space_range, gap_range.as_ref());
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Deref for UserSpaceVMManager {
|
impl Deref for UserSpaceVMManager {
|
||||||
type Target = VMManager;
|
type Target = VMManager;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
&self.0
|
&self.inner
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref USER_SPACE_VM_MANAGER: UserSpaceVMManager = UserSpaceVMManager::new().unwrap();
|
pub static ref USER_SPACE_VM_MANAGER: UserSpaceVMManager = UserSpaceVMManager::new().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
bitflags! {
|
|
||||||
struct MemPerm: i32 {
|
|
||||||
const READ = 1;
|
|
||||||
const WRITE = 2;
|
|
||||||
const EXEC = 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
// Allocate a range of EPC memory from the reserved memory area with RW permission
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// Inputs: length [in]: Size of region to be allocated in bytes. Page aligned
|
|
||||||
// Return: Starting address of the new allocated memory area on success; otherwise NULL
|
|
||||||
//
|
|
||||||
fn sgx_alloc_rsrv_mem(length: usize) -> *const c_void;
|
|
||||||
|
|
||||||
// Free a range of EPC memory from the reserved memory area
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// Inputs: addr[in]: Starting address of region to be freed. Page aligned.
|
|
||||||
// length[in]: The length of the memory to be freed in bytes. Page aligned
|
|
||||||
// Return: 0 on success; otherwise -1
|
|
||||||
//
|
|
||||||
fn sgx_free_rsrv_mem(addr: *const c_void, length: usize) -> i32;
|
|
||||||
}
|
|
||||||
|
@ -1,19 +1,28 @@
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
use super::page_tracker::PageTracker;
|
||||||
|
use super::vm_epc::EPCMemType;
|
||||||
use super::vm_perms::VMPerms;
|
use super::vm_perms::VMPerms;
|
||||||
use super::vm_range::VMRange;
|
use super::vm_range::VMRange;
|
||||||
use super::vm_util::FileBacked;
|
use super::vm_util::{FileBacked, PagePolicy, VMInitializer, VMMapOptions, GB, KB, MB};
|
||||||
|
|
||||||
use intrusive_collections::rbtree::{Link, RBTree};
|
use intrusive_collections::rbtree::{Link, RBTree};
|
||||||
use intrusive_collections::{intrusive_adapter, KeyAdapter};
|
use intrusive_collections::{intrusive_adapter, KeyAdapter};
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
// Commit memory size unit when the #PF occurs.
|
||||||
|
const COMMIT_SIZE_UNIT: usize = 4 * KB;
|
||||||
|
// Commit the whole VMA when this threshold reaches.
|
||||||
|
const PF_NUM_THRESHOLD: u64 = 3;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct VMArea {
|
pub struct VMArea {
|
||||||
range: VMRange,
|
range: VMRange,
|
||||||
perms: VMPerms,
|
perms: VMPerms,
|
||||||
file_backed: Option<FileBacked>,
|
file_backed: Option<FileBacked>,
|
||||||
access: VMAccess,
|
access: VMAccess,
|
||||||
|
pages: Option<PageTracker>, // Track the paging status of this VMA
|
||||||
|
epc_type: EPCMemType, // Track the type of the EPC to use specific APIs
|
||||||
|
pf_count: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
@ -32,11 +41,47 @@ impl VMArea {
|
|||||||
file_backed: Option<FileBacked>,
|
file_backed: Option<FileBacked>,
|
||||||
pid: pid_t,
|
pid: pid_t,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
let epc_type = EPCMemType::new(&range);
|
||||||
|
let pages = {
|
||||||
|
match epc_type {
|
||||||
|
EPCMemType::Reserved => None,
|
||||||
|
EPCMemType::UserRegion => {
|
||||||
|
let pages =
|
||||||
|
PageTracker::new_vma_tracker(&range, &EPCMemType::UserRegion).unwrap();
|
||||||
|
(!pages.is_fully_committed()).then_some(pages)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_vma = Self {
|
||||||
range,
|
range,
|
||||||
perms,
|
perms,
|
||||||
file_backed,
|
file_backed,
|
||||||
access: VMAccess::Private(pid),
|
access: VMAccess::Private(pid),
|
||||||
|
pages,
|
||||||
|
epc_type,
|
||||||
|
pf_count: 0,
|
||||||
|
};
|
||||||
|
trace!("new vma = {:?}", new_vma);
|
||||||
|
new_vma
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_with_page_tracker(
|
||||||
|
range: VMRange,
|
||||||
|
perms: VMPerms,
|
||||||
|
file_backed: Option<FileBacked>,
|
||||||
|
access: VMAccess,
|
||||||
|
pages: Option<PageTracker>,
|
||||||
|
) -> VMArea {
|
||||||
|
let epc_type = EPCMemType::new(&range);
|
||||||
|
Self {
|
||||||
|
range,
|
||||||
|
perms,
|
||||||
|
file_backed,
|
||||||
|
access,
|
||||||
|
pages,
|
||||||
|
epc_type,
|
||||||
|
pf_count: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,30 +94,41 @@ impl VMArea {
|
|||||||
access: VMAccess,
|
access: VMAccess,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
debug_assert!(vma.is_superset_of(&new_range));
|
debug_assert!(vma.is_superset_of(&new_range));
|
||||||
let new_backed_file = vma.file_backed.as_ref().map(|file| {
|
|
||||||
|
let new_backed_file = if let Some(file) = &vma.file_backed {
|
||||||
let mut new_file = file.clone();
|
let mut new_file = file.clone();
|
||||||
let file_offset = file.offset();
|
let file_offset = file.offset();
|
||||||
|
|
||||||
let new_file_offset = if vma.start() < new_range.start() {
|
debug_assert!(vma.start() <= new_range.start());
|
||||||
let vma_offset = new_range.start() - vma.start();
|
let new_start_offset = new_range.start() - vma.start();
|
||||||
file_offset + vma_offset
|
let new_file_offset = file_offset + new_start_offset;
|
||||||
} else {
|
|
||||||
let vma_offset = vma.start() - new_range.start();
|
|
||||||
debug_assert!(file_offset >= vma_offset);
|
|
||||||
file_offset - vma_offset
|
|
||||||
};
|
|
||||||
|
|
||||||
new_file.set_offset(new_file_offset);
|
new_file.set_offset(new_file_offset);
|
||||||
|
Some(new_file)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
new_file
|
let new_pages = {
|
||||||
});
|
let mut new_pages = vma.pages.clone();
|
||||||
|
|
||||||
Self {
|
if let Some(pages) = &mut new_pages {
|
||||||
range: new_range,
|
pages.split_for_new_range(&new_range);
|
||||||
perms: new_perms,
|
if pages.is_fully_committed() {
|
||||||
file_backed: new_backed_file,
|
None
|
||||||
access,
|
} else {
|
||||||
|
new_pages
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_vma =
|
||||||
|
Self::new_with_page_tracker(new_range, new_perms, new_backed_file, access, new_pages);
|
||||||
|
|
||||||
|
trace!("inherits vma: {:?}, create new vma: {:?}", vma, new_vma);
|
||||||
|
new_vma
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn perms(&self) -> VMPerms {
|
pub fn perms(&self) -> VMPerms {
|
||||||
@ -87,6 +143,13 @@ impl VMArea {
|
|||||||
&self.access
|
&self.access
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_private_pid(&self) -> Option<pid_t> {
|
||||||
|
match &self.access {
|
||||||
|
VMAccess::Private(pid) => Some(*pid),
|
||||||
|
VMAccess::Shared(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn belong_to(&self, target_pid: pid_t) -> bool {
|
pub fn belong_to(&self, target_pid: pid_t) -> bool {
|
||||||
match &self.access {
|
match &self.access {
|
||||||
VMAccess::Private(pid) => *pid == target_pid,
|
VMAccess::Private(pid) => *pid == target_pid,
|
||||||
@ -105,9 +168,199 @@ impl VMArea {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn init_file(&self) -> Option<(&FileRef, usize)> {
|
fn pages(&self) -> &PageTracker {
|
||||||
|
debug_assert!(!self.is_fully_committed());
|
||||||
|
self.pages.as_ref().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pages_mut(&mut self) -> &mut PageTracker {
|
||||||
|
debug_assert!(!self.is_fully_committed());
|
||||||
|
self.pages.as_mut().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get pid for private VMA
|
||||||
|
pub fn pid(&self) -> pid_t {
|
||||||
|
match self.access {
|
||||||
|
VMAccess::Private(pid) => pid,
|
||||||
|
VMAccess::Shared(_) => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_reserved_only(&self) -> bool {
|
||||||
|
if let Some(pages) = &self.pages {
|
||||||
|
return pages.is_reserved_only();
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_fully_committed(&self) -> bool {
|
||||||
|
self.pages.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_partially_committed(&self) -> bool {
|
||||||
|
if let Some(pages) = &self.pages {
|
||||||
|
return pages.is_partially_committed();
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init_memory(mut self, options: &VMMapOptions) -> Result<Self> {
|
||||||
|
let mut vm_area = self;
|
||||||
|
let page_policy = options.page_policy();
|
||||||
|
|
||||||
|
// Commit pages if needed
|
||||||
|
if !vm_area.is_fully_committed() && page_policy == &PagePolicy::CommitNow {
|
||||||
|
vm_area.pages_mut().commit_whole(VMPerms::DEFAULT)?;
|
||||||
|
vm_area.pages = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize committed memory
|
||||||
|
if vm_area.is_partially_committed() {
|
||||||
|
let committed = true;
|
||||||
|
for range in vm_area.pages().get_ranges(committed) {
|
||||||
|
vm_area.init_memory_internal(&range, Some(options.initializer()))?;
|
||||||
|
}
|
||||||
|
} else if vm_area.is_fully_committed() {
|
||||||
|
// Initialize the memory of the new range
|
||||||
|
unsafe {
|
||||||
|
let buf = vm_area.range().as_slice_mut();
|
||||||
|
options.initializer().init_slice(buf)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set memory permissions
|
||||||
|
if !options.perms().is_default() {
|
||||||
|
vm_area.modify_protection_force(None, vm_area.perms());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Do nothing if this vma has no committed memory
|
||||||
|
|
||||||
|
Ok(vm_area)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flush_and_clean_memory(&self) -> Result<()> {
|
||||||
|
let (need_flush, file, file_offset) = match self.writeback_file() {
|
||||||
|
None => (false, None, None),
|
||||||
|
Some((file_handle, offset)) => {
|
||||||
|
if !file_handle.access_mode().unwrap().writable() {
|
||||||
|
(false, None, None)
|
||||||
|
} else {
|
||||||
|
(true, Some(file_handle), Some(offset))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
self.flush_and_clean_internal(self.range(), need_flush, file, file_offset);
|
||||||
|
} else {
|
||||||
|
let committed = true;
|
||||||
|
for range in self.pages().get_ranges(committed) {
|
||||||
|
self.flush_and_clean_internal(&range, need_flush, file, file_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush_and_clean_internal(
|
||||||
|
&self,
|
||||||
|
target_range: &VMRange,
|
||||||
|
need_flush: bool,
|
||||||
|
file: Option<&FileRef>,
|
||||||
|
file_offset: Option<usize>,
|
||||||
|
) {
|
||||||
|
trace!("flush and clean committed range: {:?}", target_range);
|
||||||
|
debug_assert!(self.range().is_superset_of(target_range));
|
||||||
|
let buf = unsafe { target_range.as_slice_mut() };
|
||||||
|
if !self.perms().is_default() {
|
||||||
|
self.modify_protection_force(Some(&target_range), VMPerms::default());
|
||||||
|
}
|
||||||
|
|
||||||
|
if need_flush {
|
||||||
|
let file_offset = file_offset.unwrap() + (target_range.start() - self.range.start());
|
||||||
|
file.unwrap().write_at(file_offset, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset zeros
|
||||||
|
unsafe {
|
||||||
|
buf.iter_mut().for_each(|b| *b = 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn modify_permissions_for_committed_pages(&self, new_perms: VMPerms) {
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
self.modify_protection_force(None, new_perms);
|
||||||
|
} else if self.is_partially_committed() {
|
||||||
|
let committed = true;
|
||||||
|
for range in self.pages().get_ranges(committed) {
|
||||||
|
self.modify_protection_force(Some(&range), new_perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn handle_page_fault(
|
||||||
|
&mut self,
|
||||||
|
rip: usize,
|
||||||
|
pf_addr: usize,
|
||||||
|
errcd: u32,
|
||||||
|
kernel_triggers: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
trace!("PF vma = {:?}", self);
|
||||||
|
if (self.perms() == VMPerms::NONE)
|
||||||
|
|| (crate::exception::check_rw_bit(errcd) == false
|
||||||
|
&& !self.perms().contains(VMPerms::READ))
|
||||||
|
{
|
||||||
|
return_errno!(
|
||||||
|
EACCES,
|
||||||
|
"Page is set to None permission. This is user-intended"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if crate::exception::check_rw_bit(errcd) && !self.perms().contains(VMPerms::WRITE) {
|
||||||
|
return_errno!(
|
||||||
|
EACCES, "Page is set to not contain WRITE permission but this PF is triggered by write. This is user-intended"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rip == pf_addr && !self.perms().contains(VMPerms::EXEC) {
|
||||||
|
return_errno!(
|
||||||
|
EACCES, "Page is set to not contain EXEC permission but this PF is triggered by execution. This is user-intended"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
// This vma has been commited by other threads already. Just return.
|
||||||
|
info!("This vma has been committed by other threads already.");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(self.epc_type, EPCMemType::Reserved) {
|
||||||
|
return_errno!(EINVAL, "reserved memory shouldn't trigger PF");
|
||||||
|
}
|
||||||
|
|
||||||
|
if kernel_triggers || self.pf_count >= PF_NUM_THRESHOLD {
|
||||||
|
return self.commit_current_vma_whole();
|
||||||
|
}
|
||||||
|
|
||||||
|
self.pf_count += 1;
|
||||||
|
// The return commit_size can be 0 when other threads already commit the PF-containing range but the vma is not fully committed yet.
|
||||||
|
let commit_size = self.commit_once_for_page_fault(pf_addr).unwrap();
|
||||||
|
|
||||||
|
trace!("page fault commit memory size = {:?}", commit_size);
|
||||||
|
|
||||||
|
if commit_size == 0 {
|
||||||
|
warn!("This PF has been handled by other threads already.");
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("page fault handle success");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn backed_file(&self) -> Option<(&FileRef, usize)> {
|
||||||
if let Some(file) = &self.file_backed {
|
if let Some(file) = &self.file_backed {
|
||||||
Some(file.init_file())
|
Some(file.backed_file())
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
@ -147,36 +400,51 @@ impl VMArea {
|
|||||||
Some(new_vma)
|
Some(new_vma)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resize(&mut self, new_size: usize) {
|
|
||||||
self.range.resize(new_size)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_start(&mut self, new_start: usize) {
|
pub fn set_start(&mut self, new_start: usize) {
|
||||||
let old_start = self.start();
|
let old_start = self.start();
|
||||||
self.range.set_start(new_start);
|
if new_start == old_start {
|
||||||
|
|
||||||
if let Some(file) = self.file_backed.as_mut() {
|
|
||||||
if !file.need_write_back() {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.range.set_start(new_start);
|
||||||
|
|
||||||
|
if new_start < old_start {
|
||||||
|
// Extend this VMA
|
||||||
|
let pages = {
|
||||||
|
let pages = PageTracker::new_vma_tracker(&self.range, &self.epc_type).unwrap();
|
||||||
|
(!pages.is_fully_committed()).then_some(pages)
|
||||||
|
};
|
||||||
|
self.pages = pages;
|
||||||
|
} else {
|
||||||
|
// Split this VMA
|
||||||
|
debug_assert!(new_start > old_start);
|
||||||
|
if let Some(pages) = &mut self.pages {
|
||||||
|
pages.split_for_new_range(&self.range);
|
||||||
|
if pages.is_fully_committed() {
|
||||||
|
self.pages = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(file) = self.file_backed.as_mut() {
|
||||||
// If the updates to the VMA needs to write back to a file, then the
|
// If the updates to the VMA needs to write back to a file, then the
|
||||||
// file offset must be adjusted according to the new start address.
|
// file offset must be adjusted according to the new start address.
|
||||||
|
Self::set_file_offset(file, new_start, old_start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_file_offset(file: &mut FileBacked, new_start_offset: usize, old_start_offset: usize) {
|
||||||
let offset = file.offset();
|
let offset = file.offset();
|
||||||
if old_start < new_start {
|
if old_start_offset < new_start_offset {
|
||||||
file.set_offset(offset + (new_start - old_start));
|
file.set_offset(offset + (new_start_offset - old_start_offset));
|
||||||
} else {
|
} else {
|
||||||
// The caller must guarantee that the new start makes sense
|
// The caller must guarantee that the new start makes sense
|
||||||
debug_assert!(offset >= old_start - new_start);
|
debug_assert!(offset >= old_start_offset - new_start_offset);
|
||||||
file.set_offset(offset - (old_start - new_start));
|
file.set_offset(offset - (old_start_offset - new_start_offset));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_the_same_to(&self, other: &VMArea) -> bool {
|
pub fn is_the_same_to(&self, other: &VMArea) -> bool {
|
||||||
if self.access() != other.access() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.range() != other.range() {
|
if self.range() != other.range() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -185,6 +453,10 @@ impl VMArea {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.access() != other.access() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let self_writeback_file = self.writeback_file();
|
let self_writeback_file = self.writeback_file();
|
||||||
let other_writeback_file = other.writeback_file();
|
let other_writeback_file = other.writeback_file();
|
||||||
match (self_writeback_file, other_writeback_file) {
|
match (self_writeback_file, other_writeback_file) {
|
||||||
@ -199,6 +471,13 @@ impl VMArea {
|
|||||||
|
|
||||||
pub fn set_end(&mut self, new_end: usize) {
|
pub fn set_end(&mut self, new_end: usize) {
|
||||||
self.range.set_end(new_end);
|
self.range.set_end(new_end);
|
||||||
|
let pages = if self.range.size() > 0 {
|
||||||
|
let pages = PageTracker::new_vma_tracker(&self.range, &self.epc_type).unwrap();
|
||||||
|
(!pages.is_fully_committed()).then_some(pages)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
self.pages = pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn can_merge_vmas(left: &VMArea, right: &VMArea) -> bool {
|
pub fn can_merge_vmas(left: &VMArea, right: &VMArea) -> bool {
|
||||||
@ -208,10 +487,6 @@ impl VMArea {
|
|||||||
if left.size() == 0 || right.size() == 0 {
|
if left.size() == 0 || right.size() == 0 {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// The two VMAs must be owned by the same process
|
|
||||||
if left.access() != right.access() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// The two VMAs must border with each other
|
// The two VMAs must border with each other
|
||||||
if left.end() != right.start() {
|
if left.end() != right.start() {
|
||||||
return false;
|
return false;
|
||||||
@ -220,6 +495,15 @@ impl VMArea {
|
|||||||
if left.perms() != right.perms() {
|
if left.perms() != right.perms() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// The two VMAs must be owned by the same process privately
|
||||||
|
// Return false if (either is none) or (both are some but two private pids are different)
|
||||||
|
let private_access = left.get_private_pid().zip(right.get_private_pid());
|
||||||
|
if private_access.is_none() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if private_access.is_some_and(|(left_pid, right_pid)| left_pid != right_pid) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// If the two VMAs have write-back files, the files must be the same and
|
// If the two VMAs have write-back files, the files must be the same and
|
||||||
// the two file regions must be continuous.
|
// the two file regions must be continuous.
|
||||||
@ -238,12 +522,12 @@ impl VMArea {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
|
/// Flush a file-backed VMA to its file. This has no effect on anonymous VMA.
|
||||||
pub fn flush_backed_file(&self) {
|
pub fn flush_committed_backed_file(&self) {
|
||||||
self.flush_backed_file_with_cond(|_| true)
|
self.flush_committed_backed_file_with_cond(|_| true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `flush_backed_file()`, except that an extra condition on the file needs to satisfy.
|
/// Same as `flush_committed_backed_file()`, except that an extra condition on the file needs to satisfy.
|
||||||
pub fn flush_backed_file_with_cond<F: Fn(&FileRef) -> bool>(&self, cond_fn: F) {
|
pub fn flush_committed_backed_file_with_cond<F: Fn(&FileRef) -> bool>(&self, cond_fn: F) {
|
||||||
let (file, file_offset) = match self.writeback_file() {
|
let (file, file_offset) = match self.writeback_file() {
|
||||||
None => return,
|
None => return,
|
||||||
Some((file_and_offset)) => file_and_offset,
|
Some((file_and_offset)) => file_and_offset,
|
||||||
@ -258,7 +542,16 @@ impl VMArea {
|
|||||||
if !cond_fn(file) {
|
if !cond_fn(file) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if self.is_fully_committed() {
|
||||||
file.write_at(file_offset, unsafe { self.as_slice() });
|
file.write_at(file_offset, unsafe { self.as_slice() });
|
||||||
|
} else {
|
||||||
|
let committed = true;
|
||||||
|
let vm_range_start = self.range().start();
|
||||||
|
for range in self.pages().get_ranges(committed) {
|
||||||
|
let file_offset = file_offset + (range.start() - vm_range_start);
|
||||||
|
file.write_at(file_offset, unsafe { range.as_slice() });
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_shared(&self) -> bool {
|
pub fn is_shared(&self) -> bool {
|
||||||
@ -310,6 +603,198 @@ impl VMArea {
|
|||||||
pub fn inherits_access_from(&mut self, vma: &VMArea) {
|
pub fn inherits_access_from(&mut self, vma: &VMArea) {
|
||||||
self.access = vma.access().clone()
|
self.access = vma.access().clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Current implementation with "unwrap()" can help us find the error quickly by panicing directly. Also, restoring VM state
|
||||||
|
// when this function fails will require some work and is not that simple.
|
||||||
|
// TODO: Return with Result instead of "unwrap()"" in this function.
|
||||||
|
fn modify_protection_force(&self, protect_range: Option<&VMRange>, new_perms: VMPerms) {
|
||||||
|
let protect_range = protect_range.unwrap_or_else(|| self.range());
|
||||||
|
|
||||||
|
self.epc_type
|
||||||
|
.modify_protection(protect_range.start(), protect_range.size(), new_perms)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
// With initializer, the memory should be committed already.
|
||||||
|
// Without initializer, the memory need to be committed and initialized.
|
||||||
|
fn init_memory_internal(
|
||||||
|
&mut self,
|
||||||
|
target_range: &VMRange,
|
||||||
|
initializer: Option<&VMInitializer>,
|
||||||
|
) -> Result<()> {
|
||||||
|
debug_assert!(self.range().is_superset_of(target_range));
|
||||||
|
trace!("init range = {:?}", target_range);
|
||||||
|
let perms = self.perms();
|
||||||
|
if let Some(initializer) = initializer {
|
||||||
|
match initializer {
|
||||||
|
VMInitializer::FileBacked { file } => {
|
||||||
|
let (file, offset) = file.backed_file();
|
||||||
|
let vma_range_start = self.range.start();
|
||||||
|
|
||||||
|
let init_file_offset = offset + (target_range.start() - vma_range_start);
|
||||||
|
|
||||||
|
self.init_file_backed_mem(target_range, &file, init_file_offset, perms)?;
|
||||||
|
}
|
||||||
|
VMInitializer::DoNothing() => {
|
||||||
|
if !self.perms().is_default() {
|
||||||
|
self.modify_protection_force(Some(target_range), perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VMInitializer::FillZeros() => {
|
||||||
|
unsafe {
|
||||||
|
let buf = target_range.as_slice_mut();
|
||||||
|
buf.iter_mut().for_each(|b| *b = 0);
|
||||||
|
}
|
||||||
|
if !perms.is_default() {
|
||||||
|
self.modify_protection_force(Some(target_range), perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => todo!(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No initializer, #PF triggered.
|
||||||
|
let init_file = self
|
||||||
|
.backed_file()
|
||||||
|
.map(|(file, offset)| (file.clone(), offset));
|
||||||
|
if let Some((file, offset)) = init_file {
|
||||||
|
let vma_range_start = self.range.start();
|
||||||
|
|
||||||
|
let init_file_offset = offset + (target_range.start() - vma_range_start);
|
||||||
|
|
||||||
|
self.pages
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.commit_memory_and_init_with_file(
|
||||||
|
target_range,
|
||||||
|
&file,
|
||||||
|
init_file_offset,
|
||||||
|
perms,
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
// PF triggered, no file-backed memory, just modify protection
|
||||||
|
self.pages
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.commit_range(target_range, Some(perms))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn init_file_backed_mem(
|
||||||
|
&mut self,
|
||||||
|
target_range: &VMRange,
|
||||||
|
file: &FileRef,
|
||||||
|
file_offset: usize,
|
||||||
|
new_perm: VMPerms,
|
||||||
|
) -> Result<()> {
|
||||||
|
if !file.access_mode().unwrap().readable() {
|
||||||
|
return_errno!(EBADF, "file is not readable");
|
||||||
|
}
|
||||||
|
|
||||||
|
let buf = unsafe { target_range.as_slice_mut() };
|
||||||
|
let file_size = file.metadata().unwrap().size;
|
||||||
|
|
||||||
|
let len = file
|
||||||
|
.read_at(file_offset, buf)
|
||||||
|
.map_err(|_| errno!(EACCES, "failed to init memory from file"))?;
|
||||||
|
|
||||||
|
if !new_perm.is_default() {
|
||||||
|
self.modify_protection_force(Some(target_range), new_perm);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_commit_once_size(&self) -> usize {
|
||||||
|
COMMIT_SIZE_UNIT
|
||||||
|
}
|
||||||
|
|
||||||
|
fn commit_once_for_page_fault(&mut self, pf_addr: usize) -> Result<usize> {
|
||||||
|
debug_assert!(!self.is_fully_committed());
|
||||||
|
let mut early_return = false;
|
||||||
|
let mut total_commit_size = 0;
|
||||||
|
let vma_range_start = self.range.start();
|
||||||
|
let permission = self.perms();
|
||||||
|
let committed = false;
|
||||||
|
let mut uncommitted_ranges = self.pages().get_ranges(committed);
|
||||||
|
let commit_once_size = self.get_commit_once_size();
|
||||||
|
|
||||||
|
for range in uncommitted_ranges
|
||||||
|
.iter_mut()
|
||||||
|
.skip_while(|range| !range.contains(pf_addr))
|
||||||
|
{
|
||||||
|
// Skip until first reach the range which contains the pf_addr
|
||||||
|
if total_commit_size == 0 {
|
||||||
|
debug_assert!(range.contains(pf_addr));
|
||||||
|
range.set_start(align_down(pf_addr, PAGE_SIZE));
|
||||||
|
range.resize(std::cmp::min(range.size(), commit_once_size));
|
||||||
|
} else if range.size() + total_commit_size > commit_once_size {
|
||||||
|
// This is not first time commit. Try to commit until reaching the commit_once_size
|
||||||
|
range.resize(commit_once_size - total_commit_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't take care the file-backed memory here
|
||||||
|
debug_assert!(self.backed_file().is_none());
|
||||||
|
self.init_memory_internal(&range, None)?;
|
||||||
|
|
||||||
|
total_commit_size += range.size();
|
||||||
|
if total_commit_size >= commit_once_size {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.pages().is_fully_committed() {
|
||||||
|
trace!("vma is fully committed");
|
||||||
|
self.pages = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(total_commit_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only used to handle PF triggered by the kernel
|
||||||
|
fn commit_current_vma_whole(&mut self) -> Result<()> {
|
||||||
|
debug_assert!(!self.is_fully_committed());
|
||||||
|
debug_assert!(self.backed_file().is_none());
|
||||||
|
|
||||||
|
let mut uncommitted_ranges = self.pages.as_ref().unwrap().get_ranges(false);
|
||||||
|
for range in uncommitted_ranges {
|
||||||
|
self.init_memory_internal(&range, None).unwrap();
|
||||||
|
}
|
||||||
|
self.pages = None;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: We can re-enable this when we support lazy extend permissions.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn page_fault_handler_extend_permission(&mut self, pf_addr: usize) -> Result<()> {
|
||||||
|
let permission = self.perms();
|
||||||
|
|
||||||
|
// This is intended by the application.
|
||||||
|
if permission == VMPerms::NONE {
|
||||||
|
return_errno!(EPERM, "trying to access PROT_NONE memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.is_fully_committed() {
|
||||||
|
self.modify_protection_force(None, permission);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let committed = true;
|
||||||
|
let committed_ranges = self.pages().get_ranges(committed);
|
||||||
|
for range in committed_ranges.iter() {
|
||||||
|
if !range.contains(pf_addr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.epc_type
|
||||||
|
.modify_protection(range.start(), range.size(), permission)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Deref for VMArea {
|
impl Deref for VMArea {
|
||||||
|
@ -83,16 +83,7 @@ impl ChunkManager {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
vma.flush_backed_file();
|
vma.flush_and_clean_memory().unwrap();
|
||||||
|
|
||||||
if !vma.perms().is_default() {
|
|
||||||
VMPerms::apply_perms(vma, VMPerms::default());
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let buf = vma.as_slice_mut();
|
|
||||||
buf.iter_mut().for_each(|b| *b = 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
self.free_manager.add_range_back_to_free_manager(vma);
|
self.free_manager.add_range_back_to_free_manager(vma);
|
||||||
self.free_size += vma.size();
|
self.free_size += vma.size();
|
||||||
@ -110,6 +101,7 @@ impl ChunkManager {
|
|||||||
if let VMMapAddr::Force(addr) = addr {
|
if let VMMapAddr::Force(addr) = addr {
|
||||||
self.munmap(addr, size)?;
|
self.munmap(addr, size)?;
|
||||||
}
|
}
|
||||||
|
trace!("mmap options = {:?}", options);
|
||||||
|
|
||||||
// Find and allocate a new range for this mmap request
|
// Find and allocate a new range for this mmap request
|
||||||
let new_range = self
|
let new_range = self
|
||||||
@ -117,27 +109,29 @@ impl ChunkManager {
|
|||||||
.find_free_range_internal(size, align, addr)?;
|
.find_free_range_internal(size, align, addr)?;
|
||||||
let new_addr = new_range.start();
|
let new_addr = new_range.start();
|
||||||
let current_pid = current!().process().pid();
|
let current_pid = current!().process().pid();
|
||||||
|
let new_vma = {
|
||||||
let new_vma = VMArea::new(
|
let new_vma = VMArea::new(
|
||||||
new_range,
|
new_range,
|
||||||
*options.perms(),
|
*options.perms(),
|
||||||
options.initializer().backed_file(),
|
options.initializer().backed_file(),
|
||||||
current_pid,
|
current_pid,
|
||||||
);
|
)
|
||||||
|
.init_memory(options);
|
||||||
|
|
||||||
// Initialize the memory of the new range
|
if new_vma.is_err() {
|
||||||
let buf = unsafe { new_vma.as_slice_mut() };
|
let error = new_vma.err().unwrap();
|
||||||
let ret = options.initializer().init_slice(buf);
|
error!("init memory failure: {}", error.backtrace());
|
||||||
if let Err(e) = ret {
|
let range = VMRange::new_with_size(new_addr, size).unwrap();
|
||||||
// Return the free range before return with error
|
|
||||||
self.free_manager
|
self.free_manager
|
||||||
.add_range_back_to_free_manager(new_vma.range());
|
.add_range_back_to_free_manager(&range)
|
||||||
return_errno!(e.errno(), "failed to mmap");
|
.unwrap();
|
||||||
|
return Err(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set memory permissions
|
new_vma.unwrap()
|
||||||
if !options.perms().is_default() {
|
};
|
||||||
VMPerms::apply_perms(&new_vma, new_vma.perms());
|
trace!("new vma is ready");
|
||||||
}
|
|
||||||
self.free_size -= new_vma.size();
|
self.free_size -= new_vma.size();
|
||||||
// After initializing, we can safely insert the new VMA
|
// After initializing, we can safely insert the new VMA
|
||||||
self.vmas.insert(VMAObj::new_vma_obj(new_vma));
|
self.vmas.insert(VMAObj::new_vma_obj(new_vma));
|
||||||
@ -168,11 +162,7 @@ impl ChunkManager {
|
|||||||
Some(intersection_vma) => intersection_vma,
|
Some(intersection_vma) => intersection_vma,
|
||||||
};
|
};
|
||||||
|
|
||||||
// File-backed VMA needs to be flushed upon munmap
|
intersection_vma.flush_and_clean_memory()?;
|
||||||
intersection_vma.flush_backed_file();
|
|
||||||
if !&intersection_vma.perms().is_default() {
|
|
||||||
VMPerms::apply_perms(&intersection_vma, VMPerms::default());
|
|
||||||
}
|
|
||||||
|
|
||||||
if vma.range() == intersection_vma.range() {
|
if vma.range() == intersection_vma.range() {
|
||||||
// Exact match. Just remove.
|
// Exact match. Just remove.
|
||||||
@ -194,13 +184,6 @@ impl ChunkManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset zero
|
|
||||||
unsafe {
|
|
||||||
trace!("intersection vma = {:?}", intersection_vma);
|
|
||||||
let buf = intersection_vma.as_slice_mut();
|
|
||||||
buf.iter_mut().for_each(|b| *b = 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
self.free_manager
|
self.free_manager
|
||||||
.add_range_back_to_free_manager(intersection_vma.range());
|
.add_range_back_to_free_manager(intersection_vma.range());
|
||||||
self.free_size += intersection_vma.size();
|
self.free_size += intersection_vma.size();
|
||||||
@ -306,8 +289,7 @@ impl ChunkManager {
|
|||||||
if intersection_vma.range() == containing_vma.range() {
|
if intersection_vma.range() == containing_vma.range() {
|
||||||
// The whole containing_vma is mprotected
|
// The whole containing_vma is mprotected
|
||||||
containing_vma.set_perms(new_perms);
|
containing_vma.set_perms(new_perms);
|
||||||
VMPerms::apply_perms(&containing_vma, containing_vma.perms());
|
containing_vma.modify_permissions_for_committed_pages(containing_vma.perms());
|
||||||
trace!("containing_vma = {:?}", containing_vma);
|
|
||||||
containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma));
|
containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma));
|
||||||
containing_vmas.move_next();
|
containing_vmas.move_next();
|
||||||
continue;
|
continue;
|
||||||
@ -325,13 +307,13 @@ impl ChunkManager {
|
|||||||
let protect_end = protect_range.end();
|
let protect_end = protect_range.end();
|
||||||
|
|
||||||
// New VMA
|
// New VMA
|
||||||
let new_vma = VMArea::inherits_file_from(
|
let mut new_vma = VMArea::inherits_file_from(
|
||||||
&containing_vma,
|
&containing_vma,
|
||||||
protect_range,
|
protect_range,
|
||||||
new_perms,
|
new_perms,
|
||||||
VMAccess::Private(current_pid),
|
VMAccess::Private(current_pid),
|
||||||
);
|
);
|
||||||
VMPerms::apply_perms(&new_vma, new_vma.perms());
|
new_vma.modify_permissions_for_committed_pages(new_vma.perms());
|
||||||
let new_vma = VMAObj::new_vma_obj(new_vma);
|
let new_vma = VMAObj::new_vma_obj(new_vma);
|
||||||
|
|
||||||
// Another new VMA
|
// Another new VMA
|
||||||
@ -356,15 +338,16 @@ impl ChunkManager {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
1 => {
|
1 => {
|
||||||
let remain_vma = remain_vmas.pop().unwrap();
|
let mut remain_vma = remain_vmas.pop().unwrap();
|
||||||
|
|
||||||
let new_vma = VMArea::inherits_file_from(
|
let mut new_vma = VMArea::inherits_file_from(
|
||||||
&containing_vma,
|
&containing_vma,
|
||||||
intersection_vma.range().clone(),
|
intersection_vma.range().clone(),
|
||||||
new_perms,
|
new_perms,
|
||||||
VMAccess::Private(current_pid),
|
VMAccess::Private(current_pid),
|
||||||
);
|
);
|
||||||
VMPerms::apply_perms(&new_vma, new_vma.perms());
|
|
||||||
|
new_vma.modify_permissions_for_committed_pages(new_vma.perms());
|
||||||
|
|
||||||
if remain_vma.start() == containing_vma.start() {
|
if remain_vma.start() == containing_vma.start() {
|
||||||
// mprotect right side of the vma
|
// mprotect right side of the vma
|
||||||
@ -374,6 +357,7 @@ impl ChunkManager {
|
|||||||
debug_assert!(remain_vma.end() == containing_vma.end());
|
debug_assert!(remain_vma.end() == containing_vma.end());
|
||||||
containing_vma.set_start(remain_vma.start());
|
containing_vma.set_start(remain_vma.start());
|
||||||
}
|
}
|
||||||
|
debug_assert!(containing_vma.range() == remain_vma.range());
|
||||||
|
|
||||||
containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma));
|
containing_vmas.replace_with(VMAObj::new_vma_obj(containing_vma));
|
||||||
containing_vmas.insert(VMAObj::new_vma_obj(new_vma));
|
containing_vmas.insert(VMAObj::new_vma_obj(new_vma));
|
||||||
@ -401,7 +385,7 @@ impl ChunkManager {
|
|||||||
None => continue,
|
None => continue,
|
||||||
Some(vma) => vma,
|
Some(vma) => vma,
|
||||||
};
|
};
|
||||||
vma.flush_backed_file();
|
vma.flush_committed_backed_file();
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -409,9 +393,11 @@ impl ChunkManager {
|
|||||||
/// Sync all shared, file-backed memory mappings of the given file by flushing
|
/// Sync all shared, file-backed memory mappings of the given file by flushing
|
||||||
/// the memory content to the file.
|
/// the memory content to the file.
|
||||||
pub fn msync_by_file(&mut self, sync_file: &FileRef) {
|
pub fn msync_by_file(&mut self, sync_file: &FileRef) {
|
||||||
for vma_obj in &self.vmas {
|
|
||||||
let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) };
|
let is_same_file = |file: &FileRef| -> bool { Arc::ptr_eq(&file, &sync_file) };
|
||||||
vma_obj.vma().flush_backed_file_with_cond(is_same_file);
|
for vma_obj in &self.vmas {
|
||||||
|
vma_obj
|
||||||
|
.vma()
|
||||||
|
.flush_committed_backed_file_with_cond(is_same_file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -428,6 +414,34 @@ impl ChunkManager {
|
|||||||
return Ok(vma.range().clone());
|
return Ok(vma.range().clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn handle_page_fault(
|
||||||
|
&mut self,
|
||||||
|
rip: usize,
|
||||||
|
pf_addr: usize,
|
||||||
|
errcd: u32,
|
||||||
|
kernel_triggers: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
trace!(
|
||||||
|
"handle_page_fault chunk manager range = {:?}, free_size = {:?}",
|
||||||
|
self.range,
|
||||||
|
self.free_size
|
||||||
|
);
|
||||||
|
let mut vma_cursor = self.vmas.upper_bound_mut(Bound::Included(&pf_addr));
|
||||||
|
if vma_cursor.is_null() {
|
||||||
|
return_errno!(ENOMEM, "no mmap regions that contains the address");
|
||||||
|
}
|
||||||
|
let vma = vma_cursor.get().unwrap().vma();
|
||||||
|
if vma.pid() != current!().process().pid() || !vma.contains(pf_addr) {
|
||||||
|
return_errno!(ENOMEM, "no mmap regions that contains the address");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut vma = vma.clone();
|
||||||
|
vma.handle_page_fault(rip, pf_addr, errcd, kernel_triggers)?;
|
||||||
|
vma_cursor.replace_with(VMAObj::new_vma_obj(vma));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn usage_percentage(&self) -> f32 {
|
pub fn usage_percentage(&self) -> f32 {
|
||||||
let total_size = self.range.size();
|
let total_size = self.range.size();
|
||||||
let mut used_size = 0;
|
let mut used_size = 0;
|
||||||
@ -487,6 +501,7 @@ impl VMRemapParser for ChunkManager {
|
|||||||
|
|
||||||
impl Drop for ChunkManager {
|
impl Drop for ChunkManager {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
|
info!("drop chunk manager = {:?}", self);
|
||||||
assert!(self.is_empty());
|
assert!(self.is_empty());
|
||||||
assert!(self.free_size == self.range.size());
|
assert!(self.free_size == self.range.size());
|
||||||
assert!(self.free_manager.free_size() == self.range.size());
|
assert!(self.free_manager.free_size() == self.range.size());
|
||||||
|
405
src/libos/src/vm/vm_epc.rs
Normal file
405
src/libos/src/vm/vm_epc.rs
Normal file
@ -0,0 +1,405 @@
|
|||||||
|
// This file contains EPC related APIs and definitions.
|
||||||
|
use super::*;
|
||||||
|
use sgx_trts::emm::{
|
||||||
|
AllocAddr, AllocFlags, AllocOptions, EmmAlloc, HandleResult, PageFaultHandler, Perm,
|
||||||
|
};
|
||||||
|
use sgx_trts::enclave::rsgx_is_supported_EDMM;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
|
||||||
|
// Memory Layout for Platforms with EDMM support
|
||||||
|
//
|
||||||
|
// Addr low -> high
|
||||||
|
// |---------------------------------------------||---------------------||--------------------------------------|
|
||||||
|
// Reserved Memory Gap Range User Region Memory
|
||||||
|
// (commit memory when loading the enclave) (used by SDK) (commit on demand when PF occurs)
|
||||||
|
//
|
||||||
|
// For platforms without EDMM support, we only use reserved memory.
|
||||||
|
|
||||||
|
pub enum SGXPlatform {
|
||||||
|
WithEDMM,
|
||||||
|
NoEDMM,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum EPCMemType {
|
||||||
|
Reserved,
|
||||||
|
UserRegion,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ReservedMem;
|
||||||
|
pub struct UserRegionMem;
|
||||||
|
|
||||||
|
#[repr(C, align(4096))]
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct ZeroPage([u8; PAGE_SIZE]);
|
||||||
|
|
||||||
|
impl ZeroPage {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self([0; PAGE_SIZE])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_page_aligned_vec(size: usize) -> Vec<u8> {
|
||||||
|
debug_assert!(size % PAGE_SIZE == 0);
|
||||||
|
let page_num = size / PAGE_SIZE;
|
||||||
|
let mut page_vec = vec![Self::new(); page_num];
|
||||||
|
|
||||||
|
let ptr = page_vec.as_mut_ptr();
|
||||||
|
|
||||||
|
let size = page_num * std::mem::size_of::<Self>();
|
||||||
|
std::mem::forget(page_vec);
|
||||||
|
|
||||||
|
unsafe { Vec::from_raw_parts(ptr as *mut u8, size, size) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref ZERO_PAGE: Vec<u8> = ZeroPage::new_page_aligned_vec(PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait EPCAllocator {
|
||||||
|
fn alloc(size: usize) -> Result<usize> {
|
||||||
|
return_errno!(ENOSYS, "operation not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc_with_addr(addr: usize, size: usize) -> Result<usize> {
|
||||||
|
return_errno!(ENOSYS, "operation not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(addr: usize, size: usize) -> Result<()> {
|
||||||
|
return_errno!(ENOSYS, "operation not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> {
|
||||||
|
return_errno!(ENOSYS, "operation not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mem_type() -> EPCMemType;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EPCAllocator for ReservedMem {
|
||||||
|
fn alloc(size: usize) -> Result<usize> {
|
||||||
|
let ptr = unsafe { sgx_alloc_rsrv_mem(size) };
|
||||||
|
if ptr.is_null() {
|
||||||
|
return_errno!(ENOMEM, "run out of reserved memory");
|
||||||
|
}
|
||||||
|
Ok(ptr as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc_with_addr(addr: usize, size: usize) -> Result<usize> {
|
||||||
|
let ptr = unsafe { sgx_alloc_rsrv_mem_ex(addr as *const c_void, size) };
|
||||||
|
if ptr.is_null() {
|
||||||
|
return_errno!(ENOMEM, "can't allocate reserved memory at desired address");
|
||||||
|
}
|
||||||
|
Ok(ptr as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(addr: usize, size: usize) -> Result<()> {
|
||||||
|
let ret = unsafe { sgx_free_rsrv_mem(addr as *const c_void, size) };
|
||||||
|
assert!(ret == 0);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> {
|
||||||
|
let mut ret_val = 0;
|
||||||
|
let ret = if rsgx_is_supported_EDMM() {
|
||||||
|
unsafe {
|
||||||
|
sgx_tprotect_rsrv_mem(addr as *const c_void, length, protection.bits() as i32)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For platforms without EDMM, sgx_tprotect_rsrv_mem is actually useless.
|
||||||
|
// However, at least we can set pages to desired protections in the host kernel page table.
|
||||||
|
unsafe {
|
||||||
|
occlum_ocall_mprotect(
|
||||||
|
&mut ret_val as *mut i32,
|
||||||
|
addr as *const c_void,
|
||||||
|
length,
|
||||||
|
protection.bits() as i32,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if ret != sgx_status_t::SGX_SUCCESS || ret_val != 0 {
|
||||||
|
return_errno!(ENOMEM, "reserved memory modify protection failure");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mem_type() -> EPCMemType {
|
||||||
|
EPCMemType::Reserved
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EPCAllocator for UserRegionMem {
|
||||||
|
fn alloc(size: usize) -> Result<usize> {
|
||||||
|
let alloc_options = AllocOptions::new()
|
||||||
|
.set_flags(AllocFlags::COMMIT_ON_DEMAND)
|
||||||
|
.set_handler(enclave_page_fault_handler_dummy, 0);
|
||||||
|
let ptr = unsafe { EmmAlloc.alloc(AllocAddr::Any, size, alloc_options) }
|
||||||
|
.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
|
||||||
|
Ok(ptr.addr().get())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(addr: usize, size: usize) -> Result<()> {
|
||||||
|
let ptr = NonNull::<u8>::new(addr as *mut u8).unwrap();
|
||||||
|
unsafe { EmmAlloc.dealloc(ptr, size) }.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn modify_protection(addr: usize, length: usize, protection: VMPerms) -> Result<()> {
|
||||||
|
trace!(
|
||||||
|
"user region modify protection, protection = {:?}, range = {:?}",
|
||||||
|
protection,
|
||||||
|
VMRange::new_with_size(addr, length).unwrap()
|
||||||
|
);
|
||||||
|
let ptr = NonNull::<u8>::new(addr as *mut u8).unwrap();
|
||||||
|
unsafe {
|
||||||
|
EmmAlloc.modify_permissions(ptr, length, Perm::from_bits(protection.bits()).unwrap())
|
||||||
|
}
|
||||||
|
.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mem_type() -> EPCMemType {
|
||||||
|
EPCMemType::UserRegion
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserRegionMem {
|
||||||
|
fn commit_memory(start_addr: usize, size: usize) -> Result<()> {
|
||||||
|
let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap();
|
||||||
|
unsafe { EmmAlloc.commit(ptr, size) }.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn commit_memory_with_new_permission(
|
||||||
|
start_addr: usize,
|
||||||
|
size: usize,
|
||||||
|
new_perms: VMPerms,
|
||||||
|
) -> Result<()> {
|
||||||
|
let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap();
|
||||||
|
let perm = Perm::from_bits(new_perms.bits()).unwrap();
|
||||||
|
if size == PAGE_SIZE {
|
||||||
|
unsafe { EmmAlloc::commit_with_data(ptr, ZERO_PAGE.as_slice(), perm) }
|
||||||
|
.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
} else {
|
||||||
|
let data = ZeroPage::new_page_aligned_vec(size);
|
||||||
|
unsafe { EmmAlloc::commit_with_data(ptr, data.as_slice(), perm) }
|
||||||
|
.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn commit_memory_and_init_with_file(
|
||||||
|
start_addr: usize,
|
||||||
|
size: usize,
|
||||||
|
file: &FileRef,
|
||||||
|
file_offset: usize,
|
||||||
|
new_perms: VMPerms,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut data = ZeroPage::new_page_aligned_vec(size);
|
||||||
|
let len = file
|
||||||
|
.read_at(file_offset, data.as_mut_slice())
|
||||||
|
.map_err(|_| errno!(EACCES, "failed to init memory from file"))?;
|
||||||
|
|
||||||
|
let ptr = NonNull::<u8>::new(start_addr as *mut u8).unwrap();
|
||||||
|
let perm = Perm::from_bits(new_perms.bits()).unwrap();
|
||||||
|
|
||||||
|
unsafe { EmmAlloc::commit_with_data(ptr, data.as_slice(), perm) }
|
||||||
|
.map_err(|e| errno!(Errno::from(e as u32)))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SGXPlatform {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
if rsgx_is_supported_EDMM() {
|
||||||
|
SGXPlatform::WithEDMM
|
||||||
|
} else {
|
||||||
|
SGXPlatform::NoEDMM // including SGX simulation mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn alloc_user_space(
|
||||||
|
&self,
|
||||||
|
init_size: usize,
|
||||||
|
max_size: usize,
|
||||||
|
) -> Result<(VMRange, Option<VMRange>)> {
|
||||||
|
debug!(
|
||||||
|
"alloc user space init size = {:?}, max size = {:?}",
|
||||||
|
init_size, max_size
|
||||||
|
);
|
||||||
|
if matches!(self, SGXPlatform::WithEDMM) && max_size > init_size {
|
||||||
|
let user_region_size = max_size - init_size;
|
||||||
|
|
||||||
|
let reserved_mem_start_addr = ReservedMem::alloc(init_size)?;
|
||||||
|
|
||||||
|
let user_region_start_addr = UserRegionMem::alloc(user_region_size)?;
|
||||||
|
|
||||||
|
let total_user_space_range = VMRange::new(
|
||||||
|
reserved_mem_start_addr,
|
||||||
|
user_region_start_addr + user_region_size,
|
||||||
|
)?;
|
||||||
|
let gap_range =
|
||||||
|
VMRange::new(reserved_mem_start_addr + init_size, user_region_start_addr)?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"allocated user space range is {:?}, gap range is {:?}. reserved_mem range is {:?}, user region range is {:?}",
|
||||||
|
total_user_space_range, gap_range, VMRange::new_with_size(reserved_mem_start_addr, init_size),
|
||||||
|
VMRange::new_with_size(user_region_start_addr, user_region_size)
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok((total_user_space_range, Some(gap_range)))
|
||||||
|
} else {
|
||||||
|
// For platform with no-edmm support, or the max_size is the same as init_size, use reserved memory for the whole userspace
|
||||||
|
let reserved_mem_start_addr = ReservedMem::alloc(max_size)?;
|
||||||
|
let total_user_space_range =
|
||||||
|
VMRange::new(reserved_mem_start_addr, reserved_mem_start_addr + max_size)?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"allocated user space range is {:?}, gap range is None",
|
||||||
|
total_user_space_range
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok((total_user_space_range, None))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn free_user_space(&self, user_space_range: &VMRange, gap_range: Option<&VMRange>) {
|
||||||
|
let user_space_ranges = if let Some(gap_range) = gap_range {
|
||||||
|
user_space_range.subtract(gap_range)
|
||||||
|
} else {
|
||||||
|
vec![*user_space_range]
|
||||||
|
};
|
||||||
|
|
||||||
|
if user_space_ranges.len() == 2 {
|
||||||
|
debug_assert!(matches!(self, SGXPlatform::WithEDMM));
|
||||||
|
let reserved_mem = user_space_ranges[0];
|
||||||
|
let user_region_mem = user_space_ranges[1];
|
||||||
|
ReservedMem::free(reserved_mem.start(), reserved_mem.size()).unwrap();
|
||||||
|
UserRegionMem::free(user_region_mem.start(), user_region_mem.size()).unwrap();
|
||||||
|
} else {
|
||||||
|
// For platforms with EDMM but max_size equals init_size or the paltforms without EDMM, there is no gap range.
|
||||||
|
debug_assert!(user_space_ranges.len() == 1);
|
||||||
|
let reserved_mem = user_space_ranges[0];
|
||||||
|
ReservedMem::free(reserved_mem.start(), reserved_mem.size()).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for EPCMemType {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let output_str = match self {
|
||||||
|
EPCMemType::Reserved => "reserved memory region",
|
||||||
|
EPCMemType::UserRegion => "user region memory",
|
||||||
|
};
|
||||||
|
write!(f, "{}", output_str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EPCMemType {
|
||||||
|
pub fn new(range: &VMRange) -> Self {
|
||||||
|
trace!("EPC new range = {:?}", range);
|
||||||
|
if rsgx_is_supported_EDMM() {
|
||||||
|
if let Some(gap_range) = USER_SPACE_VM_MANAGER.gap_range() {
|
||||||
|
debug_assert!({
|
||||||
|
if range.size() > 0 {
|
||||||
|
!gap_range.overlap_with(range)
|
||||||
|
} else {
|
||||||
|
// Ignore for sentry VMA
|
||||||
|
true
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if range.end() <= gap_range.start() {
|
||||||
|
EPCMemType::Reserved
|
||||||
|
} else {
|
||||||
|
debug_assert!(gap_range.end() <= range.start());
|
||||||
|
EPCMemType::UserRegion
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// There is no gap, this indicates that there is no user region memory
|
||||||
|
EPCMemType::Reserved
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Only reserved memory
|
||||||
|
EPCMemType::Reserved
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn modify_protection(&self, addr: usize, length: usize, protection: VMPerms) -> Result<()> {
|
||||||
|
// PT_GROWSDOWN should only be applied to stack segment or a segment mapped with the MAP_GROWSDOWN flag set.
|
||||||
|
// Since the memory are managed by our own, mprotect ocall shouldn't use this flag. Otherwise, EINVAL will be thrown.
|
||||||
|
let mut prot = protection.clone();
|
||||||
|
prot.remove(VMPerms::GROWSDOWN);
|
||||||
|
|
||||||
|
match self {
|
||||||
|
EPCMemType::Reserved => ReservedMem::modify_protection(addr, length, prot),
|
||||||
|
EPCMemType::UserRegion => UserRegionMem::modify_protection(addr, length, prot),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn commit_memory(start_addr: usize, size: usize, new_perms: Option<VMPerms>) -> Result<()> {
|
||||||
|
trace!(
|
||||||
|
"commit epc: {:?}, new permission: {:?}",
|
||||||
|
VMRange::new_with_size(start_addr, size).unwrap(),
|
||||||
|
new_perms
|
||||||
|
);
|
||||||
|
|
||||||
|
// We should make memory commit and permission change atomic to prevent data races. Thus, if the new perms
|
||||||
|
// are not the default permission (RW), we implement a different function by calling EACCEPTCOPY
|
||||||
|
match new_perms {
|
||||||
|
Some(perms) if perms != VMPerms::DEFAULT => {
|
||||||
|
UserRegionMem::commit_memory_with_new_permission(start_addr, size, perms)
|
||||||
|
}
|
||||||
|
_ => UserRegionMem::commit_memory(start_addr, size),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn commit_memory_and_init_with_file(
|
||||||
|
start_addr: usize,
|
||||||
|
size: usize,
|
||||||
|
file: &FileRef,
|
||||||
|
file_offset: usize,
|
||||||
|
new_perms: VMPerms,
|
||||||
|
) -> Result<()> {
|
||||||
|
UserRegionMem::commit_memory_and_init_with_file(start_addr, size, file, file_offset, new_perms)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a dummy function for sgx_mm_alloc. The real handler is "enclave_page_fault_handler" shown below.
|
||||||
|
extern "C" fn enclave_page_fault_handler_dummy(
|
||||||
|
pfinfo: &sgx_pfinfo,
|
||||||
|
private: usize,
|
||||||
|
) -> HandleResult {
|
||||||
|
// Don't do anything here. Modification of registers can cause the PF handling error.
|
||||||
|
return HandleResult::Search;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn enclave_page_fault_handler(
|
||||||
|
rip: usize,
|
||||||
|
exception_info: sgx_misc_exinfo_t,
|
||||||
|
kernel_triggers: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let pf_addr = exception_info.faulting_address as usize;
|
||||||
|
let pf_errcd = exception_info.error_code;
|
||||||
|
trace!(
|
||||||
|
"enclave page fault caught, pf_addr = 0x{:x}, error code = {:?}",
|
||||||
|
pf_addr,
|
||||||
|
pf_errcd
|
||||||
|
);
|
||||||
|
|
||||||
|
USER_SPACE_VM_MANAGER.handle_page_fault(rip, pf_addr, pf_errcd, kernel_triggers)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
fn occlum_ocall_mprotect(
|
||||||
|
retval: *mut i32,
|
||||||
|
addr: *const c_void,
|
||||||
|
len: usize,
|
||||||
|
prot: i32,
|
||||||
|
) -> sgx_status_t;
|
||||||
|
}
|
@ -22,14 +22,16 @@ use std::ops::Bound::{Excluded, Included};
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct VMManager {
|
pub struct VMManager {
|
||||||
range: VMRange,
|
range: VMRange,
|
||||||
|
gap_range: Option<VMRange>,
|
||||||
internal: SgxMutex<InternalVMManager>,
|
internal: SgxMutex<InternalVMManager>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VMManager {
|
impl VMManager {
|
||||||
pub fn init(vm_range: VMRange) -> Result<Self> {
|
pub fn init(vm_range: VMRange, gap_range: Option<VMRange>) -> Result<Self> {
|
||||||
let internal = InternalVMManager::init(vm_range.clone());
|
let mut internal = InternalVMManager::init(vm_range.clone(), &gap_range);
|
||||||
Ok(VMManager {
|
Ok(VMManager {
|
||||||
range: vm_range,
|
range: vm_range,
|
||||||
|
gap_range: gap_range,
|
||||||
internal: SgxMutex::new(internal),
|
internal: SgxMutex::new(internal),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -38,6 +40,10 @@ impl VMManager {
|
|||||||
&self.range
|
&self.range
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn gap_range(&self) -> &Option<VMRange> {
|
||||||
|
&self.gap_range
|
||||||
|
}
|
||||||
|
|
||||||
pub fn internal(&self) -> SgxMutexGuard<InternalVMManager> {
|
pub fn internal(&self) -> SgxMutexGuard<InternalVMManager> {
|
||||||
self.internal.lock().unwrap()
|
self.internal.lock().unwrap()
|
||||||
}
|
}
|
||||||
@ -56,8 +62,15 @@ impl VMManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn verified_clean_when_exit(&self) -> bool {
|
pub fn verified_clean_when_exit(&self) -> bool {
|
||||||
|
let gap_size = if let Some(gap) = self.gap_range() {
|
||||||
|
gap.size()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
let internal = self.internal();
|
let internal = self.internal();
|
||||||
internal.chunks.len() == 0 && internal.free_manager.free_size() == self.range.size()
|
internal.chunks.len() == 0
|
||||||
|
&& internal.free_manager.free_size() + gap_size == self.range.size()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn free_chunk(&self, chunk: &ChunkRef) {
|
pub fn free_chunk(&self, chunk: &ChunkRef) {
|
||||||
@ -358,22 +371,19 @@ impl VMManager {
|
|||||||
|
|
||||||
intersect_chunks.iter().for_each(|chunk| {
|
intersect_chunks.iter().for_each(|chunk| {
|
||||||
if let ChunkType::SingleVMA(vma) = chunk.internal() {
|
if let ChunkType::SingleVMA(vma) = chunk.internal() {
|
||||||
if let Some(intersection_range) = chunk.range().intersect(&reset_range) {
|
let mut vma = vma.lock().unwrap();
|
||||||
let mut internal_manager = self.internal();
|
if let Some(intersection_vma) = vma.intersect(&reset_range) {
|
||||||
internal_manager.mprotect_single_vma_chunk(
|
intersection_vma.flush_and_clean_memory().unwrap();
|
||||||
&chunk,
|
|
||||||
intersection_range,
|
|
||||||
VMPerms::DEFAULT,
|
|
||||||
);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let buf = intersection_range.as_slice_mut();
|
|
||||||
buf.iter_mut().for_each(|b| *b = 0)
|
|
||||||
}
|
}
|
||||||
|
// clear permission for SingleVMA chunk
|
||||||
|
if vma.perms() != VMPerms::DEFAULT {
|
||||||
|
vma.set_perms(VMPerms::default());
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Currently only used for heap de-allocation. Thus must be SingleVMA chunk.
|
||||||
|
unreachable!()
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -394,11 +404,11 @@ impl VMManager {
|
|||||||
match chunk.internal() {
|
match chunk.internal() {
|
||||||
ChunkType::MultiVMA(manager) => {
|
ChunkType::MultiVMA(manager) => {
|
||||||
trace!("msync default chunk: {:?}", chunk.range());
|
trace!("msync default chunk: {:?}", chunk.range());
|
||||||
return manager
|
manager
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.chunk_manager_mut()
|
.chunk_manager_mut()
|
||||||
.msync_by_range(&sync_range);
|
.msync_by_range(&sync_range)?;
|
||||||
}
|
}
|
||||||
ChunkType::SingleVMA(vma) => {
|
ChunkType::SingleVMA(vma) => {
|
||||||
// Note: There are rare cases that mutliple threads do mprotect or munmap for the same single-vma chunk
|
// Note: There are rare cases that mutliple threads do mprotect or munmap for the same single-vma chunk
|
||||||
@ -406,7 +416,7 @@ impl VMManager {
|
|||||||
// It is fine here because this function doesn't modify the global chunk list and only operates on the vma
|
// It is fine here because this function doesn't modify the global chunk list and only operates on the vma
|
||||||
// which is updated realtimely.
|
// which is updated realtimely.
|
||||||
let vma = vma.lock().unwrap();
|
let vma = vma.lock().unwrap();
|
||||||
vma.flush_backed_file();
|
vma.flush_committed_backed_file();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -429,7 +439,7 @@ impl VMManager {
|
|||||||
ChunkType::SingleVMA(vma) => {
|
ChunkType::SingleVMA(vma) => {
|
||||||
vma.lock()
|
vma.lock()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.flush_backed_file_with_cond(is_same_file);
|
.flush_committed_backed_file_with_cond(is_same_file);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -539,6 +549,41 @@ impl VMManager {
|
|||||||
|
|
||||||
assert!(mem_chunks.len() == 0);
|
assert!(mem_chunks.len() == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn handle_page_fault(
|
||||||
|
&self,
|
||||||
|
rip: usize,
|
||||||
|
pf_addr: usize,
|
||||||
|
errcd: u32,
|
||||||
|
kernel_triggers: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
let current = current!();
|
||||||
|
let page_fault_chunk = {
|
||||||
|
let current_process_mem_chunks = current.vm().mem_chunks().read().unwrap();
|
||||||
|
if let Some(page_fault_chunk) = current_process_mem_chunks
|
||||||
|
.iter()
|
||||||
|
.find(|chunk| chunk.range().contains(pf_addr))
|
||||||
|
{
|
||||||
|
Some(page_fault_chunk.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(page_fault_chunk) = page_fault_chunk {
|
||||||
|
return page_fault_chunk.handle_page_fault(rip, pf_addr, errcd, kernel_triggers);
|
||||||
|
}
|
||||||
|
|
||||||
|
// System V SHM segments are not tracked by the process VM. Try find the chunk here.
|
||||||
|
if let Some(page_fault_shm_chunk) =
|
||||||
|
SYSTEM_V_SHM_MANAGER.get_shm_chunk_containing_addr(pf_addr, current.process().pid())
|
||||||
|
{
|
||||||
|
return page_fault_shm_chunk.handle_page_fault(rip, pf_addr, errcd, kernel_triggers);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This can happen for example, when the user intends to trigger the SIGSEGV handler by visit nullptr.
|
||||||
|
return_errno!(ENOMEM, "can't find the chunk containing the address");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modification on this structure must acquire the global lock.
|
// Modification on this structure must acquire the global lock.
|
||||||
@ -552,11 +597,21 @@ pub struct InternalVMManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl InternalVMManager {
|
impl InternalVMManager {
|
||||||
pub fn init(vm_range: VMRange) -> Self {
|
pub fn init(vm_range: VMRange, gap_range: &Option<VMRange>) -> Self {
|
||||||
let chunks = BTreeSet::new();
|
let chunks = BTreeSet::new();
|
||||||
let fast_default_chunks = Vec::new();
|
let fast_default_chunks = Vec::new();
|
||||||
let free_manager = VMFreeSpaceManager::new(vm_range);
|
let mut free_manager = VMFreeSpaceManager::new(vm_range);
|
||||||
let shm_manager = ShmManager::new();
|
let shm_manager = ShmManager::new();
|
||||||
|
if let Some(gap_range) = gap_range {
|
||||||
|
debug_assert!(vm_range.is_superset_of(&gap_range));
|
||||||
|
free_manager
|
||||||
|
.find_free_range_internal(
|
||||||
|
gap_range.size(),
|
||||||
|
PAGE_SIZE,
|
||||||
|
VMMapAddr::Force(gap_range.start()),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
Self {
|
Self {
|
||||||
chunks,
|
chunks,
|
||||||
fast_default_chunks,
|
fast_default_chunks,
|
||||||
@ -657,19 +712,7 @@ impl InternalVMManager {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// File-backed VMA needs to be flushed upon munmap
|
intersection_vma.flush_and_clean_memory()?;
|
||||||
intersection_vma.flush_backed_file();
|
|
||||||
|
|
||||||
// Reset memory permissions
|
|
||||||
if !&intersection_vma.perms().is_default() {
|
|
||||||
VMPerms::apply_perms(&intersection_vma, VMPerms::default());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset to zero
|
|
||||||
unsafe {
|
|
||||||
let buf = intersection_vma.as_slice_mut();
|
|
||||||
buf.iter_mut().for_each(|b| *b = 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut new_vmas = vma.subtract(&intersection_vma);
|
let mut new_vmas = vma.subtract(&intersection_vma);
|
||||||
let current = current!();
|
let current = current!();
|
||||||
@ -724,10 +767,10 @@ impl InternalVMManager {
|
|||||||
self.shm_manager
|
self.shm_manager
|
||||||
.create_shared_chunk(options, new_chunk.clone())
|
.create_shared_chunk(options, new_chunk.clone())
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
let vma = new_chunk.get_vma_for_single_vma_chunk();
|
let mut vma = new_chunk.get_vma_for_single_vma_chunk();
|
||||||
// Reset memory permissions
|
// Reset memory permissions
|
||||||
if !vma.perms().is_default() {
|
if !vma.perms().is_default() {
|
||||||
VMPerms::apply_perms(&vma, VMPerms::default());
|
vma.modify_permissions_for_committed_pages(VMPerms::default())
|
||||||
}
|
}
|
||||||
// Reset memory contents
|
// Reset memory contents
|
||||||
unsafe {
|
unsafe {
|
||||||
@ -777,20 +820,12 @@ impl InternalVMManager {
|
|||||||
.shm_manager
|
.shm_manager
|
||||||
.munmap_shared_chunk(chunk, munmap_range, flag)?
|
.munmap_shared_chunk(chunk, munmap_range, flag)?
|
||||||
== MunmapSharedResult::Freeable
|
== MunmapSharedResult::Freeable
|
||||||
|
{
|
||||||
|
// Flush memory contents to backed file and reset memory contents
|
||||||
{
|
{
|
||||||
let vma = chunk.get_vma_for_single_vma_chunk();
|
let vma = chunk.get_vma_for_single_vma_chunk();
|
||||||
// Flush memory contents to backed file
|
vma.flush_and_clean_memory()?;
|
||||||
vma.flush_backed_file();
|
|
||||||
// Reset memory permissions
|
|
||||||
if !vma.perms().is_default() {
|
|
||||||
VMPerms::apply_perms(&vma, VMPerms::default());
|
|
||||||
}
|
}
|
||||||
// Reset memory contents
|
|
||||||
unsafe {
|
|
||||||
let buf = vma.as_slice_mut();
|
|
||||||
buf.iter_mut().for_each(|b| *b = 0)
|
|
||||||
}
|
|
||||||
drop(vma);
|
|
||||||
|
|
||||||
self.free_chunk(chunk);
|
self.free_chunk(chunk);
|
||||||
let current = current!();
|
let current = current!();
|
||||||
@ -855,7 +890,6 @@ impl InternalVMManager {
|
|||||||
}
|
}
|
||||||
ChunkType::SingleVMA(vma) => vma,
|
ChunkType::SingleVMA(vma) => vma,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut updated_vmas = {
|
let mut updated_vmas = {
|
||||||
let mut containing_vma = vma.lock().unwrap();
|
let mut containing_vma = vma.lock().unwrap();
|
||||||
trace!(
|
trace!(
|
||||||
@ -865,7 +899,8 @@ impl InternalVMManager {
|
|||||||
);
|
);
|
||||||
debug_assert!(chunk.range() == containing_vma.range());
|
debug_assert!(chunk.range() == containing_vma.range());
|
||||||
|
|
||||||
if containing_vma.perms() == new_perms {
|
let old_perms = containing_vma.perms();
|
||||||
|
if old_perms == new_perms {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -876,7 +911,7 @@ impl InternalVMManager {
|
|||||||
(true, true) => {
|
(true, true) => {
|
||||||
// Exact the same vma
|
// Exact the same vma
|
||||||
containing_vma.set_perms(new_perms);
|
containing_vma.set_perms(new_perms);
|
||||||
VMPerms::apply_perms(&containing_vma, containing_vma.perms());
|
containing_vma.modify_permissions_for_committed_pages(new_perms);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
(false, false) => {
|
(false, false) => {
|
||||||
@ -886,15 +921,13 @@ impl InternalVMManager {
|
|||||||
// remaining old VMA: [protect_range.end, containing_vma.end)
|
// remaining old VMA: [protect_range.end, containing_vma.end)
|
||||||
|
|
||||||
let old_end = containing_vma.end();
|
let old_end = containing_vma.end();
|
||||||
let old_perms = containing_vma.perms();
|
let mut new_vma = VMArea::inherits_file_from(
|
||||||
|
|
||||||
let new_vma = VMArea::inherits_file_from(
|
|
||||||
&containing_vma,
|
&containing_vma,
|
||||||
protect_range,
|
protect_range,
|
||||||
new_perms,
|
new_perms,
|
||||||
VMAccess::Private(current_pid),
|
VMAccess::Private(current_pid),
|
||||||
);
|
);
|
||||||
VMPerms::apply_perms(&new_vma, new_vma.perms());
|
new_vma.modify_permissions_for_committed_pages(new_perms);
|
||||||
|
|
||||||
let remaining_old_vma = {
|
let remaining_old_vma = {
|
||||||
let range = VMRange::new(protect_range.end(), old_end).unwrap();
|
let range = VMRange::new(protect_range.end(), old_end).unwrap();
|
||||||
@ -905,7 +938,6 @@ impl InternalVMManager {
|
|||||||
VMAccess::Private(current_pid),
|
VMAccess::Private(current_pid),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
containing_vma.set_end(protect_range.start());
|
containing_vma.set_end(protect_range.start());
|
||||||
|
|
||||||
// Put containing_vma at last to be updated first.
|
// Put containing_vma at last to be updated first.
|
||||||
@ -913,19 +945,19 @@ impl InternalVMManager {
|
|||||||
updated_vmas
|
updated_vmas
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
let new_vma = VMArea::inherits_file_from(
|
let mut new_vma = VMArea::inherits_file_from(
|
||||||
&containing_vma,
|
&containing_vma,
|
||||||
protect_range,
|
protect_range,
|
||||||
new_perms,
|
new_perms,
|
||||||
VMAccess::Private(current_pid),
|
VMAccess::Private(current_pid),
|
||||||
);
|
);
|
||||||
VMPerms::apply_perms(&new_vma, new_vma.perms());
|
new_vma.modify_permissions_for_committed_pages(new_perms);
|
||||||
|
|
||||||
if same_start {
|
if same_start {
|
||||||
// Protect range is at left side of the cotaining vma
|
// Protect range is at left side of the containing vma
|
||||||
containing_vma.set_start(protect_range.end());
|
containing_vma.set_start(protect_range.end());
|
||||||
} else {
|
} else {
|
||||||
// Protect range is at right side of the cotaining vma
|
// Protect range is at right side of the containing vma
|
||||||
containing_vma.set_end(protect_range.start());
|
containing_vma.set_end(protect_range.start());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -935,19 +967,16 @@ impl InternalVMManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let current = current!();
|
let current = current!();
|
||||||
// First update current vma chunk
|
// First update current vma chunk
|
||||||
if updated_vmas.len() > 1 {
|
if updated_vmas.len() > 1 {
|
||||||
let update_vma = updated_vmas.pop().unwrap();
|
let update_vma = updated_vmas.pop().unwrap();
|
||||||
self.update_single_vma_chunk(¤t, &chunk, update_vma);
|
self.update_single_vma_chunk(¤t, &chunk, update_vma);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then add new chunks if any
|
// Then add new chunks if any
|
||||||
updated_vmas.into_iter().for_each(|vma| {
|
updated_vmas.into_iter().for_each(|vma| {
|
||||||
self.add_new_chunk(¤t, vma);
|
self.add_new_chunk(¤t, vma);
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -964,9 +993,6 @@ impl InternalVMManager {
|
|||||||
// Remove from chunks
|
// Remove from chunks
|
||||||
self.chunks.remove(chunk);
|
self.chunks.remove(chunk);
|
||||||
|
|
||||||
// Mprotect the whole chunk to reduce the usage of vma count of host
|
|
||||||
VMPerms::apply_perms(range, VMPerms::DEFAULT);
|
|
||||||
|
|
||||||
// Add range back to freespace manager
|
// Add range back to freespace manager
|
||||||
self.free_manager.add_range_back_to_free_manager(range);
|
self.free_manager.add_range_back_to_free_manager(range);
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -1131,6 +1157,7 @@ impl InternalVMManager {
|
|||||||
let perms = options.perms().clone();
|
let perms = options.perms().clone();
|
||||||
let align = options.align().clone();
|
let align = options.align().clone();
|
||||||
let initializer = options.initializer();
|
let initializer = options.initializer();
|
||||||
|
let page_policy = options.page_policy();
|
||||||
target_contained_ranges
|
target_contained_ranges
|
||||||
.iter()
|
.iter()
|
||||||
.map(|range| {
|
.map(|range| {
|
||||||
@ -1146,6 +1173,7 @@ impl InternalVMManager {
|
|||||||
.initializer(initializer.clone())
|
.initializer(initializer.clone())
|
||||||
.addr(addr)
|
.addr(addr)
|
||||||
.size(size)
|
.size(size)
|
||||||
|
.page_policy(*page_policy)
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
})
|
})
|
||||||
|
@ -39,37 +39,6 @@ impl VMPerms {
|
|||||||
self.bits == Self::DEFAULT.bits
|
self.bits == Self::DEFAULT.bits
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply_perms(protect_range: &VMRange, perms: VMPerms) {
|
|
||||||
use sgx_trts::enclave::rsgx_is_supported_EDMM;
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let mut retval = 0;
|
|
||||||
let addr = protect_range.start() as *const c_void;
|
|
||||||
let len = protect_range.size();
|
|
||||||
// PT_GROWSDOWN should only be applied to stack segment or a segment mapped with the MAP_GROWSDOWN flag set.
|
|
||||||
// Since the memory are managed by our own, mprotect ocall shouldn't use this flag. Otherwise, EINVAL will be thrown.
|
|
||||||
let mut prot = perms.clone();
|
|
||||||
prot.remove(VMPerms::GROWSDOWN);
|
|
||||||
|
|
||||||
if rsgx_is_supported_EDMM() {
|
|
||||||
// With EDMM support, reserved memory permission should be updated.
|
|
||||||
let sgx_status = sgx_tprotect_rsrv_mem(addr, len, prot.bits() as i32);
|
|
||||||
if sgx_status != sgx_status_t::SGX_SUCCESS {
|
|
||||||
panic!("sgx_tprotect_rsrv_mem status {}", sgx_status);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Without EDMM support, reserved memory permission is statically RWX and we only need to do mprotect ocall.
|
|
||||||
let sgx_status = occlum_ocall_mprotect(&mut retval, addr, len, prot.bits() as i32);
|
|
||||||
if sgx_status != sgx_status_t::SGX_SUCCESS || retval != 0 {
|
|
||||||
panic!(
|
|
||||||
"occlum_ocall_mprotect status {}, retval {}",
|
|
||||||
sgx_status, retval
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn display(&self) -> String {
|
pub fn display(&self) -> String {
|
||||||
let mut str = String::new();
|
let mut str = String::new();
|
||||||
if self.can_read() {
|
if self.can_read() {
|
||||||
@ -96,23 +65,3 @@ impl Default for VMPerms {
|
|||||||
VMPerms::DEFAULT
|
VMPerms::DEFAULT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
// Modify the access permissions of the pages in the reserved memory area
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// Inputs: addr[in]: Starting address of region which needs to change access
|
|
||||||
// permission. Page aligned.
|
|
||||||
// length[in]: The length of the memory to be manipulated in bytes. Page aligned.
|
|
||||||
// prot[in]: The target memory protection.
|
|
||||||
// Return: sgx_status_t
|
|
||||||
//
|
|
||||||
fn sgx_tprotect_rsrv_mem(addr: *const c_void, length: usize, prot: i32) -> sgx_status_t;
|
|
||||||
|
|
||||||
fn occlum_ocall_mprotect(
|
|
||||||
retval: *mut i32,
|
|
||||||
addr: *const c_void,
|
|
||||||
len: usize,
|
|
||||||
prot: i32,
|
|
||||||
) -> sgx_status_t;
|
|
||||||
}
|
|
||||||
|
@ -10,6 +10,11 @@ use intrusive_collections::RBTreeLink;
|
|||||||
use intrusive_collections::{intrusive_adapter, KeyAdapter};
|
use intrusive_collections::{intrusive_adapter, KeyAdapter};
|
||||||
use rcore_fs::vfs::Metadata;
|
use rcore_fs::vfs::Metadata;
|
||||||
|
|
||||||
|
pub const GB: usize = 1 << 30;
|
||||||
|
pub const TB: usize = 1 << 40;
|
||||||
|
pub const MB: usize = 1 << 20;
|
||||||
|
pub const KB: usize = 1 << 10;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum VMInitializer {
|
pub enum VMInitializer {
|
||||||
DoNothing(),
|
DoNothing(),
|
||||||
@ -139,7 +144,7 @@ impl FileBacked {
|
|||||||
self.write_back
|
self.write_back
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn init_file(&self) -> (&FileRef, usize) {
|
pub fn backed_file(&self) -> (&FileRef, usize) {
|
||||||
(&self.file, self.offset)
|
(&self.file, self.offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,6 +184,19 @@ impl VMMapAddr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum PagePolicy {
|
||||||
|
ReserveOnly = 0x1, // Only reserve
|
||||||
|
CommitNow = 0x2, // Commit all pages when mmap.
|
||||||
|
CommitOnDemand = 0x4, // Reserve space when mmap, commit in the PF handler. This is the default policy.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for PagePolicy {
|
||||||
|
fn default() -> PagePolicy {
|
||||||
|
PagePolicy::CommitOnDemand
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Builder, Debug)]
|
#[derive(Builder, Debug)]
|
||||||
#[builder(pattern = "owned", build_fn(skip), no_std)]
|
#[builder(pattern = "owned", build_fn(skip), no_std)]
|
||||||
pub struct VMMapOptions {
|
pub struct VMMapOptions {
|
||||||
@ -187,6 +205,7 @@ pub struct VMMapOptions {
|
|||||||
perms: VMPerms,
|
perms: VMPerms,
|
||||||
addr: VMMapAddr,
|
addr: VMMapAddr,
|
||||||
initializer: VMInitializer,
|
initializer: VMInitializer,
|
||||||
|
page_policy: PagePolicy,
|
||||||
}
|
}
|
||||||
|
|
||||||
// VMMapOptionsBuilder is generated automatically, except the build function
|
// VMMapOptionsBuilder is generated automatically, except the build function
|
||||||
@ -232,12 +251,21 @@ impl VMMapOptionsBuilder {
|
|||||||
Some(initializer) => initializer.clone(),
|
Some(initializer) => initializer.clone(),
|
||||||
None => VMInitializer::default(),
|
None => VMInitializer::default(),
|
||||||
};
|
};
|
||||||
|
let page_policy = {
|
||||||
|
match &initializer {
|
||||||
|
VMInitializer::CopyFrom { .. } => PagePolicy::CommitNow,
|
||||||
|
VMInitializer::CopyOldAndReadNew { .. } => PagePolicy::CommitNow,
|
||||||
|
_ => self.page_policy.unwrap_or_default(),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(VMMapOptions {
|
Ok(VMMapOptions {
|
||||||
size,
|
size,
|
||||||
align,
|
align,
|
||||||
perms,
|
perms,
|
||||||
addr,
|
addr,
|
||||||
initializer,
|
initializer,
|
||||||
|
page_policy,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -269,6 +297,10 @@ impl VMMapOptions {
|
|||||||
}
|
}
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn page_policy(&self) -> &PagePolicy {
|
||||||
|
&self.page_policy
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
|
2
tools/toolchains/dcap_lib/Cargo.lock
generated
2
tools/toolchains/dcap_lib/Cargo.lock
generated
@ -25,4 +25,4 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sgx_types"
|
name = "sgx_types"
|
||||||
version = "1.1.5"
|
version = "1.1.6"
|
||||||
|
Loading…
Reference in New Issue
Block a user