Add support for mremap with file-backed memory

This commit is contained in:
Hui, Chunyang 2021-03-04 06:50:37 +00:00 committed by Tate, Hongliang Tian
parent adc79d3a26
commit 6d37dd2d90
2 changed files with 313 additions and 26 deletions

@ -7,8 +7,19 @@ use super::vm_perms::VMPerms;
pub enum VMInitializer {
DoNothing(),
FillZeros(),
CopyFrom { range: VMRange },
LoadFromFile { file: FileRef, offset: usize },
CopyFrom {
range: VMRange,
},
LoadFromFile {
file: FileRef,
offset: usize,
},
// For file-backed mremap which may move from old range to new range and read extra bytes from file
CopyOldAndReadNew {
old_range: VMRange,
file: FileRef,
offset: usize, // read file from this offset
},
}
impl Default for VMInitializer {
@ -45,6 +56,24 @@ impl VMInitializer {
*b = 0;
}
}
VMInitializer::CopyOldAndReadNew {
old_range,
file,
offset,
} => {
// TODO: Handle old_range with non-readable subrange
let src_slice = unsafe { old_range.as_slice() };
let copy_len = src_slice.len();
debug_assert!(copy_len <= buf.len());
let read_len = buf.len() - copy_len;
buf[..copy_len].copy_from_slice(&src_slice[..copy_len]);
let len = file
.read_at(*offset, &mut buf[copy_len..])
.cause_err(|_| errno!(EIO, "failed to init memory from file"))?;
for b in &mut buf[(copy_len + len)..] {
*b = 0;
}
}
}
Ok(())
}
@ -325,7 +354,6 @@ impl VMManager {
// After initializing, we can safely insert the new VMA
self.insert_new_vma(insert_idx, new_vma);
Ok(new_addr)
}
@ -404,67 +432,164 @@ impl VMManager {
} else {
SizeType::Growing
};
// Get the memory permissions of the old range
let perms = {
// The old range must be contained in one VMA
let idx = self
.find_containing_vma_idx(&old_range)
.ok_or_else(|| errno!(EFAULT, "invalid range"))?;
let containing_vma = &self.vmas[idx];
containing_vma.perms()
};
// Get the memory permissions of the old range
let perms = containing_vma.perms();
// Get the write back file of the old range if there is one.
let writeback_file = containing_vma.writeback_file();
// FIXME: Current implementation for file-backed memory mremap has limitation that if a SUBRANGE of the previous
// file-backed mmap with MAP_SHARED is then mremap-ed with MREMAP_MAYMOVE, there will be two vmas that have the same backed file.
// For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before
// we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error.
if writeback_file.is_some() && &old_range != containing_vma.range() {
return_errno!(EINVAL, "Known limition")
}
// Implement mremap as one optional mmap followed by one optional munmap.
//
// The exact arguments for the mmap and munmap are determined by the values of MRemapFlags
// and SizeType. There is a total of 9 combinations between MRemapFlags and SizeType.
// As some combinations result in the same mmap and munmap operations, the following code
// only needs to match four patterns of (MRemapFlags, SizeType) and treat each case
// accordingly.
// The exact arguments for the mmap and munmap are determined by the values of MRemapFlags,
// SizeType and writeback_file. There is a total of 18 combinations among MRemapFlags and
// SizeType and writeback_file. As some combinations result in the same mmap and munmap operations,
// the following code only needs to match below patterns of (MRemapFlags, SizeType, writeback_file)
// and treat each case accordingly.
// Determine whether need to do mmap. And when possible, determine the returned address
// TODO: should fill zeros even when extending a file-backed mapping?
let (need_mmap, mut ret_addr) = match (flags, size_type) {
(MRemapFlags::None, SizeType::Growing) => {
let (need_mmap, mut ret_addr) = match (flags, size_type, writeback_file) {
(MRemapFlags::None, SizeType::Growing, None) => {
let vm_initializer_for_new_range = VMInitializer::FillZeros();
let mmap_opts = VMMapOptionsBuilder::default()
.size(new_size - old_size)
.addr(VMMapAddr::Need(old_range.end()))
.perms(perms)
.initializer(VMInitializer::FillZeros())
.initializer(vm_initializer_for_new_range)
.build()?;
let ret_addr = Some(old_addr);
(Some(mmap_opts), ret_addr)
}
(MRemapFlags::MayMove, SizeType::Growing) => {
(MRemapFlags::None, SizeType::Growing, Some((backed_file, offset))) => {
// Update writeback file offset
let new_writeback_file =
Some((backed_file.clone(), offset + containing_vma.size()));
let vm_initializer_for_new_range = VMInitializer::LoadFromFile {
file: backed_file.clone(),
offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file
};
let mmap_opts = VMMapOptionsBuilder::default()
.size(new_size - old_size)
.addr(VMMapAddr::Need(old_range.end()))
.perms(perms)
.initializer(vm_initializer_for_new_range)
.writeback_file(new_writeback_file)
.build()?;
let ret_addr = Some(old_addr);
(Some(mmap_opts), ret_addr)
}
(MRemapFlags::MayMove, SizeType::Growing, None) => {
let prefered_new_range =
VMRange::new_with_size(old_addr + old_size, new_size - old_size)?;
if self.is_free_range(&prefered_new_range) {
// Don't need to move the old range
let vm_initializer_for_new_range = VMInitializer::FillZeros();
let mmap_ops = VMMapOptionsBuilder::default()
.size(prefered_new_range.size())
.addr(VMMapAddr::Need(prefered_new_range.start()))
.perms(perms)
.initializer(VMInitializer::FillZeros())
.initializer(vm_initializer_for_new_range)
.build()?;
(Some(mmap_ops), Some(old_addr))
} else {
// Need to move old range to a new range and init the new range
let vm_initializer_for_new_range = VMInitializer::CopyFrom { range: old_range };
let mmap_ops = VMMapOptionsBuilder::default()
.size(new_size)
.addr(VMMapAddr::Any)
.perms(perms)
.initializer(VMInitializer::CopyFrom { range: old_range })
.initializer(vm_initializer_for_new_range)
.build()?;
// Cannot determine the returned address for now, which can only be obtained after calling mmap
let ret_addr = None;
(Some(mmap_ops), ret_addr)
}
}
(MRemapFlags::FixedAddr(new_addr), _) => {
(MRemapFlags::MayMove, SizeType::Growing, Some((backed_file, offset))) => {
let prefered_new_range =
VMRange::new_with_size(old_addr + old_size, new_size - old_size)?;
if self.is_free_range(&prefered_new_range) {
// Don't need to move the old range
let vm_initializer_for_new_range = VMInitializer::LoadFromFile {
file: backed_file.clone(),
offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file
};
// Write back file should start from new offset
let new_writeback_file =
Some((backed_file.clone(), offset + containing_vma.size()));
let mmap_ops = VMMapOptionsBuilder::default()
.size(prefered_new_range.size())
.addr(VMMapAddr::Need(prefered_new_range.start()))
.perms(perms)
.initializer(vm_initializer_for_new_range)
.writeback_file(new_writeback_file)
.build()?;
(Some(mmap_ops), Some(old_addr))
} else {
// Need to move old range to a new range and init the new range
let vm_initializer_for_new_range = {
let copy_end = containing_vma.end();
let copy_range = VMRange::new(old_range.start(), copy_end)?;
let reread_file_start_offset = copy_end - containing_vma.start();
VMInitializer::CopyOldAndReadNew {
old_range: copy_range,
file: backed_file.clone(),
offset: reread_file_start_offset,
}
};
let new_writeback_file = Some((backed_file.clone(), *offset));
let mmap_ops = VMMapOptionsBuilder::default()
.size(new_size)
.addr(VMMapAddr::Any)
.perms(perms)
.initializer(vm_initializer_for_new_range)
.writeback_file(new_writeback_file)
.build()?;
// Cannot determine the returned address for now, which can only be obtained after calling mmap
let ret_addr = None;
(Some(mmap_ops), ret_addr)
}
}
(MRemapFlags::FixedAddr(new_addr), _, None) => {
let vm_initializer_for_new_range = { VMInitializer::CopyFrom { range: old_range } };
let mmap_opts = VMMapOptionsBuilder::default()
.size(new_size)
.addr(VMMapAddr::Force(new_addr))
.perms(perms)
.initializer(VMInitializer::CopyFrom { range: old_range })
.initializer(vm_initializer_for_new_range)
.build()?;
let ret_addr = Some(new_addr);
(Some(mmap_opts), ret_addr)
}
(MRemapFlags::FixedAddr(new_addr), _, Some((backed_file, offset))) => {
let vm_initializer_for_new_range = {
let copy_end = containing_vma.end();
let copy_range = VMRange::new(old_range.start(), copy_end)?;
let reread_file_start_offset = copy_end - containing_vma.start();
VMInitializer::CopyOldAndReadNew {
old_range: copy_range,
file: backed_file.clone(),
offset: reread_file_start_offset,
}
};
let new_writeback_file = Some((backed_file.clone(), *offset));
let mmap_opts = VMMapOptionsBuilder::default()
.size(new_size)
.addr(VMMapAddr::Force(new_addr))
.perms(perms)
.initializer(vm_initializer_for_new_range)
.writeback_file(new_writeback_file)
.build()?;
let ret_addr = Some(new_addr);
(Some(mmap_opts), ret_addr)

@ -1091,6 +1091,166 @@ int test_mprotect_with_non_page_aligned_size() {
return 0;
}
int check_file_first_four_page(char *file_path, int first_page_val, int secend_page_val,
int third_page_val, int fourth_page_val) {
int fd = open(file_path, O_RDONLY);
if (fd < 0) {
THROW_ERROR("file open failed");
}
if (check_file_with_repeated_bytes(fd, PAGE_SIZE, first_page_val) < 0) {
THROW_ERROR("unexpected file content");
}
if (check_file_with_repeated_bytes(fd, PAGE_SIZE, secend_page_val) < 0) {
THROW_ERROR("unexpected file content");
}
if (check_file_with_repeated_bytes(fd, PAGE_SIZE, third_page_val) < 0) {
THROW_ERROR("unexpected file content\n");
}
if (check_file_with_repeated_bytes(fd, PAGE_SIZE, fourth_page_val) < 0) {
THROW_ERROR("unexpectbed file content");
}
close(fd);
return 0;
}
typedef int (* test_file_backed_mremap_fn_t) (void *, size_t, void **);
static int byte_val_0 = 0xff;
static int byte_val_1 = 0xab;
static int byte_val_2 = 0xcd;
static int byte_val_3 = 0xef;
int file_backed_mremap_simple(void *buf, size_t len, void **new_buf) {
void *expand_buf = mremap(buf, len, 2 * len, 0);
if (expand_buf == MAP_FAILED) {
THROW_ERROR("mremap with big size failed");
}
// Check the value assigned before
if (check_bytes_in_buf(expand_buf, len, byte_val_1) != 0 ) {
THROW_ERROR("check expand_buf error");
};
// Check the value of second page which should be mapped from file
if (check_bytes_in_buf(expand_buf + len, len, byte_val_0) != 0 ) {
THROW_ERROR("check expand_buf error");
};
// Assign new value to the second page
for (int i = len; i < len * 2; i++) { ((char *)expand_buf)[i] = byte_val_2; }
expand_buf = mremap(expand_buf, len * 2, 4 * len, 0);
if (expand_buf == MAP_FAILED) {
THROW_ERROR("mremap with bigger size failed");
}
// Third and fourth page are not assigned any new value, so should still be 0.
if (check_bytes_in_buf((void *)(expand_buf + len * 2), len * 2, 0) != 0) {
THROW_ERROR("check buf content error");
};
// Assign new value to the fourth page
for (int i = len * 3; i < len * 4; i++) { ((char *)expand_buf)[i] = byte_val_3; }
*new_buf = expand_buf;
return 0;
}
int file_backed_mremap_mem_may_move(void *buf, size_t len, void **new_buf) {
int prot = PROT_READ | PROT_WRITE;
// Allocate a gap buffer to make sure mremap buf must move to a new range
unsigned long gap_buf = (unsigned long) buf + len;
assert(gap_buf % PAGE_SIZE == 0);
void *ret = mmap((void *)gap_buf, PAGE_SIZE, prot,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, 0, 0);
if ((unsigned long)ret != gap_buf) {
THROW_ERROR("mmap gap_buf with prefered address failed");
}
void *expand_buf = mremap(buf, len, 2 * len, MREMAP_MAYMOVE);
if (expand_buf == MAP_FAILED) {
THROW_ERROR("mremap with big size failed");
}
// Check the value assigned before
if (check_bytes_in_buf(expand_buf, len, byte_val_1) != 0 ) {
THROW_ERROR("check expand_buf error");
};
// Check the value of second page which should be mapped from file
if (check_bytes_in_buf(expand_buf + len, len, byte_val_0) != 0 ) {
THROW_ERROR("check expand_buf error");
};
// Assign new value to the second page
for (int i = len; i < len * 2; i++) { ((char *)expand_buf)[i] = byte_val_2; }
// Mremap to a new fixed address
unsigned long fixed_addr = (unsigned long) expand_buf + 2 * len;
ret = mremap(expand_buf, len * 2, 4 * len, MREMAP_FIXED | MREMAP_MAYMOVE,
(void *)fixed_addr);
if ((unsigned long)ret != fixed_addr) {
THROW_ERROR("mremap with fixed address and more big size failed");
}
// Third and fourth page are not assigned any new value, so should still be 0.
if (check_bytes_in_buf((void *)(fixed_addr + len * 2), len * 2, 0) != 0) {
THROW_ERROR("check buf content error");
};
// Assign new value to the fourth page
for (int i = len * 3; i < len * 4; i++) { ((char *)fixed_addr)[i] = byte_val_3; }
int rc = munmap((void *)gap_buf, PAGE_SIZE);
if (rc < 0) {
THROW_ERROR("munmap gap_buf failed");
}
*new_buf = (void *)fixed_addr;
return 0;
}
int _test_file_backed_mremap(test_file_backed_mremap_fn_t fn) {
int prot = PROT_READ | PROT_WRITE;
size_t len = PAGE_SIZE;
char *file_path = "/tmp/test";
// O_TRUNC is not supported by Occlum yet.
remove(file_path);
int fd = open(file_path, O_RDWR | O_CREAT | O_NOFOLLOW | O_CLOEXEC | O_TRUNC, 0600);
if (fd < 0) {
THROW_ERROR("open file error");
}
fallocate(fd, 0, 0, len * 4);
fill_file_with_repeated_bytes(fd, len * 2, byte_val_0);
void *buf = mmap(0, len, prot, MAP_SHARED, fd, 0);
if (buf == MAP_FAILED) {
THROW_ERROR("mmap failed");
}
for (int i = 0; i < len; i++) { ((char *)buf)[i] = byte_val_1; }
void *expand_buf = 0;
int ret = fn(buf, len, &expand_buf);
if (ret != 0) {
THROW_ERROR("mremap test failed");
}
int rc = msync((void *)expand_buf, 4 * len, MS_SYNC);
if (rc < 0) {
THROW_ERROR("msync failed");
}
rc = munmap((void *)expand_buf, 4 * len);
if (rc < 0) {
THROW_ERROR("munmap failed");
}
close(fd);
return check_file_first_four_page(file_path, byte_val_1, byte_val_2, 0, byte_val_3);;
}
int test_file_backed_mremap() {
return _test_file_backed_mremap(file_backed_mremap_simple);
}
int test_file_backed_mremap_mem_may_move() {
return _test_file_backed_mremap(file_backed_mremap_mem_may_move);
}
// ============================================================================
// Test suite main
// ============================================================================
@ -1124,6 +1284,8 @@ static test_case_t test_cases[] = {
TEST_CASE(test_mremap),
TEST_CASE(test_mremap_subrange),
TEST_CASE(test_mremap_with_fixed_addr),
TEST_CASE(test_file_backed_mremap),
TEST_CASE(test_file_backed_mremap_mem_may_move),
TEST_CASE(test_mprotect_once),
TEST_CASE(test_mprotect_twice),
TEST_CASE(test_mprotect_triple),