diff --git a/src/libos/src/vm/vm_manager.rs b/src/libos/src/vm/vm_manager.rs index a20906aa..6f8a2065 100644 --- a/src/libos/src/vm/vm_manager.rs +++ b/src/libos/src/vm/vm_manager.rs @@ -7,8 +7,19 @@ use super::vm_perms::VMPerms; pub enum VMInitializer { DoNothing(), FillZeros(), - CopyFrom { range: VMRange }, - LoadFromFile { file: FileRef, offset: usize }, + CopyFrom { + range: VMRange, + }, + LoadFromFile { + file: FileRef, + offset: usize, + }, + // For file-backed mremap which may move from old range to new range and read extra bytes from file + CopyOldAndReadNew { + old_range: VMRange, + file: FileRef, + offset: usize, // read file from this offset + }, } impl Default for VMInitializer { @@ -45,6 +56,24 @@ impl VMInitializer { *b = 0; } } + VMInitializer::CopyOldAndReadNew { + old_range, + file, + offset, + } => { + // TODO: Handle old_range with non-readable subrange + let src_slice = unsafe { old_range.as_slice() }; + let copy_len = src_slice.len(); + debug_assert!(copy_len <= buf.len()); + let read_len = buf.len() - copy_len; + buf[..copy_len].copy_from_slice(&src_slice[..copy_len]); + let len = file + .read_at(*offset, &mut buf[copy_len..]) + .cause_err(|_| errno!(EIO, "failed to init memory from file"))?; + for b in &mut buf[(copy_len + len)..] { + *b = 0; + } + } } Ok(()) } @@ -325,7 +354,6 @@ impl VMManager { // After initializing, we can safely insert the new VMA self.insert_new_vma(insert_idx, new_vma); - Ok(new_addr) } @@ -404,67 +432,164 @@ impl VMManager { } else { SizeType::Growing }; - + // The old range must be contained in one VMA + let idx = self + .find_containing_vma_idx(&old_range) + .ok_or_else(|| errno!(EFAULT, "invalid range"))?; + let containing_vma = &self.vmas[idx]; // Get the memory permissions of the old range - let perms = { - // The old range must be contained in one VMA - let idx = self - .find_containing_vma_idx(&old_range) - .ok_or_else(|| errno!(EFAULT, "invalid range"))?; - let containing_vma = &self.vmas[idx]; - containing_vma.perms() - }; + let perms = containing_vma.perms(); + // Get the write back file of the old range if there is one. + let writeback_file = containing_vma.writeback_file(); + + // FIXME: Current implementation for file-backed memory mremap has limitation that if a SUBRANGE of the previous + // file-backed mmap with MAP_SHARED is then mremap-ed with MREMAP_MAYMOVE, there will be two vmas that have the same backed file. + // For Linux, writing to either memory vma or the file will update the other two equally. But we won't be able to support this before + // we really have paging. Thus, if the old_range is not equal to a recorded vma, we will just return with error. + if writeback_file.is_some() && &old_range != containing_vma.range() { + return_errno!(EINVAL, "Known limition") + } // Implement mremap as one optional mmap followed by one optional munmap. // - // The exact arguments for the mmap and munmap are determined by the values of MRemapFlags - // and SizeType. There is a total of 9 combinations between MRemapFlags and SizeType. - // As some combinations result in the same mmap and munmap operations, the following code - // only needs to match four patterns of (MRemapFlags, SizeType) and treat each case - // accordingly. + // The exact arguments for the mmap and munmap are determined by the values of MRemapFlags, + // SizeType and writeback_file. There is a total of 18 combinations among MRemapFlags and + // SizeType and writeback_file. As some combinations result in the same mmap and munmap operations, + // the following code only needs to match below patterns of (MRemapFlags, SizeType, writeback_file) + // and treat each case accordingly. // Determine whether need to do mmap. And when possible, determine the returned address - // TODO: should fill zeros even when extending a file-backed mapping? - let (need_mmap, mut ret_addr) = match (flags, size_type) { - (MRemapFlags::None, SizeType::Growing) => { + let (need_mmap, mut ret_addr) = match (flags, size_type, writeback_file) { + (MRemapFlags::None, SizeType::Growing, None) => { + let vm_initializer_for_new_range = VMInitializer::FillZeros(); let mmap_opts = VMMapOptionsBuilder::default() .size(new_size - old_size) .addr(VMMapAddr::Need(old_range.end())) .perms(perms) - .initializer(VMInitializer::FillZeros()) + .initializer(vm_initializer_for_new_range) .build()?; let ret_addr = Some(old_addr); (Some(mmap_opts), ret_addr) } - (MRemapFlags::MayMove, SizeType::Growing) => { + (MRemapFlags::None, SizeType::Growing, Some((backed_file, offset))) => { + // Update writeback file offset + let new_writeback_file = + Some((backed_file.clone(), offset + containing_vma.size())); + let vm_initializer_for_new_range = VMInitializer::LoadFromFile { + file: backed_file.clone(), + offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file + }; + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size - old_size) + .addr(VMMapAddr::Need(old_range.end())) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + let ret_addr = Some(old_addr); + (Some(mmap_opts), ret_addr) + } + (MRemapFlags::MayMove, SizeType::Growing, None) => { let prefered_new_range = VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; if self.is_free_range(&prefered_new_range) { + // Don't need to move the old range + let vm_initializer_for_new_range = VMInitializer::FillZeros(); let mmap_ops = VMMapOptionsBuilder::default() .size(prefered_new_range.size()) .addr(VMMapAddr::Need(prefered_new_range.start())) .perms(perms) - .initializer(VMInitializer::FillZeros()) + .initializer(vm_initializer_for_new_range) .build()?; (Some(mmap_ops), Some(old_addr)) } else { + // Need to move old range to a new range and init the new range + let vm_initializer_for_new_range = VMInitializer::CopyFrom { range: old_range }; let mmap_ops = VMMapOptionsBuilder::default() .size(new_size) .addr(VMMapAddr::Any) .perms(perms) - .initializer(VMInitializer::CopyFrom { range: old_range }) + .initializer(vm_initializer_for_new_range) .build()?; // Cannot determine the returned address for now, which can only be obtained after calling mmap let ret_addr = None; (Some(mmap_ops), ret_addr) } } - (MRemapFlags::FixedAddr(new_addr), _) => { + (MRemapFlags::MayMove, SizeType::Growing, Some((backed_file, offset))) => { + let prefered_new_range = + VMRange::new_with_size(old_addr + old_size, new_size - old_size)?; + if self.is_free_range(&prefered_new_range) { + // Don't need to move the old range + let vm_initializer_for_new_range = VMInitializer::LoadFromFile { + file: backed_file.clone(), + offset: offset + containing_vma.size(), // file-backed mremap should start from the end of previous mmap/mremap file + }; + // Write back file should start from new offset + let new_writeback_file = + Some((backed_file.clone(), offset + containing_vma.size())); + let mmap_ops = VMMapOptionsBuilder::default() + .size(prefered_new_range.size()) + .addr(VMMapAddr::Need(prefered_new_range.start())) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + (Some(mmap_ops), Some(old_addr)) + } else { + // Need to move old range to a new range and init the new range + let vm_initializer_for_new_range = { + let copy_end = containing_vma.end(); + let copy_range = VMRange::new(old_range.start(), copy_end)?; + let reread_file_start_offset = copy_end - containing_vma.start(); + VMInitializer::CopyOldAndReadNew { + old_range: copy_range, + file: backed_file.clone(), + offset: reread_file_start_offset, + } + }; + let new_writeback_file = Some((backed_file.clone(), *offset)); + let mmap_ops = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Any) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) + .build()?; + // Cannot determine the returned address for now, which can only be obtained after calling mmap + let ret_addr = None; + (Some(mmap_ops), ret_addr) + } + } + (MRemapFlags::FixedAddr(new_addr), _, None) => { + let vm_initializer_for_new_range = { VMInitializer::CopyFrom { range: old_range } }; let mmap_opts = VMMapOptionsBuilder::default() .size(new_size) .addr(VMMapAddr::Force(new_addr)) .perms(perms) - .initializer(VMInitializer::CopyFrom { range: old_range }) + .initializer(vm_initializer_for_new_range) + .build()?; + let ret_addr = Some(new_addr); + (Some(mmap_opts), ret_addr) + } + (MRemapFlags::FixedAddr(new_addr), _, Some((backed_file, offset))) => { + let vm_initializer_for_new_range = { + let copy_end = containing_vma.end(); + let copy_range = VMRange::new(old_range.start(), copy_end)?; + let reread_file_start_offset = copy_end - containing_vma.start(); + VMInitializer::CopyOldAndReadNew { + old_range: copy_range, + file: backed_file.clone(), + offset: reread_file_start_offset, + } + }; + let new_writeback_file = Some((backed_file.clone(), *offset)); + let mmap_opts = VMMapOptionsBuilder::default() + .size(new_size) + .addr(VMMapAddr::Force(new_addr)) + .perms(perms) + .initializer(vm_initializer_for_new_range) + .writeback_file(new_writeback_file) .build()?; let ret_addr = Some(new_addr); (Some(mmap_opts), ret_addr) diff --git a/test/mmap/main.c b/test/mmap/main.c index ad2463b3..81971b39 100644 --- a/test/mmap/main.c +++ b/test/mmap/main.c @@ -1091,6 +1091,166 @@ int test_mprotect_with_non_page_aligned_size() { return 0; } +int check_file_first_four_page(char *file_path, int first_page_val, int secend_page_val, + int third_page_val, int fourth_page_val) { + int fd = open(file_path, O_RDONLY); + if (fd < 0) { + THROW_ERROR("file open failed"); + } + if (check_file_with_repeated_bytes(fd, PAGE_SIZE, first_page_val) < 0) { + THROW_ERROR("unexpected file content"); + } + if (check_file_with_repeated_bytes(fd, PAGE_SIZE, secend_page_val) < 0) { + THROW_ERROR("unexpected file content"); + } + + if (check_file_with_repeated_bytes(fd, PAGE_SIZE, third_page_val) < 0) { + THROW_ERROR("unexpected file content\n"); + } + + if (check_file_with_repeated_bytes(fd, PAGE_SIZE, fourth_page_val) < 0) { + THROW_ERROR("unexpectbed file content"); + } + close(fd); + return 0; +} + +typedef int (* test_file_backed_mremap_fn_t) (void *, size_t, void **); + +static int byte_val_0 = 0xff; +static int byte_val_1 = 0xab; +static int byte_val_2 = 0xcd; +static int byte_val_3 = 0xef; + +int file_backed_mremap_simple(void *buf, size_t len, void **new_buf) { + void *expand_buf = mremap(buf, len, 2 * len, 0); + if (expand_buf == MAP_FAILED) { + THROW_ERROR("mremap with big size failed"); + } + // Check the value assigned before + if (check_bytes_in_buf(expand_buf, len, byte_val_1) != 0 ) { + THROW_ERROR("check expand_buf error"); + }; + // Check the value of second page which should be mapped from file + if (check_bytes_in_buf(expand_buf + len, len, byte_val_0) != 0 ) { + THROW_ERROR("check expand_buf error"); + }; + // Assign new value to the second page + for (int i = len; i < len * 2; i++) { ((char *)expand_buf)[i] = byte_val_2; } + + expand_buf = mremap(expand_buf, len * 2, 4 * len, 0); + if (expand_buf == MAP_FAILED) { + THROW_ERROR("mremap with bigger size failed"); + } + // Third and fourth page are not assigned any new value, so should still be 0. + if (check_bytes_in_buf((void *)(expand_buf + len * 2), len * 2, 0) != 0) { + THROW_ERROR("check buf content error"); + }; + + // Assign new value to the fourth page + for (int i = len * 3; i < len * 4; i++) { ((char *)expand_buf)[i] = byte_val_3; } + *new_buf = expand_buf; + return 0; +} + +int file_backed_mremap_mem_may_move(void *buf, size_t len, void **new_buf) { + int prot = PROT_READ | PROT_WRITE; + // Allocate a gap buffer to make sure mremap buf must move to a new range + unsigned long gap_buf = (unsigned long) buf + len; + assert(gap_buf % PAGE_SIZE == 0); + void *ret = mmap((void *)gap_buf, PAGE_SIZE, prot, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, 0, 0); + if ((unsigned long)ret != gap_buf) { + THROW_ERROR("mmap gap_buf with prefered address failed"); + } + + void *expand_buf = mremap(buf, len, 2 * len, MREMAP_MAYMOVE); + if (expand_buf == MAP_FAILED) { + THROW_ERROR("mremap with big size failed"); + } + // Check the value assigned before + if (check_bytes_in_buf(expand_buf, len, byte_val_1) != 0 ) { + THROW_ERROR("check expand_buf error"); + }; + // Check the value of second page which should be mapped from file + if (check_bytes_in_buf(expand_buf + len, len, byte_val_0) != 0 ) { + THROW_ERROR("check expand_buf error"); + }; + // Assign new value to the second page + for (int i = len; i < len * 2; i++) { ((char *)expand_buf)[i] = byte_val_2; } + + // Mremap to a new fixed address + unsigned long fixed_addr = (unsigned long) expand_buf + 2 * len; + ret = mremap(expand_buf, len * 2, 4 * len, MREMAP_FIXED | MREMAP_MAYMOVE, + (void *)fixed_addr); + if ((unsigned long)ret != fixed_addr) { + THROW_ERROR("mremap with fixed address and more big size failed"); + } + // Third and fourth page are not assigned any new value, so should still be 0. + if (check_bytes_in_buf((void *)(fixed_addr + len * 2), len * 2, 0) != 0) { + THROW_ERROR("check buf content error"); + }; + + // Assign new value to the fourth page + for (int i = len * 3; i < len * 4; i++) { ((char *)fixed_addr)[i] = byte_val_3; } + + int rc = munmap((void *)gap_buf, PAGE_SIZE); + if (rc < 0) { + THROW_ERROR("munmap gap_buf failed"); + } + + *new_buf = (void *)fixed_addr; + return 0; +} + +int _test_file_backed_mremap(test_file_backed_mremap_fn_t fn) { + int prot = PROT_READ | PROT_WRITE; + size_t len = PAGE_SIZE; + char *file_path = "/tmp/test"; + + // O_TRUNC is not supported by Occlum yet. + remove(file_path); + int fd = open(file_path, O_RDWR | O_CREAT | O_NOFOLLOW | O_CLOEXEC | O_TRUNC, 0600); + if (fd < 0) { + THROW_ERROR("open file error"); + } + fallocate(fd, 0, 0, len * 4); + fill_file_with_repeated_bytes(fd, len * 2, byte_val_0); + + void *buf = mmap(0, len, prot, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + THROW_ERROR("mmap failed"); + } + for (int i = 0; i < len; i++) { ((char *)buf)[i] = byte_val_1; } + + void *expand_buf = 0; + int ret = fn(buf, len, &expand_buf); + if (ret != 0) { + THROW_ERROR("mremap test failed"); + } + + int rc = msync((void *)expand_buf, 4 * len, MS_SYNC); + if (rc < 0) { + THROW_ERROR("msync failed"); + } + rc = munmap((void *)expand_buf, 4 * len); + if (rc < 0) { + THROW_ERROR("munmap failed"); + } + + close(fd); + + return check_file_first_four_page(file_path, byte_val_1, byte_val_2, 0, byte_val_3);; +} + +int test_file_backed_mremap() { + return _test_file_backed_mremap(file_backed_mremap_simple); +} + +int test_file_backed_mremap_mem_may_move() { + return _test_file_backed_mremap(file_backed_mremap_mem_may_move); +} + // ============================================================================ // Test suite main // ============================================================================ @@ -1124,6 +1284,8 @@ static test_case_t test_cases[] = { TEST_CASE(test_mremap), TEST_CASE(test_mremap_subrange), TEST_CASE(test_mremap_with_fixed_addr), + TEST_CASE(test_file_backed_mremap), + TEST_CASE(test_file_backed_mremap_mem_may_move), TEST_CASE(test_mprotect_once), TEST_CASE(test_mprotect_twice), TEST_CASE(test_mprotect_triple),