implement autodep for copydirs

This commit is contained in:
jianfengjiang 2021-09-23 18:25:41 +08:00 committed by Zongmin.Gu
parent aeca3cd5cb
commit f0793b0d30
7 changed files with 153 additions and 15 deletions

@ -94,6 +94,7 @@ dependencies = [
"sha2",
"shellexpand",
"structopt",
"walkdir",
]
[[package]]
@ -347,6 +348,15 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.130"
@ -496,6 +506,17 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"

@ -17,3 +17,4 @@ regex = "1.5.4"
shellexpand = "2.1"
elf = "0.0.10"
structopt = "0.3.23"
walkdir = "2"

@ -9,12 +9,12 @@ Bom file is used to describe which files should be copied to the root directory(
`copy_bom` is the tool designed to create directories and symbolic links, copy all files and directories defined in a bom file to the root directory. Internally, `copy_bom` will use `rsync` to do the real file operations. `copy_bom` will copy each file and directory incrementally, i.e., only changed parts will be copied. The permission bits and modification times will be reserved. This is done by the `-a` option of `rsync`. `copy_bom` will not ensure the whole image directory as described in bom file (sync behavior) because it will not try to delete old files. To pursue a sync behavior, one can delete the old image directory and copy files again.
### dependencies
`copy_bom` will analyze all dependencies(shared objects) of each ELF file via the dynamic loader defined in the `.interp` section in the file and automatically copy dependencies to the root directory. Currently, `copy_bom` only copy dependencies with absolute paths. We support only one dependency pattern in the result of dynamic loader.
`copy_bom` will analyze all dependencies(shared objects) of each ELF file. `copy_bom` will analyze dependencies for each user-defined file in `files` entry as well as files in user-defined directory in `dirs` entry. For user-defined elf file, it will report error and abort the program if we can't find the dependent shared objects. For files in user-defined directories, we will report warning if autodep fails. We analyze dependencies via the dynamic loader defined in the `.interp` section in elf files and automatically copy dependencies to the root directory. If there's no `.interp` section for an elf file, `copy_bom` will try to infer the loader if all other elf files have the same loader. Currently, `copy_bom` only copy dependencies with absolute paths. We support only one dependency pattern in the result of dynamic loader.
- name => path e.g., `libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6`
All dependencies will be copied to the corresponding directory in root directory. For example, if root directory is `image`, then the dependency `/lib64/ld-linux-x86-64.so.2` will be copied to `image/lib/ld-linux-x86-64.so.2`. An entry named `autodep` with value `false` can be added to each file to avoid finding and copying dependencies automatically.
All dependencies will be copied to the corresponding directory in root directory. For example, if root directory is `image`, then the dependency `/lib64/ld-linux-x86-64.so.2` will be copied to `image/lib64/ld-linux-x86-64.so.2`. An entry named `autodep` with value `false` can be added to each file to avoid finding and copying dependencies automatically.
### log
`copy_bom` uses the same log setting as `occlum`. One can set `OCCLUM_LOG_LEVEL=trace` to see all logs printed by `copy_bom`.
`copy_bom` uses the same log setting as `occlum`. One can set `OCCLUM_LOG_LEVEL=trace` to see all logs printed by `copy_bom`. To only view real file operations, `OCCLUM_LOG_LEVEL=info` is a proper level.
### prepare and install
1.prepare. Since `copy_bom` relies on `rsync` to copy files. We need to install `rsync` at first. On ubuntu, this can be done by `apt install rsync -y`.
@ -52,5 +52,5 @@ The second part in the line indicates where to find shared libraries. All paths
- Environmental variables pointing to an empty value may fail to resolve.
# demos
1. The demos with `copy_bom` are in the `../../demos/bom-demos` directory.
2. Before using these demos, `rsync` and `copy_bom` should be installed. The file `base.yaml` should be copied to `/opt/occlum/etc/template`.
1. We have modify several demos with `copy_bom`.
2. Before using these demos, `rsync` and `copy_bom` should be installed. There should be at least `base.yaml` and `occlum_elf_loader.config` in `/opt/occlum/etc/template`.

@ -17,6 +17,7 @@ use std::collections::{HashSet, VecDeque};
use std::hash::Hash;
use std::path::PathBuf;
use std::slice::Iter;
use walkdir::WalkDir;
// The whole bom file
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -120,15 +121,29 @@ impl Bom {
// remove redundant operations in each bom management
remove_redundant(&mut bom_managements);
// Since we have different copy options for each bom, we cannot copy all targets together.
let mut bom_managements_iter = bom_managements.into_iter();
let mut bom_managements_iter = bom_managements.iter();
for bom in sorted_boms.into_iter() {
// each bom corresponds to a bom management, so the unwrap will never fail
bom.manage_self(bom_managements_iter.next().unwrap(), dry_run);
}
// Try to autodep for each copydir
if !dry_run {
let mut made_dirs = Vec::new();
let mut copied_shared_objects = Vec::new();
for bom_management in bom_managements.iter() {
let dirs = bom_management.dirs_to_make.clone();
let shared_objects = bom_management.shared_objects_to_copy.clone();
made_dirs.extend(dirs);
copied_shared_objects.extend(shared_objects);
}
for bom_management in bom_managements.iter() {
bom_management.autodep_for_copydirs(&made_dirs, &copied_shared_objects, root_dir);
}
}
}
/// This func will only manage the current bom file without finding included bom files
pub fn manage_self(self, bom_management: BomManagement, dry_run: bool) {
pub fn manage_self(self, bom_management: &BomManagement, dry_run: bool) {
let excludes = self.excludes.unwrap_or(Vec::new());
bom_management.manage(dry_run, excludes);
}
@ -386,7 +401,8 @@ impl BomManagement {
let default_loader = infer_default_loader(&files_autodep);
debug!("default loader in autodep: {:?}", default_loader);
for file_autodep in files_autodep.iter() {
let mut shared_objects = find_dependent_shared_objects(file_autodep, &default_loader);
let mut shared_objects =
find_dependent_shared_objects(file_autodep, &default_loader);
for (src, dest) in shared_objects.drain() {
let dest_path = dest_in_root(root_dir, &dest);
// First, we create dir to store the dependency
@ -424,6 +440,93 @@ impl BomManagement {
.iter()
.for_each(|(src, dest)| copy_shared_object(src, dest, dry_run));
}
// Try to analyse and copy dependencies for files in copydirs.
// We do this job after we really copy dirs. This is because rsync will help deal with soft link
// when we copy dirs. soft links pointing to file/dir out of tree will be transformed to the referent file/dir.
// soft links pointing to files(dirs) in tree will be kept.
// So, we can simply skip any soft link when we walk the dir.
// This func will also not take effect if we are with dry run mode.
// `copied_shared_objects` stores shared objects for copyfiles. We use it here to remove redundance.
fn autodep_for_copydirs(
&self,
made_dirs: &Vec<String>,
copied_shared_objects: &Vec<(String, String)>,
root_dir: &str,
) {
let BomManagement { dirs_to_copy, .. } = self;
// get all files in copydirs. filter directories and symlinks
let mut files_in_copied_dirs = Vec::new();
for (src, dest) in dirs_to_copy {
let dirname = PathBuf::from(src)
.file_name()
.unwrap()
.to_string_lossy()
.to_string();
let dest_dir = PathBuf::from(dest)
.join(dirname)
.to_string_lossy()
.to_string();
for entry in WalkDir::new(dest_dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|entry| entry.file_type().is_file())
{
files_in_copied_dirs.push(entry.path().to_string_lossy().to_string());
}
}
// analyse dependencies for all files
// TODO: fix false-positive warnings
// When we find dependent shared objects for all files in copydir, it may report warning
// if we can't find the shared object. For files in directories, it may be a false-positive case,
// because we may already copy these shared objects when we copy the directory.
// But the loader cannot find these libraries antomatically
// since we don't know how to set the proper LD_LIBRARY_PATH env.
// One possible method to fix this problem is that we don't directly report warning message
// when we can't find dependencies. We return all warning message instead. Before we log these message,
// we can check whether these libraries has already been copied when we copy the directory.
// This method can help avoid most false-positive warnings while not affecting which files to copy.
// User also can avoid these warnings by setting proper LD_LIBRARY_PATH in `/opt/occlum/etc/template/occlum_elf_loader.config`.
let default_loader = infer_default_loader(&files_in_copied_dirs);
let mut all_shared_objects = Vec::new();
for file_path in files_in_copied_dirs.into_iter() {
let shared_objects = find_dependent_shared_objects(&file_path, &default_loader);
all_shared_objects.extend(shared_objects);
}
// We should not copy shared libraries already in image directory.
// This is due to some libraries are in relative path. We will filter these libraries.
let absolute_root_dir = std::fs::canonicalize(root_dir)
.unwrap()
.to_string_lossy()
.to_string();
all_shared_objects = all_shared_objects
.into_iter()
.filter(|(src, _)| !src.starts_with(&absolute_root_dir))
.map(|(src, dest)| {
let dest = dest_in_root(root_dir, &dest);
(src, dest.to_string_lossy().to_string())
})
.collect();
// remove redundancy
let shared_objects =
remove_redundant_items_in_vec(&all_shared_objects, copied_shared_objects.iter());
// create dirs for shared objects
let mut mkdirs = Vec::new();
for (_, shared_object_dest) in shared_objects.iter() {
let shared_object_dir = PathBuf::from(shared_object_dest)
.parent()
.unwrap()
.to_string_lossy()
.to_string();
mkdirs.push(shared_object_dir);
}
let mkdirs = remove_redundant_items_in_vec(&mkdirs, made_dirs.iter());
// do real operations
mkdirs.iter().for_each(|dir| mkdir(dir, false));
shared_objects
.iter()
.for_each(|(src, dest)| copy_shared_object(src, dest, false));
}
}
impl TargetManagement {

@ -6,4 +6,3 @@ pub static CREATE_DIR_ERROR: i32 = -4;
pub static CREATE_SYMLINK_ERROR: i32 = -5;
pub static COPY_DIR_ERROR: i32 = -6;
pub static INCORRECT_HASH_ERROR: i32 = -7;
pub static SHARED_OBJECT_NOT_EXISTS_ERROR: i32 = -8;

@ -6,6 +6,7 @@ extern crate elf;
extern crate env_logger;
extern crate regex;
extern crate shellexpand;
extern crate walkdir;
use bom::Bom;
use env_logger::Env;
use structopt::StructOpt;

@ -1,8 +1,9 @@
use crate::error::{
COPY_DIR_ERROR, COPY_FILE_ERROR, CREATE_DIR_ERROR, CREATE_SYMLINK_ERROR, FILE_NOT_EXISTS_ERROR,
INCORRECT_HASH_ERROR, SHARED_OBJECT_NOT_EXISTS_ERROR,
INCORRECT_HASH_ERROR,
};
use data_encoding::HEXUPPER;
use elf::types::{ET_DYN, ET_EXEC, Type};
use regex::Regex;
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
@ -196,6 +197,9 @@ pub fn calculate_file_hash(filename: &str) -> String {
/// and analyze the stdout. We use regex to match the pattern of the loader output.
/// The loader will automatically find all dependencies recursively, i.e., it will also find dependencies
/// for each shared object, so we only need to analyze the top elf file.
/// The flag `exit_when_encountering_errors` is used to indicate the behavior if we can't find dependencies for an elf file.
/// If this flag is set true, the default behavior when encountering autodep errors is to print error message and exit program.
/// Otherwise, we will only print error message.
pub fn find_dependent_shared_objects(
file_path: &str,
default_loader: &Option<(String, String)>,
@ -215,7 +219,11 @@ pub fn find_dependent_shared_objects(
.default_lib_dirs
.get(&occlum_elf_loader)
.cloned();
let mut objects = extract_dependencies_from_output(&file_path, output, default_lib_dirs);
let mut objects = extract_dependencies_from_output(
&file_path,
output,
default_lib_dirs,
);
for item in objects.drain() {
shared_objects.insert(item);
}
@ -277,6 +285,12 @@ fn auto_dynamic_loader(
Err(_) => return None,
Ok(elf_file) => elf_file,
};
// We should only try to find dependencies for dynamic libraries or executables
// relocatable files and core files are not included
match elf_file.ehdr.elftype {
ET_DYN|ET_EXEC => {},
Type(_) => return None,
}
match elf_file.get_section(".interp") {
None => {
// When the elf file does not has interp section
@ -285,7 +299,7 @@ fn auto_dynamic_loader(
if let Some(default_loader) = default_loader {
return Some(default_loader.clone());
} else {
warn!("cannot autodep for file {}. ", filename);
warn!("cannot autodep for file {}. No dynamic loader can be found or inferred.", filename);
return None;
}
}
@ -352,8 +366,7 @@ pub fn extract_dependencies_from_output(
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
// audodep may output error message. We should return this message to user for further checking.
if stderr.trim().len() > 0 {
error!("cannot autodep for {}. {}", file_path, stderr);
std::process::exit(SHARED_OBJECT_NOT_EXISTS_ERROR);
warn!("cannot autodep for {}. stderr: {}", file_path, stderr);
}
for line in stdout.lines() {
let line = line.trim();