From f0793b0d305865ba43f4e861ef12de06a1b8149d Mon Sep 17 00:00:00 2001 From: jianfengjiang Date: Thu, 23 Sep 2021 18:25:41 +0800 Subject: [PATCH] implement autodep for copydirs --- tools/copy_bom/Cargo.lock | 21 +++++++ tools/copy_bom/Cargo.toml | 1 + tools/copy_bom/README.md | 10 ++-- tools/copy_bom/src/bom.rs | 111 ++++++++++++++++++++++++++++++++++-- tools/copy_bom/src/error.rs | 1 - tools/copy_bom/src/main.rs | 1 + tools/copy_bom/src/util.rs | 23 ++++++-- 7 files changed, 153 insertions(+), 15 deletions(-) diff --git a/tools/copy_bom/Cargo.lock b/tools/copy_bom/Cargo.lock index 618f080f..c14b1e63 100644 --- a/tools/copy_bom/Cargo.lock +++ b/tools/copy_bom/Cargo.lock @@ -94,6 +94,7 @@ dependencies = [ "sha2", "shellexpand", "structopt", + "walkdir", ] [[package]] @@ -347,6 +348,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.130" @@ -496,6 +506,17 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" diff --git a/tools/copy_bom/Cargo.toml b/tools/copy_bom/Cargo.toml index 7d6cefb6..983c8f84 100644 --- a/tools/copy_bom/Cargo.toml +++ b/tools/copy_bom/Cargo.toml @@ -17,3 +17,4 @@ regex = "1.5.4" shellexpand = "2.1" elf = "0.0.10" structopt = "0.3.23" +walkdir = "2" diff --git a/tools/copy_bom/README.md b/tools/copy_bom/README.md index ddea4893..8cce2772 100644 --- a/tools/copy_bom/README.md +++ b/tools/copy_bom/README.md @@ -9,12 +9,12 @@ Bom file is used to describe which files should be copied to the root directory( `copy_bom` is the tool designed to create directories and symbolic links, copy all files and directories defined in a bom file to the root directory. Internally, `copy_bom` will use `rsync` to do the real file operations. `copy_bom` will copy each file and directory incrementally, i.e., only changed parts will be copied. The permission bits and modification times will be reserved. This is done by the `-a` option of `rsync`. `copy_bom` will not ensure the whole image directory as described in bom file (sync behavior) because it will not try to delete old files. To pursue a sync behavior, one can delete the old image directory and copy files again. ### dependencies -`copy_bom` will analyze all dependencies(shared objects) of each ELF file via the dynamic loader defined in the `.interp` section in the file and automatically copy dependencies to the root directory. Currently, `copy_bom` only copy dependencies with absolute paths. We support only one dependency pattern in the result of dynamic loader. +`copy_bom` will analyze all dependencies(shared objects) of each ELF file. `copy_bom` will analyze dependencies for each user-defined file in `files` entry as well as files in user-defined directory in `dirs` entry. For user-defined elf file, it will report error and abort the program if we can't find the dependent shared objects. For files in user-defined directories, we will report warning if autodep fails. We analyze dependencies via the dynamic loader defined in the `.interp` section in elf files and automatically copy dependencies to the root directory. If there's no `.interp` section for an elf file, `copy_bom` will try to infer the loader if all other elf files have the same loader. Currently, `copy_bom` only copy dependencies with absolute paths. We support only one dependency pattern in the result of dynamic loader. - name => path e.g., `libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6` -All dependencies will be copied to the corresponding directory in root directory. For example, if root directory is `image`, then the dependency `/lib64/ld-linux-x86-64.so.2` will be copied to `image/lib/ld-linux-x86-64.so.2`. An entry named `autodep` with value `false` can be added to each file to avoid finding and copying dependencies automatically. +All dependencies will be copied to the corresponding directory in root directory. For example, if root directory is `image`, then the dependency `/lib64/ld-linux-x86-64.so.2` will be copied to `image/lib64/ld-linux-x86-64.so.2`. An entry named `autodep` with value `false` can be added to each file to avoid finding and copying dependencies automatically. ### log -`copy_bom` uses the same log setting as `occlum`. One can set `OCCLUM_LOG_LEVEL=trace` to see all logs printed by `copy_bom`. +`copy_bom` uses the same log setting as `occlum`. One can set `OCCLUM_LOG_LEVEL=trace` to see all logs printed by `copy_bom`. To only view real file operations, `OCCLUM_LOG_LEVEL=info` is a proper level. ### prepare and install 1.prepare. Since `copy_bom` relies on `rsync` to copy files. We need to install `rsync` at first. On ubuntu, this can be done by `apt install rsync -y`. @@ -52,5 +52,5 @@ The second part in the line indicates where to find shared libraries. All paths - Environmental variables pointing to an empty value may fail to resolve. # demos -1. The demos with `copy_bom` are in the `../../demos/bom-demos` directory. -2. Before using these demos, `rsync` and `copy_bom` should be installed. The file `base.yaml` should be copied to `/opt/occlum/etc/template`. +1. We have modify several demos with `copy_bom`. +2. Before using these demos, `rsync` and `copy_bom` should be installed. There should be at least `base.yaml` and `occlum_elf_loader.config` in `/opt/occlum/etc/template`. diff --git a/tools/copy_bom/src/bom.rs b/tools/copy_bom/src/bom.rs index 2e8d3143..1547a98d 100644 --- a/tools/copy_bom/src/bom.rs +++ b/tools/copy_bom/src/bom.rs @@ -17,6 +17,7 @@ use std::collections::{HashSet, VecDeque}; use std::hash::Hash; use std::path::PathBuf; use std::slice::Iter; +use walkdir::WalkDir; // The whole bom file #[derive(Debug, Clone, Serialize, Deserialize)] @@ -120,15 +121,29 @@ impl Bom { // remove redundant operations in each bom management remove_redundant(&mut bom_managements); // Since we have different copy options for each bom, we cannot copy all targets together. - let mut bom_managements_iter = bom_managements.into_iter(); + let mut bom_managements_iter = bom_managements.iter(); for bom in sorted_boms.into_iter() { // each bom corresponds to a bom management, so the unwrap will never fail bom.manage_self(bom_managements_iter.next().unwrap(), dry_run); } + // Try to autodep for each copydir + if !dry_run { + let mut made_dirs = Vec::new(); + let mut copied_shared_objects = Vec::new(); + for bom_management in bom_managements.iter() { + let dirs = bom_management.dirs_to_make.clone(); + let shared_objects = bom_management.shared_objects_to_copy.clone(); + made_dirs.extend(dirs); + copied_shared_objects.extend(shared_objects); + } + for bom_management in bom_managements.iter() { + bom_management.autodep_for_copydirs(&made_dirs, &copied_shared_objects, root_dir); + } + } } /// This func will only manage the current bom file without finding included bom files - pub fn manage_self(self, bom_management: BomManagement, dry_run: bool) { + pub fn manage_self(self, bom_management: &BomManagement, dry_run: bool) { let excludes = self.excludes.unwrap_or(Vec::new()); bom_management.manage(dry_run, excludes); } @@ -363,7 +378,7 @@ impl BomManagement { ) { let mut files_autodep_in_bom = Vec::new(); for mut target_management in target_managements.into_iter() { - // First, we need to resolve environmental variables + // First, we need to resolve environmental variables target_management.resolve_environmental_variables(); let TargetManagement { dirs_to_make, @@ -386,7 +401,8 @@ impl BomManagement { let default_loader = infer_default_loader(&files_autodep); debug!("default loader in autodep: {:?}", default_loader); for file_autodep in files_autodep.iter() { - let mut shared_objects = find_dependent_shared_objects(file_autodep, &default_loader); + let mut shared_objects = + find_dependent_shared_objects(file_autodep, &default_loader); for (src, dest) in shared_objects.drain() { let dest_path = dest_in_root(root_dir, &dest); // First, we create dir to store the dependency @@ -424,6 +440,93 @@ impl BomManagement { .iter() .for_each(|(src, dest)| copy_shared_object(src, dest, dry_run)); } + + // Try to analyse and copy dependencies for files in copydirs. + // We do this job after we really copy dirs. This is because rsync will help deal with soft link + // when we copy dirs. soft links pointing to file/dir out of tree will be transformed to the referent file/dir. + // soft links pointing to files(dirs) in tree will be kept. + // So, we can simply skip any soft link when we walk the dir. + // This func will also not take effect if we are with dry run mode. + // `copied_shared_objects` stores shared objects for copyfiles. We use it here to remove redundance. + fn autodep_for_copydirs( + &self, + made_dirs: &Vec, + copied_shared_objects: &Vec<(String, String)>, + root_dir: &str, + ) { + let BomManagement { dirs_to_copy, .. } = self; + // get all files in copydirs. filter directories and symlinks + let mut files_in_copied_dirs = Vec::new(); + for (src, dest) in dirs_to_copy { + let dirname = PathBuf::from(src) + .file_name() + .unwrap() + .to_string_lossy() + .to_string(); + let dest_dir = PathBuf::from(dest) + .join(dirname) + .to_string_lossy() + .to_string(); + for entry in WalkDir::new(dest_dir) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|entry| entry.file_type().is_file()) + { + files_in_copied_dirs.push(entry.path().to_string_lossy().to_string()); + } + } + // analyse dependencies for all files + // TODO: fix false-positive warnings + // When we find dependent shared objects for all files in copydir, it may report warning + // if we can't find the shared object. For files in directories, it may be a false-positive case, + // because we may already copy these shared objects when we copy the directory. + // But the loader cannot find these libraries antomatically + // since we don't know how to set the proper LD_LIBRARY_PATH env. + // One possible method to fix this problem is that we don't directly report warning message + // when we can't find dependencies. We return all warning message instead. Before we log these message, + // we can check whether these libraries has already been copied when we copy the directory. + // This method can help avoid most false-positive warnings while not affecting which files to copy. + // User also can avoid these warnings by setting proper LD_LIBRARY_PATH in `/opt/occlum/etc/template/occlum_elf_loader.config`. + let default_loader = infer_default_loader(&files_in_copied_dirs); + let mut all_shared_objects = Vec::new(); + for file_path in files_in_copied_dirs.into_iter() { + let shared_objects = find_dependent_shared_objects(&file_path, &default_loader); + all_shared_objects.extend(shared_objects); + } + // We should not copy shared libraries already in image directory. + // This is due to some libraries are in relative path. We will filter these libraries. + let absolute_root_dir = std::fs::canonicalize(root_dir) + .unwrap() + .to_string_lossy() + .to_string(); + all_shared_objects = all_shared_objects + .into_iter() + .filter(|(src, _)| !src.starts_with(&absolute_root_dir)) + .map(|(src, dest)| { + let dest = dest_in_root(root_dir, &dest); + (src, dest.to_string_lossy().to_string()) + }) + .collect(); + // remove redundancy + let shared_objects = + remove_redundant_items_in_vec(&all_shared_objects, copied_shared_objects.iter()); + // create dirs for shared objects + let mut mkdirs = Vec::new(); + for (_, shared_object_dest) in shared_objects.iter() { + let shared_object_dir = PathBuf::from(shared_object_dest) + .parent() + .unwrap() + .to_string_lossy() + .to_string(); + mkdirs.push(shared_object_dir); + } + let mkdirs = remove_redundant_items_in_vec(&mkdirs, made_dirs.iter()); + // do real operations + mkdirs.iter().for_each(|dir| mkdir(dir, false)); + shared_objects + .iter() + .for_each(|(src, dest)| copy_shared_object(src, dest, false)); + } } impl TargetManagement { diff --git a/tools/copy_bom/src/error.rs b/tools/copy_bom/src/error.rs index 40ccb2df..35ac1e8d 100644 --- a/tools/copy_bom/src/error.rs +++ b/tools/copy_bom/src/error.rs @@ -6,4 +6,3 @@ pub static CREATE_DIR_ERROR: i32 = -4; pub static CREATE_SYMLINK_ERROR: i32 = -5; pub static COPY_DIR_ERROR: i32 = -6; pub static INCORRECT_HASH_ERROR: i32 = -7; -pub static SHARED_OBJECT_NOT_EXISTS_ERROR: i32 = -8; diff --git a/tools/copy_bom/src/main.rs b/tools/copy_bom/src/main.rs index 6351e28c..c3cf4de6 100644 --- a/tools/copy_bom/src/main.rs +++ b/tools/copy_bom/src/main.rs @@ -6,6 +6,7 @@ extern crate elf; extern crate env_logger; extern crate regex; extern crate shellexpand; +extern crate walkdir; use bom::Bom; use env_logger::Env; use structopt::StructOpt; diff --git a/tools/copy_bom/src/util.rs b/tools/copy_bom/src/util.rs index e9d1267f..cef99da9 100644 --- a/tools/copy_bom/src/util.rs +++ b/tools/copy_bom/src/util.rs @@ -1,8 +1,9 @@ use crate::error::{ COPY_DIR_ERROR, COPY_FILE_ERROR, CREATE_DIR_ERROR, CREATE_SYMLINK_ERROR, FILE_NOT_EXISTS_ERROR, - INCORRECT_HASH_ERROR, SHARED_OBJECT_NOT_EXISTS_ERROR, + INCORRECT_HASH_ERROR, }; use data_encoding::HEXUPPER; +use elf::types::{ET_DYN, ET_EXEC, Type}; use regex::Regex; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; @@ -196,6 +197,9 @@ pub fn calculate_file_hash(filename: &str) -> String { /// and analyze the stdout. We use regex to match the pattern of the loader output. /// The loader will automatically find all dependencies recursively, i.e., it will also find dependencies /// for each shared object, so we only need to analyze the top elf file. +/// The flag `exit_when_encountering_errors` is used to indicate the behavior if we can't find dependencies for an elf file. +/// If this flag is set true, the default behavior when encountering autodep errors is to print error message and exit program. +/// Otherwise, we will only print error message. pub fn find_dependent_shared_objects( file_path: &str, default_loader: &Option<(String, String)>, @@ -215,7 +219,11 @@ pub fn find_dependent_shared_objects( .default_lib_dirs .get(&occlum_elf_loader) .cloned(); - let mut objects = extract_dependencies_from_output(&file_path, output, default_lib_dirs); + let mut objects = extract_dependencies_from_output( + &file_path, + output, + default_lib_dirs, + ); for item in objects.drain() { shared_objects.insert(item); } @@ -277,6 +285,12 @@ fn auto_dynamic_loader( Err(_) => return None, Ok(elf_file) => elf_file, }; + // We should only try to find dependencies for dynamic libraries or executables + // relocatable files and core files are not included + match elf_file.ehdr.elftype { + ET_DYN|ET_EXEC => {}, + Type(_) => return None, + } match elf_file.get_section(".interp") { None => { // When the elf file does not has interp section @@ -285,7 +299,7 @@ fn auto_dynamic_loader( if let Some(default_loader) = default_loader { return Some(default_loader.clone()); } else { - warn!("cannot autodep for file {}. ", filename); + warn!("cannot autodep for file {}. No dynamic loader can be found or inferred.", filename); return None; } } @@ -352,8 +366,7 @@ pub fn extract_dependencies_from_output( let stderr = String::from_utf8_lossy(&output.stderr).to_string(); // audodep may output error message. We should return this message to user for further checking. if stderr.trim().len() > 0 { - error!("cannot autodep for {}. {}", file_path, stderr); - std::process::exit(SHARED_OBJECT_NOT_EXISTS_ERROR); + warn!("cannot autodep for {}. stderr: {}", file_path, stderr); } for line in stdout.lines() { let line = line.trim();