occlum/tools/copy_bom/src/util.rs
2022-09-28 16:51:42 +08:00

619 lines
25 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use crate::error::{
COPY_DIR_ERROR, COPY_FILE_ERROR, CREATE_DIR_ERROR, CREATE_SYMLINK_ERROR, FILE_NOT_EXISTS_ERROR,
INCORRECT_HASH_ERROR, MISSING_LIBRARY_ERROR, RSYNC_NOT_FOUND_ERROR,
};
use data_encoding::HEXUPPER;
use elf::types::{Type, ET_DYN, ET_EXEC};
use regex::Regex;
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::process::{Command, Output, Stdio};
use std::sync::Mutex;
use std::vec;
/// This structure represents loader information in config file.
/// `loader_paths` stores the actual path of each loader. key: the loader name, value: the loader path in host
/// `ld_library_path_envs` stores the LD_LIBRARY_PATH environmental variable.
/// We combine the loader dir and the user provided path to get the environmental variable.
/// `default_lib_dirs` stores all directories we parse from the LD_LIBRARY_PATH variable.
/// We use `default_lib_dirs` because we may not have the same LD_LIBRARY_PATH in occlum image.
/// When we use `occlum run`, the loader in occlum image wont try to find libraries in all libs in LD_LIBRARY_PATH.
/// So, we will copy libraries in these `default_lib_dirs` to the directory of loader.
#[derive(Debug)]
struct OcclumLoaders {
loader_paths: HashMap<String, String>,
ld_library_path_envs: HashMap<String, String>,
default_lib_dirs: HashMap<String, Vec<String>>,
}
lazy_static! {
/// This map stores the path of occlum-modified loaders.
/// The `key` is the name of the loader. The `value` is the loader path.
/// We read the loaders from the `LOADER_CONFIG_FILE`
static ref OCCLUM_LOADERS: OcclumLoaders = {
const LOADER_CONFIG_FILE: &'static str = "/opt/occlum/etc/template/occlum_elf_loader.config";
let mut loader_paths = HashMap::new();
let mut ld_library_path_envs = HashMap::new();
let mut default_lib_dirs = HashMap::new();
let config_path = PathBuf::from(LOADER_CONFIG_FILE);
if !config_path.is_file() {
// if no given config file is found, we will use the default loader in elf headers
warn!("fail to find loader config file {}. No loader is set!", LOADER_CONFIG_FILE);
} else {
let file_content = std::fs::read_to_string(config_path).unwrap();
for line in file_content.lines() {
let trim_line = line.trim();
if trim_line.len() <= 0 {
continue;
}
let line_split: Vec<_> = trim_line.split(' ').collect();
// The first string is loader path
let loader_path = line_split[0].to_string();
let loader_path_buf = PathBuf::from(&loader_path);
let loader_file_name = loader_path_buf.file_name().unwrap().to_string_lossy().to_string();
// The second string plus the loader directory is LD_LIBRARY_PATH
let loader_dir = loader_path_buf.parent().unwrap().to_string_lossy().to_string();
let ld_library_path = format!("{}:{}", loader_dir, line_split[1]);
// parse all libraries in LD_LIBRARY_PATH
let lib_paths = ld_library_path.split(':').filter(|s| s.len()>0).map(|s| s.to_string()).collect();
loader_paths.insert(loader_file_name, loader_path.clone());
ld_library_path_envs.insert(loader_path.clone(), ld_library_path);
default_lib_dirs.insert(loader_path, lib_paths);
}
}
debug!("occlum elf loaders: {:?}", loader_paths);
debug!("occlum ld_library_path envs: {:?}", ld_library_path_envs);
debug!("default lib dirs: {:?}", default_lib_dirs);
OcclumLoaders {loader_paths, ld_library_path_envs, default_lib_dirs}
};
}
// pattern used to extract dependencies from ldd result
lazy_static! {
/// pattern: name => path
/// example: libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6
static ref DEPENDENCY_REGEX: Regex = Regex::new(r"^(?P<name>\S+) => (?P<path>\S+) ").unwrap();
/// pattern: name => not found
/// example: libstdc++.so.6 => not found
static ref NOT_FOUND_REGEX: Regex = Regex::new(r"^(?P<name>\S+) => not found").unwrap();
/// pattern: Error loading shared library name: No such file or directory
/// example: Error loading shared library libstdc++.so.6: No such file or directory
static ref ERROR_LOADING_REGEX: Regex = Regex::new(r"Error loading shared library (?P<name>\S+): No such file or directory").unwrap();
}
lazy_static! {
static ref MISSING_LIBRARIES: Mutex<HashSet<String>> = Mutex::new(HashSet::new());
}
pub fn copy_file(src: &str, dest: &str, dry_run: bool) {
info!("rsync -aL {} {}", src, dest);
if !dry_run {
let output = Command::new("rsync").arg("-aL").arg(src).arg(dest).output();
match output {
Ok(output) => deal_with_output(output, COPY_FILE_ERROR),
Err(e) => {
error!("copy file {} to {} failed. {}", src, dest, e);
std::process::exit(COPY_FILE_ERROR);
}
}
}
}
fn format_command_args(args: &Vec<String>) -> String {
let mut res = String::new();
for arg in args {
res = format!("{} {}", res, arg);
}
res.trim().to_string()
}
pub fn mkdir(dest: &str, dry_run: bool) {
info!("mkdir -p {}", dest);
if !dry_run {
if let Err(e) = std::fs::create_dir_all(dest) {
error!("mkdir {} fails. {}", dest, e);
std::process::exit(CREATE_DIR_ERROR);
}
}
}
pub fn create_link(src: &str, linkname: &str, dry_run: bool) {
info!("ln -s {} {}", src, linkname);
if !dry_run {
// When we try to create a link, if there is already a file, the create will fail
// So we delete the link at first if an old file exists.
let _ = std::fs::remove_file(linkname);
if let Err(e) = std::os::unix::fs::symlink(src, linkname) {
error!("ln -s {} {} failed. {}", src, linkname, e);
std::process::exit(CREATE_SYMLINK_ERROR);
}
}
}
pub fn copy_dir(src: &str, dest: &str, dry_run: bool, excludes: &Vec<String>) {
// we should not pass --delete args. Otherwise it will overwrite files in the same place
// We pass --copy-unsafe-links instead of -L arg. So links point to current directory will be kept.
let mut args: Vec<_> = vec!["-ar", "--copy-unsafe-links"]
.into_iter()
.map(|s| s.to_string())
.collect();
let excludes: Vec<_> = excludes
.iter()
.map(|arg| format!("--exclude={}", arg))
.collect();
args.extend(excludes.into_iter());
info!("rsync {} {} {}", format_command_args(&args), src, dest);
if !dry_run {
let output = Command::new("rsync").args(args).arg(src).arg(dest).output();
match output {
Ok(output) => deal_with_output(output, CREATE_DIR_ERROR),
Err(e) => {
error!("copy dir {} to {} failed. {}", src, dest, e);
std::process::exit(COPY_DIR_ERROR);
}
}
}
}
pub fn copy_shared_object(src: &str, dest: &str, dry_run: bool) {
debug!("copy shared object {} to {}.", src, dest);
copy_file(src, dest, dry_run);
}
/// convert a dest path(usually absolute) to a dest path in root directory
pub fn dest_in_root(root_dir: &str, dest: &str) -> PathBuf {
let root_path = PathBuf::from(root_dir);
let dest_path = PathBuf::from(dest);
let dest_relative = if dest_path.is_absolute() {
PathBuf::from(dest_path.strip_prefix("/").unwrap())
} else {
dest_path
};
return root_path.join(dest_relative);
}
/// check if hash of the file is equal to the passed hash value.
pub fn check_file_hash(filename: &str, hash: &str) {
let file_hash = calculate_file_hash(filename);
if file_hash != hash.to_string() {
error!(
"The hash value of {} should be {:?}. Please correct it.",
filename, file_hash
);
std::process::exit(INCORRECT_HASH_ERROR);
}
}
/// Use sha256 to calculate hash for file content. The returned hash is a hex-encoded string.
pub fn calculate_file_hash(filename: &str) -> String {
let mut file = std::fs::File::open(filename).unwrap_or_else(|e| {
println!("can not open file {}. {}", filename, e);
std::process::exit(FILE_NOT_EXISTS_ERROR);
});
let mut hasher = Sha256::new();
std::io::copy(&mut file, &mut hasher).unwrap();
let hash = hasher.finalize();
let hash = HEXUPPER.encode(&hash);
hash
}
/// This is the main function of finding dependent shared objects for an elf file.
/// Currently, we only support dependent shared objects with absolute path.
/// This function works in such a process.
/// It will first analyze the dynamic loader of the file if it has a dynamic loader,
/// which means the file is an elf file. Then, we will use the loader defined in *OCCLUM_LOADERS*
/// to replace the original loader. The modified loader will find dependencies for occlum.
/// We will use the dynamic loader to analyze the dependencies. We run the dynamic loader in command line
/// and analyze the stdout. We use regex to match the pattern of the loader output.
/// The loader will automatically find all dependencies recursively, i.e., it will also find dependencies
/// for each shared object, so we only need to analyze the top elf file.
/// The flag `exit_when_encountering_errors` is used to indicate the behavior if we can't find dependencies for an elf file.
/// If this flag is set true, the default behavior when encountering autodep errors is to print error message and exit program.
/// Otherwise, we will only print error message.
pub fn find_dependent_shared_objects(
file_path: &str,
default_loader: &Option<(String, String)>,
lazy_check_missing_libraries: bool,
) -> HashSet<(String, String)> {
let mut shared_objects = HashSet::new();
// find dependencies for the input file
// first, we find the dynamic loader for the elf file, if we can't find the loader, return empty shared objects
let dynamic_loader = auto_dynamic_loader(file_path, default_loader);
if dynamic_loader.is_none() {
return shared_objects;
}
let (occlum_elf_loader, inlined_elf_loader) = dynamic_loader.unwrap();
shared_objects.insert((occlum_elf_loader.clone(), inlined_elf_loader));
let output = command_output_of_executing_dynamic_loader(&file_path, &occlum_elf_loader);
if let Ok(output) = output {
let default_lib_dirs = OCCLUM_LOADERS
.default_lib_dirs
.get(&occlum_elf_loader)
.cloned();
let mut objects = extract_dependencies_from_output(
&file_path,
output,
default_lib_dirs,
lazy_check_missing_libraries,
);
for item in objects.drain() {
shared_objects.insert(item);
}
}
shared_objects
}
/// get the output of the given dynamic loader.
/// This function will use the dynamic loader to analyze the dependencies of an elf file
/// and return the command line output of the dynamic loader.
fn command_output_of_executing_dynamic_loader(
file_path: &str,
dynamic_loader: &str,
) -> Result<Output, std::io::Error> {
// if the file path has only filename, we need to add a "." directory
let file_path_buf = PathBuf::from(file_path);
let file_path = if file_path_buf.parent() == None {
PathBuf::from(".")
.join(&file_path_buf)
.to_string_lossy()
.to_string()
} else {
file_path_buf.to_string_lossy().to_string()
};
// return the output of the command to analyze dependencies
match OCCLUM_LOADERS.ld_library_path_envs.get(dynamic_loader) {
None => {
debug!("{} --list {}", dynamic_loader, file_path);
Command::new(dynamic_loader)
.arg("--list")
.arg(file_path)
.output()
}
Some(ld_library_path) => {
debug!(
"LD_LIBRARY_PATH='{}' {} --list {}",
ld_library_path, dynamic_loader, file_path
);
Command::new(dynamic_loader)
.arg("--list")
.arg(file_path)
.env("LD_LIBRARY_PATH", ld_library_path)
.output()
}
}
}
/// This function will try to find a dynamic loader for a elf file automatically.
/// If will first try to read the interp section of elf file. If the file does not have interp section,
/// and the default loader is *NOT* None, it will return default loader.
/// It there is no interp section and default loader is None, it will return None.
/// If we find the loader, we will return Some((occlum_elf_loader, inlined_elf_loader)).
/// This is because the occlum_elf_loader and inlined_elf_loader may not be the same directory.
fn auto_dynamic_loader(
filename: &str,
default_loader: &Option<(String, String)>,
) -> Option<(String, String)> {
let elf_file = match elf::File::open_path(filename) {
Err(_) => return None,
Ok(elf_file) => elf_file,
};
// We should only try to find dependencies for dynamic libraries or executables
// relocatable files and core files are not included
match elf_file.ehdr.elftype {
ET_DYN | ET_EXEC => {}
Type(_) => return None,
}
match elf_file.get_section(".interp") {
None => {
// When the elf file does not has interp section
// 1. if we have default loader, we will return the default loader
// 2. Otherwise we will return None and give warning.
if let Some(default_loader) = default_loader {
return Some(default_loader.clone());
} else {
warn!(
"cannot autodep for file {}. No dynamic loader can be found or inferred.",
filename
);
return None;
}
}
Some(_) => read_loader_from_interp_section(filename),
}
}
fn read_loader_from_interp_section(filename: &str) -> Option<(String, String)> {
let elf_file = match elf::File::open_path(filename) {
Err(_) => return None,
Ok(elf_file) => elf_file,
};
let interp_scan = match elf_file.get_section(".interp") {
None => return None,
Some(section) => section,
};
let interp_data = String::from_utf8_lossy(&interp_scan.data).to_string();
let inlined_elf_loader = interp_data.trim_end_matches("\u{0}"); // this interp_data always with a \u{0} at end
debug!("the loader of {} is {}.", filename, inlined_elf_loader);
let inlined_elf_loader_path = PathBuf::from(inlined_elf_loader);
let loader_file_name = inlined_elf_loader_path
.file_name()
.and_then(|s| s.to_str())
.unwrap();
// If the loader file name is glibc loader or musl loader, we will use occlum-modified loader
let occlum_elf_loader = OCCLUM_LOADERS
.loader_paths
.get(loader_file_name)
.cloned()
.unwrap_or(inlined_elf_loader.to_string());
Some((
occlum_elf_loader.to_string(),
inlined_elf_loader.to_string(),
))
}
// try to infer default loader for all files to autodep
// If all files with .interp section points to the same loader,
// this loader will be viewed as the default loader
// Otherwise, no default loader can be found.
pub fn infer_default_loader(files_autodep: &Vec<String>) -> Option<(String, String)> {
let mut loaders = HashSet::new();
for filename in files_autodep.iter() {
if let Some(loader) = read_loader_from_interp_section(filename) {
loaders.insert(loader);
}
}
if loaders.len() == 1 {
return loaders.into_iter().next();
} else {
return None;
}
}
/// resolve the results of dynamic loader to extract dependencies
pub fn extract_dependencies_from_output(
file_path: &str,
output: Output,
default_lib_dirs: Option<Vec<String>>,
lazy_check_missing_libraries: bool,
) -> HashSet<(String, String)> {
let mut shared_objects = HashSet::new();
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
debug!("The loader output of {}:\n {}", file_path, stdout);
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
// audodep may output error message. We should return this message to user for further checking.
if stderr.trim().len() > 0 {
warn!("cannot autodep for {}. stderr: {}", file_path, stderr);
if lazy_check_missing_libraries {
for line in stderr.lines() {
if let Some(captures) = ERROR_LOADING_REGEX.captures(line) {
let missing_library = (&captures["name"]).to_string();
add_missing_library(missing_library);
}
}
}
}
for line in stdout.lines() {
let line = line.trim();
// whether the line contains a not found library
if let Some(captures) = NOT_FOUND_REGEX.captures(line) {
if lazy_check_missing_libraries {
let missing_library = (&captures["name"]).to_string();
add_missing_library(missing_library);
}
continue;
}
let captures = DEPENDENCY_REGEX.captures(line);
if let Some(captures) = captures {
let raw_path = (&captures["path"]).to_string();
if let Some(absolute_path) = convert_to_absolute(file_path, &raw_path) {
match default_lib_dirs {
None => {
shared_objects.insert((absolute_path.clone(), absolute_path));
}
Some(ref default_lib_dirs) => {
let file_name = (&captures["name"]).to_string();
let lib_dir_in_host = PathBuf::from(&absolute_path)
.parent()
.unwrap()
.to_string_lossy()
.to_string();
// if the shared object is from one of the default dirs,
// we will copy to the first default dir(the loader dir)
// Otherwise it will be copied to the same dir as its dir in host.
if default_lib_dirs.contains(&lib_dir_in_host) {
let target_dir = default_lib_dirs.first().unwrap();
let target_path = PathBuf::from(target_dir)
.join(file_name)
.to_string_lossy()
.to_string();
shared_objects.insert((absolute_path, target_path));
} else {
shared_objects.insert((absolute_path.clone(), absolute_path));
}
}
}
}
}
}
debug!("find objects: {:?}", shared_objects);
shared_objects
}
/// convert the raw path to an absolute path.
/// The raw_path may be an absolute path itself, or a relative path relative to some file
/// If the conversion succeeds, return Some(converted_absolute_path)
/// otherwise, return None
pub fn convert_to_absolute(file_path: &str, raw_path: &str) -> Option<String> {
let raw_path = PathBuf::from(raw_path);
// if raw path is absolute, return
if raw_path.is_absolute() {
return Some(raw_path.to_string_lossy().to_string());
}
// if the given relative path can be converted to an absolute path , return
let converted_path = resolve_relative_path(file_path, &raw_path.to_string_lossy());
let converted_path = PathBuf::from(converted_path);
if converted_path.is_absolute() {
return Some(converted_path.to_string_lossy().to_string());
}
// return None
return None;
}
/// convert `a path relative to file` to the real path in file system
pub fn resolve_relative_path(filename: &str, relative_path: &str) -> String {
let file_path = PathBuf::from(filename);
let file_dir_path = file_path
.parent()
.map_or(PathBuf::from("."), |p| PathBuf::from(p));
let resolved_path = file_dir_path.join(relative_path);
resolved_path.to_string_lossy().to_string()
}
/// find an included file in the file system. If we can find the bom file, return the path
/// otherwise, the process exit with error
/// if included dir is relative path, if will be viewed as path relative to the `current` path (where we execute command)
pub fn find_included_bom_file(
included_file: &str,
bom_file: &str,
included_dirs: &Vec<String>,
) -> String {
let bom_file_path = PathBuf::from(bom_file);
let bom_file_dir_path = bom_file_path
.parent()
.map_or(PathBuf::from("."), |p| p.to_path_buf());
// first, we find the included bom file in the current dir of the bom file
let included_file_path = bom_file_dir_path.join(included_file);
if included_file_path.is_file() {
return included_file_path.to_string_lossy().to_string();
}
// Then, we find the bom file in each included dir.
for included_dir in included_dirs {
let included_dir_path = std::env::current_dir().unwrap().join(included_dir);
let included_file_path = included_dir_path.join(included_file);
if included_file_path.is_file() {
return included_file_path.to_string_lossy().to_string();
}
}
// fail to find the bom file
error!(
"cannot find included bom file {} in {}.",
included_file, bom_file
);
std::process::exit(FILE_NOT_EXISTS_ERROR);
}
/// Try to resolve a path may contain environmental variables to a path without environmental variables
/// This function relies on a third-party crate shellexpand.
/// Known limitations: If the environmental variable points to an empty value, the conversion may fail.
pub fn resolve_envs(path: &str) -> String {
shellexpand::env(path).map_or_else(
|_| {
warn!("{} resolve fails.", path);
path.to_string()
},
|res| res.to_string(),
)
}
fn deal_with_output(output: Output, error_number: i32) {
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
if stdout.trim().len() > 0 {
debug!("{}", stdout);
}
// if stderr is not None, the operation fails. We should abort the process and output error log.
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
if stderr.trim().len() > 0 {
error!("{}", stderr);
std::process::exit(error_number);
}
}
/// Add missing library to a global hashset
fn add_missing_library(missing_library: String) {
MISSING_LIBRARIES
.lock()
.expect("Acquire lock should not fail")
.insert(missing_library);
}
/// Check whether missing library exists in image directory
fn check_missing_library(missing_library: &str, image_dir: &str) -> bool {
let mut command = Command::new("find");
command.arg(image_dir).arg("-name").arg(missing_library);
let output = command.output().expect("find missing library failed");
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
if stdout.len() == 0 {
return false;
} else {
let mut lines = stdout.lines();
debug!(
"find missing library {} in {}",
missing_library,
lines.nth(0).unwrap()
);
return true;
}
}
/// check whether missing libraries exist in image dir
pub fn lazy_check_missing_libraries(image_dir: &str) {
let mut check_failed_missing_libraries = HashSet::new();
MISSING_LIBRARIES
.lock()
.unwrap()
.iter()
.for_each(|missing_library| {
if !check_missing_library(missing_library, image_dir) {
check_failed_missing_libraries.insert(missing_library.to_owned());
}
});
if check_failed_missing_libraries.len() > 0 {
println!("copy_bom failed due to following libraries are missing:");
for library in check_failed_missing_libraries {
println!("{}", library);
}
std::process::exit(MISSING_LIBRARY_ERROR);
}
}
/// Print warn message if image directory is not empty.
/// The image dir should be empty before running copy_bom so copy bom can track all files to copy in bom file.
/// Users can ignore the warning now if they are sure the image directory contains correct contents.
pub fn warn_on_nonempty_image_dir(image_dir: &str) {
let image_path_buf = PathBuf::from(image_dir);
let image_path = image_path_buf.as_path();
if image_path.is_dir() {
if let Ok(read_dir) = image_path.read_dir() {
let dir_entries = read_dir.collect::<Vec<_>>();
if dir_entries.len() > 0 {
println!(
"WARNING: {} is not an empty directory before running copy_bom.",
image_dir
);
}
}
}
}
/// Check rsync is installed by running command `rsync --version`.
/// The exit code should be zero if rsync is installed.
/// If rsync is not installed, copy_bom will abort.
pub fn check_rsync() {
let mut command = Command::new("rsync");
command.arg("--version");
command.stdout(Stdio::null());
if let Ok(status) = command.status() {
if let Some(exit_code) = status.code() {
if exit_code == 0 {
return;
}
}
}
println!("rsync is not installed.");
std::process::exit(RSYNC_NOT_FOUND_ERROR);
}