[example] Update example with init-ra way

This commit is contained in:
Zheng, Qi 2023-06-29 16:19:34 +08:00 committed by volcano
parent 403d960a37
commit b43cd4233a
11 changed files with 42 additions and 337 deletions

@ -37,7 +37,7 @@ It is used to encrypt/decrypt the Occlum application RootFS image which is Tenso
#### Step 2 #### Step 2
Application starts. First it starts the `init` process. This customized [`init`](./init_ra/) requests `ssl_config` and `image_key` from GRPC-RATLS server through a secure GRPC RATLS connection. Then it uses the `image_key` to decrypt the RootFS where the real application is located, mount the RootFS, save the `ssl_config` to RootFS `/etc/tf_ssl.cfg`. Application starts. First it starts the `init` process. This customized [`init`](../tools/init_grpc_ratls/) requests `ssl_config` and `image_key` from GRPC-RATLS server through a secure GRPC RATLS connection. Then it uses the `image_key` to decrypt the RootFS where the real application is located, mount the RootFS, save the `ssl_config` to RootFS `/etc/tf_ssl.cfg`.
Detail description of the above two steps Init-RA operation could refer to [`Init-RA`](../demos/remote_attestation/init_ra_flow/). Detail description of the above two steps Init-RA operation could refer to [`Init-RA`](../demos/remote_attestation/init_ra_flow/).
@ -55,9 +55,9 @@ Now users could send inference request with server certificates (`server.crt`).
There are prebuilt docker images could be used for the examples, either in the following docker way or [`kubernates`](./kubernetes/) way. Users could pull them directly and try the example. There are prebuilt docker images could be used for the examples, either in the following docker way or [`kubernates`](./kubernetes/) way. Users could pull them directly and try the example.
``` ```
docker pull occlum/init_ra_server:0.29.5-ubuntu20.04 docker pull occlum/init_ra_server:0.29.6-ubuntu20.04
docker pull occlum/tf_demo:0.29.5-ubuntu20.04 docker pull occlum/tf_demo:0.29.6-ubuntu20.04
docker pull occlum/tf_demo_client:0.29.5-ubuntu20.04 docker pull occlum/tf_demo_client:0.29.6-ubuntu20.04
``` ```
If users want to build or customize the images, please check below part. If users want to build or customize the images, please check below part.

@ -2,17 +2,8 @@
set -e set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
export INITRA_DIR="${script_dir}/init_ra"
export RATLS_DIR="${script_dir}/../tools/toolchains/grpc_ratls"
export TF_DIR="${script_dir}/tf_serving" export TF_DIR="${script_dir}/tf_serving"
function build_ratls()
{
pushd ${RATLS_DIR}
./build.sh
popd
}
function build_tf_serving() function build_tf_serving()
{ {
@ -27,12 +18,22 @@ function build_tf_serving()
popd popd
} }
function build_init_ra() function update_client_init_ra_conf()
{ {
pushd ${INITRA_DIR} # Fill in the keys
occlum-cargo clean new_json="$(jq '.kms_keys = [ {"key": "ssl_config", "path": "/etc/tf_ssl.cfg"}]' init_ra_conf.json)" && \
occlum-cargo build --release echo "${new_json}" > init_ra_conf.json
popd
# Fill in the KMS server measurements.
new_json="$(jq ' .ra_config.verify_mr_enclave = "off" |
.ra_config.verify_mr_signer = "on" |
.ra_config.verify_isv_prod_id = "off" |
.ra_config.verify_isv_svn = "off" |
.ra_config.verify_config_svn = "off" |
.ra_config.verify_enclave_debuggable = "on" |
.ra_config.sgx_mrs[0].mr_signer = ''"'`get_mr tf mrsigner`'" |
.ra_config.sgx_mrs[0].debuggable = false ' init_ra_conf.json)" && \
echo "${new_json}" > init_ra_conf.json
} }
function build_tf_instance() function build_tf_instance()
@ -40,40 +41,32 @@ function build_tf_instance()
# generate tf image key # generate tf image key
occlum gen-image-key image_key occlum gen-image-key image_key
rm -rf occlum_tf && occlum new occlum_tf rm -rf occlum_tf
# choose grpc_ratls as init ra kms client
occlum new occlum_tf --init-ra grpc_ratls
pushd occlum_tf pushd occlum_tf
# prepare tf_serving content # prepare tf_serving content
rm -rf image rm -rf image
copy_bom -f ../tf_serving.yaml --root image --include-dir /opt/occlum/etc/template copy_bom -f ../tf_serving.yaml --root image --include-dir /opt/occlum/etc/template
# Try build first to get mrsigner
# In our case, client and server use the same sign-key thus also the same mrsigner
occlum build
new_json="$(jq '.resource_limits.user_space_size = "7000MB" | new_json="$(jq '.resource_limits.user_space_size = "7000MB" |
.resource_limits.kernel_space_heap_size="384MB" | .resource_limits.kernel_space_heap_size="384MB" |
.process.default_heap_size = "128MB" | .process.default_heap_size = "128MB" |
.resource_limits.max_num_of_threads = 64 | .resource_limits.max_num_of_threads = 64 |
.metadata.debuggable = false | .metadata.debuggable = false |
.env.default += ["GRPC_SERVER=localhost:50051"] | .env.default += ["OCCLUM_INIT_RA_KMS_SERVER=localhost:50051"] |
.env.untrusted += ["GRPC_SERVER"]' Occlum.json)" && \ .env.untrusted += ["OCCLUM_INIT_RA_KMS_SERVER"]' Occlum.json)" && \
echo "${new_json}" > Occlum.json echo "${new_json}" > Occlum.json
# Update init_ra_conf json file accordingly before occlum build
update_client_init_ra_conf
occlum build --image-key ../image_key occlum build --image-key ../image_key
# Get server mrsigner.
# Here client and server use the same signer-key thus using client mrsigner directly.
jq ' .verify_mr_enclave = "off" |
.verify_mr_signer = "on" |
.verify_isv_prod_id = "off" |
.verify_isv_svn = "off" |
.verify_config_svn = "off" |
.verify_enclave_debuggable = "on" |
.sgx_mrs[0].mr_signer = ''"'`get_mr tf mrsigner`'" |
.sgx_mrs[0].debuggable = false ' ../ra_config_template.json > dynamic_config.json
# prepare init-ra content
rm -rf initfs
copy_bom -f ../init_ra_client.yaml --root initfs --include-dir /opt/occlum/etc/template
occlum build -f --image-key ../image_key
occlum package occlum_instance occlum package occlum_instance
popd popd
@ -125,9 +118,7 @@ function build_server_instance()
popd popd
} }
build_ratls
build_tf_serving build_tf_serving
build_init_ra
build_tf_instance build_tf_instance
build_server_instance build_server_instance

@ -5,7 +5,7 @@ LABEL maintainer="Qi Zheng <huaiqing.zq@antgroup.com>"
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
ARG PSW_VERSION=2.17.100.3 ARG PSW_VERSION=2.17.100.3
ARG DCAP_VERSION=1.14.100.3 ARG DCAP_VERSION=1.14.100.3
ARG OCCLUM_VERSION=0.29.5-1 ARG OCCLUM_VERSION=0.29.6-1
RUN apt update && DEBIAN_FRONTEND="noninteractive" apt install -y --no-install-recommends gnupg wget ca-certificates jq && \ RUN apt update && DEBIAN_FRONTEND="noninteractive" apt install -y --no-install-recommends gnupg wget ca-certificates jq && \
echo 'deb [arch=amd64] https://download.01.org/intel-sgx/sgx_repo/ubuntu focal main' | tee /etc/apt/sources.list.d/intel-sgx.list && \ echo 'deb [arch=amd64] https://download.01.org/intel-sgx/sgx_repo/ubuntu focal main' | tee /etc/apt/sources.list.d/intel-sgx.list && \
wget -qO - https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | apt-key add - && \ wget -qO - https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | apt-key add - && \

@ -1,94 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "init"
version = "0.0.1"
dependencies = [
"libc",
"serde",
"serde_json",
]
[[package]]
name = "itoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "libc"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cca32fa0182e8c0989459524dc356b8f2b5c10f1b9eb521b7d182c03cf8c5ff"
[[package]]
name = "proc-macro2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "serde"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"

@ -1,11 +0,0 @@
[package]
name = "init"
version = "0.0.1"
build = "build.rs"
authors = ["LI Qing geding.lq@antgroup.com"]
edition = "2021"
[dependencies]
libc = "0.2.84"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

@ -1,7 +0,0 @@
fn main() {
println!("cargo:rustc-link-search=native=/opt/occlum/toolchains/dcap_lib/musl");
println!("cargo:rustc-link-search=native=/opt/occlum/toolchains/grpc_ratls/musl");
println!("cargo:rustc-link-lib=dylib=grpc_ratls_client");
println!("cargo:rustc-link-lib=dylib=hw_grpc_proto");
println!("cargo:rustc-link-lib=dylib=occlum_dcap")
}

@ -1,151 +0,0 @@
extern crate libc;
extern crate serde;
extern crate serde_json;
use libc::syscall;
use serde::Deserialize;
use std::env;
use std::error::Error;
use std::fs;
use std::fs::File;
use std::io::{ErrorKind, Read};
use std::ffi::CString;
use std::os::raw::{c_int, c_char};
#[link(name = "grpc_ratls_client")]
extern "C" {
fn grpc_ratls_get_secret(
server_addr: *const c_char, // grpc server address+port, such as "localhost:50051"
config_json: *const c_char, // ratls handshake config json file
name: *const c_char, // secret name to be requested
secret_file: *const c_char // secret file to be saved
) -> c_int;
}
fn main() -> Result<(), Box<dyn Error>> {
// Load the configuration from initfs
const IMAGE_CONFIG_FILE: &str = "/etc/image_config.json";
let image_config = load_config(IMAGE_CONFIG_FILE)?;
// Get grpc server address from environment GRPC_SERVER
let server_addr = CString::new(
env::var("GRPC_SERVER").unwrap_or("localhost:50051".to_string()))
.unwrap();
let config_json = CString::new("dynamic_config.json").unwrap();
// Get the key of FS image if needed
let key = match &image_config.image_type[..] {
"encrypted" => {
// Get the image encrypted key through RA
let secret = CString::new("image_key").unwrap();
let filename = CString::new("/etc/image_key").unwrap();
let ret = unsafe {
grpc_ratls_get_secret(
server_addr.as_ptr(),
config_json.as_ptr(),
secret.as_ptr(),
filename.as_ptr())
};
if ret != 0 {
println!("grpc_ratls_get_secret failed return {}", ret);
return Err(Box::new(std::io::Error::last_os_error()));
}
const IMAGE_KEY_FILE: &str = "/etc/image_key";
let key_str = load_key(IMAGE_KEY_FILE)?;
let mut key: sgx_key_128bit_t = Default::default();
parse_str_to_bytes(&key_str, &mut key)?;
Some(key)
}
"integrity-only" => None,
_ => unreachable!(),
};
let key_ptr = key
.as_ref()
.map(|key| key as *const sgx_key_128bit_t)
.unwrap_or(std::ptr::null());
// Get certificate
let secret = CString::new("ssl_config").unwrap();
let filename = CString::new("ssl_file").unwrap();
let ret = unsafe {
grpc_ratls_get_secret(
server_addr.as_ptr(),
config_json.as_ptr(),
secret.as_ptr(),
filename.as_ptr())
};
if ret != 0 {
println!("grpc_ratls_get_secret failed return {}", ret);
return Err(Box::new(std::io::Error::last_os_error()));
}
let ssl_secret = fs::read_to_string(filename.into_string().unwrap())
.expect("Something went wrong reading the file");
// Mount the image
const SYS_MOUNT_FS: i64 = 363;
// User can provide valid path for runtime mount and boot
// Otherwise, just pass null pointer to do general mount and boot
let rootfs_config: *const i8 = std::ptr::null();
let ret = unsafe { syscall(SYS_MOUNT_FS, key_ptr, rootfs_config) };
if ret < 0 {
return Err(Box::new(std::io::Error::last_os_error()));
}
// Write the secrets to rootfs
fs::write("/etc/tf_ssl.cfg", ssl_secret.into_bytes())?;
Ok(())
}
#[allow(non_camel_case_types)]
type sgx_key_128bit_t = [u8; 16];
#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
struct ImageConfig {
image_type: String,
}
fn load_config(config_path: &str) -> Result<ImageConfig, Box<dyn Error>> {
let mut config_file = File::open(config_path)?;
let config_json = {
let mut config_json = String::new();
config_file.read_to_string(&mut config_json)?;
config_json
};
let config: ImageConfig = serde_json::from_str(&config_json)?;
Ok(config)
}
fn load_key(key_path: &str) -> Result<String, Box<dyn Error>> {
let mut key_file = File::open(key_path)?;
let mut key = String::new();
key_file.read_to_string(&mut key)?;
Ok(key.trim_end_matches(|c| c == '\r' || c == '\n').to_string())
}
fn parse_str_to_bytes(arg_str: &str, bytes: &mut [u8]) -> Result<(), Box<dyn Error>> {
let bytes_str_vec = {
let bytes_str_vec: Vec<&str> = arg_str.split('-').collect();
if bytes_str_vec.len() != bytes.len() {
return Err(Box::new(std::io::Error::new(
ErrorKind::InvalidData,
"The length or format of Key/MAC string is invalid",
)));
}
bytes_str_vec
};
for (byte_i, byte_str) in bytes_str_vec.iter().enumerate() {
bytes[byte_i] = u8::from_str_radix(byte_str, 16)?;
}
Ok(())
}

@ -1,21 +0,0 @@
includes:
- base.yaml
targets:
- target: /bin/
copy:
- files:
- ${INITRA_DIR}/target/x86_64-unknown-linux-musl/release/init
- target: /lib/
copy:
- files:
- /opt/occlum/toolchains/grpc_ratls/musl/libgrpc_ratls_client.so
- /opt/occlum/toolchains/grpc_ratls/musl/libhw_grpc_proto.so
- /opt/occlum/toolchains/dcap_lib/musl/libocclum_dcap.so.0.1.0
- target: /
copy:
- files:
- dynamic_config.json
- target: /etc
copy:
- dirs:
- /etc/ssl

@ -50,18 +50,16 @@ Build Occlum TF examples container images for k8s deployment.
usage: build.sh [OPTION]... usage: build.sh [OPTION]...
-r <container image registry> the container image registry -r <container image registry> the container image registry
-g <tag> container image tag -g <tag> container image tag
-d <grpc_server_domain> GPRC RA server domain
-p <grpc_server_port> GPRC RA server port
``` ```
For example, below command generates three container images. For example, below command generates three container images.
``` ```
# ./build.sh -r demo -g 0.29.5 # ./build.sh -r demo -g 0.29.6
``` ```
* **`demo/init_ra_server:0.29.5`** acts as key broker pod. * **`demo/init_ra_server:0.29.6`** acts as key broker pod.
* **`demo/tf_demo:0.29.5`** acts as tensorflow serving pod. * **`demo/tf_demo:0.29.6`** acts as tensorflow serving pod.
* **`demo/tf_demo_client:0.29.5`** acts as client. * **`demo/tf_demo_client:0.29.6`** acts as client.
## How to test ## How to test
@ -80,7 +78,7 @@ For example, below command generates three container images.
``` ```
In this case, for inference, "8000MB" SGX EPC memory size is used because Occlum `user_space_size` is set to "7000MB" in building stage. cpu limits "1000m" here is to limit the CPU usage for each Occlum inference pod with the purpose to show the performance gain by scalability in `benchmark`. In this case, for inference, "8000MB" SGX EPC memory size is used because Occlum `user_space_size` is set to "7000MB" in building stage. cpu limits "1000m" here is to limit the CPU usage for each Occlum inference pod with the purpose to show the performance gain by scalability in `benchmark`.
* Args `"taskset -c 2,3,4,5"` is necessary till Occlum v1.0 release. The purpose is to limit the CPU cores used in tensorflow serving which makes the SGX thread number used won't exceed the `max_num_of_threads` defined in building stage. * Args `"taskset -c 0-3"` is necessary till Occlum v1.0 release. The purpose is to limit the CPU cores used in tensorflow serving which makes the SGX thread number used won't exceed the `max_num_of_threads` defined in building stage.
### Start the key broker service ### Start the key broker service
@ -110,7 +108,7 @@ In default, only one replica for the tensorflow serving pod.
### Try the inference request ### Try the inference request
``` ```
$ docker run --rm --network host demo/tf_demo_client:0.29.5 python3 resnet_client_grpc.py --server=localhost:31001 --crt server.crt --image cat.jpg $ docker run --rm --network host demo/tf_demo_client:0.29.6 python3 resnet_client_grpc.py --server=localhost:31001 --crt server.crt --image cat.jpg
``` ```
If successful, it prints the classification results. If successful, it prints the classification results.
@ -120,7 +118,7 @@ If successful, it prints the classification results.
Below command can do benchmark test for the tensorflow serving service running in Occlum. Below command can do benchmark test for the tensorflow serving service running in Occlum.
``` ```
$ docker run --rm --network host demo/tf_demo_client:0.29.5 python3 benchmark.py --server localhost:31001 --crt server.crt --cnum 4 --loop 10 --image cat.jpg $ docker run --rm --network host demo/tf_demo_client:0.29.6 python3 benchmark.py --server localhost:31001 --crt server.crt --cnum 4 --loop 10 --image cat.jpg
``` ```
Try scale up the tensorflow serving pods number, better `tps` can be achieved. Try scale up the tensorflow serving pods number, better `tps` can be achieved.

@ -20,12 +20,12 @@ spec:
env: env:
- name: PCCS_URL - name: PCCS_URL
value: https://sgx-dcap-server.cn-shanghai.aliyuncs.com/sgx/certification/v3/ value: https://sgx-dcap-server.cn-shanghai.aliyuncs.com/sgx/certification/v3/
- name: GRPC_SERVER - name: OCCLUM_INIT_RA_KMS_SERVER
value: init-ra-server-svc:5000 value: init-ra-server-svc:5000
args: args:
- taskset - taskset
- -c - -c
- 2,3,4,5 - 0-3
- occlum - occlum
- run - run
- /bin/tensorflow_model_server - /bin/tensorflow_model_server

@ -54,7 +54,7 @@ GRPC_SERVER="${grpc_domain}:${grpc_port}"
docker run --network host \ docker run --network host \
--device /dev/sgx/enclave --device /dev/sgx/provision \ --device /dev/sgx/enclave --device /dev/sgx/provision \
--env PCCS_URL=${pccs_url} \ --env PCCS_URL=${pccs_url} \
--env GRPC_SERVER="${GRPC_SERVER}" \ --env OCCLUM_INIT_RA_KMS_SERVER="${GRPC_SERVER}" \
${registry}/tf_demo:${tag} \ ${registry}/tf_demo:${tag} \
taskset -c 0,1 occlum run /bin/tensorflow_model_server \ taskset -c 0,1 occlum run /bin/tensorflow_model_server \
--model_name=resnet --model_base_path=/models/resnet \ --model_name=resnet --model_base_path=/models/resnet \