[example] unify the container images used for docker and k8s deployment

2022-09-28 10:29:16 +08:00 · 2022-09-28 10:29:16 +08:00 · 115b827f68
commit 115b827f68
parent 27ca93c7ab
8 changed files with 39 additions and 43 deletions
--- a/example/README.md
+++ b/example/README.md
@ -51,15 +51,26 @@ Extra model_key could be added to protect the models if necessary. (not included

 Now users could send inference request with server certificates (`server.crt`).

+## Get the demo docker images
+
+There are prebuilt docker images could be used for the examples, either in the following docker way or [`kubernates`](./kubernetes/) way. Users could pull them directly and try the example.
+```
+docker pull occlum/init_ra_server:0.29.0
+docker pull occlum/tf_demo:0.29.0
+docker pull occlum/tf_demo_client:0.29.0
+```
+
+If users want to build or customize the images, please check below part.
+
 ## How-to build

 Our target is to deploy the demo in separated container images, so docker build is necessary steps. Thanks to the `docker run in docker` method, this example build could be done in Occlum development container image.

-First, please make sure `docker` is installed successfully in your host. Then start the Occlum container (use version `0.27.0-ubuntu20.04` for example) as below.
+First, please make sure `docker` is installed successfully in your host. Then start the Occlum container (use version `0.29.0-ubuntu20.04` for example) as below.
 ```
 $ sudo docker run --rm -itd --network host \
        -v $(which docker):/usr/bin/docker -v /var/run/docker.sock:/var/run/docker.sock \
-        occlum/occlum:0.27.0-ubuntu20.04
+        occlum/occlum:0.29.0-ubuntu20.04
 ```

 All the following are running in the above container.
@ -69,12 +80,9 @@ All the following are running in the above container.
 This step prepares all the content and builds the Occlum images.

 ```
-# ./build_content.sh localhost 50051
+# ./build_content.sh
 ```

-Parameters `localhost` and `50051` indicate the network domain and port for the GRPC server.
-Users could modify them depending on the real case situation.
-
 Below are the two Occlum images.

 * **occlum_server**
@ -138,3 +146,8 @@ There is an example python based [`inference client`](./client/inception_client.
 # cd client
 # python3 inception_client.py --server=localhost:9000 --crt ../ssl_configure/server.crt --image cat.jpg
 ```
+
+Or you can use the demo client container image to do the inference test.
+```
+$ docker run --rm --network host <registry>/tf_demo_client:<tag> python3 inception_client.py --server=localhost:9000 --crt server.crt --image cat.jpg
+```
--- a/example/build_content.sh
+++ b/example/build_content.sh
@ -8,9 +8,6 @@ export INITRA_DIR="${script_dir}/init_ra"
 export RATLS_DIR="${script_dir}/../demos/ra_tls"
 export TF_DIR="${script_dir}/tf_serving"

-GRPC_SERVER_DOMAIN=${1:-localhost}
-GRPC_SERVER_PORT=${2:-50051}
-
 function build_ratls()
 {
    rm -rf ${DEP_LIBS_DIR} && mkdir ${DEP_LIBS_DIR}
@ -62,13 +59,10 @@ function build_tf_instance()
                    .process.default_heap_size = "128MB" |
                    .resource_limits.max_num_of_threads = 64 |
                    .metadata.debuggable = false |
-                    .env.default += ["GRPC_SERVER=localhost:50051"]' Occlum.json)" && \
+                    .env.default += ["GRPC_SERVER=localhost:50051"] |
+                    .env.untrusted += ["GRPC_SERVER"]' Occlum.json)" && \
    echo "${new_json}" > Occlum.json

-    # Update GRPC_SERVER env
-    GRPC_SERVER="${GRPC_SERVER_DOMAIN}:${GRPC_SERVER_PORT}"
-    sed -i "s/localhost:50051/$GRPC_SERVER/g" Occlum.json
-
    occlum build --image-key ../image_key

    # Get server mrsigner.
@ -86,9 +80,6 @@ function build_tf_instance()
    rm -rf initfs
    copy_bom -f ../init_ra_client.yaml --root initfs --include-dir /opt/occlum/etc/template

-    # Set GRPC_SERVER_DOMAIN to the hosts
-    # echo "$IP ${GRPC_SERVER_DOMAIN}" >> initfs/etc/hosts
-
    occlum build -f --image-key ../image_key
    occlum package occlum_instance

@ -140,9 +131,6 @@ function build_server_instance()
    rm -rf image
    copy_bom -f ../ra_server.yaml --root image --include-dir /opt/occlum/etc/template

-    # Set GRPC_SERVER_DOMAIN to the hosts
-    # echo "$IP ${GRPC_SERVER_DOMAIN} " >> image/etc/hosts
-
    occlum build
    occlum package occlum_instance

--- a/example/container/Dockerfile_occlum_instance.ubuntu20.04
+++ b/example/container/Dockerfile_occlum_instance.ubuntu20.04
@ -2,8 +2,6 @@ FROM ubuntu:20.04
 LABEL maintainer="Qi Zheng <huaiqing.zq@antgroup.com>"

 # Install SGX DCAP and Occlum runtime
-ARG PSW_VERSION=2.15.101.1
-ARG DCAP_VERSION=1.12.101.1
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
 RUN apt update && DEBIAN_FRONTEND="noninteractive" apt install -y --no-install-recommends gnupg wget ca-certificates jq && \
    echo 'deb [arch=amd64] https://download.01.org/intel-sgx/sgx_repo/ubuntu focal main' | tee /etc/apt/sources.list.d/intel-sgx.list && \
@ -11,9 +9,9 @@ RUN apt update && DEBIAN_FRONTEND="noninteractive" apt install -y --no-install-r
    echo 'deb [arch=amd64] https://occlum.io/occlum-package-repos/debian focal main' | tee /etc/apt/sources.list.d/occlum.list && \
    wget -qO - https://occlum.io/occlum-package-repos/debian/public.key | apt-key add - && \
    apt update && \
-    apt install -y libsgx-uae-service=$PSW_VERSION-focal1 && \
-    apt install -y libsgx-dcap-ql=$DCAP_VERSION-focal1 && \
-    apt install -y libsgx-dcap-default-qpl=$DCAP_VERSION-focal1 && \
+    apt install -y libsgx-uae-service && \
+    apt install -y libsgx-dcap-ql && \
+    apt install -y libsgx-dcap-default-qpl && \
    apt install -y occlum-runtime && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*
--- a/example/container/docker-entrypoint.sh
+++ b/example/container/docker-entrypoint.sh
@ -1,7 +1,7 @@
 #!/bin/bash

 # Update PCCS_URL
-line=$(grep -n "PCCS_URL" /etc/sgx_default_qcnl.conf | cut -d ":" -f 1)
-sed -i "${line}c PCCS_URL=${PCCS_URL}" /etc/sgx_default_qcnl.conf
+line=$(grep -n "pccs_url" /etc/sgx_default_qcnl.conf | cut -d ":" -f 1)
+sed -i "${line}c \"pccs_url\": \"${PCCS_URL}\"," /etc/sgx_default_qcnl.conf

 exec "$@"
--- a/example/kubernetes/README.md
+++ b/example/kubernetes/README.md
@ -56,12 +56,12 @@ usage: build.sh [OPTION]...

 For example, below command generates three container images.
 ```
-# ./build.sh -r demo -g 0.28.0 -d init-ra-server-svc -p 5000
+# ./build.sh -r demo -g 0.29.0
 ```

-* **`demo/init_ra_server:0.28.0`** acts as key broker pod.
-* **`demo/tf_demo:0.28.0`** acts as tensorflow serving pod.
-* **`demo/tf_demo_client:0.28.0`** acts as client.
+* **`demo/init_ra_server:0.29.0`** acts as key broker pod.
+* **`demo/tf_demo:0.29.0`** acts as tensorflow serving pod.
+* **`demo/tf_demo_client:0.29.0`** acts as client.

 ## How to test

@ -110,7 +110,7 @@ In default, only one replica for the tensorflow serving pod.
 ### Try the inference request

 ```
-$ docker run --rm --network host sevenzheng/tf_demo_client:0.28.0 python3 inception_client.py --server=localhost:31001 --crt server.crt --image cat.jpg
+$ docker run --rm --network host demo/tf_demo_client:0.29.0 python3 inception_client.py --server=localhost:31001 --crt server.crt --image cat.jpg
 ```

 If successful, it prints the classification results.
@ -120,7 +120,7 @@ If successful, it prints the classification results.
 Below command can do benchmark test for the tensorflow serving service running in Occlum.

 ```
-$ docker run --rm --network host sevenzheng/tf_demo_client:0.28.0 python3 benchmark.py --server localhost:31001 --crt server.crt --cnum 4 --loop 10 --image cat.jpg
+$ docker run --rm --network host demo/tf_demo_client:0.29.0 python3 benchmark.py --server localhost:31001 --crt server.crt --cnum 4 --loop 10 --image cat.jpg
 ```

 Try scale up the tensorflow serving pods number, better `tps` can be achieved.
--- a/example/kubernetes/build.sh
+++ b/example/kubernetes/build.sh
@ -9,8 +9,6 @@ pip_mirror="-i https://pypi.douban.com/simple"

 registry="demo"
 tag="latest"
-grpc_server_domain="init-ra-server-svc"
-grpc_server_port="5000"

 function usage {
    cat << EOM
@ -18,20 +16,16 @@ Build Occlum TF examples container images for k8s deployment.
 usage: $(basename "$0") [OPTION]...
    -r <container image registry> the container image registry
    -g <tag> container image tag
-    -d <grpc_server_domain> GPRC RA server domain
-    -p <grpc_server_port> GPRC RA server port
    -h <usage> usage help
 EOM
    exit 0
 }

 function process_args {
-    while getopts ":r:g:d:p:h" option; do
+    while getopts ":r:g:h" option; do
        case "${option}" in
            r) registry=${OPTARG};;
            g) tag=${OPTARG};;
-            d) grpc_server_domain=${OPTARG};;
-            p) grpc_server_port=${OPTARG};;
            h) usage;;
        esac
    done
@ -44,13 +38,11 @@ echo "############################"
 echo "Build Occlum TF examples container images for k8s deployment"
 echo "  Container images registry: ${registry}"
 echo "  Container images tag: ${tag}"
-echo "  GRPC RA server domain: ${grpc_server_domain}"
-echo "  GRPC RA server port: ${grpc_server_port}"
 echo ""

 pushd ${top_dir}
 echo "Build Occlum instances first ..."
-./build_content.sh ${grpc_server_domain} ${grpc_server_port}
+./build_content.sh

 echo ""
 echo "Build Occlum container images ..."
--- a/example/kubernetes/occlum_tf_demo.yaml
+++ b/example/kubernetes/occlum_tf_demo.yaml
@ -20,6 +20,8 @@ spec:
        env:
        - name: PCCS_URL
          value: https://sgx-dcap-server.cn-shanghai.aliyuncs.com/sgx/certification/v3/
+        - name: GRPC_SERVER
+          value: init-ra-server-svc:5000
        args:
        - taskset
        - -c
--- a/example/run_container.sh
+++ b/example/run_container.sh
@ -17,6 +17,7 @@ usage: $(basename "$0") [OPTION]...
    -p <GRPC Server port> default 50051.
    -u <PCCS URL> default https://localhost:8081/sgx/certification/v3/.
    -r <registry prefix> the registry for this demo container images.
+    -g <image tag> the container images tag, default it is "latest".
    -h <usage> usage help
 EOM
    exit 0
@ -48,10 +49,12 @@ docker run --network host \
 sleep 3

 echo "Start Tensorflow-Serving on backgound ..."
+GRPC_SERVER="${grpc_domain}:${grpc_port}"

 docker run --network host \
        --device /dev/sgx/enclave --device /dev/sgx/provision \
        --env PCCS_URL=${pccs_url} \
+        --env GRPC_SERVER="${GRPC_SERVER}" \
        ${registry}/tf_demo:${tag} \
        taskset -c 0,1 occlum run /bin/tensorflow_model_server \
        --model_name=INCEPTION --model_base_path=/model/INCEPTION/INCEPTION \