Enable Tensorflow_serving in Occlum

Signed-off-by: yuanwu <yuan.wu@intel.com>
2021-07-19 07:43:58 +00:00 · 2021-07-19 07:43:58 +00:00 · d19ff1a8be
commit d19ff1a8be
parent 68f0a22177
17 changed files with 580 additions and 0 deletions
--- a/.github/workflows/demo_test.yml
+++ b/.github/workflows/demo_test.yml
@ -691,3 +691,38 @@ jobs:

    - name: Run Bash test
      run: docker exec bash_test bash -c "cd /root/occlum/demos/bash && SGX_MODE=SIM ./run_bash_demo.sh"
+
+  Tensorflow_serving_test:
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v1
+      with:
+        submodules: true
+
+    - name: Get occlum version
+      run: echo "OCCLUM_VERSION=$(grep "Version =" src/pal/include/occlum_version.h |  awk '{print $4}')" >> $GITHUB_ENV
+
+    - name: Compile the tensorflow_serving with PIC
+      run: |
+        cd $GITHUB_WORKSPACE/demos/tensorflow/tensorflow_serving;
+        ./build_occlum_tf_serving.sh
+
+    - name: Create container
+      run: docker run -itd --name=tf_serving_test -v $GITHUB_WORKSPACE:/root/occlum occlum/occlum:${{ env.OCCLUM_VERSION }}-ubuntu18.04
+
+    - name: Build dependencies
+      run: docker exec tf_serving_test bash -c "cd /root/occlum; make submodule"
+
+    - name: Make install
+      run: docker exec tf_serving_test bash -c "source /opt/intel/sgxsdk/environment; cd /root/occlum; OCCLUM_RELEASE_BUILD=1 make install"
+
+    - name: Set up environment
+      run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; ./prepare_model_and_env.sh"
+
+    - name: Run tf_serving server
+      run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; SGX_MODE=SIM ./run_occlum_tf_serving.sh"
+
+    - name: Run tf_serving client
+      run: |
+        sleep 120;
+        docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving/client; ./prepare_client_env.sh; ./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt"
--- a/demos/tensorflow/tensorflow_serving/README.md
+++ b/demos/tensorflow/tensorflow_serving/README.md
@ -0,0 +1,41 @@
+# TensorFlow Serving With Occlum
+
+
+
+TensorFlow Serving is a flexible, high-performance serving system for machine learning models, designed for production environments. This demo presents a secure End-to-End TensorFlow serving solution in Occlum.
+
+- **Runtime security.**  Occlum uses the intel SGX to provide an enclave for running applications in encrypted memory.
+- **At-Rest security.**  Model and TLS key are protected by Occlum encrypted FS.
+- **Communication Security.** Use the TLS to secure the gRPC communications. 
+
+#### Executing the Tensorflow serving in Occlum
+
+The following command will download the Resnet50 model and convert the model format. It also will generate the TLS key and certificates for localhost( server domain name). The server.crt will be used by client. The sever.key and ssl.cfg is used by TF serving.
+
+```
+./prepare_model_and_env.sh 
+```
+
+Run the Tensorflow Serving in occlum.
+
+```
+./run_occlum_tf_serving.sh
+```
+
+***Note:*** The demo runs in the same machine by default. If you want to run TF serving and client in different machines. Please modify the domain name in the scripts.
+
+#### Executing the benchmark in client
+
+Prepare the environment for client benchmark.
+
+```
+cd client
+./prepare_client_env.sh
+```
+
+Run the benchmark test in client.
+
+```
+./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt
+```
+
--- a/demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh
+++ b/demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+echo Building tf-serving with pic
+cd docker
+./build_tf_serving_with_pic.sh
+echo Create the tensorflow_model_server 
+docker create --name extract tf_serving_pic:latest
+echo Copy the tensorflow_model_server 
+docker cp extract:/usr/local/bin/tensorflow_model_server ../tensorflow_model_server
+docker rm -f extract
--- a/demos/tensorflow/tensorflow_serving/client/pycache/utils.cpython-36.pyc
+++ b/demos/tensorflow/tensorflow_serving/client/pycache/utils.cpython-36.pyc
--- a/demos/tensorflow/tensorflow_serving/client/benchmark.sh
+++ b/demos/tensorflow/tensorflow_serving/client/benchmark.sh
@ -0,0 +1,21 @@
+python=$1
+grpc_url=$2
+server_crt=$3
+
+script_dir=$(cd "$(dirname "$0")";pwd -P)
+
+unset http_proxy && unset https_proxy
+
+# Batch off
+$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 1 -loop 200
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 16 -loop 125
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 32 -loop 100
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 48 -loop 75
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 64 -loop 50
+
+# Batch on
+$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 1 -loop 100
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 16 -loop 50
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 32 -loop 40
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 48 -loop 30
+#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 64 -loop 20
--- a/demos/tensorflow/tensorflow_serving/client/prepare_client_env.sh
+++ b/demos/tensorflow/tensorflow_serving/client/prepare_client_env.sh
@ -0,0 +1,3 @@
+apt update
+apt install -y libgl1-mesa-glx
+pip3 install -r requirements.txt -v
--- a/demos/tensorflow/tensorflow_serving/client/requirements.txt
+++ b/demos/tensorflow/tensorflow_serving/client/requirements.txt
@ -0,0 +1,6 @@
+argparse
+aiohttp>=3.7.0
+grpcio>=1.34.0
+opencv-python>=4.4.0
+tensorflow>=2.3.0
+tensorflow-serving-api>=2.3.0
--- a/demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py
+++ b/demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py
@ -0,0 +1,162 @@
+from __future__ import print_function
+
+import numpy as np
+import requests, argparse, time, grpc, cv2, asyncio, functools
+
+import tensorflow as tf
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2_grpc
+
+from utils import *
+
+class benchmark_engine(object):
+    def __init__(self, url, image_flag=None, certificate=None, batch_size=1, concurrent_num=64, response_time=10):
+        self.url = url
+        self.batch_size = batch_size
+        self.response_time = response_time
+        self.concurrent_num = concurrent_num
+        self.image_flag = image_flag
+        self.certificate = certificate
+        self.request_signatures = []
+        self.request_stubs = []
+        self.request_response_list = {}
+        self.__prepare__()
+        pass
+
+    def __prepare__(self):
+        for idx in range(self.concurrent_num):
+            # get image array
+            if self.image_flag == None:
+                image_np = np.random.randint(0, 255, (self.batch_size, 224, 224, 3), dtype=np.uint8).astype(np.float32)
+                # print('image type: dummy')
+            else:
+                if self.batch_size != 1:
+                    print('not support batch n!=1 with image!')
+                    exit()
+                else:
+                    image_np = img_to_array(self.image_flag).astype(np.float32)
+                    image_np.resize((1, 224, 224, 3))
+                    # cv2.imshow('',img)
+                    # cv2.waitKey(0)
+                    # cv2.destroyAllWindows()
+                    # print('image type: real')
+
+            # create request
+            request = predict_pb2.PredictRequest()
+            request.model_spec.name = 'resnet50-v15-fp32'
+            request.model_spec.signature_name = 'serving_default'
+            request.inputs['input'].CopyFrom(tf.make_tensor_proto(image_np, shape=[self.batch_size, 224, 224, 3]))
+            self.request_signatures.append(request)
+        return None
+
+    async def __connection__(self, task_idx, loop_num):
+        request_signatures = self.request_signatures[task_idx]
+        response_list = []
+
+        # create channel
+        if self.certificate == None:
+            async with grpc.aio.insecure_channel(self.url) as channel:
+                stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+                if loop_num != 0:
+                    format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}'
+                    for loop_idx in range(loop_num):
+                        start_time = time.time()
+                        response = await stub.Predict(request_signatures)
+                        stop_time = time.time()
+                        latency = stop_time - start_time
+                        tps = self.batch_size / latency
+                        response_list.append([response, latency])
+                        print(format_string.format('insecure', task_idx, self.batch_size, loop_idx, 1000*latency, tps))
+                else:
+                    format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}'
+                    while True:
+                        start_time = time.time()
+                        response = await stub.Predict(request_signatures)
+                        stop_time = time.time()
+                        latency = stop_time - start_time
+                        tps = self.batch_size / latency
+                        print(format_string.format('insecure', task_idx, self.batch_size, 1000*latency, tps))
+        else:
+            creds = grpc.ssl_channel_credentials(root_certificates=open(self.certificate, 'rb').read())
+            async with grpc.aio.secure_channel(self.url, creds) as channel:
+                stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+                if loop_num != 0:
+                    format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}'
+                    for loop_idx in range(loop_num):
+                        start_time = time.time()
+                        response = await stub.Predict(request_signatures)
+                        stop_time = time.time()
+                        latency = stop_time - start_time
+                        tps = self.batch_size / latency
+                        response_list.append([response, latency])
+                        print(format_string.format('secure', task_idx, self.batch_size, loop_idx, 1000*latency, tps))
+                else:
+                    format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}'
+                    while True:
+                        start_time = time.time()
+                        response = await stub.Predict(request_signatures)
+                        stop_time = time.time()
+                        latency = stop_time - start_time
+                        tps = self.batch_size / latency
+                        try:
+                            proto_msg_to_dict(response)
+                        except Exception as e:
+                            print('Error response:', e)
+                        print(format_string.format('secure', task_idx, self.batch_size, 1000*latency, tps))
+        return response_list
+
+    def run(self, loop_num):
+        start_time = time.time()
+
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        connections = []
+        self.request_response_list.clear()
+        for idx in range(self.concurrent_num):
+            connections.append(asyncio.ensure_future(self.__connection__(idx, loop_num)))
+
+        loop.run_until_complete(asyncio.wait(connections))
+        loop.close()
+
+        stop_time = time.time()
+
+        response_list = [connections[idx].result() for idx in range(self.concurrent_num)]
+        print(proto_msg_to_dict(response_list[0][0][0]))
+
+        request_time = 0
+        for c_idx in range(self.concurrent_num):
+            if loop_num != 0:
+                for l_idx in range(loop_num):
+                    request_time += response_list[c_idx][l_idx][1]
+
+        if loop_num != 0:
+            e2e_time = stop_time - start_time
+            request_num = self.concurrent_num * loop_num
+            latency = request_time / request_num
+            tps = request_num * self.batch_size / e2e_time
+            format_string = 'summary: cnum {}, batch {}, e2e time(s) {}, average latency(ms) {}, tps: {}'
+            print(format_string.format(self.concurrent_num, self.batch_size, e2e_time, 1000*latency, tps))
+    pass
+
+def main():
+    benchmark_app = benchmark_engine(args.url, args.img, args.crt, args.batch, args.cnum)
+    if args.loop != 0:
+        # warm up
+        benchmark_app.run(5)
+    # start loop
+    benchmark_app.run(args.loop)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-url', type=str, help='gRPC API Serving URL: IP:8500')
+    parser.add_argument('-img', default=None, type=str, help='Image path')
+    parser.add_argument('-crt', default=None, type=str, help='TLS certificate file path')
+    parser.add_argument('-batch', default=1, type=int, help='Batch size')
+    parser.add_argument('-cnum', default=16, type=int, help='Concurrent connection num')
+    parser.add_argument('-loop', default=200, type=int, help='Requests loop num: 0 (infinite loop)')
+
+    args = parser.parse_args()
+
+    main()
--- a/demos/tensorflow/tensorflow_serving/client/utils.py
+++ b/demos/tensorflow/tensorflow_serving/client/utils.py
@ -0,0 +1,29 @@
+import json, cv2, base64
+from google.protobuf import json_format
+
+
+def dict_to_json_msg(data):
+    return json.dumps(data)
+
+def json_msg_to_dict(json_msg):
+    return json.loads(json_msg)
+
+def proto_msg_to_json_msg(proto_data):
+    return json_format.MessageToJson(proto_data)
+
+def proto_msg_to_dict(proto_data):
+    return json_msg_to_dict(proto_msg_to_json_msg(proto_data))
+
+def img_to_array(img_path):
+    img = cv2.imread(img_path)
+    return img
+
+def img_array_to_base64(image_array):
+    base64_str = base64.b64encode(image_array).decode('utf-8')
+    return base64_str
+
+def base64_to_img_array(base64_str):
+    imgString = base64.b64decode(base64_str)
+    nparr = np.fromstring(imgString, np.uint8)
+    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    return image
--- a/demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel
+++ b/demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel
@ -0,0 +1,51 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+FROM tensorflow/serving:latest-devel as binary_build
+WORKDIR /tensorflow-serving
+# Build, and install TensorFlow Serving
+ARG TF_SERVING_BUILD_OPTIONS="--config=nativeopt"
+RUN echo "Building with build options: ${TF_SERVING_BUILD_OPTIONS}"
+ARG TF_SERVING_BAZEL_OPTIONS=""
+RUN echo "Building with Bazel options: ${TF_SERVING_BAZEL_OPTIONS}"
+
+RUN bazel build -j 8 --color=yes --curses=yes \
+    ${TF_SERVING_BAZEL_OPTIONS} \
+    --verbose_failures \
+    --force_pic \
+    --output_filter=DONT_MATCH_ANYTHING \
+    ${TF_SERVING_BUILD_OPTIONS} \
+    tensorflow_serving/model_servers:tensorflow_model_server && \
+    cp bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
+    /usr/local/bin/
+
+# Build and install TensorFlow Serving API
+RUN bazel build -j 8 --color=yes --curses=yes \
+    ${TF_SERVING_BAZEL_OPTIONS} \
+    --force_pic \
+    --verbose_failures \
+    --output_filter=DONT_MATCH_ANYTHING \
+    ${TF_SERVING_BUILD_OPTIONS} \
+    tensorflow_serving/tools/pip_package:build_pip_package && \
+    bazel-bin/tensorflow_serving/tools/pip_package/build_pip_package \
+    /tmp/pip && \
+    pip --no-cache-dir install --upgrade \
+    /tmp/pip/tensorflow_serving_api-*.whl && \
+    rm -rf /tmp/pip
+
+FROM binary_build as clean_build
+# Clean up Bazel cache when done.
+RUN bazel clean --expunge --color=yes && \
+    rm -rf /root/.cache
+
+CMD ["/bin/bash"]
--- a/demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh
+++ b/demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+set -e
+
+if  [ ! -n "$1" ] ; then
+    tag=latest
+else
+    tag=$1
+fi
+
+# You can remove build-arg http_proxy and https_proxy if your network doesn't need it
+proxy_server="" # your http proxy server
+
+DOCKER_BUILDKIT=0 docker build \
+    -f Dockerfile.devel . \
+    -t tf_serving_pic:${tag} \
+    --build-arg http_proxy=${proxy_server} \
+    --build-arg https_proxy=${proxy_server} \
+    --build-arg no_proxy=localhost,127.0.0.0/1 \
--- a/demos/tensorflow/tensorflow_serving/download_model.sh
+++ b/demos/tensorflow/tensorflow_serving/download_model.sh
@ -0,0 +1,8 @@
+cur_dir=`pwd -P`
+models_abs_dir=${cur_dir}/models
+mkdir ${models_abs_dir}
+
+# resnet50-v15
+mkdir ${models_abs_dir}/resnet50-v15-fp32
+cd ${models_abs_dir}/resnet50-v15-fp32
+wget --no-check-certificate -c https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_fp32_pretrained_model.pb -O resnet50-v15-fp32.pb
--- a/demos/tensorflow/tensorflow_serving/generate_ssl_config.sh
+++ b/demos/tensorflow/tensorflow_serving/generate_ssl_config.sh
@ -0,0 +1,24 @@
+service_domain_name=$1
+
+rm -rf ssl_configure
+mkdir ssl_configure
+cd ssl_configure
+
+# https://kubernetes.github.io/ingress-nginx/examples/PREREQUISITES/#client-certificate-authentication
+openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:2048 -keyout server.key -out server.crt -subj "/CN=${service_domain_name}"
+
+# Generate tls configure
+## https://stackoverflow.com/questions/59199419/using-tensorflow-model-server-with-ssl-configuration
+
+echo "server_key: '`cat server.key | paste -d "" -s`'" >> ssl.cfg
+echo "server_cert: '`cat server.crt | paste -d "" -s`'" >> ssl.cfg
+echo "client_verify: false" >> ssl.cfg
+
+sed -i "s/-----BEGIN PRIVATE KEY-----/-----BEGIN PRIVATE KEY-----\\\n/g" ssl.cfg
+sed -i "s/-----END PRIVATE KEY-----/\\\n-----END PRIVATE KEY-----/g" ssl.cfg
+sed -i "s/-----BEGIN CERTIFICATE-----/-----BEGIN CERTIFICATE-----\\\n/g" ssl.cfg
+sed -i "s/-----END CERTIFICATE-----/\\\n-----END CERTIFICATE-----/g" ssl.cfg
+
+echo "Generate server.key server.crt and ssl.cfg successfully!"
+#cat ssl.cfg
+cd -
--- a/demos/tensorflow/tensorflow_serving/hosts
+++ b/demos/tensorflow/tensorflow_serving/hosts
@ -0,0 +1,4 @@
+127.0.0.1 occlum-node
+127.0.0.1 localhost
+::1 occlum-node
+::1 localhost
--- a/demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py
+++ b/demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py
@ -0,0 +1,102 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: EPL-2.0
+#
+
+"""Import a model graph and export a SavedModel.
+
+Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir
+"""
+
+from __future__ import print_function
+
+import sys
+from collections import OrderedDict
+import tensorflow.compat.v1 as tf
+from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
+from tensorflow.python.framework import dtypes
+
+
+INPUTS = 'input'
+OUTPUTS = 'predict'
+
+tf.app.flags.DEFINE_integer('model_version', 1, 'Version number of the model.')
+tf.app.flags.DEFINE_string('import_path', '', 'Model import path.')
+tf.app.flags.DEFINE_string('export_dir', '/tmp', 'Export directory.')
+tf.app.flags.DEFINE_string('inputs', INPUTS, 'Export directory.')
+tf.app.flags.DEFINE_string('outputs', OUTPUTS, 'Export directory.')
+tf.app.flags.DEFINE_string('dtypes', 'float32', 'Export directory.')
+FLAGS = tf.app.flags.FLAGS
+
+
+def main(_):
+    if len(sys.argv) < 2 or sys.argv[-1].startswith('-'):
+        print('Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir')
+        sys.exit(-1)
+    if FLAGS.import_path == '':
+        print('Please specify the path to the model graph you want to convert to SavedModel format.')
+        sys.exit(-1)
+    if FLAGS.model_version <= 0:
+        print('Please specify a positive value for version number.')
+        sys.exit(-1)
+
+    # Import model graph
+    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
+        graph_def = tf.GraphDef()
+        with tf.gfile.GFile(FLAGS.import_path, 'rb') as input_file:
+            input_graph_content = input_file.read()
+            graph_def.ParseFromString(input_graph_content)
+
+        # Apply transform optimizations
+        # https://www.tensorflow.org/api_docs/python/tf/dtypes/DType
+        output_graph = optimize_for_inference(graph_def, [FLAGS.inputs], [FLAGS.outputs], dtypes.float32.as_datatype_enum, True)
+        # output_graph = graph_def
+
+        sess.graph.as_default()
+        tf.import_graph_def(output_graph, name='')
+        # print(sess.graph.get_operations())
+
+        # Replace the signature_def_map.
+        in_image = sess.graph.get_tensor_by_name(FLAGS.inputs + ':0')
+        inputs = {INPUTS: tf.compat.v1.saved_model.build_tensor_info(in_image)}
+
+        out_classes = sess.graph.get_tensor_by_name(FLAGS.outputs + ':0')
+        outputs = {OUTPUTS: tf.compat.v1.saved_model.build_tensor_info(out_classes)}
+
+        signature = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs=inputs,
+            outputs=outputs,
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
+        )
+
+        # Save out the SavedModel
+        print('Exporting trained model to', FLAGS.export_dir + '/' + str(FLAGS.model_version))
+        builder = tf.saved_model.builder.SavedModelBuilder(FLAGS.export_dir + '/' + str(FLAGS.model_version))
+        builder.add_meta_graph_and_variables(
+            sess, [tf.saved_model.tag_constants.SERVING],
+            signature_def_map={
+                tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
+            }
+        )
+        builder.save()
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    tf.app.run()
--- a/demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh
+++ b/demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh
@ -0,0 +1,8 @@
+apt-get update
+apt install -y python3-pip 
+pip3 install --upgrade pip
+pip3 install --upgrade tensorflow==2.4
+./download_model.sh
+python3 ./model_graph_to_saved_model.py --import_path ./models/resnet50-v15-fp32/resnet50-v15-fp32.pb --export_dir ./resnet50-v15-fp32 --model_version 1 --inputs input --outputs predict
+./generate_ssl_config.sh localhost
+
--- a/demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh
+++ b/demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh
@ -0,0 +1,59 @@
+#!/bin/bash
+occlum_glibc=/opt/occlum/glibc/lib/
+host_libs=/lib/x86_64-linux-gnu/
+set -e
+ssl_config_file=/bin/ssl_configure/ssl.cfg
+model_name=resnet50-v15-fp32
+enable_batching=false
+rest_api_num_threads=8
+session_parallelism=0
+parallel_num_threads=2
+
+
+unset http_proxy https_proxy
+
+
+# 1. Init Occlum Workspace
+rm -rf occlum_instance
+mkdir occlum_instance
+cd occlum_instance
+occlum init
+new_json="$(jq '.resource_limits.user_space_size = "7000MB" |
+                .resource_limits.kernel_space_heap_size="384MB" |
+                .process.default_heap_size = "128MB" |
+                .resource_limits.max_num_of_threads = 64 |
+                .process.default_mmap_size = "6000MB" |
+                .env.default = [ "OMP_NUM_THREADS=8", "KMP_AFFINITY=verbose,granularity=fine,compact,1,0", "KMP_BLOCKTIME=20", "MKL_NUM_THREADS=8"]' Occlum.json)" && \
+echo "${new_json}" > Occlum.json
+
+# 2. Copy files into Occlum Workspace and Build
+mkdir -p image/model
+cp -rf ../resnet50-v15-fp32  image/model/
+cp -rf ../ssl_configure  image/bin/
+cp ../tensorflow_model_server image/bin
+cp ../hosts image/etc/
+cp $occlum_glibc/libdl.so.2 image/$occlum_glibc
+cp $occlum_glibc/librt.so.1 image/$occlum_glibc
+cp $occlum_glibc/libm.so.6 image/$occlum_glibc
+cp $occlum_glibc/libutil.so.1 image/$occlum_glibc
+cp $occlum_glibc/libpthread.so.0 image/$occlum_glibc
+cp $occlum_glibc/libnss_files.so.2 image/$occlum_glibc
+cp $occlum_glibc/libnss_compat.so.2 image/$occlum_glibc
+
+#occlum build
+occlum build
+# 3. Run benchmark
+taskset -c 0-1 occlum run /bin/tensorflow_model_server \
+    --model_name=${model_name} \
+    --model_base_path=/model/${model_name} \
+    --port=8500 \
+    --rest_api_port=8501 \
+    --enable_model_warmup=true \
+    --flush_filesystem_caches=false \
+    --enable_batching=${enable_batching} \
+    --rest_api_num_threads=${rest_api_num_threads} \
+    --tensorflow_session_parallelism=${session_parallelism} \
+    --tensorflow_intra_op_parallelism=${parallel_num_threads} \
+    --tensorflow_inter_op_parallelism=${parallel_num_threads} \
+    --ssl_config_file=${ssl_config_file} \
+	&