From d19ff1a8be9d3b89428a740cd51db6c410d7fc2c Mon Sep 17 00:00:00 2001 From: yuanwu Date: Mon, 19 Jul 2021 07:43:58 +0000 Subject: [PATCH] Enable Tensorflow_serving in Occlum Signed-off-by: yuanwu --- .github/workflows/demo_test.yml | 35 ++++ demos/tensorflow/tensorflow_serving/README.md | 41 +++++ .../build_occlum_tf_serving.sh | 9 + .../client/__pycache__/utils.cpython-36.pyc | Bin 0 -> 1333 bytes .../tensorflow_serving/client/benchmark.sh | 21 +++ .../client/prepare_client_env.sh | 3 + .../client/requirements.txt | 6 + .../client/resnet_client_grpc.py | 162 ++++++++++++++++++ .../tensorflow_serving/client/utils.py | 29 ++++ .../docker/Dockerfile.devel | 51 ++++++ .../docker/build_tf_serving_with_pic.sh | 18 ++ .../tensorflow_serving/download_model.sh | 8 + .../tensorflow_serving/generate_ssl_config.sh | 24 +++ demos/tensorflow/tensorflow_serving/hosts | 4 + .../model_graph_to_saved_model.py | 102 +++++++++++ .../prepare_model_and_env.sh | 8 + .../run_occlum_tf_serving.sh | 59 +++++++ 17 files changed, 580 insertions(+) create mode 100644 demos/tensorflow/tensorflow_serving/README.md create mode 100755 demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh create mode 100644 demos/tensorflow/tensorflow_serving/client/__pycache__/utils.cpython-36.pyc create mode 100755 demos/tensorflow/tensorflow_serving/client/benchmark.sh create mode 100755 demos/tensorflow/tensorflow_serving/client/prepare_client_env.sh create mode 100644 demos/tensorflow/tensorflow_serving/client/requirements.txt create mode 100644 demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py create mode 100644 demos/tensorflow/tensorflow_serving/client/utils.py create mode 100644 demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel create mode 100755 demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh create mode 100755 demos/tensorflow/tensorflow_serving/download_model.sh create mode 100755 demos/tensorflow/tensorflow_serving/generate_ssl_config.sh create mode 100644 demos/tensorflow/tensorflow_serving/hosts create mode 100755 demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py create mode 100755 demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh create mode 100755 demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh diff --git a/.github/workflows/demo_test.yml b/.github/workflows/demo_test.yml index 2543d968..d55d0d1b 100644 --- a/.github/workflows/demo_test.yml +++ b/.github/workflows/demo_test.yml @@ -691,3 +691,38 @@ jobs: - name: Run Bash test run: docker exec bash_test bash -c "cd /root/occlum/demos/bash && SGX_MODE=SIM ./run_bash_demo.sh" + + Tensorflow_serving_test: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v1 + with: + submodules: true + + - name: Get occlum version + run: echo "OCCLUM_VERSION=$(grep "Version =" src/pal/include/occlum_version.h | awk '{print $4}')" >> $GITHUB_ENV + + - name: Compile the tensorflow_serving with PIC + run: | + cd $GITHUB_WORKSPACE/demos/tensorflow/tensorflow_serving; + ./build_occlum_tf_serving.sh + + - name: Create container + run: docker run -itd --name=tf_serving_test -v $GITHUB_WORKSPACE:/root/occlum occlum/occlum:${{ env.OCCLUM_VERSION }}-ubuntu18.04 + + - name: Build dependencies + run: docker exec tf_serving_test bash -c "cd /root/occlum; make submodule" + + - name: Make install + run: docker exec tf_serving_test bash -c "source /opt/intel/sgxsdk/environment; cd /root/occlum; OCCLUM_RELEASE_BUILD=1 make install" + + - name: Set up environment + run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; ./prepare_model_and_env.sh" + + - name: Run tf_serving server + run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; SGX_MODE=SIM ./run_occlum_tf_serving.sh" + + - name: Run tf_serving client + run: | + sleep 120; + docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving/client; ./prepare_client_env.sh; ./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt" diff --git a/demos/tensorflow/tensorflow_serving/README.md b/demos/tensorflow/tensorflow_serving/README.md new file mode 100644 index 00000000..5265164a --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/README.md @@ -0,0 +1,41 @@ +# TensorFlow Serving With Occlum + + + +TensorFlow Serving is a flexible, high-performance serving system for machine learning models, designed for production environments. This demo presents a secure End-to-End TensorFlow serving solution in Occlum. + +- **Runtime security.** Occlum uses the intel SGX to provide an enclave for running applications in encrypted memory. +- **At-Rest security.** Model and TLS key are protected by Occlum encrypted FS. +- **Communication Security.** Use the TLS to secure the gRPC communications. + +#### Executing the Tensorflow serving in Occlum + +The following command will download the Resnet50 model and convert the model format. It also will generate the TLS key and certificates for localhost( server domain name). The server.crt will be used by client. The sever.key and ssl.cfg is used by TF serving. + +``` +./prepare_model_and_env.sh +``` + +Run the Tensorflow Serving in occlum. + +``` +./run_occlum_tf_serving.sh +``` + +***Note:*** The demo runs in the same machine by default. If you want to run TF serving and client in different machines. Please modify the domain name in the scripts. + +#### Executing the benchmark in client + +Prepare the environment for client benchmark. + +``` +cd client +./prepare_client_env.sh +``` + +Run the benchmark test in client. + +``` +./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt +``` + diff --git a/demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh b/demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh new file mode 100755 index 00000000..df47c5ec --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/build_occlum_tf_serving.sh @@ -0,0 +1,9 @@ +#!/bin/sh +echo Building tf-serving with pic +cd docker +./build_tf_serving_with_pic.sh +echo Create the tensorflow_model_server +docker create --name extract tf_serving_pic:latest +echo Copy the tensorflow_model_server +docker cp extract:/usr/local/bin/tensorflow_model_server ../tensorflow_model_server +docker rm -f extract diff --git a/demos/tensorflow/tensorflow_serving/client/__pycache__/utils.cpython-36.pyc b/demos/tensorflow/tensorflow_serving/client/__pycache__/utils.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34067f5eb7aa6053289e66373a66409855042005 GIT binary patch literal 1333 zcma)6O>fgc5Z(1h;y6j0Qp(4Pvys43sj40j0u_)zw3S*;zPPT}NgdhVXm{PJC_R-w z!-YS?ZLgg81xTEjStlU`4z=Z(wRd*kdo#0s*6;h@zWzM5Z9=}2rOSfxOL*id7)BT^ zNYr>5QS)v^7I>?$ODA$DVK#F-HJ=SGC@Ez7?Yv6sh&IaJS zjGmBS;|HLS!+`4USn?_!a#1EKrT7&t^hjtQ9_fQoWK5^z2A9&qz|sz`(k`pZiG)oY zmZ;CFOR=)3m6|kYy#8p~@=B#jxO)vCPCpUX8-E z$g@g?waN?mbaJH!EKgOecx)J!a^xZ#q(!Lkz}}j!fMgTx7Cd3VHuB~h8orHbW5i%S zfvAX%D04(i!|f4~rtTkPQYNG9BYzL+f;{;X!EtNI)++W^GX*l2C3lwy^j8U>DpR2z z3=~nOLW>#+BrI@IKm|suB5wSJh;r8;YAzSY3%RIf(GNH??}uwTUG#*VUTo`DUP855 zU`1YDj^at8&b1ArRq!6pLMjp=lB*5Gb-)nnQ*j6U|B>&)%p7@aF?vI1_}1Ykw^qX^ zdx5Q6XNk;qcXa1$cPFb-&NAI%8G0bq%}c09gR1=|GKL0SrMHRn3^yd%#BBlkjOBoe z`^zkm`=T0;msnm?WsU7ATIa^Pwv;<=P0_aD&7RuLdGf?2tTBumrrN3|+8+vD0yj7t z+O6|S?TK{^gT&6FdhZU7-@JMq?;m|QIu0Ch52m9IP(LxNBDb1=2JITr|45vv2Ym~% z4`Bm|O^=0GgIhd;anLgd$e=e&*GHU>itMTBptE`?usO_`{L^&;`@|H}=3.7.0 +grpcio>=1.34.0 +opencv-python>=4.4.0 +tensorflow>=2.3.0 +tensorflow-serving-api>=2.3.0 diff --git a/demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py b/demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py new file mode 100644 index 00000000..f8fc1a05 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/client/resnet_client_grpc.py @@ -0,0 +1,162 @@ +from __future__ import print_function + +import numpy as np +import requests, argparse, time, grpc, cv2, asyncio, functools + +import tensorflow as tf +from tensorflow_serving.apis import predict_pb2 +from tensorflow_serving.apis import prediction_service_pb2_grpc + +from utils import * + +class benchmark_engine(object): + def __init__(self, url, image_flag=None, certificate=None, batch_size=1, concurrent_num=64, response_time=10): + self.url = url + self.batch_size = batch_size + self.response_time = response_time + self.concurrent_num = concurrent_num + self.image_flag = image_flag + self.certificate = certificate + self.request_signatures = [] + self.request_stubs = [] + self.request_response_list = {} + self.__prepare__() + pass + + def __prepare__(self): + for idx in range(self.concurrent_num): + # get image array + if self.image_flag == None: + image_np = np.random.randint(0, 255, (self.batch_size, 224, 224, 3), dtype=np.uint8).astype(np.float32) + # print('image type: dummy') + else: + if self.batch_size != 1: + print('not support batch n!=1 with image!') + exit() + else: + image_np = img_to_array(self.image_flag).astype(np.float32) + image_np.resize((1, 224, 224, 3)) + # cv2.imshow('',img) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + # print('image type: real') + + # create request + request = predict_pb2.PredictRequest() + request.model_spec.name = 'resnet50-v15-fp32' + request.model_spec.signature_name = 'serving_default' + request.inputs['input'].CopyFrom(tf.make_tensor_proto(image_np, shape=[self.batch_size, 224, 224, 3])) + self.request_signatures.append(request) + return None + + async def __connection__(self, task_idx, loop_num): + request_signatures = self.request_signatures[task_idx] + response_list = [] + + # create channel + if self.certificate == None: + async with grpc.aio.insecure_channel(self.url) as channel: + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + if loop_num != 0: + format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}' + for loop_idx in range(loop_num): + start_time = time.time() + response = await stub.Predict(request_signatures) + stop_time = time.time() + latency = stop_time - start_time + tps = self.batch_size / latency + response_list.append([response, latency]) + print(format_string.format('insecure', task_idx, self.batch_size, loop_idx, 1000*latency, tps)) + else: + format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}' + while True: + start_time = time.time() + response = await stub.Predict(request_signatures) + stop_time = time.time() + latency = stop_time - start_time + tps = self.batch_size / latency + print(format_string.format('insecure', task_idx, self.batch_size, 1000*latency, tps)) + else: + creds = grpc.ssl_channel_credentials(root_certificates=open(self.certificate, 'rb').read()) + async with grpc.aio.secure_channel(self.url, creds) as channel: + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + if loop_num != 0: + format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}' + for loop_idx in range(loop_num): + start_time = time.time() + response = await stub.Predict(request_signatures) + stop_time = time.time() + latency = stop_time - start_time + tps = self.batch_size / latency + response_list.append([response, latency]) + print(format_string.format('secure', task_idx, self.batch_size, loop_idx, 1000*latency, tps)) + else: + format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}' + while True: + start_time = time.time() + response = await stub.Predict(request_signatures) + stop_time = time.time() + latency = stop_time - start_time + tps = self.batch_size / latency + try: + proto_msg_to_dict(response) + except Exception as e: + print('Error response:', e) + print(format_string.format('secure', task_idx, self.batch_size, 1000*latency, tps)) + return response_list + + def run(self, loop_num): + start_time = time.time() + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + connections = [] + self.request_response_list.clear() + for idx in range(self.concurrent_num): + connections.append(asyncio.ensure_future(self.__connection__(idx, loop_num))) + + loop.run_until_complete(asyncio.wait(connections)) + loop.close() + + stop_time = time.time() + + response_list = [connections[idx].result() for idx in range(self.concurrent_num)] + print(proto_msg_to_dict(response_list[0][0][0])) + + request_time = 0 + for c_idx in range(self.concurrent_num): + if loop_num != 0: + for l_idx in range(loop_num): + request_time += response_list[c_idx][l_idx][1] + + if loop_num != 0: + e2e_time = stop_time - start_time + request_num = self.concurrent_num * loop_num + latency = request_time / request_num + tps = request_num * self.batch_size / e2e_time + format_string = 'summary: cnum {}, batch {}, e2e time(s) {}, average latency(ms) {}, tps: {}' + print(format_string.format(self.concurrent_num, self.batch_size, e2e_time, 1000*latency, tps)) + pass + +def main(): + benchmark_app = benchmark_engine(args.url, args.img, args.crt, args.batch, args.cnum) + if args.loop != 0: + # warm up + benchmark_app.run(5) + # start loop + benchmark_app.run(args.loop) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-url', type=str, help='gRPC API Serving URL: IP:8500') + parser.add_argument('-img', default=None, type=str, help='Image path') + parser.add_argument('-crt', default=None, type=str, help='TLS certificate file path') + parser.add_argument('-batch', default=1, type=int, help='Batch size') + parser.add_argument('-cnum', default=16, type=int, help='Concurrent connection num') + parser.add_argument('-loop', default=200, type=int, help='Requests loop num: 0 (infinite loop)') + + args = parser.parse_args() + + main() diff --git a/demos/tensorflow/tensorflow_serving/client/utils.py b/demos/tensorflow/tensorflow_serving/client/utils.py new file mode 100644 index 00000000..24459f97 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/client/utils.py @@ -0,0 +1,29 @@ +import json, cv2, base64 +from google.protobuf import json_format + + +def dict_to_json_msg(data): + return json.dumps(data) + +def json_msg_to_dict(json_msg): + return json.loads(json_msg) + +def proto_msg_to_json_msg(proto_data): + return json_format.MessageToJson(proto_data) + +def proto_msg_to_dict(proto_data): + return json_msg_to_dict(proto_msg_to_json_msg(proto_data)) + +def img_to_array(img_path): + img = cv2.imread(img_path) + return img + +def img_array_to_base64(image_array): + base64_str = base64.b64encode(image_array).decode('utf-8') + return base64_str + +def base64_to_img_array(base64_str): + imgString = base64.b64decode(base64_str) + nparr = np.fromstring(imgString, np.uint8) + image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + return image \ No newline at end of file diff --git a/demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel b/demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel new file mode 100644 index 00000000..d3391e21 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/docker/Dockerfile.devel @@ -0,0 +1,51 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +FROM tensorflow/serving:latest-devel as binary_build +WORKDIR /tensorflow-serving +# Build, and install TensorFlow Serving +ARG TF_SERVING_BUILD_OPTIONS="--config=nativeopt" +RUN echo "Building with build options: ${TF_SERVING_BUILD_OPTIONS}" +ARG TF_SERVING_BAZEL_OPTIONS="" +RUN echo "Building with Bazel options: ${TF_SERVING_BAZEL_OPTIONS}" + +RUN bazel build -j 8 --color=yes --curses=yes \ + ${TF_SERVING_BAZEL_OPTIONS} \ + --verbose_failures \ + --force_pic \ + --output_filter=DONT_MATCH_ANYTHING \ + ${TF_SERVING_BUILD_OPTIONS} \ + tensorflow_serving/model_servers:tensorflow_model_server && \ + cp bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \ + /usr/local/bin/ + +# Build and install TensorFlow Serving API +RUN bazel build -j 8 --color=yes --curses=yes \ + ${TF_SERVING_BAZEL_OPTIONS} \ + --force_pic \ + --verbose_failures \ + --output_filter=DONT_MATCH_ANYTHING \ + ${TF_SERVING_BUILD_OPTIONS} \ + tensorflow_serving/tools/pip_package:build_pip_package && \ + bazel-bin/tensorflow_serving/tools/pip_package/build_pip_package \ + /tmp/pip && \ + pip --no-cache-dir install --upgrade \ + /tmp/pip/tensorflow_serving_api-*.whl && \ + rm -rf /tmp/pip + +FROM binary_build as clean_build +# Clean up Bazel cache when done. +RUN bazel clean --expunge --color=yes && \ + rm -rf /root/.cache + +CMD ["/bin/bash"] diff --git a/demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh b/demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh new file mode 100755 index 00000000..c711d4af --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/docker/build_tf_serving_with_pic.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +if [ ! -n "$1" ] ; then + tag=latest +else + tag=$1 +fi + +# You can remove build-arg http_proxy and https_proxy if your network doesn't need it +proxy_server="" # your http proxy server + +DOCKER_BUILDKIT=0 docker build \ + -f Dockerfile.devel . \ + -t tf_serving_pic:${tag} \ + --build-arg http_proxy=${proxy_server} \ + --build-arg https_proxy=${proxy_server} \ + --build-arg no_proxy=localhost,127.0.0.0/1 \ diff --git a/demos/tensorflow/tensorflow_serving/download_model.sh b/demos/tensorflow/tensorflow_serving/download_model.sh new file mode 100755 index 00000000..789004af --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/download_model.sh @@ -0,0 +1,8 @@ +cur_dir=`pwd -P` +models_abs_dir=${cur_dir}/models +mkdir ${models_abs_dir} + +# resnet50-v15 +mkdir ${models_abs_dir}/resnet50-v15-fp32 +cd ${models_abs_dir}/resnet50-v15-fp32 +wget --no-check-certificate -c https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_fp32_pretrained_model.pb -O resnet50-v15-fp32.pb diff --git a/demos/tensorflow/tensorflow_serving/generate_ssl_config.sh b/demos/tensorflow/tensorflow_serving/generate_ssl_config.sh new file mode 100755 index 00000000..53fbafa5 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/generate_ssl_config.sh @@ -0,0 +1,24 @@ +service_domain_name=$1 + +rm -rf ssl_configure +mkdir ssl_configure +cd ssl_configure + +# https://kubernetes.github.io/ingress-nginx/examples/PREREQUISITES/#client-certificate-authentication +openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:2048 -keyout server.key -out server.crt -subj "/CN=${service_domain_name}" + +# Generate tls configure +## https://stackoverflow.com/questions/59199419/using-tensorflow-model-server-with-ssl-configuration + +echo "server_key: '`cat server.key | paste -d "" -s`'" >> ssl.cfg +echo "server_cert: '`cat server.crt | paste -d "" -s`'" >> ssl.cfg +echo "client_verify: false" >> ssl.cfg + +sed -i "s/-----BEGIN PRIVATE KEY-----/-----BEGIN PRIVATE KEY-----\\\n/g" ssl.cfg +sed -i "s/-----END PRIVATE KEY-----/\\\n-----END PRIVATE KEY-----/g" ssl.cfg +sed -i "s/-----BEGIN CERTIFICATE-----/-----BEGIN CERTIFICATE-----\\\n/g" ssl.cfg +sed -i "s/-----END CERTIFICATE-----/\\\n-----END CERTIFICATE-----/g" ssl.cfg + +echo "Generate server.key server.crt and ssl.cfg successfully!" +#cat ssl.cfg +cd - diff --git a/demos/tensorflow/tensorflow_serving/hosts b/demos/tensorflow/tensorflow_serving/hosts new file mode 100644 index 00000000..a190adc8 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/hosts @@ -0,0 +1,4 @@ +127.0.0.1 occlum-node +127.0.0.1 localhost +::1 occlum-node +::1 localhost diff --git a/demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py b/demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py new file mode 100755 index 00000000..263beb8e --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/model_graph_to_saved_model.py @@ -0,0 +1,102 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +"""Import a model graph and export a SavedModel. + +Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir +""" + +from __future__ import print_function + +import sys +from collections import OrderedDict +import tensorflow.compat.v1 as tf +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.python.framework import dtypes + + +INPUTS = 'input' +OUTPUTS = 'predict' + +tf.app.flags.DEFINE_integer('model_version', 1, 'Version number of the model.') +tf.app.flags.DEFINE_string('import_path', '', 'Model import path.') +tf.app.flags.DEFINE_string('export_dir', '/tmp', 'Export directory.') +tf.app.flags.DEFINE_string('inputs', INPUTS, 'Export directory.') +tf.app.flags.DEFINE_string('outputs', OUTPUTS, 'Export directory.') +tf.app.flags.DEFINE_string('dtypes', 'float32', 'Export directory.') +FLAGS = tf.app.flags.FLAGS + + +def main(_): + if len(sys.argv) < 2 or sys.argv[-1].startswith('-'): + print('Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir') + sys.exit(-1) + if FLAGS.import_path == '': + print('Please specify the path to the model graph you want to convert to SavedModel format.') + sys.exit(-1) + if FLAGS.model_version <= 0: + print('Please specify a positive value for version number.') + sys.exit(-1) + + # Import model graph + with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess: + graph_def = tf.GraphDef() + with tf.gfile.GFile(FLAGS.import_path, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + # Apply transform optimizations + # https://www.tensorflow.org/api_docs/python/tf/dtypes/DType + output_graph = optimize_for_inference(graph_def, [FLAGS.inputs], [FLAGS.outputs], dtypes.float32.as_datatype_enum, True) + # output_graph = graph_def + + sess.graph.as_default() + tf.import_graph_def(output_graph, name='') + # print(sess.graph.get_operations()) + + # Replace the signature_def_map. + in_image = sess.graph.get_tensor_by_name(FLAGS.inputs + ':0') + inputs = {INPUTS: tf.compat.v1.saved_model.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name(FLAGS.outputs + ':0') + outputs = {OUTPUTS: tf.compat.v1.saved_model.build_tensor_info(out_classes)} + + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME + ) + + # Save out the SavedModel + print('Exporting trained model to', FLAGS.export_dir + '/' + str(FLAGS.model_version)) + builder = tf.saved_model.builder.SavedModelBuilder(FLAGS.export_dir + '/' + str(FLAGS.model_version)) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + } + ) + builder.save() + + print('Done!') + + +if __name__ == '__main__': + tf.app.run() diff --git a/demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh b/demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh new file mode 100755 index 00000000..f22b5b33 --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/prepare_model_and_env.sh @@ -0,0 +1,8 @@ +apt-get update +apt install -y python3-pip +pip3 install --upgrade pip +pip3 install --upgrade tensorflow==2.4 +./download_model.sh +python3 ./model_graph_to_saved_model.py --import_path ./models/resnet50-v15-fp32/resnet50-v15-fp32.pb --export_dir ./resnet50-v15-fp32 --model_version 1 --inputs input --outputs predict +./generate_ssl_config.sh localhost + diff --git a/demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh b/demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh new file mode 100755 index 00000000..5b78f37b --- /dev/null +++ b/demos/tensorflow/tensorflow_serving/run_occlum_tf_serving.sh @@ -0,0 +1,59 @@ +#!/bin/bash +occlum_glibc=/opt/occlum/glibc/lib/ +host_libs=/lib/x86_64-linux-gnu/ +set -e +ssl_config_file=/bin/ssl_configure/ssl.cfg +model_name=resnet50-v15-fp32 +enable_batching=false +rest_api_num_threads=8 +session_parallelism=0 +parallel_num_threads=2 + + +unset http_proxy https_proxy + + +# 1. Init Occlum Workspace +rm -rf occlum_instance +mkdir occlum_instance +cd occlum_instance +occlum init +new_json="$(jq '.resource_limits.user_space_size = "7000MB" | + .resource_limits.kernel_space_heap_size="384MB" | + .process.default_heap_size = "128MB" | + .resource_limits.max_num_of_threads = 64 | + .process.default_mmap_size = "6000MB" | + .env.default = [ "OMP_NUM_THREADS=8", "KMP_AFFINITY=verbose,granularity=fine,compact,1,0", "KMP_BLOCKTIME=20", "MKL_NUM_THREADS=8"]' Occlum.json)" && \ +echo "${new_json}" > Occlum.json + +# 2. Copy files into Occlum Workspace and Build +mkdir -p image/model +cp -rf ../resnet50-v15-fp32 image/model/ +cp -rf ../ssl_configure image/bin/ +cp ../tensorflow_model_server image/bin +cp ../hosts image/etc/ +cp $occlum_glibc/libdl.so.2 image/$occlum_glibc +cp $occlum_glibc/librt.so.1 image/$occlum_glibc +cp $occlum_glibc/libm.so.6 image/$occlum_glibc +cp $occlum_glibc/libutil.so.1 image/$occlum_glibc +cp $occlum_glibc/libpthread.so.0 image/$occlum_glibc +cp $occlum_glibc/libnss_files.so.2 image/$occlum_glibc +cp $occlum_glibc/libnss_compat.so.2 image/$occlum_glibc + +#occlum build +occlum build +# 3. Run benchmark +taskset -c 0-1 occlum run /bin/tensorflow_model_server \ + --model_name=${model_name} \ + --model_base_path=/model/${model_name} \ + --port=8500 \ + --rest_api_port=8501 \ + --enable_model_warmup=true \ + --flush_filesystem_caches=false \ + --enable_batching=${enable_batching} \ + --rest_api_num_threads=${rest_api_num_threads} \ + --tensorflow_session_parallelism=${session_parallelism} \ + --tensorflow_intra_op_parallelism=${parallel_num_threads} \ + --tensorflow_inter_op_parallelism=${parallel_num_threads} \ + --ssl_config_file=${ssl_config_file} \ + &