Enable Tensorflow_serving in Occlum

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu 2021-07-19 07:43:58 +00:00 committed by Zongmin.Gu
parent 68f0a22177
commit d19ff1a8be
17 changed files with 580 additions and 0 deletions

@ -691,3 +691,38 @@ jobs:
- name: Run Bash test
run: docker exec bash_test bash -c "cd /root/occlum/demos/bash && SGX_MODE=SIM ./run_bash_demo.sh"
Tensorflow_serving_test:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v1
with:
submodules: true
- name: Get occlum version
run: echo "OCCLUM_VERSION=$(grep "Version =" src/pal/include/occlum_version.h | awk '{print $4}')" >> $GITHUB_ENV
- name: Compile the tensorflow_serving with PIC
run: |
cd $GITHUB_WORKSPACE/demos/tensorflow/tensorflow_serving;
./build_occlum_tf_serving.sh
- name: Create container
run: docker run -itd --name=tf_serving_test -v $GITHUB_WORKSPACE:/root/occlum occlum/occlum:${{ env.OCCLUM_VERSION }}-ubuntu18.04
- name: Build dependencies
run: docker exec tf_serving_test bash -c "cd /root/occlum; make submodule"
- name: Make install
run: docker exec tf_serving_test bash -c "source /opt/intel/sgxsdk/environment; cd /root/occlum; OCCLUM_RELEASE_BUILD=1 make install"
- name: Set up environment
run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; ./prepare_model_and_env.sh"
- name: Run tf_serving server
run: docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving; SGX_MODE=SIM ./run_occlum_tf_serving.sh"
- name: Run tf_serving client
run: |
sleep 120;
docker exec tf_serving_test bash -c "cd /root/occlum/demos/tensorflow/tensorflow_serving/client; ./prepare_client_env.sh; ./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt"

@ -0,0 +1,41 @@
# TensorFlow Serving With Occlum
TensorFlow Serving is a flexible, high-performance serving system for machine learning models, designed for production environments. This demo presents a secure End-to-End TensorFlow serving solution in Occlum.
- **Runtime security.** Occlum uses the intel SGX to provide an enclave for running applications in encrypted memory.
- **At-Rest security.** Model and TLS key are protected by Occlum encrypted FS.
- **Communication Security.** Use the TLS to secure the gRPC communications.
#### Executing the Tensorflow serving in Occlum
The following command will download the Resnet50 model and convert the model format. It also will generate the TLS key and certificates for localhost( server domain name). The server.crt will be used by client. The sever.key and ssl.cfg is used by TF serving.
```
./prepare_model_and_env.sh
```
Run the Tensorflow Serving in occlum.
```
./run_occlum_tf_serving.sh
```
***Note:*** The demo runs in the same machine by default. If you want to run TF serving and client in different machines. Please modify the domain name in the scripts.
#### Executing the benchmark in client
Prepare the environment for client benchmark.
```
cd client
./prepare_client_env.sh
```
Run the benchmark test in client.
```
./benchmark.sh python3 localhost:8500 ../ssl_configure/server.crt
```

@ -0,0 +1,9 @@
#!/bin/sh
echo Building tf-serving with pic
cd docker
./build_tf_serving_with_pic.sh
echo Create the tensorflow_model_server
docker create --name extract tf_serving_pic:latest
echo Copy the tensorflow_model_server
docker cp extract:/usr/local/bin/tensorflow_model_server ../tensorflow_model_server
docker rm -f extract

@ -0,0 +1,21 @@
python=$1
grpc_url=$2
server_crt=$3
script_dir=$(cd "$(dirname "$0")";pwd -P)
unset http_proxy && unset https_proxy
# Batch off
$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 1 -loop 200
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 16 -loop 125
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 32 -loop 100
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 48 -loop 75
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -batch 1 -cnum 64 -loop 50
# Batch on
$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 1 -loop 100
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 16 -loop 50
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 32 -loop 40
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 48 -loop 30
#$python -u $script_dir/resnet_client_grpc.py -url $grpc_url -crt $server_crt -cnum 1 -batch 64 -loop 20

@ -0,0 +1,3 @@
apt update
apt install -y libgl1-mesa-glx
pip3 install -r requirements.txt -v

@ -0,0 +1,6 @@
argparse
aiohttp>=3.7.0
grpcio>=1.34.0
opencv-python>=4.4.0
tensorflow>=2.3.0
tensorflow-serving-api>=2.3.0

@ -0,0 +1,162 @@
from __future__ import print_function
import numpy as np
import requests, argparse, time, grpc, cv2, asyncio, functools
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
from utils import *
class benchmark_engine(object):
def __init__(self, url, image_flag=None, certificate=None, batch_size=1, concurrent_num=64, response_time=10):
self.url = url
self.batch_size = batch_size
self.response_time = response_time
self.concurrent_num = concurrent_num
self.image_flag = image_flag
self.certificate = certificate
self.request_signatures = []
self.request_stubs = []
self.request_response_list = {}
self.__prepare__()
pass
def __prepare__(self):
for idx in range(self.concurrent_num):
# get image array
if self.image_flag == None:
image_np = np.random.randint(0, 255, (self.batch_size, 224, 224, 3), dtype=np.uint8).astype(np.float32)
# print('image type: dummy')
else:
if self.batch_size != 1:
print('not support batch n!=1 with image!')
exit()
else:
image_np = img_to_array(self.image_flag).astype(np.float32)
image_np.resize((1, 224, 224, 3))
# cv2.imshow('',img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# print('image type: real')
# create request
request = predict_pb2.PredictRequest()
request.model_spec.name = 'resnet50-v15-fp32'
request.model_spec.signature_name = 'serving_default'
request.inputs['input'].CopyFrom(tf.make_tensor_proto(image_np, shape=[self.batch_size, 224, 224, 3]))
self.request_signatures.append(request)
return None
async def __connection__(self, task_idx, loop_num):
request_signatures = self.request_signatures[task_idx]
response_list = []
# create channel
if self.certificate == None:
async with grpc.aio.insecure_channel(self.url) as channel:
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
if loop_num != 0:
format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}'
for loop_idx in range(loop_num):
start_time = time.time()
response = await stub.Predict(request_signatures)
stop_time = time.time()
latency = stop_time - start_time
tps = self.batch_size / latency
response_list.append([response, latency])
print(format_string.format('insecure', task_idx, self.batch_size, loop_idx, 1000*latency, tps))
else:
format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}'
while True:
start_time = time.time()
response = await stub.Predict(request_signatures)
stop_time = time.time()
latency = stop_time - start_time
tps = self.batch_size / latency
print(format_string.format('insecure', task_idx, self.batch_size, 1000*latency, tps))
else:
creds = grpc.ssl_channel_credentials(root_certificates=open(self.certificate, 'rb').read())
async with grpc.aio.secure_channel(self.url, creds) as channel:
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
if loop_num != 0:
format_string = 'query: {} channel, task {}, batch {}, loop_idx {}, latency(ms) {:.1f}, tps: {:.1f}'
for loop_idx in range(loop_num):
start_time = time.time()
response = await stub.Predict(request_signatures)
stop_time = time.time()
latency = stop_time - start_time
tps = self.batch_size / latency
response_list.append([response, latency])
print(format_string.format('secure', task_idx, self.batch_size, loop_idx, 1000*latency, tps))
else:
format_string = 'query: {} channel, task {}, batch {}, latency(ms) {:.1f}, tps: {:.1f}'
while True:
start_time = time.time()
response = await stub.Predict(request_signatures)
stop_time = time.time()
latency = stop_time - start_time
tps = self.batch_size / latency
try:
proto_msg_to_dict(response)
except Exception as e:
print('Error response:', e)
print(format_string.format('secure', task_idx, self.batch_size, 1000*latency, tps))
return response_list
def run(self, loop_num):
start_time = time.time()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
connections = []
self.request_response_list.clear()
for idx in range(self.concurrent_num):
connections.append(asyncio.ensure_future(self.__connection__(idx, loop_num)))
loop.run_until_complete(asyncio.wait(connections))
loop.close()
stop_time = time.time()
response_list = [connections[idx].result() for idx in range(self.concurrent_num)]
print(proto_msg_to_dict(response_list[0][0][0]))
request_time = 0
for c_idx in range(self.concurrent_num):
if loop_num != 0:
for l_idx in range(loop_num):
request_time += response_list[c_idx][l_idx][1]
if loop_num != 0:
e2e_time = stop_time - start_time
request_num = self.concurrent_num * loop_num
latency = request_time / request_num
tps = request_num * self.batch_size / e2e_time
format_string = 'summary: cnum {}, batch {}, e2e time(s) {}, average latency(ms) {}, tps: {}'
print(format_string.format(self.concurrent_num, self.batch_size, e2e_time, 1000*latency, tps))
pass
def main():
benchmark_app = benchmark_engine(args.url, args.img, args.crt, args.batch, args.cnum)
if args.loop != 0:
# warm up
benchmark_app.run(5)
# start loop
benchmark_app.run(args.loop)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-url', type=str, help='gRPC API Serving URL: IP:8500')
parser.add_argument('-img', default=None, type=str, help='Image path')
parser.add_argument('-crt', default=None, type=str, help='TLS certificate file path')
parser.add_argument('-batch', default=1, type=int, help='Batch size')
parser.add_argument('-cnum', default=16, type=int, help='Concurrent connection num')
parser.add_argument('-loop', default=200, type=int, help='Requests loop num: 0 (infinite loop)')
args = parser.parse_args()
main()

@ -0,0 +1,29 @@
import json, cv2, base64
from google.protobuf import json_format
def dict_to_json_msg(data):
return json.dumps(data)
def json_msg_to_dict(json_msg):
return json.loads(json_msg)
def proto_msg_to_json_msg(proto_data):
return json_format.MessageToJson(proto_data)
def proto_msg_to_dict(proto_data):
return json_msg_to_dict(proto_msg_to_json_msg(proto_data))
def img_to_array(img_path):
img = cv2.imread(img_path)
return img
def img_array_to_base64(image_array):
base64_str = base64.b64encode(image_array).decode('utf-8')
return base64_str
def base64_to_img_array(base64_str):
imgString = base64.b64decode(base64_str)
nparr = np.fromstring(imgString, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return image

@ -0,0 +1,51 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM tensorflow/serving:latest-devel as binary_build
WORKDIR /tensorflow-serving
# Build, and install TensorFlow Serving
ARG TF_SERVING_BUILD_OPTIONS="--config=nativeopt"
RUN echo "Building with build options: ${TF_SERVING_BUILD_OPTIONS}"
ARG TF_SERVING_BAZEL_OPTIONS=""
RUN echo "Building with Bazel options: ${TF_SERVING_BAZEL_OPTIONS}"
RUN bazel build -j 8 --color=yes --curses=yes \
${TF_SERVING_BAZEL_OPTIONS} \
--verbose_failures \
--force_pic \
--output_filter=DONT_MATCH_ANYTHING \
${TF_SERVING_BUILD_OPTIONS} \
tensorflow_serving/model_servers:tensorflow_model_server && \
cp bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
/usr/local/bin/
# Build and install TensorFlow Serving API
RUN bazel build -j 8 --color=yes --curses=yes \
${TF_SERVING_BAZEL_OPTIONS} \
--force_pic \
--verbose_failures \
--output_filter=DONT_MATCH_ANYTHING \
${TF_SERVING_BUILD_OPTIONS} \
tensorflow_serving/tools/pip_package:build_pip_package && \
bazel-bin/tensorflow_serving/tools/pip_package/build_pip_package \
/tmp/pip && \
pip --no-cache-dir install --upgrade \
/tmp/pip/tensorflow_serving_api-*.whl && \
rm -rf /tmp/pip
FROM binary_build as clean_build
# Clean up Bazel cache when done.
RUN bazel clean --expunge --color=yes && \
rm -rf /root/.cache
CMD ["/bin/bash"]

@ -0,0 +1,18 @@
#!/bin/bash
set -e
if [ ! -n "$1" ] ; then
tag=latest
else
tag=$1
fi
# You can remove build-arg http_proxy and https_proxy if your network doesn't need it
proxy_server="" # your http proxy server
DOCKER_BUILDKIT=0 docker build \
-f Dockerfile.devel . \
-t tf_serving_pic:${tag} \
--build-arg http_proxy=${proxy_server} \
--build-arg https_proxy=${proxy_server} \
--build-arg no_proxy=localhost,127.0.0.0/1 \

@ -0,0 +1,8 @@
cur_dir=`pwd -P`
models_abs_dir=${cur_dir}/models
mkdir ${models_abs_dir}
# resnet50-v15
mkdir ${models_abs_dir}/resnet50-v15-fp32
cd ${models_abs_dir}/resnet50-v15-fp32
wget --no-check-certificate -c https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_fp32_pretrained_model.pb -O resnet50-v15-fp32.pb

@ -0,0 +1,24 @@
service_domain_name=$1
rm -rf ssl_configure
mkdir ssl_configure
cd ssl_configure
# https://kubernetes.github.io/ingress-nginx/examples/PREREQUISITES/#client-certificate-authentication
openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:2048 -keyout server.key -out server.crt -subj "/CN=${service_domain_name}"
# Generate tls configure
## https://stackoverflow.com/questions/59199419/using-tensorflow-model-server-with-ssl-configuration
echo "server_key: '`cat server.key | paste -d "" -s`'" >> ssl.cfg
echo "server_cert: '`cat server.crt | paste -d "" -s`'" >> ssl.cfg
echo "client_verify: false" >> ssl.cfg
sed -i "s/-----BEGIN PRIVATE KEY-----/-----BEGIN PRIVATE KEY-----\\\n/g" ssl.cfg
sed -i "s/-----END PRIVATE KEY-----/\\\n-----END PRIVATE KEY-----/g" ssl.cfg
sed -i "s/-----BEGIN CERTIFICATE-----/-----BEGIN CERTIFICATE-----\\\n/g" ssl.cfg
sed -i "s/-----END CERTIFICATE-----/\\\n-----END CERTIFICATE-----/g" ssl.cfg
echo "Generate server.key server.crt and ssl.cfg successfully!"
#cat ssl.cfg
cd -

@ -0,0 +1,4 @@
127.0.0.1 occlum-node
127.0.0.1 localhost
::1 occlum-node
::1 localhost

@ -0,0 +1,102 @@
#
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: EPL-2.0
#
"""Import a model graph and export a SavedModel.
Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir
"""
from __future__ import print_function
import sys
from collections import OrderedDict
import tensorflow.compat.v1 as tf
from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
from tensorflow.python.framework import dtypes
INPUTS = 'input'
OUTPUTS = 'predict'
tf.app.flags.DEFINE_integer('model_version', 1, 'Version number of the model.')
tf.app.flags.DEFINE_string('import_path', '', 'Model import path.')
tf.app.flags.DEFINE_string('export_dir', '/tmp', 'Export directory.')
tf.app.flags.DEFINE_string('inputs', INPUTS, 'Export directory.')
tf.app.flags.DEFINE_string('outputs', OUTPUTS, 'Export directory.')
tf.app.flags.DEFINE_string('dtypes', 'float32', 'Export directory.')
FLAGS = tf.app.flags.FLAGS
def main(_):
if len(sys.argv) < 2 or sys.argv[-1].startswith('-'):
print('Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir')
sys.exit(-1)
if FLAGS.import_path == '':
print('Please specify the path to the model graph you want to convert to SavedModel format.')
sys.exit(-1)
if FLAGS.model_version <= 0:
print('Please specify a positive value for version number.')
sys.exit(-1)
# Import model graph
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
graph_def = tf.GraphDef()
with tf.gfile.GFile(FLAGS.import_path, 'rb') as input_file:
input_graph_content = input_file.read()
graph_def.ParseFromString(input_graph_content)
# Apply transform optimizations
# https://www.tensorflow.org/api_docs/python/tf/dtypes/DType
output_graph = optimize_for_inference(graph_def, [FLAGS.inputs], [FLAGS.outputs], dtypes.float32.as_datatype_enum, True)
# output_graph = graph_def
sess.graph.as_default()
tf.import_graph_def(output_graph, name='')
# print(sess.graph.get_operations())
# Replace the signature_def_map.
in_image = sess.graph.get_tensor_by_name(FLAGS.inputs + ':0')
inputs = {INPUTS: tf.compat.v1.saved_model.build_tensor_info(in_image)}
out_classes = sess.graph.get_tensor_by_name(FLAGS.outputs + ':0')
outputs = {OUTPUTS: tf.compat.v1.saved_model.build_tensor_info(out_classes)}
signature = tf.saved_model.signature_def_utils.build_signature_def(
inputs=inputs,
outputs=outputs,
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
# Save out the SavedModel
print('Exporting trained model to', FLAGS.export_dir + '/' + str(FLAGS.model_version))
builder = tf.saved_model.builder.SavedModelBuilder(FLAGS.export_dir + '/' + str(FLAGS.model_version))
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
}
)
builder.save()
print('Done!')
if __name__ == '__main__':
tf.app.run()

@ -0,0 +1,8 @@
apt-get update
apt install -y python3-pip
pip3 install --upgrade pip
pip3 install --upgrade tensorflow==2.4
./download_model.sh
python3 ./model_graph_to_saved_model.py --import_path ./models/resnet50-v15-fp32/resnet50-v15-fp32.pb --export_dir ./resnet50-v15-fp32 --model_version 1 --inputs input --outputs predict
./generate_ssl_config.sh localhost

@ -0,0 +1,59 @@
#!/bin/bash
occlum_glibc=/opt/occlum/glibc/lib/
host_libs=/lib/x86_64-linux-gnu/
set -e
ssl_config_file=/bin/ssl_configure/ssl.cfg
model_name=resnet50-v15-fp32
enable_batching=false
rest_api_num_threads=8
session_parallelism=0
parallel_num_threads=2
unset http_proxy https_proxy
# 1. Init Occlum Workspace
rm -rf occlum_instance
mkdir occlum_instance
cd occlum_instance
occlum init
new_json="$(jq '.resource_limits.user_space_size = "7000MB" |
.resource_limits.kernel_space_heap_size="384MB" |
.process.default_heap_size = "128MB" |
.resource_limits.max_num_of_threads = 64 |
.process.default_mmap_size = "6000MB" |
.env.default = [ "OMP_NUM_THREADS=8", "KMP_AFFINITY=verbose,granularity=fine,compact,1,0", "KMP_BLOCKTIME=20", "MKL_NUM_THREADS=8"]' Occlum.json)" && \
echo "${new_json}" > Occlum.json
# 2. Copy files into Occlum Workspace and Build
mkdir -p image/model
cp -rf ../resnet50-v15-fp32 image/model/
cp -rf ../ssl_configure image/bin/
cp ../tensorflow_model_server image/bin
cp ../hosts image/etc/
cp $occlum_glibc/libdl.so.2 image/$occlum_glibc
cp $occlum_glibc/librt.so.1 image/$occlum_glibc
cp $occlum_glibc/libm.so.6 image/$occlum_glibc
cp $occlum_glibc/libutil.so.1 image/$occlum_glibc
cp $occlum_glibc/libpthread.so.0 image/$occlum_glibc
cp $occlum_glibc/libnss_files.so.2 image/$occlum_glibc
cp $occlum_glibc/libnss_compat.so.2 image/$occlum_glibc
#occlum build
occlum build
# 3. Run benchmark
taskset -c 0-1 occlum run /bin/tensorflow_model_server \
--model_name=${model_name} \
--model_base_path=/model/${model_name} \
--port=8500 \
--rest_api_port=8501 \
--enable_model_warmup=true \
--flush_filesystem_caches=false \
--enable_batching=${enable_batching} \
--rest_api_num_threads=${rest_api_num_threads} \
--tensorflow_session_parallelism=${session_parallelism} \
--tensorflow_intra_op_parallelism=${parallel_num_threads} \
--tensorflow_inter_op_parallelism=${parallel_num_threads} \
--ssl_config_file=${ssl_config_file} \
&