Add XGBoost demo
This commit is contained in:
parent
ab89421f96
commit
ba3813bea8
@ -7,3 +7,4 @@ This directory contains sample projects that demonstrate how Occlum can be used
|
||||
* `hello_bazel/`: A sample C++ project built with [Bazel](https://bazel.build).
|
||||
* `https_server/`: A HTTPS file server based on [Mongoose Embedded Web Server Library](https://github.com/cesanta/mongoose).
|
||||
* `tensorflow_lite/`: A demo and benchmark of [Tensorflow Lite](https://www.tensorflow.org/lite) inference engine.
|
||||
* `xgboost/`: A demo of [XGBoost](https://xgboost.readthedocs.io/en/latest/).
|
||||
|
5
demos/xgboost/.gitignore
vendored
Normal file
5
demos/xgboost/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
xgboost_src/
|
||||
data/
|
||||
host/
|
||||
occlum_workspace/
|
||||
tmp_*
|
48
demos/xgboost/Makefile
Normal file
48
demos/xgboost/Makefile
Normal file
@ -0,0 +1,48 @@
|
||||
CUR_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
XGBOOST_TRACKER := $(realpath $(CUR_DIR)/xgboost_src/dmlc-core/tracker/)
|
||||
BIN := xgboost
|
||||
DATA := data
|
||||
|
||||
.PHONY: all test test-native test-local-cluster clean
|
||||
|
||||
all: occlum_workspace
|
||||
|
||||
$(BIN):
|
||||
cp xgboost_src/$(BIN) .
|
||||
|
||||
$(DATA):
|
||||
rm -rf $(DATA) && mkdir -p $(DATA)
|
||||
cp xgboost_src/demo/binary_classification/agaricus.txt.test $(DATA)
|
||||
cp xgboost_src/demo/binary_classification/agaricus.txt.train $(DATA)
|
||||
cp xgboost_src/demo/binary_classification/mushroom.conf $(DATA)
|
||||
sed -i 's/agaricus/.\/data\/agaricus/g' $(DATA)/mushroom.conf
|
||||
sed -i 's/num_round = 2/num_round = 10/g' $(DATA)/mushroom.conf
|
||||
sed -i '$$a\model_out = "./host/result.model"' $(DATA)/mushroom.conf
|
||||
sed -i '$$a\verbosity = 2' $(DATA)/mushroom.conf
|
||||
sed -i '$$a\nthread = 2' $(DATA)/mushroom.conf
|
||||
|
||||
occlum_workspace: $(BIN) $(DATA)
|
||||
rm -rf occlum_workspace && mkdir occlum_workspace
|
||||
cd occlum_workspace && \
|
||||
occlum init && \
|
||||
jq '.process.default_mmap_size = "96MB"' Occlum.json > temp_Occlum.json && \
|
||||
mv temp_Occlum.json Occlum.json && \
|
||||
cp ../$(BIN) image/bin && \
|
||||
cp ../xgboost_src/lib/libxgboost.so image/lib && \
|
||||
mkdir -p image/$(DATA) && \
|
||||
cp -r ../$(DATA)/* image/$(DATA) && \
|
||||
occlum build
|
||||
|
||||
test: occlum_workspace
|
||||
@./run_xgboost.sh
|
||||
|
||||
test-native: $(BIN) $(DATA)
|
||||
rm -rf host
|
||||
mkdir host
|
||||
./$(BIN) ./$(DATA)/mushroom.conf
|
||||
|
||||
test-local-cluster: occlum_workspace
|
||||
@$(XGBOOST_TRACKER)/dmlc-submit --cluster=local --num-workers=2 --log-level=DEBUG ./run_xgboost.sh
|
||||
|
||||
clean:
|
||||
rm -rf $(BIN) $(DATA) tmp_* host occlum_workspace
|
22
demos/xgboost/README.md
Normal file
22
demos/xgboost/README.md
Normal file
@ -0,0 +1,22 @@
|
||||
# Use XGBoost in SGX with Occlum
|
||||
|
||||
Step 1: Download XGBoost and its dependencies, and then build XGBoost
|
||||
```
|
||||
./build_xgboost.sh
|
||||
```
|
||||
When completed, the resulting XGBoost can be found in `xgboost_src` directory.
|
||||
|
||||
Step 2: To train data with XGBoost in a single process, run
|
||||
```
|
||||
make test
|
||||
```
|
||||
|
||||
Step 3: To train data with a two-node XGBoost cluster, run
|
||||
```
|
||||
make test-local-cluster
|
||||
```
|
||||
|
||||
Step 4 (Optional): To train data with XGBoost in a single process in Linux, run
|
||||
```
|
||||
make test-native
|
||||
```
|
42
demos/xgboost/build_xgboost.sh
Executable file
42
demos/xgboost/build_xgboost.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Install the dependencies
|
||||
apt-get update
|
||||
apt-get install -y python3.5
|
||||
apt-get install -y python3-pip
|
||||
apt-get install -y python3-setuptools
|
||||
apt-get install -y python-pip
|
||||
apt-get install -y python-setuptools
|
||||
pip3 install kubernetes
|
||||
pip install kubernetes
|
||||
|
||||
# Download and build XGBoost
|
||||
rm -rf xgboost_src && mkdir xgboost_src
|
||||
pushd xgboost_src
|
||||
git clone https://github.com/dmlc/xgboost .
|
||||
git checkout 6d5b34d82486cd1d0480c548f5d1953834659bd6
|
||||
git submodule init
|
||||
git submodule update
|
||||
git apply ../patch/xgboost-01.diff
|
||||
pushd rabit
|
||||
git apply ../../patch/rabit-01.diff
|
||||
popd
|
||||
pushd dmlc-core
|
||||
git apply ../../patch/dmlc-core-01.diff
|
||||
popd
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ../ \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_COMPILER=occlum-gcc -DCMAKE_CXX_COMPILER=occlum-g++
|
||||
make -j
|
||||
popd
|
||||
|
||||
# Prepare data
|
||||
pushd xgboost_src/demo/binary_classification
|
||||
python mapfeat.py
|
||||
python mknfold.py agaricus.txt 1
|
||||
popd
|
||||
|
||||
echo "Build XGBoost Success!"
|
18
demos/xgboost/patch/dmlc-core-01.diff
Normal file
18
demos/xgboost/patch/dmlc-core-01.diff
Normal file
@ -0,0 +1,18 @@
|
||||
diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py
|
||||
index dff7c17..fca0f9d 100644
|
||||
--- a/tracker/dmlc_tracker/local.py
|
||||
+++ b/tracker/dmlc_tracker/local.py
|
||||
@@ -24,6 +24,13 @@ def exec_cmd(cmd, role, taskid, pass_env):
|
||||
|
||||
num_retry = env.get('DMLC_NUM_ATTEMPT', 0)
|
||||
|
||||
+ cmd_str = ''
|
||||
+ for k, v in env.items():
|
||||
+ if str(k)[0:4] == 'DMLC':
|
||||
+ strenv = str(k) + '=' + str(v);
|
||||
+ cmd_str = cmd_str + ' ' + strenv
|
||||
+ cmd = cmd + cmd_str
|
||||
+
|
||||
while True:
|
||||
if os.name == 'nt':
|
||||
ret = subprocess.call(cmd, shell=True, env=env)
|
36
demos/xgboost/patch/rabit-01.diff
Normal file
36
demos/xgboost/patch/rabit-01.diff
Normal file
@ -0,0 +1,36 @@
|
||||
diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc
|
||||
index 143db6e..a6daf20 100644
|
||||
--- a/src/allreduce_base.cc
|
||||
+++ b/src/allreduce_base.cc
|
||||
@@ -486,12 +486,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
// select must return
|
||||
watcher.Poll();
|
||||
// exception handling
|
||||
- for (int i = 0; i < nlink; ++i) {
|
||||
+ //FIXME:workaround for Occlum
|
||||
+ /*for (int i = 0; i < nlink; ++i) {
|
||||
// recive OOB message from some link
|
||||
if (watcher.CheckExcept(links[i].sock)) {
|
||||
return ReportError(&links[i], kGetExcept);
|
||||
}
|
||||
- }
|
||||
+ }*/
|
||||
// read data from childs
|
||||
for (int i = 0; i < nlink; ++i) {
|
||||
if (i != parent_index && watcher.CheckRead(links[i].sock)) {
|
||||
@@ -641,12 +642,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
|
||||
// select
|
||||
watcher.Poll();
|
||||
// exception handling
|
||||
- for (int i = 0; i < nlink; ++i) {
|
||||
+ //FIXME:workaround for Occlum
|
||||
+ /*for (int i = 0; i < nlink; ++i) {
|
||||
// recive OOB message from some link
|
||||
if (watcher.CheckExcept(links[i].sock)) {
|
||||
return ReportError(&links[i], kGetExcept);
|
||||
}
|
||||
- }
|
||||
+ }*/
|
||||
if (in_link == -2) {
|
||||
// probe in-link
|
||||
for (int i = 0; i < nlink; ++i) {
|
21
demos/xgboost/patch/xgboost-01.diff
Normal file
21
demos/xgboost/patch/xgboost-01.diff
Normal file
@ -0,0 +1,21 @@
|
||||
diff --git a/src/cli_main.cc b/src/cli_main.cc
|
||||
index faa93ad..a201615 100644
|
||||
--- a/src/cli_main.cc
|
||||
+++ b/src/cli_main.cc
|
||||
@@ -339,6 +339,16 @@ int CLIRunTask(int argc, char *argv[]) {
|
||||
printf("Usage: <config>\n");
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+ //FIXME:workaroud for local distributed machine learning on Occlum
|
||||
+ for (int i = 2; i < argc; ++i) {
|
||||
+ char name[128], val[128];
|
||||
+ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
|
||||
+ //LOG(CONSOLE) << "setenv: " << std::string(name) << " = " << std::string(val);
|
||||
+ setenv(std::string(name).c_str(), std::string(val).c_str(), 1);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
rabit::Init(argc, argv);
|
||||
|
||||
std::vector<std::pair<std::string, std::string> > cfg;
|
13
demos/xgboost/run_xgboost.sh
Executable file
13
demos/xgboost/run_xgboost.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
RED='\033[1;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Create a temporal folder and run xgboost demo
|
||||
tmp_dir="tmp_$RANDOM"
|
||||
mkdir -p $tmp_dir
|
||||
cp -a occlum_workspace/. $tmp_dir
|
||||
|
||||
cd $tmp_dir
|
||||
echo -e "${RED}occlum run xgboost $@${NC}"
|
||||
occlum run /bin/xgboost /data/mushroom.conf $@
|
@ -4,7 +4,7 @@
|
||||
<ISVSVN>0</ISVSVN>
|
||||
<StackMaxSize>0x100000</StackMaxSize>
|
||||
<HeapMaxSize>0x2000000</HeapMaxSize>
|
||||
<TCSNum>8</TCSNum>
|
||||
<TCSNum>32</TCSNum>
|
||||
<TCSPolicy>1</TCSPolicy>
|
||||
<DisableDebug>0</DisableDebug>
|
||||
<MiscSelect>0</MiscSelect>
|
||||
|
Loading…
Reference in New Issue
Block a user