Add XGBoost demo
This commit is contained in:
parent
ab89421f96
commit
ba3813bea8
@ -7,3 +7,4 @@ This directory contains sample projects that demonstrate how Occlum can be used
|
|||||||
* `hello_bazel/`: A sample C++ project built with [Bazel](https://bazel.build).
|
* `hello_bazel/`: A sample C++ project built with [Bazel](https://bazel.build).
|
||||||
* `https_server/`: A HTTPS file server based on [Mongoose Embedded Web Server Library](https://github.com/cesanta/mongoose).
|
* `https_server/`: A HTTPS file server based on [Mongoose Embedded Web Server Library](https://github.com/cesanta/mongoose).
|
||||||
* `tensorflow_lite/`: A demo and benchmark of [Tensorflow Lite](https://www.tensorflow.org/lite) inference engine.
|
* `tensorflow_lite/`: A demo and benchmark of [Tensorflow Lite](https://www.tensorflow.org/lite) inference engine.
|
||||||
|
* `xgboost/`: A demo of [XGBoost](https://xgboost.readthedocs.io/en/latest/).
|
||||||
|
5
demos/xgboost/.gitignore
vendored
Normal file
5
demos/xgboost/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
xgboost_src/
|
||||||
|
data/
|
||||||
|
host/
|
||||||
|
occlum_workspace/
|
||||||
|
tmp_*
|
48
demos/xgboost/Makefile
Normal file
48
demos/xgboost/Makefile
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
CUR_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
XGBOOST_TRACKER := $(realpath $(CUR_DIR)/xgboost_src/dmlc-core/tracker/)
|
||||||
|
BIN := xgboost
|
||||||
|
DATA := data
|
||||||
|
|
||||||
|
.PHONY: all test test-native test-local-cluster clean
|
||||||
|
|
||||||
|
all: occlum_workspace
|
||||||
|
|
||||||
|
$(BIN):
|
||||||
|
cp xgboost_src/$(BIN) .
|
||||||
|
|
||||||
|
$(DATA):
|
||||||
|
rm -rf $(DATA) && mkdir -p $(DATA)
|
||||||
|
cp xgboost_src/demo/binary_classification/agaricus.txt.test $(DATA)
|
||||||
|
cp xgboost_src/demo/binary_classification/agaricus.txt.train $(DATA)
|
||||||
|
cp xgboost_src/demo/binary_classification/mushroom.conf $(DATA)
|
||||||
|
sed -i 's/agaricus/.\/data\/agaricus/g' $(DATA)/mushroom.conf
|
||||||
|
sed -i 's/num_round = 2/num_round = 10/g' $(DATA)/mushroom.conf
|
||||||
|
sed -i '$$a\model_out = "./host/result.model"' $(DATA)/mushroom.conf
|
||||||
|
sed -i '$$a\verbosity = 2' $(DATA)/mushroom.conf
|
||||||
|
sed -i '$$a\nthread = 2' $(DATA)/mushroom.conf
|
||||||
|
|
||||||
|
occlum_workspace: $(BIN) $(DATA)
|
||||||
|
rm -rf occlum_workspace && mkdir occlum_workspace
|
||||||
|
cd occlum_workspace && \
|
||||||
|
occlum init && \
|
||||||
|
jq '.process.default_mmap_size = "96MB"' Occlum.json > temp_Occlum.json && \
|
||||||
|
mv temp_Occlum.json Occlum.json && \
|
||||||
|
cp ../$(BIN) image/bin && \
|
||||||
|
cp ../xgboost_src/lib/libxgboost.so image/lib && \
|
||||||
|
mkdir -p image/$(DATA) && \
|
||||||
|
cp -r ../$(DATA)/* image/$(DATA) && \
|
||||||
|
occlum build
|
||||||
|
|
||||||
|
test: occlum_workspace
|
||||||
|
@./run_xgboost.sh
|
||||||
|
|
||||||
|
test-native: $(BIN) $(DATA)
|
||||||
|
rm -rf host
|
||||||
|
mkdir host
|
||||||
|
./$(BIN) ./$(DATA)/mushroom.conf
|
||||||
|
|
||||||
|
test-local-cluster: occlum_workspace
|
||||||
|
@$(XGBOOST_TRACKER)/dmlc-submit --cluster=local --num-workers=2 --log-level=DEBUG ./run_xgboost.sh
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BIN) $(DATA) tmp_* host occlum_workspace
|
22
demos/xgboost/README.md
Normal file
22
demos/xgboost/README.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# Use XGBoost in SGX with Occlum
|
||||||
|
|
||||||
|
Step 1: Download XGBoost and its dependencies, and then build XGBoost
|
||||||
|
```
|
||||||
|
./build_xgboost.sh
|
||||||
|
```
|
||||||
|
When completed, the resulting XGBoost can be found in `xgboost_src` directory.
|
||||||
|
|
||||||
|
Step 2: To train data with XGBoost in a single process, run
|
||||||
|
```
|
||||||
|
make test
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 3: To train data with a two-node XGBoost cluster, run
|
||||||
|
```
|
||||||
|
make test-local-cluster
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 4 (Optional): To train data with XGBoost in a single process in Linux, run
|
||||||
|
```
|
||||||
|
make test-native
|
||||||
|
```
|
42
demos/xgboost/build_xgboost.sh
Executable file
42
demos/xgboost/build_xgboost.sh
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Install the dependencies
|
||||||
|
apt-get update
|
||||||
|
apt-get install -y python3.5
|
||||||
|
apt-get install -y python3-pip
|
||||||
|
apt-get install -y python3-setuptools
|
||||||
|
apt-get install -y python-pip
|
||||||
|
apt-get install -y python-setuptools
|
||||||
|
pip3 install kubernetes
|
||||||
|
pip install kubernetes
|
||||||
|
|
||||||
|
# Download and build XGBoost
|
||||||
|
rm -rf xgboost_src && mkdir xgboost_src
|
||||||
|
pushd xgboost_src
|
||||||
|
git clone https://github.com/dmlc/xgboost .
|
||||||
|
git checkout 6d5b34d82486cd1d0480c548f5d1953834659bd6
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
|
git apply ../patch/xgboost-01.diff
|
||||||
|
pushd rabit
|
||||||
|
git apply ../../patch/rabit-01.diff
|
||||||
|
popd
|
||||||
|
pushd dmlc-core
|
||||||
|
git apply ../../patch/dmlc-core-01.diff
|
||||||
|
popd
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ../ \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=occlum-gcc -DCMAKE_CXX_COMPILER=occlum-g++
|
||||||
|
make -j
|
||||||
|
popd
|
||||||
|
|
||||||
|
# Prepare data
|
||||||
|
pushd xgboost_src/demo/binary_classification
|
||||||
|
python mapfeat.py
|
||||||
|
python mknfold.py agaricus.txt 1
|
||||||
|
popd
|
||||||
|
|
||||||
|
echo "Build XGBoost Success!"
|
18
demos/xgboost/patch/dmlc-core-01.diff
Normal file
18
demos/xgboost/patch/dmlc-core-01.diff
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py
|
||||||
|
index dff7c17..fca0f9d 100644
|
||||||
|
--- a/tracker/dmlc_tracker/local.py
|
||||||
|
+++ b/tracker/dmlc_tracker/local.py
|
||||||
|
@@ -24,6 +24,13 @@ def exec_cmd(cmd, role, taskid, pass_env):
|
||||||
|
|
||||||
|
num_retry = env.get('DMLC_NUM_ATTEMPT', 0)
|
||||||
|
|
||||||
|
+ cmd_str = ''
|
||||||
|
+ for k, v in env.items():
|
||||||
|
+ if str(k)[0:4] == 'DMLC':
|
||||||
|
+ strenv = str(k) + '=' + str(v);
|
||||||
|
+ cmd_str = cmd_str + ' ' + strenv
|
||||||
|
+ cmd = cmd + cmd_str
|
||||||
|
+
|
||||||
|
while True:
|
||||||
|
if os.name == 'nt':
|
||||||
|
ret = subprocess.call(cmd, shell=True, env=env)
|
36
demos/xgboost/patch/rabit-01.diff
Normal file
36
demos/xgboost/patch/rabit-01.diff
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc
|
||||||
|
index 143db6e..a6daf20 100644
|
||||||
|
--- a/src/allreduce_base.cc
|
||||||
|
+++ b/src/allreduce_base.cc
|
||||||
|
@@ -486,12 +486,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||||
|
// select must return
|
||||||
|
watcher.Poll();
|
||||||
|
// exception handling
|
||||||
|
- for (int i = 0; i < nlink; ++i) {
|
||||||
|
+ //FIXME:workaround for Occlum
|
||||||
|
+ /*for (int i = 0; i < nlink; ++i) {
|
||||||
|
// recive OOB message from some link
|
||||||
|
if (watcher.CheckExcept(links[i].sock)) {
|
||||||
|
return ReportError(&links[i], kGetExcept);
|
||||||
|
}
|
||||||
|
- }
|
||||||
|
+ }*/
|
||||||
|
// read data from childs
|
||||||
|
for (int i = 0; i < nlink; ++i) {
|
||||||
|
if (i != parent_index && watcher.CheckRead(links[i].sock)) {
|
||||||
|
@@ -641,12 +642,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
|
||||||
|
// select
|
||||||
|
watcher.Poll();
|
||||||
|
// exception handling
|
||||||
|
- for (int i = 0; i < nlink; ++i) {
|
||||||
|
+ //FIXME:workaround for Occlum
|
||||||
|
+ /*for (int i = 0; i < nlink; ++i) {
|
||||||
|
// recive OOB message from some link
|
||||||
|
if (watcher.CheckExcept(links[i].sock)) {
|
||||||
|
return ReportError(&links[i], kGetExcept);
|
||||||
|
}
|
||||||
|
- }
|
||||||
|
+ }*/
|
||||||
|
if (in_link == -2) {
|
||||||
|
// probe in-link
|
||||||
|
for (int i = 0; i < nlink; ++i) {
|
21
demos/xgboost/patch/xgboost-01.diff
Normal file
21
demos/xgboost/patch/xgboost-01.diff
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
diff --git a/src/cli_main.cc b/src/cli_main.cc
|
||||||
|
index faa93ad..a201615 100644
|
||||||
|
--- a/src/cli_main.cc
|
||||||
|
+++ b/src/cli_main.cc
|
||||||
|
@@ -339,6 +339,16 @@ int CLIRunTask(int argc, char *argv[]) {
|
||||||
|
printf("Usage: <config>\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ //FIXME:workaroud for local distributed machine learning on Occlum
|
||||||
|
+ for (int i = 2; i < argc; ++i) {
|
||||||
|
+ char name[128], val[128];
|
||||||
|
+ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
|
||||||
|
+ //LOG(CONSOLE) << "setenv: " << std::string(name) << " = " << std::string(val);
|
||||||
|
+ setenv(std::string(name).c_str(), std::string(val).c_str(), 1);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
rabit::Init(argc, argv);
|
||||||
|
|
||||||
|
std::vector<std::pair<std::string, std::string> > cfg;
|
13
demos/xgboost/run_xgboost.sh
Executable file
13
demos/xgboost/run_xgboost.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RED='\033[1;36m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
# Create a temporal folder and run xgboost demo
|
||||||
|
tmp_dir="tmp_$RANDOM"
|
||||||
|
mkdir -p $tmp_dir
|
||||||
|
cp -a occlum_workspace/. $tmp_dir
|
||||||
|
|
||||||
|
cd $tmp_dir
|
||||||
|
echo -e "${RED}occlum run xgboost $@${NC}"
|
||||||
|
occlum run /bin/xgboost /data/mushroom.conf $@
|
@ -4,7 +4,7 @@
|
|||||||
<ISVSVN>0</ISVSVN>
|
<ISVSVN>0</ISVSVN>
|
||||||
<StackMaxSize>0x100000</StackMaxSize>
|
<StackMaxSize>0x100000</StackMaxSize>
|
||||||
<HeapMaxSize>0x2000000</HeapMaxSize>
|
<HeapMaxSize>0x2000000</HeapMaxSize>
|
||||||
<TCSNum>8</TCSNum>
|
<TCSNum>32</TCSNum>
|
||||||
<TCSPolicy>1</TCSPolicy>
|
<TCSPolicy>1</TCSPolicy>
|
||||||
<DisableDebug>0</DisableDebug>
|
<DisableDebug>0</DisableDebug>
|
||||||
<MiscSelect>0</MiscSelect>
|
<MiscSelect>0</MiscSelect>
|
||||||
|
Loading…
Reference in New Issue
Block a user