Update the xgboost to latest version

Intel has optimized the xgboost in latest version. Performance has
obvious acceleration in benchmark. In svm training with Yahoo LTR data,
latest version gets 7.8X acceleration in host, and 2X acceleration in occlum.

Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
yuanwu 2018-02-06 19:01:29 +00:00 committed by Zongmin.Gu
parent 4048686c3b
commit 66b64f8276
5 changed files with 46 additions and 35 deletions

@ -9,11 +9,14 @@ apt-get update
apt-get install -y python3-pip python3-setuptools apt-get install -y python3-pip python3-setuptools
pip3 install kubernetes pip3 install kubernetes
#install the cmake
./install_cmake.sh
# Download and build XGBoost # Download and build XGBoost
rm -rf xgboost_src && mkdir xgboost_src rm -rf xgboost_src && mkdir xgboost_src
pushd xgboost_src pushd xgboost_src
git clone https://github.com/dmlc/xgboost . git clone https://github.com/dmlc/xgboost .
git checkout 6d5b34d82486cd1d0480c548f5d1953834659bd6 git checkout 9e955fb9b06cac32a06c92c4715f749d9d87e932
git submodule init git submodule init
git submodule update git submodule update
git apply ../patch/xgboost-01.diff git apply ../patch/xgboost-01.diff

8
demos/xgboost/install_cmake.sh Executable file

@ -0,0 +1,8 @@
#!/bin/bash
set -e
cd ~
wget https://github.com/Kitware/CMake/releases/download/v3.15.5/cmake-3.15.5.tar.gz && tar xf cmake-3.15.5.tar.gz
cd cmake-3.15.5
./bootstrap
make -j4
sudo make install

@ -1,18 +1,18 @@
diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py
index dff7c17..fca0f9d 100644 index 6e4af12..09df15e 100644
--- a/tracker/dmlc_tracker/local.py --- a/tracker/dmlc_tracker/local.py
+++ b/tracker/dmlc_tracker/local.py +++ b/tracker/dmlc_tracker/local.py
@@ -24,6 +24,13 @@ def exec_cmd(cmd, role, taskid, pass_env): @@ -26,6 +26,13 @@ def exec_cmd(cmd, num_attempt, role, taskid, pass_env):
num_retry = env.get('DMLC_NUM_ATTEMPT', num_attempt)
num_retry = env.get('DMLC_NUM_ATTEMPT', 0) num_trial = 0
+ cmd_str = '' + cmd_str = ''
+ for k, v in env.items(): + for k, v in env.items():
+ if str(k)[0:4] == 'DMLC': + if str(k)[0:4] == 'DMLC':
+ strenv = str(k) + '=' + str(v); + strenv = str(k) + '=' + str(v);
+ cmd_str = cmd_str + ' ' + strenv + cmd_str = cmd_str + ' ' + strenv
+ cmd = cmd + cmd_str + cmdline = cmdline + ' ' + cmd_str
+ +
logging.debug('num of retry %d',num_retry)
while True: while True:
if os.name == 'nt':
ret = subprocess.call(cmd, shell=True, env=env)

@ -1,36 +1,36 @@
diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc
index 143db6e..a6daf20 100644 index d1959eaa..68cd377a 100644
--- a/src/allreduce_base.cc --- a/rabit/src/allreduce_base.cc
+++ b/src/allreduce_base.cc +++ b/rabit/src/allreduce_base.cc
@@ -486,12 +486,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_, @@ -551,12 +551,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
// select must return // select must return
watcher.Poll(); watcher.Poll();
// exception handling // exception handling
- for (int i = 0; i < nlink; ++i) { - for (int i = 0; i < nlink; ++i) {
+ //FIXME:workaround for Occlum + // FIXME:workaround for Occlum
+ /*for (int i = 0; i < nlink; ++i) { + /* for (int i = 0; i < nlink; ++i) {
// recive OOB message from some link // recive OOB message from some link
if (watcher.CheckExcept(links[i].sock)) { if (watcher.CheckExcept(links[i].sock)) {
return ReportError(&links[i], kGetExcept); return ReportError(&links[i], kGetExcept);
} }
- } - }
+ }*/ + } */
// read data from childs // read data from childs
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {
if (i != parent_index && watcher.CheckRead(links[i].sock)) { if (i != parent_index && watcher.CheckRead(links[i].sock)) {
@@ -641,12 +642,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { @@ -731,12 +732,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
// select // select
watcher.Poll(); watcher.Poll();
// exception handling // exception handling
- for (int i = 0; i < nlink; ++i) { - for (int i = 0; i < nlink; ++i) {
+ //FIXME:workaround for Occlum + // FIXME:workaround for Occlum
+ /*for (int i = 0; i < nlink; ++i) { + /* for (int i = 0; i < nlink; ++i) {
// recive OOB message from some link // recive OOB message from some link
if (watcher.CheckExcept(links[i].sock)) { if (watcher.CheckExcept(links[i].sock)) {
return ReportError(&links[i], kGetExcept); return ReportError(&links[i], kGetExcept);
} }
- } - }
+ }*/ + } */
if (in_link == -2) { if (in_link == -2) {
// probe in-link // probe in-link
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {

@ -1,13 +1,13 @@
diff --git a/src/cli_main.cc b/src/cli_main.cc diff --git a/src/cli_main.cc b/src/cli_main.cc
index faa93ad..a201615 100644 index 5c602f37..d18a77c2 100644
--- a/src/cli_main.cc --- a/src/cli_main.cc
+++ b/src/cli_main.cc +++ b/src/cli_main.cc
@@ -339,6 +339,16 @@ int CLIRunTask(int argc, char *argv[]) { @@ -444,6 +444,16 @@ class CLI {
printf("Usage: <config>\n"); this->PrintHelp();
return 0; exit(1);
} }
+ +
+ //FIXME:workaroud for local distributed machine learning on Occlum + // FIXME:workaroud for local distributed machine learning on Occlum
+ for (int i = 2; i < argc; ++i) { + for (int i = 2; i < argc; ++i) {
+ char name[128], val[128]; + char name[128], val[128];
+ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { + if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
@ -16,6 +16,6 @@ index faa93ad..a201615 100644
+ } + }
+ } + }
+ +
rabit::Init(argc, argv); for (int i = 0; i < argc; ++i) {
std::string str {argv[i]};
std::vector<std::pair<std::string, std::string> > cfg; if (str == "-h" || str == "--help") {