Update the xgboost to latest version
Intel has optimized the xgboost in latest version. Performance has obvious acceleration in benchmark. In svm training with Yahoo LTR data, latest version gets 7.8X acceleration in host, and 2X acceleration in occlum. Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
4048686c3b
commit
66b64f8276
@ -9,11 +9,14 @@ apt-get update
|
|||||||
apt-get install -y python3-pip python3-setuptools
|
apt-get install -y python3-pip python3-setuptools
|
||||||
pip3 install kubernetes
|
pip3 install kubernetes
|
||||||
|
|
||||||
|
#install the cmake
|
||||||
|
./install_cmake.sh
|
||||||
|
|
||||||
# Download and build XGBoost
|
# Download and build XGBoost
|
||||||
rm -rf xgboost_src && mkdir xgboost_src
|
rm -rf xgboost_src && mkdir xgboost_src
|
||||||
pushd xgboost_src
|
pushd xgboost_src
|
||||||
git clone https://github.com/dmlc/xgboost .
|
git clone https://github.com/dmlc/xgboost .
|
||||||
git checkout 6d5b34d82486cd1d0480c548f5d1953834659bd6
|
git checkout 9e955fb9b06cac32a06c92c4715f749d9d87e932
|
||||||
git submodule init
|
git submodule init
|
||||||
git submodule update
|
git submodule update
|
||||||
git apply ../patch/xgboost-01.diff
|
git apply ../patch/xgboost-01.diff
|
||||||
|
8
demos/xgboost/install_cmake.sh
Executable file
8
demos/xgboost/install_cmake.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
cd ~
|
||||||
|
wget https://github.com/Kitware/CMake/releases/download/v3.15.5/cmake-3.15.5.tar.gz && tar xf cmake-3.15.5.tar.gz
|
||||||
|
cd cmake-3.15.5
|
||||||
|
./bootstrap
|
||||||
|
make -j4
|
||||||
|
sudo make install
|
@ -1,18 +1,18 @@
|
|||||||
diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py
|
diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py
|
||||||
index dff7c17..fca0f9d 100644
|
index 6e4af12..09df15e 100644
|
||||||
--- a/tracker/dmlc_tracker/local.py
|
--- a/tracker/dmlc_tracker/local.py
|
||||||
+++ b/tracker/dmlc_tracker/local.py
|
+++ b/tracker/dmlc_tracker/local.py
|
||||||
@@ -24,6 +24,13 @@ def exec_cmd(cmd, role, taskid, pass_env):
|
@@ -26,6 +26,13 @@ def exec_cmd(cmd, num_attempt, role, taskid, pass_env):
|
||||||
|
num_retry = env.get('DMLC_NUM_ATTEMPT', num_attempt)
|
||||||
num_retry = env.get('DMLC_NUM_ATTEMPT', 0)
|
num_trial = 0
|
||||||
|
|
||||||
+ cmd_str = ''
|
+ cmd_str = ''
|
||||||
+ for k, v in env.items():
|
+ for k, v in env.items():
|
||||||
+ if str(k)[0:4] == 'DMLC':
|
+ if str(k)[0:4] == 'DMLC':
|
||||||
+ strenv = str(k) + '=' + str(v);
|
+ strenv = str(k) + '=' + str(v);
|
||||||
+ cmd_str = cmd_str + ' ' + strenv
|
+ cmd_str = cmd_str + ' ' + strenv
|
||||||
+ cmd = cmd + cmd_str
|
+ cmdline = cmdline + ' ' + cmd_str
|
||||||
+
|
+
|
||||||
|
logging.debug('num of retry %d',num_retry)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if os.name == 'nt':
|
|
||||||
ret = subprocess.call(cmd, shell=True, env=env)
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc
|
diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc
|
||||||
index 143db6e..a6daf20 100644
|
index d1959eaa..68cd377a 100644
|
||||||
--- a/src/allreduce_base.cc
|
--- a/rabit/src/allreduce_base.cc
|
||||||
+++ b/src/allreduce_base.cc
|
+++ b/rabit/src/allreduce_base.cc
|
||||||
@@ -486,12 +486,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
@@ -551,12 +551,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||||
// select must return
|
// select must return
|
||||||
watcher.Poll();
|
watcher.Poll();
|
||||||
// exception handling
|
// exception handling
|
||||||
@ -18,7 +18,7 @@ index 143db6e..a6daf20 100644
|
|||||||
// read data from childs
|
// read data from childs
|
||||||
for (int i = 0; i < nlink; ++i) {
|
for (int i = 0; i < nlink; ++i) {
|
||||||
if (i != parent_index && watcher.CheckRead(links[i].sock)) {
|
if (i != parent_index && watcher.CheckRead(links[i].sock)) {
|
||||||
@@ -641,12 +642,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
|
@@ -731,12 +732,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
|
||||||
// select
|
// select
|
||||||
watcher.Poll();
|
watcher.Poll();
|
||||||
// exception handling
|
// exception handling
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
diff --git a/src/cli_main.cc b/src/cli_main.cc
|
diff --git a/src/cli_main.cc b/src/cli_main.cc
|
||||||
index faa93ad..a201615 100644
|
index 5c602f37..d18a77c2 100644
|
||||||
--- a/src/cli_main.cc
|
--- a/src/cli_main.cc
|
||||||
+++ b/src/cli_main.cc
|
+++ b/src/cli_main.cc
|
||||||
@@ -339,6 +339,16 @@ int CLIRunTask(int argc, char *argv[]) {
|
@@ -444,6 +444,16 @@ class CLI {
|
||||||
printf("Usage: <config>\n");
|
this->PrintHelp();
|
||||||
return 0;
|
exit(1);
|
||||||
}
|
}
|
||||||
+
|
+
|
||||||
+ // FIXME:workaroud for local distributed machine learning on Occlum
|
+ // FIXME:workaroud for local distributed machine learning on Occlum
|
||||||
@ -16,6 +16,6 @@ index faa93ad..a201615 100644
|
|||||||
+ }
|
+ }
|
||||||
+ }
|
+ }
|
||||||
+
|
+
|
||||||
rabit::Init(argc, argv);
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
std::string str {argv[i]};
|
||||||
std::vector<std::pair<std::string, std::string> > cfg;
|
if (str == "-h" || str == "--help") {
|
||||||
|
Loading…
Reference in New Issue
Block a user