diff --git a/demos/xgboost/download_and_build_xgboost.sh b/demos/xgboost/download_and_build_xgboost.sh index 9616db14..c8975181 100755 --- a/demos/xgboost/download_and_build_xgboost.sh +++ b/demos/xgboost/download_and_build_xgboost.sh @@ -9,11 +9,14 @@ apt-get update apt-get install -y python3-pip python3-setuptools pip3 install kubernetes +#install the cmake +./install_cmake.sh + # Download and build XGBoost rm -rf xgboost_src && mkdir xgboost_src pushd xgboost_src git clone https://github.com/dmlc/xgboost . -git checkout 6d5b34d82486cd1d0480c548f5d1953834659bd6 +git checkout 9e955fb9b06cac32a06c92c4715f749d9d87e932 git submodule init git submodule update git apply ../patch/xgboost-01.diff diff --git a/demos/xgboost/install_cmake.sh b/demos/xgboost/install_cmake.sh new file mode 100755 index 00000000..cc99829c --- /dev/null +++ b/demos/xgboost/install_cmake.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e +cd ~ +wget https://github.com/Kitware/CMake/releases/download/v3.15.5/cmake-3.15.5.tar.gz && tar xf cmake-3.15.5.tar.gz +cd cmake-3.15.5 +./bootstrap +make -j4 +sudo make install diff --git a/demos/xgboost/patch/dmlc-core-01.diff b/demos/xgboost/patch/dmlc-core-01.diff index ab2a29c0..4f6d22ea 100644 --- a/demos/xgboost/patch/dmlc-core-01.diff +++ b/demos/xgboost/patch/dmlc-core-01.diff @@ -1,18 +1,18 @@ diff --git a/tracker/dmlc_tracker/local.py b/tracker/dmlc_tracker/local.py -index dff7c17..fca0f9d 100644 +index 6e4af12..09df15e 100644 --- a/tracker/dmlc_tracker/local.py +++ b/tracker/dmlc_tracker/local.py -@@ -24,6 +24,13 @@ def exec_cmd(cmd, role, taskid, pass_env): - - num_retry = env.get('DMLC_NUM_ATTEMPT', 0) +@@ -26,6 +26,13 @@ def exec_cmd(cmd, num_attempt, role, taskid, pass_env): + num_retry = env.get('DMLC_NUM_ATTEMPT', num_attempt) + num_trial = 0 + cmd_str = '' + for k, v in env.items(): + if str(k)[0:4] == 'DMLC': + strenv = str(k) + '=' + str(v); + cmd_str = cmd_str + ' ' + strenv -+ cmd = cmd + cmd_str ++ cmdline = cmdline + ' ' + cmd_str + + logging.debug('num of retry %d',num_retry) + while True: - if os.name == 'nt': - ret = subprocess.call(cmd, shell=True, env=env) diff --git a/demos/xgboost/patch/rabit-01.diff b/demos/xgboost/patch/rabit-01.diff index e710390a..3e4972fb 100644 --- a/demos/xgboost/patch/rabit-01.diff +++ b/demos/xgboost/patch/rabit-01.diff @@ -1,36 +1,36 @@ -diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc -index 143db6e..a6daf20 100644 ---- a/src/allreduce_base.cc -+++ b/src/allreduce_base.cc -@@ -486,12 +486,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_, +diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc +index d1959eaa..68cd377a 100644 +--- a/rabit/src/allreduce_base.cc ++++ b/rabit/src/allreduce_base.cc +@@ -551,12 +551,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_, // select must return watcher.Poll(); // exception handling - for (int i = 0; i < nlink; ++i) { -+ //FIXME:workaround for Occlum -+ /*for (int i = 0; i < nlink; ++i) { ++ // FIXME:workaround for Occlum ++ /* for (int i = 0; i < nlink; ++i) { // recive OOB message from some link if (watcher.CheckExcept(links[i].sock)) { return ReportError(&links[i], kGetExcept); } - } -+ }*/ ++ } */ // read data from childs for (int i = 0; i < nlink; ++i) { if (i != parent_index && watcher.CheckRead(links[i].sock)) { -@@ -641,12 +642,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { +@@ -731,12 +732,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { // select watcher.Poll(); // exception handling - for (int i = 0; i < nlink; ++i) { -+ //FIXME:workaround for Occlum -+ /*for (int i = 0; i < nlink; ++i) { ++ // FIXME:workaround for Occlum ++ /* for (int i = 0; i < nlink; ++i) { // recive OOB message from some link if (watcher.CheckExcept(links[i].sock)) { return ReportError(&links[i], kGetExcept); } - } -+ }*/ ++ } */ if (in_link == -2) { // probe in-link for (int i = 0; i < nlink; ++i) { diff --git a/demos/xgboost/patch/xgboost-01.diff b/demos/xgboost/patch/xgboost-01.diff index 4fec3b01..c78afb20 100644 --- a/demos/xgboost/patch/xgboost-01.diff +++ b/demos/xgboost/patch/xgboost-01.diff @@ -1,21 +1,21 @@ diff --git a/src/cli_main.cc b/src/cli_main.cc -index faa93ad..a201615 100644 +index 5c602f37..d18a77c2 100644 --- a/src/cli_main.cc +++ b/src/cli_main.cc -@@ -339,6 +339,16 @@ int CLIRunTask(int argc, char *argv[]) { - printf("Usage: \n"); - return 0; - } +@@ -444,6 +444,16 @@ class CLI { + this->PrintHelp(); + exit(1); + } + -+ //FIXME:workaroud for local distributed machine learning on Occlum -+ for (int i = 2; i < argc; ++i) { -+ char name[128], val[128]; -+ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { -+ //LOG(CONSOLE) << "setenv: " << std::string(name) << " = " << std::string(val); -+ setenv(std::string(name).c_str(), std::string(val).c_str(), 1); ++ // FIXME:workaroud for local distributed machine learning on Occlum ++ for (int i = 2; i < argc; ++i) { ++ char name[128], val[128]; ++ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { ++ //LOG(CONSOLE) << "setenv: " << std::string(name) << " = " << std::string(val); ++ setenv(std::string(name).c_str(), std::string(val).c_str(), 1); ++ } + } -+ } + - rabit::Init(argc, argv); - - std::vector > cfg; + for (int i = 0; i < argc; ++i) { + std::string str {argv[i]}; + if (str == "-h" || str == "--help") {