Intel has optimized the xgboost in latest version. Performance has obvious acceleration in benchmark. In svm training with Yahoo LTR data, latest version gets 7.8X acceleration in host, and 2X acceleration in occlum. Signed-off-by: yuanwu <yuan.wu@intel.com>
37 lines
1.3 KiB
Diff
37 lines
1.3 KiB
Diff
diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc
|
|
index d1959eaa..68cd377a 100644
|
|
--- a/rabit/src/allreduce_base.cc
|
|
+++ b/rabit/src/allreduce_base.cc
|
|
@@ -551,12 +551,13 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
|
// select must return
|
|
watcher.Poll();
|
|
// exception handling
|
|
- for (int i = 0; i < nlink; ++i) {
|
|
+ // FIXME:workaround for Occlum
|
|
+ /* for (int i = 0; i < nlink; ++i) {
|
|
// recive OOB message from some link
|
|
if (watcher.CheckExcept(links[i].sock)) {
|
|
return ReportError(&links[i], kGetExcept);
|
|
}
|
|
- }
|
|
+ } */
|
|
// read data from childs
|
|
for (int i = 0; i < nlink; ++i) {
|
|
if (i != parent_index && watcher.CheckRead(links[i].sock)) {
|
|
@@ -731,12 +732,13 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) {
|
|
// select
|
|
watcher.Poll();
|
|
// exception handling
|
|
- for (int i = 0; i < nlink; ++i) {
|
|
+ // FIXME:workaround for Occlum
|
|
+ /* for (int i = 0; i < nlink; ++i) {
|
|
// recive OOB message from some link
|
|
if (watcher.CheckExcept(links[i].sock)) {
|
|
return ReportError(&links[i], kGetExcept);
|
|
}
|
|
- }
|
|
+ } */
|
|
if (in_link == -2) {
|
|
// probe in-link
|
|
for (int i = 0; i < nlink; ++i) {
|