[demos] Fix distributed pytorch panic

This commit is contained in:
ClawSeven 2023-03-15 19:36:05 +08:00 committed by volcano
parent 745a7619c9
commit 04083cc200

@ -34,7 +34,7 @@ function build_instance()
fi fi
new_json="$(jq '.resource_limits.user_space_size = "4000MB" | new_json="$(jq '.resource_limits.user_space_size = "4000MB" |
.resource_limits.kernel_space_heap_size = "256MB" | .resource_limits.kernel_space_heap_size = "400MB" |
.resource_limits.max_num_of_threads = 64 | .resource_limits.max_num_of_threads = 64 |
.env.untrusted += [ "MASTER_ADDR", "MASTER_PORT", "WORLD_SIZE", "RANK", "OMP_NUM_THREADS", "HOME" ] | .env.untrusted += [ "MASTER_ADDR", "MASTER_PORT", "WORLD_SIZE", "RANK", "OMP_NUM_THREADS", "HOME" ] |
.env.default += ["GLOO_DEVICE_TRANSPORT=TCP_TLS"] | .env.default += ["GLOO_DEVICE_TRANSPORT=TCP_TLS"] |