We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent a9a32f3 commit b0c451fCopy full SHA for b0c451f
1 file changed
torch/csrc/distributed/c10d/symm_mem/nccl_devcomm_manager.hpp
@@ -63,10 +63,16 @@ class NCCLDevCommManager {
63
}
64
c10::cuda::CUDAGuard guard(device_);
65
ncclDevComm devComm;
66
+
67
+ // Initializer available from NCCL 2.29
68
+#ifdef NCCL_DEV_COMM_REQUIREMENTS_INITIALIZER
69
+ ncclDevCommRequirements reqs = NCCL_DEV_COMM_REQUIREMENTS_INITIALIZER;
70
+#else
71
+ // In 2.28, we can set it to zero
72
ncclDevCommRequirements reqs;
- // See example in
- // https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/deviceapi.html#simple-lsa-kernel
73
memset(&reqs, 0, sizeof(ncclDevCommRequirements));
74
+#endif
75
76
// Specifies the number of memory barriers to allocate.
77
reqs.lsaBarrierCount = NCCL_LSA_BARRIER_COUNT;
78
// TODO (kwen2501): Add network barrier count.
0 commit comments