Skip to content

Commit e56d7de

Browse files
committed
ROCm: Enable test_distributed_spawn
Fixes ROCm#927 Signed-off-by: Jagadish Krishnamoorthy <jagdish.krishna@gmail.com>
1 parent e3f546a commit e56d7de

1 file changed

Lines changed: 0 additions & 18 deletions

File tree

torch/testing/_internal/distributed/distributed_test.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3938,7 +3938,6 @@ def forward(self):
39383938
f"The {BACKEND} backend does not support DistributedDataParallel"
39393939
)
39403940
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
3941-
@skip_if_rocm
39423941
def test_DistributedDataParallel_non_default_stream(self):
39433942
stream = torch.cuda.Stream(self.rank)
39443943
rank = self.rank
@@ -3977,7 +3976,6 @@ def test_DistributedDataParallel_non_default_stream(self):
39773976
f"The {BACKEND} backend does not support DDP communication hook on CUDA devices"
39783977
)
39793978
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
3980-
@skip_if_rocm
39813979
def test_ddp_comm_hook_logging(self):
39823980
hooks = [
39833981
default.allreduce_hook,
@@ -4171,7 +4169,6 @@ def _test_ddp_hook_with_optimizer_parity(
41714169
"Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
41724170
)
41734171
@skip_if_lt_x_gpu(2)
4174-
@skip_if_rocm
41754172
@parametrize("grad_as_bucket_view", [True, False])
41764173
@parametrize("static_graph", [True, False])
41774174
@parametrize("optimize_subset", [True, False])
@@ -4199,7 +4196,6 @@ def test_ddp_hook_with_optimizer_parity_adamw(
41994196
"Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
42004197
)
42014198
@skip_if_lt_x_gpu(2)
4202-
@skip_if_rocm
42034199
@parametrize("optimize_subset", [True, False])
42044200
def test_ddp_hook_with_optimizer_parity_adam(self, optimize_subset):
42054201
adam_lr = 1e-2
@@ -4220,7 +4216,6 @@ def test_ddp_hook_with_optimizer_parity_adam(self, optimize_subset):
42204216
"Issues with async error handling, see https://github.com/pytorch/pytorch/issues/73259"
42214217
)
42224218
@skip_if_lt_x_gpu(2)
4223-
@skip_if_rocm
42244219
@parametrize("optimize_subset", [True, False])
42254220
def test_ddp_hook_with_optimizer_parity_sgd(self, optimize_subset):
42264221
sgd_lr = 1e-2
@@ -4298,7 +4293,6 @@ def _test_ddp_hook_parity(self, state, hook):
42984293
f"The {BACKEND} backend does not support DDP communication hook on CUDA devices"
42994294
)
43004295
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
4301-
@skip_if_rocm
43024296
def test_ddp_hook_parity_allreduce(self):
43034297
self._test_ddp_hook_parity(state=None, hook=default.allreduce_hook)
43044298

@@ -4307,7 +4301,6 @@ def test_ddp_hook_parity_allreduce(self):
43074301
f"The {BACKEND} backend does not support DDP communication hook on CUDA devices"
43084302
)
43094303
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
4310-
@skip_if_rocm
43114304
def test_ddp_hook_parity_allreduce_process_group(self):
43124305
# process_group is passed in to both DDP and comm. hook
43134306
world_size = dist.get_world_size()
@@ -4321,7 +4314,6 @@ def test_ddp_hook_parity_allreduce_process_group(self):
43214314
f"The {BACKEND} backend does not support DDP communication hook on CUDA devices"
43224315
)
43234316
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
4324-
@skip_if_rocm
43254317
def test_ddp_hook_parity_powerSGD(self):
43264318
for warm_start in [True, False]:
43274319
powersgd_state = powerSGD.PowerSGDState(
@@ -4344,7 +4336,6 @@ def test_ddp_hook_parity_powerSGD(self):
43444336
don't support multiprocessing with spawn start method",
43454337
)
43464338
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
4347-
@skip_if_rocm
43484339
def test_ddp_hook_parity_post_localSGD(self):
43494340
# Although we start run local SGD at iteration 10, since we still use the global process group to run it,
43504341
# the post-LocalSGD actually still allreduces gradients globally for the remaining iterations.
@@ -7191,14 +7182,12 @@ def _test_compute_bucket_assignment_by_size(self, use_logger):
71917182
@require_backend(DistTestCases.backend_feature["gpu"])
71927183
@require_backends_available(DistTestCases.backend_feature["gpu"])
71937184
@skip_if_lt_x_gpu(2)
7194-
@skip_if_rocm
71957185
def test_compute_bucket_assignment_by_size_sparse_error_without_logger(self):
71967186
self._test_compute_bucket_assignment_by_size(use_logger=False)
71977187

71987188
@require_backend(DistTestCases.backend_feature["gpu"])
71997189
@require_backends_available(DistTestCases.backend_feature["gpu"])
72007190
@skip_if_lt_x_gpu(2)
7201-
@skip_if_rocm
72027191
def test_compute_bucket_assignment_by_size_sparse_error_with_logger(self):
72037192
self._test_compute_bucket_assignment_by_size(use_logger=True)
72047193

@@ -7283,14 +7272,12 @@ def _test_verify_model_across_rank(self, use_logger):
72837272
@require_backend(DistTestCases.backend_feature["gpu"])
72847273
@require_backends_available(DistTestCases.backend_feature["gpu"])
72857274
@skip_if_lt_x_gpu(2)
7286-
@skip_if_rocm
72877275
def test_verify_model_across_rank_with_logger(self):
72887276
self._test_verify_model_across_rank(use_logger=True)
72897277

72907278
@require_backend(DistTestCases.backend_feature["gpu"])
72917279
@require_backends_available(DistTestCases.backend_feature["gpu"])
72927280
@skip_if_lt_x_gpu(2)
7293-
@skip_if_rocm
72947281
def test_verify_model_across_rank_without_logger(self):
72957282
self._test_verify_model_across_rank(use_logger=False)
72967283

@@ -7314,7 +7301,6 @@ def _run_test_ddp_model_with_diff_params(self, ctx, net, ddp_group, group_gloo):
73147301
@require_backend(DistTestCases.backend_feature["gpu"])
73157302
@require_backends_available(DistTestCases.backend_feature["gpu"])
73167303
@skip_if_lt_x_gpu(2)
7317-
@skip_if_rocm
73187304
def test_ddp_model_diff_shape_across_ranks(self):
73197305
group_gloo = dist.new_group(
73207306
timeout=timedelta(seconds=60), backend=dist.Backend.GLOO
@@ -7337,7 +7323,6 @@ def test_ddp_model_diff_shape_across_ranks(self):
73377323
@require_backend(DistTestCases.backend_feature["gpu"])
73387324
@require_backends_available(DistTestCases.backend_feature["gpu"])
73397325
@skip_if_lt_x_gpu(2)
7340-
@skip_if_rocm
73417326
def test_ddp_model_diff_num_params_across_ranks(self):
73427327
group_gloo = dist.new_group(
73437328
timeout=timedelta(seconds=60), backend=dist.Backend.GLOO
@@ -7679,7 +7664,6 @@ def _test_monitored_barrier_allreduce_hang(self, wait_all_ranks):
76797664
@with_nccl_blocking_wait
76807665
@require_backend(DistTestCases.backend_feature["gpu"])
76817666
@require_backends_available(DistTestCases.backend_feature["gpu"])
7682-
@skip_if_rocm
76837667
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
76847668
def test_monitored_barrier_allreduce_hang(self):
76857669
# tests expected behavior when nonzero rank hangs and we want to
@@ -7689,7 +7673,6 @@ def test_monitored_barrier_allreduce_hang(self):
76897673
@with_nccl_blocking_wait
76907674
@require_backend(DistTestCases.backend_feature["gpu"])
76917675
@require_backends_available(DistTestCases.backend_feature["gpu"])
7692-
@skip_if_rocm
76937676
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
76947677
def test_monitored_barrier_allreduce_hang_wait_all_ranks(self):
76957678
# tests expected behavior when nonzero rank hangs and we want to
@@ -8024,7 +8007,6 @@ def test_ddp_inference(self):
80248007
f"The {BACKEND} backend does not support DistributedDataParallel"
80258008
)
80268009
@skip_if_lt_x_gpu(2)
8027-
@skip_if_rocm
80288010
def test_ddp_sync_bn_training_vs_eval(self):
80298011
rank = self.rank
80308012
torch.cuda.set_device(rank)

0 commit comments

Comments
 (0)