🐛 Bug
Running the upstreamed benchmarking scripts with the following command results in an unexpected error.
python xla/benchmarks/experiment_runner.py \
--suite-name torchbench \
--accelerator cuda \
--xla PJRT --xla None \
--dynamo openxla --dynamo None \
--test eval --test train \
--repeat 30 --iterations-per-run 5 \
--print-subprocess \
--no-resume -k moco
Traceback (most recent call last):
File "xla/benchmarks/experiment_runner.py", line 601, in <module>
main()
File "xla/benchmarks/experiment_runner.py", line 597, in main
runner.run()
File "xla/benchmarks/experiment_runner.py", line 65, in run
self.run_single_experiment(experiment_config, model_config)
File "xla/benchmarks/experiment_runner.py", line 161, in run_single_experiment
run_metrics, output = self.timed_run(benchmark_experiment,
File "xla/benchmarks/experiment_runner.py", line 328, in timed_run
output = loop()
File "xla/benchmarks/experiment_runner.py", line 310, in loop
output = benchmark_model.model_iter_fn(
File "xla/benchmarks/benchmark_model.py", line 154, in eval
pred = self.module(*inputs)
File "torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "torch/nn/parallel/distributed.py", line 1519, in forward
inputs, kwargs = self._pre_forward(*inputs, **kwargs)
File "torch/nn/parallel/distributed.py", line 1420, in _pre_forward
self._sync_buffers()
File "torch/nn/parallel/distributed.py", line 2040, in _sync_buffers
self._sync_module_buffers(authoritative_rank)
File "torch/nn/parallel/distributed.py", line 2044, in _sync_module_buffers
self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
File "torch/nn/parallel/distributed.py", line 2066, in _default_broadcast_coalesced
self._distributed_broadcast_coalesced(bufs, bucket_size, authoritative_rank)
File "torch/nn/parallel/distributed.py", line 1981, in _distributed_broadcast_coalesced
dist._broadcast_coalesced(
RuntimeError: No backend type associated with device type cpu
Environment
🐛 Bug
Running the upstreamed benchmarking scripts with the following command results in an unexpected error.
python xla/benchmarks/experiment_runner.py \ --suite-name torchbench \ --accelerator cuda \ --xla PJRT --xla None \ --dynamo openxla --dynamo None \ --test eval --test train \ --repeat 30 --iterations-per-run 5 \ --print-subprocess \ --no-resume -k mocoEnvironment