flaky `distributed/tests/test_failed_workers.py::test_gather_then_submit_after_failed_workers`

https://github.com/dask/distributed/runs/6796365931?check_suite_focus=true#step:11:1301

```
_________________ test_gather_then_submit_after_failed_workers _________________
args = (), kwds = {}
@wraps(func)
definner(*args, **kwds):
withself._recreate_cm():
>           return func(*args, **kwds)
../../../miniconda3/envs/dask-distributed/lib/python3.10/contextlib.py:79: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
distributed/utils_test.py:1212: in test_func
return _run_and_close_tornado(async_fn_outer)
distributed/utils_test.py:473: in _run_and_close_tornado
return asyncio.run(inner_fn())
../../../miniconda3/envs/dask-distributed/lib/python3.10/asyncio/runners.py:44: in run
return loop.run_until_complete(main)
../../../miniconda3/envs/dask-distributed/lib/python3.10/asyncio/base_events.py:646: in run_until_complete
return future.result()
distributed/utils_test.py:470: in inner_fn
returnawait async_fn(*args, **kwargs)
distributed/utils_test.py:1209: in async_fn_outer
returnawait asyncio.wait_for(async_fn(), timeout=timeout * 2)
../../../miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py:445: in wait_for
return fut.result()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
asyncdefasync_fn():
        result = None
with tempfile.TemporaryDirectory() as tmpdir:
            config2 = merge({"temporary-directory": tmpdir}, config)
with dask.config.set(config2):
                workers = []
                s = False
for _ inrange(60):
try:
                        s, ws = await start_cluster(
                            nthreads,
                            scheduler,
                            security=security,
                            Worker=Worker,
                            scheduler_kwargs=scheduler_kwargs,
                            worker_kwargs=worker_kwargs,
                        )
exceptExceptionas e:
                        logger.error(
"Failed to start gen_cluster: "
f"{e.__class__.__name__}: {e}; retrying",
                            exc_info=True,
                        )
await asyncio.sleep(1)
else:
                        workers[:] = ws
                        args = [s] + workers
break
if s isFalse:
raiseException("Could not start cluster")
if client:
                    c = await Client(
                        s.address,
                        security=security,
                        asynchronous=True,
                        **client_kwargs,
                    )
                    args = [c] + args
try:
                    coro = func(*args, *outer_args, **kwargs)
                    task = asyncio.create_task(coro)
                    coro2 = asyncio.wait_for(asyncio.shield(task), timeout)
                    result = await coro2
                    validate_state(s, *workers)
except asyncio.TimeoutError:
assert task
                    buffer = io.StringIO()
# This stack indicates where the coro/test is suspended
                    task.print_stack(file=buffer)
if cluster_dump_directory:
await dump_cluster_state(
                            s,
                            ws,
                            output_dir=cluster_dump_directory,
                            func_name=func.__name__,
                        )
                    task.cancel()
whilenot task.cancelled():
await asyncio.sleep(0.01)
# Hopefully, the hang has been caused by inconsistent
# state, which should be much more meaningful than the
# timeout
                    validate_state(s, *workers)
# Remove as much of the traceback as possible; it's
# uninteresting boilerplate from utils_test and asyncio
# and not from the code being tested.
raise asyncio.TimeoutError(
f"Test timeout after {timeout}s.\n"
"========== Test stack trace starts here ==========\n"
f"{buffer.getvalue()}"
                    ) fromNone
except pytest.xfail.Exception:
raise
exceptException:
if cluster_dump_directory andnot has_pytestmark(
                        test_func, "xfail"
                    ):
await dump_cluster_state(
                            s,
                            ws,
                            output_dir=cluster_dump_directory,
                            func_name=func.__name__,
                        )
raise
finally:
if client and c.status notin ("closing", "closed"):
await c._close(fast=s.status == Status.closed)
await end_cluster(s, workers)
await asyncio.wait_for(cleanup_global_workers(), 1)
try:
                    c = await default_client()
exceptValueError:
pass
else:
await c._close(fast=True)
defget_unclosed():
return [c for c in Comm._instances ifnot c.closed()] + [
                        c
for c in _global_clients.values()
if c.status != "closed"
                    ]
try:
                    start = time()
while time() < start + 60:
                        gc.collect()
ifnot get_unclosed():
break
await asyncio.sleep(0.05)
else:
if allow_unclosed:
print(f"Unclosed Comms: {get_unclosed()}")
else:
>                           raiseRuntimeError("Unclosed Comms", get_unclosed())
E                           RuntimeError: ('Unclosed Comms', [<TCP (closed)  local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56354>, <TCP (closed) Scheduler connection to worker local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56349>, <TCP (closed) ConnectionPool local=tcp://127.0.0.1:56331 remote=tcp://127.0.0.1:56326>, <TCP (closed)  local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56331>])
distributed/utils_test.py:1190: RuntimeError
----------------------------- Captured stderr call -----------------------------
2022-06-08 15:30:36,596 - distributed.scheduler - INFO - State start
2022-06-08 15:30:36,600 - distributed.scheduler - INFO - Clear task state
2022-06-08 15:30:36,601 - distributed.scheduler - INFO -   Scheduler at:     tcp://127.0.0.1:56326
2022-06-08 15:30:36,601 - distributed.scheduler - INFO -   dashboard at:           127.0.0.1:56325
2022-06-08 15:30:36,632 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56327'
2022-06-08 15:30:36,633 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56328'
2022-06-08 15:30:36,633 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56329'
2022-06-08 15:30:36,718 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56330'
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/contextlib.py:142: UserWarning: Creating scratch directories is taking a surprisingly long time. This is often due to running workers on a network file system. Consider specifying a local-directory to point workers to write scratch data to a local disk.
  next(self.gen)
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/contextlib.py:142: UserWarning: Creating scratch directories is taking a surprisingly long time. This is often due to running workers on a network file system. Consider specifying a local-directory to point workers to write scratch data to a local disk.
  next(self.gen)
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/contextlib.py:142: UserWarning: Creating scratch directories is taking a surprisingly long time. This is often due to running workers on a network file system. Consider specifying a local-directory to point workers to write scratch data to a local disk.
  next(self.gen)
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/contextlib.py:142: UserWarning: Creating scratch directories is taking a surprisingly long time. This is often due to running workers on a network file system. Consider specifying a local-directory to point workers to write scratch data to a local disk.
  next(self.gen)
2022-06-08 15:30:48,925 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56342
2022-06-08 15:30:48,925 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56342
2022-06-08 15:30:48,925 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56346
2022-06-08 15:30:48,926 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56326
2022-06-08 15:30:48,926 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,926 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:48,926 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:48,926 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-cqaesjyc
2022-06-08 15:30:48,926 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,927 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56343
2022-06-08 15:30:48,927 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56343
2022-06-08 15:30:48,928 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56347
2022-06-08 15:30:48,928 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56326
2022-06-08 15:30:48,928 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,928 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:48,928 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:48,928 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-w7_0em4r
2022-06-08 15:30:48,928 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,931 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56344
2022-06-08 15:30:48,931 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56344
2022-06-08 15:30:48,932 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56348
2022-06-08 15:30:48,932 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56326
2022-06-08 15:30:48,932 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,932 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:48,932 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:48,932 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-m_k3z9bl
2022-06-08 15:30:48,932 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,934 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56345
2022-06-08 15:30:48,934 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56345
2022-06-08 15:30:48,935 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56351
2022-06-08 15:30:48,935 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56326
2022-06-08 15:30:48,935 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:48,935 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:48,935 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:48,935 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-ho22i89o
2022-06-08 15:30:48,935 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:50,923 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56342', name: 0, status: init, memory: 0, processing: 0>
2022-06-08 15:30:51,836 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56342
2022-06-08 15:30:51,837 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,837 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56326
2022-06-08 15:30:51,838 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:51,839 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,841 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56344', name: 3, status: init, memory: 0, processing: 0>
2022-06-08 15:30:51,842 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56344
2022-06-08 15:30:51,843 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,843 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56343', name: 1, status: init, memory: 0, processing: 0>
2022-06-08 15:30:51,843 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56326
2022-06-08 15:30:51,844 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:51,844 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56343
2022-06-08 15:30:51,844 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,845 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56345', name: 2, status: init, memory: 0, processing: 0>
2022-06-08 15:30:51,845 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,846 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56345
2022-06-08 15:30:51,846 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,846 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56326
2022-06-08 15:30:51,847 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:51,847 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56326
2022-06-08 15:30:51,848 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56328'.
2022-06-08 15:30:51,849 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:51,849 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:30:51,848 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,850 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56329'.
2022-06-08 15:30:51,850 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:30:51,851 - distributed.core - INFO - Starting established connection
2022-06-08 15:30:51,852 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56330'.
2022-06-08 15:30:51,852 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:30:51,852 - distributed.worker - INFO - Stopping worker at tcp://127.0.0.1:56343
2022-06-08 15:30:51,855 - distributed.worker - INFO - Stopping worker at tcp://127.0.0.1:56345
2022-06-08 15:30:51,856 - distributed.worker - INFO - Stopping worker at tcp://127.0.0.1:56344
2022-06-08 15:30:51,859 - distributed.worker - INFO - Connection to scheduler broken. Closing without reporting. ID: Worker-41f4db71-63d9-4b71-9c76-d7a48239fcfb Address tcp://127.0.0.1:56343 Status: Status.closing
2022-06-08 15:30:51,859 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56343', name: 1, status: closing, memory: 0, processing: 0>
2022-06-08 15:30:51,859 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56343
2022-06-08 15:30:51,860 - distributed.worker - INFO - Connection to scheduler broken. Closing without reporting. ID: Worker-4d3fbcdb-6ff5-49f9-9c77-3666b596c0ec Address tcp://127.0.0.1:56345 Status: Status.closing
2022-06-08 15:30:51,861 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56344', name: 3, status: closing, memory: 0, processing: 0>
2022-06-08 15:30:51,861 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56344
2022-06-08 15:30:51,861 - distributed.worker - INFO - Connection to scheduler broken. Closing without reporting. ID: Worker-d74101b3-46ee-4022-a10d-28f0634c6b9d Address tcp://127.0.0.1:56344 Status: Status.closing
2022-06-08 15:30:51,862 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56345', name: 2, status: closing, memory: 0, processing: 0>
2022-06-08 15:30:51,862 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56345
2022-06-08 15:30:52,214 - distributed.utils_test - ERROR - Failed to start gen_cluster: TimeoutError: Nanny start timed out after 15s.; retrying
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/nanny.py", line 357, in start_unsafe
    response = await self.instantiate()
  File "/Users/runner/work/distributed/distributed/distributed/nanny.py", line 416, in instantiate
    result = await asyncio.wait_for(
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 432, in wait_for
    await waiter
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/core.py", line 327, in start
    await asyncio.wait_for(self.start_unsafe(), timeout=timeout)
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
    raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/utils_test.py", line 1075, in async_fn
    s, ws = await start_cluster(
  File "/Users/runner/work/distributed/distributed/distributed/utils_test.py", line 919, in start_cluster
    await asyncio.gather(*workers)
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 650, in _wrap_awaitable
    return (yield from awaitable.__await__())
  File "/Users/runner/work/distributed/distributed/distributed/core.py", line 330, in start
    raise asyncio.TimeoutError(
asyncio.exceptions.TimeoutError: Nanny start timed out after 15s.
2022-06-08 15:30:53,224 - distributed.scheduler - INFO - State start
2022-06-08 15:30:53,541 - distributed.scheduler - INFO - Clear task state
2022-06-08 15:30:53,543 - distributed.scheduler - INFO -   Scheduler at:     tcp://127.0.0.1:56356
2022-06-08 15:30:53,543 - distributed.scheduler - INFO -   dashboard at:           127.0.0.1:56355
2022-06-08 15:30:53,582 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56357'
2022-06-08 15:30:53,582 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56358'
2022-06-08 15:30:53,583 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56359'
2022-06-08 15:30:53,646 - distributed.nanny - INFO -         Start Nanny at: 'tcp://127.0.0.1:56360'
2022-06-08 15:30:59,761 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56369
2022-06-08 15:30:59,761 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56369
2022-06-08 15:30:59,761 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56371
2022-06-08 15:30:59,761 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56356
2022-06-08 15:30:59,762 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,762 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:59,762 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:59,762 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-59vtgvhl
2022-06-08 15:30:59,762 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,771 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56370
2022-06-08 15:30:59,771 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56370
2022-06-08 15:30:59,771 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56373
2022-06-08 15:30:59,771 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56356
2022-06-08 15:30:59,772 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,772 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:59,772 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:59,772 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-cs6boirh
2022-06-08 15:30:59,772 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,802 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56375
2022-06-08 15:30:59,803 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56375
2022-06-08 15:30:59,803 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56377
2022-06-08 15:30:59,803 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56356
2022-06-08 15:30:59,803 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56376
2022-06-08 15:30:59,803 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,803 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56376
2022-06-08 15:30:59,803 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:59,804 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:59,804 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-ntjdomga
2022-06-08 15:30:59,803 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56378
2022-06-08 15:30:59,804 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56356
2022-06-08 15:30:59,804 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,804 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:30:59,804 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:30:59,804 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:30:59,804 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-ju7fqx6y
2022-06-08 15:30:59,804 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:02,115 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56375', name: 1, status: init, memory: 0, processing: 0>
2022-06-08 15:31:02,117 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56375
2022-06-08 15:31:02,117 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,118 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56356
2022-06-08 15:31:02,118 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:02,121 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,194 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56370', name: 2, status: init, memory: 0, processing: 0>
2022-06-08 15:31:02,195 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56370
2022-06-08 15:31:02,195 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,197 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56369', name: 0, status: init, memory: 0, processing: 0>
2022-06-08 15:31:02,198 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56369
2022-06-08 15:31:02,198 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,228 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56356
2022-06-08 15:31:02,229 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:02,230 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56356
2022-06-08 15:31:02,230 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:02,235 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56376', name: 3, status: init, memory: 0, processing: 0>
2022-06-08 15:31:02,236 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56376
2022-06-08 15:31:02,236 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,238 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,239 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,239 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56356
2022-06-08 15:31:02,239 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:02,241 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:02,267 - distributed.scheduler - INFO - Receive client connection: Client-01022396-e740-11ec-b081-00505695c07f
2022-06-08 15:31:02,268 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:03,777 - distributed.nanny - INFO - Worker process 45128 was killed by signal 15
2022-06-08 15:31:03,779 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56369', name: 0, status: running, memory: 5, processing: 0>
2022-06-08 15:31:03,779 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56369
2022-06-08 15:31:03,787 - distributed.nanny - WARNING - Restarting worker
2022-06-08 15:31:06,602 - distributed.worker - INFO -       Start worker at:      tcp://127.0.0.1:56408
2022-06-08 15:31:06,602 - distributed.worker - INFO -          Listening to:      tcp://127.0.0.1:56408
2022-06-08 15:31:06,602 - distributed.worker - INFO -          dashboard at:            127.0.0.1:56409
2022-06-08 15:31:06,602 - distributed.worker - INFO - Waiting to connect to:      tcp://127.0.0.1:56356
2022-06-08 15:31:06,603 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:06,603 - distributed.worker - INFO -               Threads:                          1
2022-06-08 15:31:06,603 - distributed.worker - INFO -                Memory:                  14.00 GiB
2022-06-08 15:31:06,603 - distributed.worker - INFO -       Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpkxjf1p50/dask-worker-space/worker-vulg6gm5
2022-06-08 15:31:06,603 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:08,440 - distributed.scheduler - INFO - Register worker <WorkerState 'tcp://127.0.0.1:56408', name: 0, status: init, memory: 0, processing: 0>
2022-06-08 15:31:08,442 - distributed.scheduler - INFO - Starting worker compute stream, tcp://127.0.0.1:56408
2022-06-08 15:31:08,442 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:08,454 - distributed.worker - INFO -         Registered to:      tcp://127.0.0.1:56356
2022-06-08 15:31:08,455 - distributed.worker - INFO - -------------------------------------------------
2022-06-08 15:31:08,488 - distributed.core - INFO - Starting established connection
2022-06-08 15:31:08,781 - distributed.worker - ERROR - Worker stream died during communication: tcp://127.0.0.1:56369
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/comm/tcp.py", line 439, in connect
    stream = await self.client.connect(
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/site-packages/tornado/tcpclient.py", line 265, in connect
    addrinfo = await self.resolver.resolve(host, port, af)
  File "/Users/runner/work/distributed/distributed/distributed/comm/tcp.py", line 424, in resolve
    for fam, _, _, _, address in await asyncio.get_running_loop().getaddrinfo(
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/base_events.py", line 860, in getaddrinfo
    return await self.run_in_executor(
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/comm/core.py", line 289, in connect
    comm = await asyncio.wait_for(
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
    raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/worker.py", line 3290, in gather_dep
    response = await get_data_from_worker(
  File "/Users/runner/work/distributed/distributed/distributed/worker.py", line 4616, in get_data_from_worker
    return await retry_operation(_get_data, operation="get_data_from_worker")
  File "/Users/runner/work/distributed/distributed/distributed/utils_comm.py", line 381, in retry_operation
    return await retry(
  File "/Users/runner/work/distributed/distributed/distributed/utils_comm.py", line 366, in retry
    return await coro()
  File "/Users/runner/work/distributed/distributed/distributed/worker.py", line 4593, in _get_data
    comm = await rpc.connect(worker)
  File "/Users/runner/work/distributed/distributed/distributed/core.py", line 1202, in connect
    return await connect_attempt
  File "/Users/runner/work/distributed/distributed/distributed/core.py", line 1138, in _connect
    comm = await connect(
  File "/Users/runner/work/distributed/distributed/distributed/comm/core.py", line 315, in connect
    raise OSError(
OSError: Timed out trying to connect to tcp://127.0.0.1:56369 after 5 s
2022-06-08 15:31:08,823 - distributed.nanny - INFO - Worker process 45132 was killed by signal 15
2022-06-08 15:31:08,825 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56375', name: 1, status: running, memory: 21, processing: 0>
2022-06-08 15:31:08,825 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56375
2022-06-08 15:31:08,831 - distributed.scheduler - ERROR - Couldn't gather keys {'sum-e85115fa770066da8e0a1836b8c95026': ['tcp://127.0.0.1:56375']} state: ['waiting'] workers: ['tcp://127.0.0.1:56375']
NoneType: None
2022-06-08 15:31:08,834 - distributed.scheduler - ERROR - Shut down workers that don't have promised key: ['tcp://127.0.0.1:56375'], sum-e85115fa770066da8e0a1836b8c95026
NoneType: None
2022-06-08 15:31:08,837 - distributed.nanny - WARNING - Restarting worker
2022-06-08 15:31:08,842 - distributed.client - WARNING - Couldn't gather 1 keys, rescheduling {'sum-e85115fa770066da8e0a1836b8c95026': ('tcp://127.0.0.1:56375',)}
2022-06-08 15:31:09,724 - distributed.nanny - INFO - Worker process 45139 was killed by signal 15
2022-06-08 15:31:09,726 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56370', name: 2, status: running, memory: 21, processing: 0>
2022-06-08 15:31:09,726 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56370
2022-06-08 15:31:09,729 - distributed.scheduler - ERROR - Couldn't gather keys {'sum-e85115fa770066da8e0a1836b8c95026': ['tcp://127.0.0.1:56370']} state: ['waiting'] workers: ['tcp://127.0.0.1:56370']
NoneType: None
2022-06-08 15:31:09,732 - distributed.scheduler - ERROR - Shut down workers that don't have promised key: ['tcp://127.0.0.1:56370'], sum-e85115fa770066da8e0a1836b8c95026
NoneType: None
2022-06-08 15:31:09,735 - distributed.client - WARNING - Couldn't gather 1 keys, rescheduling {'sum-e85115fa770066da8e0a1836b8c95026': ('tcp://127.0.0.1:56370',)}
2022-06-08 15:31:09,738 - distributed.nanny - WARNING - Restarting worker
2022-06-08 15:31:09,819 - distributed.nanny - INFO - Worker process 45145 was killed by signal 15
2022-06-08 15:31:09,822 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56376', name: 3, status: running, memory: 21, processing: 0>
2022-06-08 15:31:09,822 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56376
2022-06-08 15:31:09,826 - distributed.scheduler - ERROR - Couldn't gather keys {'sum-e85115fa770066da8e0a1836b8c95026': ['tcp://127.0.0.1:56376']} state: ['waiting'] workers: ['tcp://127.0.0.1:56376']
NoneType: None
2022-06-08 15:31:09,828 - distributed.scheduler - ERROR - Shut down workers that don't have promised key: ['tcp://127.0.0.1:56376'], sum-e85115fa770066da8e0a1836b8c95026
NoneType: None
2022-06-08 15:31:09,830 - distributed.client - WARNING - Couldn't gather 1 keys, rescheduling {'sum-e85115fa770066da8e0a1836b8c95026': ('tcp://127.0.0.1:56376',)}
2022-06-08 15:31:09,832 - distributed.nanny - WARNING - Restarting worker
2022-06-08 15:31:09,913 - distributed.scheduler - INFO - Remove client Client-01022396-e740-11ec-b081-00505695c07f
2022-06-08 15:31:09,915 - distributed.scheduler - INFO - Remove client Client-01022396-e740-11ec-b081-00505695c07f
2022-06-08 15:31:09,916 - distributed.scheduler - INFO - Close client connection: Client-01022396-e740-11ec-b081-00505695c07f
2022-06-08 15:31:09,919 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56357'.
2022-06-08 15:31:09,919 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:31:09,920 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56358'.
2022-06-08 15:31:09,921 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:31:09,921 - distributed.worker - INFO - Stopping worker at tcp://127.0.0.1:56408
2022-06-08 15:31:09,924 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56359'.
2022-06-08 15:31:09,925 - distributed.worker - INFO - Connection to scheduler broken. Closing without reporting. ID: Worker-234fea19-b5af-4e46-bfe0-ae9c510772c3 Address tcp://127.0.0.1:56408 Status: Status.closing
2022-06-08 15:31:09,925 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:31:09,926 - distributed.nanny - INFO - Closing Nanny at 'tcp://127.0.0.1:56360'.
2022-06-08 15:31:09,926 - distributed.nanny - INFO - Nanny asking worker to close
2022-06-08 15:31:09,931 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56408', name: 0, status: closing, memory: 0, processing: 0>
2022-06-08 15:31:09,931 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56408
2022-06-08 15:31:09,932 - distributed.scheduler - INFO - Lost all workers
2022-06-08 15:31:13,490 - distributed.worker - INFO - Stopping worker
2022-06-08 15:31:13,491 - distributed.worker - INFO - Closed worker has not yet started: Status.init
2022-06-08 15:31:13,951 - distributed.scheduler - INFO - Scheduler closing...
2022-06-08 15:31:13,952 - distributed.scheduler - INFO - Scheduler closing all comms
2022-06-08 15:31:14,171 - distributed.process - INFO - reaping stray process <SpawnProcess name='Dask Worker process (from Nanny)' pid=45835 parent=12417 started daemon>
2022-06-08 15:31:14,173 - distributed.process - INFO - reaping stray process <SpawnProcess name='Dask Worker process (from Nanny)' pid=45836 parent=12417 started daemon>
2022-06-08 15:31:14,180 - distributed.utils_perf - WARNING - full garbage collections took 31% CPU time recently (threshold: 10%)
2022-06-08 15:31:14,468 - distributed.utils_perf - WARNING - full garbage collections took 31% CPU time recently (threshold: 10%)
2022-06-08 15:31:14,731 - distributed.utils_perf - WARNING - full garbage collections took 32% CPU time recently (threshold: 10%)
2022-06-08 15:31:15,003 - distributed.utils_perf - WARNING - full garbage collections took 32% CPU time recently (threshold: 10%)
2022-06-08 15:31:15,207 - distributed.utils_perf - WARNING - full garbage collections took 33% CPU time recently (threshold: 10%)
2022-06-08 15:31:15,408 - distributed.utils_perf - WARNING - full garbage collections took 34% CPU time recently (threshold: 10%)
2022-06-08 15:31:15,606 - distributed.utils_perf - WARNING - full garbage collections took 34% CPU time recently (threshold: 10%)
2022-06-08 15:31:15,976 - distributed.utils_perf - WARNING - full garbage collections took 34% CPU time recently (threshold: 10%)
2022-06-08 15:31:16,215 - distributed.utils_perf - WARNING - full garbage collections took 34% CPU time recently (threshold: 10%)
2022-06-08 15:31:22,038 - distributed.core - INFO - Event loop was unresponsive in Scheduler for 6.42s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2022-06-08 15:31:22,039 - distributed.core - INFO - Event loop was unresponsive in Nanny for 6.06s.  This is often caused by long-running GIL-holding functions or moving large chunks of data. This can cause timeouts and instability.
2022-06-08 15:31:22,422 - distributed.utils_perf - WARNING - full garbage collections took 35% CPU time recently (threshold: 10%)
2022-06-08 15:31:23,467 - distributed.utils_perf - WARNING - full garbage collections took 35% CPU time recently (threshold: 10%)
2022-06-08 15:31:23,681 - distributed.utils_perf - WARNING - full garbage collections took 35% CPU time recently (threshold: 10%)
2022-06-08 15:31:24,005 - distributed.utils_perf - WARNING - full garbage collections took 36% CPU time recently (threshold: 10%)
2022-06-08 15:31:24,367 - distributed.utils_perf - WARNING - full garbage collections took 36% CPU time recently (threshold: 10%)
2022-06-08 15:31:24,638 - distributed.utils_perf - WARNING - full garbage collections took 37% CPU time recently (threshold: 10%)
2022-06-08 15:31:24,903 - distributed.utils_perf - WARNING - full garbage collections took 37% CPU time recently (threshold: 10%)
2022-06-08 15:31:25,182 - distributed.utils_perf - WARNING - full garbage collections took 38% CPU time recently (threshold: 10%)
2022-06-08 15:31:25,474 - distributed.utils_perf - WARNING - full garbage collections took 42% CPU time recently (threshold: 10%)
2022-06-08 15:31:25,762 - distributed.utils_perf - WARNING - full garbage collections took 43% CPU time recently (threshold: 10%)
2022-06-08 15:31:26,021 - distributed.utils_perf - WARNING - full garbage collections took 45% CPU time recently (threshold: 10%)
2022-06-08 15:31:26,386 - distributed.utils_perf - WARNING - full garbage collections took 47% CPU time recently (threshold: 10%)
2022-06-08 15:31:26,631 - distributed.utils_perf - WARNING - full garbage collections took 47% CPU time recently (threshold: 10%)
2022-06-08 15:31:26,844 - distributed.utils_perf - WARNING - full garbage collections took 48% CPU time recently (threshold: 10%)
2022-06-08 15:31:27,055 - distributed.utils_perf - WARNING - full garbage collections took 58% CPU time recently (threshold: 10%)
2022-06-08 15:31:27,256 - distributed.utils_perf - WARNING - full garbage collections took 63% CPU time recently (threshold: 10%)
2022-06-08 15:31:27,456 - distributed.utils_perf - WARNING - full garbage collections took 63% CPU time recently (threshold: 10%)
2022-06-08 15:31:27,731 - distributed.utils_perf - WARNING - full garbage collections took 69% CPU time recently (threshold: 10%)
2022-06-08 15:31:27,991 - distributed.utils_perf - WARNING - full garbage collections took 69% CPU time recently (threshold: 10%)
2022-06-08 15:31:28,252 - distributed.utils_perf - WARNING - full garbage collections took 72% CPU time recently (threshold: 10%)
2022-06-08 15:31:28,487 - distributed.utils_perf - WARNING - full garbage collections took 74% CPU time recently (threshold: 10%)
2022-06-08 15:31:28,702 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:28,923 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:29,150 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:29,375 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:29,594 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:29,801 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:30,001 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:30,267 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:30,527 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:31:30,852 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)

...
2022-06-08 15:32:09,244 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:09,469 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:09,796 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:10,128 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:10,330 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:10,527 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:10,721 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:11,045 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:11,244 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:11,441 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:11,745 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:12,027 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:12,309 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:12,532 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:12,729 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:12,989 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:13,311 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:13,628 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:13,904 - distributed.utils_perf - WARNING - full garbage collections took 97% CPU time recently (threshold: 10%)
2022-06-08 15:32:13,959 - distributed.scheduler - INFO - Remove worker <WorkerState 'tcp://127.0.0.1:56342', name: 0, status: running, memory: 0, processing: 0>
2022-06-08 15:32:13,959 - distributed.core - INFO - Removing comms to tcp://127.0.0.1:56342
2022-06-08 15:32:13,959 - distributed.worker - INFO - Connection to scheduler broken. Closing without reporting. ID: Worker-c98c8245-dba5-49ae-b23a-0f518fece5f0 Address tcp://127.0.0.1:56342 Status: Status.running
2022-06-08 15:32:13,959 - distributed.scheduler - INFO - Lost all workers
2022-06-08 15:46:52,352 - distributed.utils_perf - WARNING - full garbage collections took 28% CPU time recently (threshold: 10%)
2022-06-08 15:46:53,358 - distributed.utils_perf - WARNING - full garbage collections took 27% CPU time recently (threshold: 10%)
2022-06-08 15:46:54,328 - distributed.utils_perf - WARNING - full garbage collections took 26% CPU time recently (threshold: 10%)
2022-06-08 15:32:13,959 - distributed.worker - INFO - Stopping worker at tcp://127.0.0.1:56342
------------------------------ Captured log call -------------------------------
ERROR    asyncio.events:utils.py:787 
Traceback (most recent call last):
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/base_events.py", line 646, in run_until_complete
    return future.result()
  File "/Users/runner/work/distributed/distributed/distributed/utils_test.py", line 470, in inner_fn
    return await async_fn(*args, **kwargs)
  File "/Users/runner/work/distributed/distributed/distributed/utils_test.py", line 1209, in async_fn_outer
    return await asyncio.wait_for(async_fn(), timeout=timeout * 2)
  File "/Users/runner/miniconda3/envs/dask-distributed/lib/python3.10/asyncio/tasks.py", line 445, in wait_for
    return fut.result()
  File "/Users/runner/work/distributed/distributed/distributed/utils_test.py", line 1190, in async_fn
    raise RuntimeError("Unclosed Comms", get_unclosed())
RuntimeError: ('Unclosed Comms', [<TCP  local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56354>, <TCP (closed) Scheduler connection to worker local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56349>, <TCP ConnectionPool local=tcp://127.0.0.1:56331 remote=tcp://127.0.0.1:56326>, <TCP  local=tcp://127.0.0.1:56326 remote=tcp://127.0.0.1:56331>])
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/Users/runner/work/distributed/distributed/distributed/utils.py", line 761, in wrapper
    return await func(*args, **kwargs)
  File "/Users/runner/work/distributed/distributed/distributed/scheduler.py", line 3699, in add_worker
    await self.handle_worker(comm=comm, worker=address, stimulus_id=stimulus_id)
  File "/Users/runner/work/distributed/distributed/distributed/scheduler.py", line 4814, in handle_worker
    await self.handle_stream(comm=comm, extra={"worker": worker})
  File "/Users/runner/work/distributed/distributed/distributed/core.py", line 649, in handle_stream
    msgs = await comm.read()
  File "/Users/runner/work/distributed/distributed/distributed/comm/tcp.py", line 226, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError
- generated xml file: /Users/runner/work/distributed/distributed/reports/pytest.xml -
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

flaky `distributed/tests/test_failed_workers.py::test_gather_then_submit_after_failed_workers` #6530

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

flaky distributed/tests/test_failed_workers.py::test_gather_then_submit_after_failed_workers #6530

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

flaky `distributed/tests/test_failed_workers.py::test_gather_then_submit_after_failed_workers` #6530