Skip to content

Flaky test test_rearrange #5867

@TomAugspurger

Description

@TomAugspurger
=================================== FAILURES ===================================

________________________ test_rearrange[processes-disk] ________________________

shuffle = 'disk', scheduler = 'processes'

    @pytest.mark.parametrize("shuffle", ["tasks", "disk"])

    @pytest.mark.parametrize("scheduler", ["threads", "processes"])

    def test_rearrange(shuffle, scheduler):

        df = pd.DataFrame({"x": np.random.random(10)})

        ddf = dd.from_pandas(df, npartitions=4)

        ddf2 = ddf.assign(_partitions=ddf.x % 4)

    

        result = rearrange_by_column(ddf2, "_partitions", max_branch=32, shuffle=shuffle)

        assert result.npartitions == ddf.npartitions

        assert set(ddf.dask).issubset(result.dask)

    

        # Every value in exactly one partition

>       a = result.compute(scheduler=scheduler)

dask/dataframe/tests/test_shuffle.py:272: 

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dask/base.py:166: in compute

    (result,) = compute(self, traverse=False, **kwargs)

dask/base.py:437: in compute

    results = schedule(dsk, keys, **kwargs)

dask/multiprocessing.py:212: in get

    result = get_async(

dask/local.py:486: in get_async

    raise_exception(exc, tb)

dask/local.py:316: in reraise

    raise exc

dask/local.py:222: in execute_task

    result = _execute_task(task, data)

dask/core.py:119: in _execute_task

    return func(*args2)

dask/dataframe/shuffle.py:669: in shuffle_group_3

    p.append(d, fsync=True)

../../../miniconda/envs/test-environment/lib/python3.8/site-packages/partd/encode.py:25: in append

    self.partd.append(data, **kwargs)

../../../miniconda/envs/test-environment/lib/python3.8/site-packages/partd/file.py:37: in append

    if lock: self.lock.acquire()

../../../miniconda/envs/test-environment/lib/python3.8/site-packages/locket/__init__.py:99: in acquire

    lock.acquire()

_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   self._file = open(self._path, "w")

E   FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpn5wlil7v.partd/.lock'

Will look into this a bit. Both were on 3.8 IIRC.

Metadata

Metadata

Assignees

Labels

testsUnit tests and/or continuous integration

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions