Skip to content

Flaky test_pivot_table #6729

@TomAugspurger

Description

@TomAugspurger
dask/dataframe/tests/test_reshape.py::test_pivot_table[sum-values2-67-100] FAILED                                                                                                                                                                                                                           [ 67%]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> traceback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
values = ['B', 'D'], aggfunc = 'sum'

    @pytest.mark.parametrize("values", ["B", ["B"], ["B", "D"]])
    @pytest.mark.parametrize("aggfunc", ["mean", "sum", "count"])
    def test_pivot_table(values, aggfunc):
        df = pd.DataFrame(
            {
                "A": np.random.choice(list("XYZ"), size=100),
                "B": np.random.randn(100),
                "C": pd.Categorical(np.random.choice(list("abc"), size=100)),
                "D": np.random.randn(100),
            }
        )
        ddf = dd.from_pandas(df, 5)

        res = dd.pivot_table(ddf, index="A", columns="C", values=values, aggfunc=aggfunc)
        exp = pd.pivot_table(df, index="A", columns="C", values=values, aggfunc=aggfunc)
        if aggfunc == "count":
            # dask result cannot be int64 dtype depending on divisions because of NaN
            exp = exp.astype(np.float64)

>       assert_eq(res, exp)

dask/dataframe/tests/test_reshape.py:192:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _dask/dataframe/utils.py:811: in assert_eq
    a = _check_dask(a, check_names=check_names, check_dtypes=check_dtypes)
dask/dataframe/utils.py:754: in _check_dask
    tm.assert_index_equal(dsk.columns, result.columns)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
l = CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, name='C', dtype='category'), r = Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object', name='C'), obj = 'MultiIndex level [1]'

    def _check_types(l, r, obj="Index"):
        if exact:
>           assert_class_equal(l, r, exact=exact, obj=obj)
E           AssertionError: MultiIndex level [1] are different
E
E           MultiIndex level [1] classes are not equivalent
E           [left]:  CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, name='C', dtype='category')
E           [right]: Index(['a', 'b', 'c', 'a', 'b', 'c'], dtype='object', name='C')

../../../Envs/dask-dev/lib/python3.8/site-packages/pandas/util/testing.py:611: AssertionError
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> entering PDB >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Noticed in #6727. I can take a look later.

Metadata

Metadata

Assignees

No one assigned

    Labels

    dataframetestsUnit tests and/or continuous integration

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions