Skip to content

Failures on GPU CI #8825

@jsignell

Description

@jsignell

Some failures just started cropping up on GPU CI. I thought that they might be related to #8818 but the tests passed fine there. But it looks like it might just be an out of memory error, so I'm really not sure. Here's an example:

engine = 'cudf', how = 'leftanti', parts = (3, 3)

    @pytest.mark.parametrize("parts", [(3, 3), (3, 1), (1, 3)])
    @pytest.mark.parametrize("how", ["leftsemi", "leftanti"])
    @pytest.mark.parametrize(
        "engine",
        [
            pytest.param(
                "pandas",
                marks=pytest.mark.xfail(
                    reason="Pandas does not support leftsemi or leftanti"
                ),
            ),
            pytest.param("cudf", marks=pytest.mark.gpu),
        ],
    )
    def test_merge_tasks_semi_anti_cudf(engine, how, parts):
        if engine == "cudf":
            # NOTE: engine == "cudf" requires cudf/dask_cudf,
            # will be skipped by non-GPU CI.
    
            cudf = pytest.importorskip("cudf")
            dask_cudf = pytest.importorskip("dask_cudf")
    
        emp = pd.DataFrame(
            {
                "emp_id": np.arange(101, stop=106),
                "name": ["John", "Tom", "Harry", "Rahul", "Sakil"],
                "city": ["Cal", "Mum", "Del", "Ban", "Del"],
                "salary": [50000, 40000, 80000, 60000, 90000],
            }
        )
        skills = pd.DataFrame(
            {
                "skill_id": [404, 405, 406, 407, 408],
                "emp_id": [103, 101, 105, 102, 101],
                "skill_name": ["Dask", "Spark", "C", "Python", "R"],
            }
        )
    
        if engine == "cudf":
>           emp = cudf.from_pandas(emp)

dask/dataframe/tests/test_multi.py:911: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/conda/envs/dask/lib/python3.9/contextlib.py:79: in inner
    return func(*args, **kwds)
/opt/conda/envs/dask/lib/python3.9/site-packages/cudf/core/dataframe.py:6282: in from_pandas
    return DataFrame.from_pandas(obj, nan_as_null=nan_as_null)
/opt/conda/envs/dask/lib/python3.9/contextlib.py:79: in inner
    return func(*args, **kwds)
/opt/conda/envs/dask/lib/python3.9/site-packages/cudf/core/dataframe.py:4376: in from_pandas
    df[col_name] = column.as_column(
/opt/conda/envs/dask/lib/python3.9/site-packages/cudf/core/column/column.py:1987: in as_column
    data = as_column(
/opt/conda/envs/dask/lib/python3.9/site-packages/cudf/core/column/column.py:1780: in as_column
    col = ColumnBase.from_arrow(arbitrary)
/opt/conda/envs/dask/lib/python3.9/site-packages/cudf/core/column/column.py:302: in from_arrow
    result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"]
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   ???
E   RuntimeError: CUDA error encountered at: /workspace/.conda-bld/work/cpp/src/bitmask/null_mask.cu:93: 2 cudaErrorMemoryAllocation out of memory

cudf/_lib/interop.pyx:167: RuntimeError

https://gpuci.gpuopenanalytics.com/job/dask/job/dask/job/prb/job/dask-prb/1554/

ping @charlesbluca @quasiben @rjzamora

Metadata

Metadata

Assignees

No one assigned

    Labels

    testsUnit tests and/or continuous integration

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions