Skip to content

Add a filter for the numeric_only warning. #9271

Closed
jsignell wants to merge 1 commit intodask:mainfrom
jsignell:filter-warnings
Closed

Add a filter for the numeric_only warning. #9271
jsignell wants to merge 1 commit intodask:mainfrom
jsignell:filter-warnings

Conversation

@jsignell
Copy link
Copy Markdown
Member

@jsignell jsignell commented Jul 14, 2022

This is mostly to see how many remaining failures there are once we filter out the warning.

  • Closes #xxxx
  • Tests added / passed
  • Passes pre-commit run --all-files

It looks like there is still an issue with groupby. So this is what happens if I ignore the warning.

________________________ test_groupby_reduction_split[tasks-split_out] _________________________
[gw5] linux -- Python 3.9.13 /home/julia/conda/envs/dask-upstream/bin/python

keyword = 'split_out'

    @pytest.mark.filterwarnings(
        "ignore:The default value of numeric_only:FutureWarning",
    )
    @pytest.mark.parametrize("keyword", ["split_every", "split_out"])
    def test_groupby_reduction_split(keyword):
        pdf = pd.DataFrame(
            {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7] * 100, "b": [4, 2, 7, 3, 3, 1, 1, 1, 2] * 100}
        )
        ddf = dd.from_pandas(pdf, npartitions=15)
    
        def call(g, m, **kwargs):
            return getattr(g, m)(**kwargs)
    
        # DataFrame
        for m in AGG_FUNCS:
            # nunique is not implemented for DataFrameGroupBy
            # covariance/correlation is not a series aggregation
            if m in ("nunique", "cov", "corr"):
                continue
            res = call(ddf.groupby("b"), m, **{keyword: 2})
>           sol = call(pdf.groupby("b"), m)

dask/dataframe/tests/test_groupby.py:774: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
dask/dataframe/tests/test_groupby.py:765: in call
    return getattr(g, m)(**kwargs)
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2423: in prod
    return self._agg_general(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1662: in _agg_general
    result = self._cython_agg_general(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1756: in _cython_agg_general
    new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures)
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/internals/managers.py:1334: in grouped_reduce
    applied = blk.apply(func)
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/internals/blocks.py:351: in apply
    result = func(self.values, **kwargs)
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1742: in array_func
    result = self.grouper._cython_operation(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:1008: in _cython_operation
    return cy_op.cython_operation(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:676: in cython_operation
    return self._cython_op_ndim_compat(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:508: in _cython_op_ndim_compat
    return self._call_cython_op(
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:630: in _call_cython_op
    op_result = maybe_downcast_to_dtype(result, res_dtype)
../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:294: in maybe_downcast_to_dtype
    converted = maybe_downcast_numeric(result, dtype, do_round)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

result = array([[5.41065251e+185, 4.10018609e+114, 2.58224988e+120,
        1.00000000e+000, 6.53318624e+077]])
dtype = dtype('int64'), do_round = False

    def maybe_downcast_numeric(
        result: ArrayLike, dtype: DtypeObj, do_round: bool = False
    ) -> ArrayLike:
        """
        Subset of maybe_downcast_to_dtype restricted to numeric dtypes.
    
        Parameters
        ----------
        result : ndarray or ExtensionArray
        dtype : np.dtype or ExtensionDtype
        do_round : bool
    
        Returns
        -------
        ndarray or ExtensionArray
        """
        if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype):
            # e.g. SparseDtype has no itemsize attr
            return result
    
        def trans(x):
            if do_round:
                return x.round()
            return x
    
        if dtype.kind == result.dtype.kind:
            # don't allow upcasts here (except if empty)
            if result.dtype.itemsize <= dtype.itemsize and result.size:
                return result
    
        if is_bool_dtype(dtype) or is_integer_dtype(dtype):
    
            if not result.size:
                # if we don't have any elements, just astype it
                return trans(result).astype(dtype)
    
            # do a test on the first element, if it fails then we are done
            r = result.ravel()
            arr = np.array([r[0]])
    
            if isna(arr).any():
                # if we have any nulls, then we are done
                return result
    
            elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)):
                # a comparable, e.g. a Decimal may slip in here
                return result
    
            if (
                issubclass(result.dtype.type, (np.object_, np.number))
                and notna(result).all()
            ):
>               new_result = trans(result).astype(dtype)
E               RuntimeWarning: invalid value encountered in cast

../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:380: RuntimeWarning

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant