Add a filter for the numeric_only warning. #9271
Closed
Conversation
…lse is broken [test-upstream]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This is mostly to see how many remaining failures there are once we filter out the warning.
pre-commit run --all-filesIt looks like there is still an issue with groupby. So this is what happens if I ignore the warning.
________________________ test_groupby_reduction_split[tasks-split_out] _________________________ [gw5] linux -- Python 3.9.13 /home/julia/conda/envs/dask-upstream/bin/python keyword = 'split_out' @pytest.mark.filterwarnings( "ignore:The default value of numeric_only:FutureWarning", ) @pytest.mark.parametrize("keyword", ["split_every", "split_out"]) def test_groupby_reduction_split(keyword): pdf = pd.DataFrame( {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7] * 100, "b": [4, 2, 7, 3, 3, 1, 1, 1, 2] * 100} ) ddf = dd.from_pandas(pdf, npartitions=15) def call(g, m, **kwargs): return getattr(g, m)(**kwargs) # DataFrame for m in AGG_FUNCS: # nunique is not implemented for DataFrameGroupBy # covariance/correlation is not a series aggregation if m in ("nunique", "cov", "corr"): continue res = call(ddf.groupby("b"), m, **{keyword: 2}) > sol = call(pdf.groupby("b"), m) dask/dataframe/tests/test_groupby.py:774: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ dask/dataframe/tests/test_groupby.py:765: in call return getattr(g, m)(**kwargs) ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2423: in prod return self._agg_general( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1662: in _agg_general result = self._cython_agg_general( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1756: in _cython_agg_general new_mgr = data.grouped_reduce(array_func, ignore_failures=ignore_failures) ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/internals/managers.py:1334: in grouped_reduce applied = blk.apply(func) ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/internals/blocks.py:351: in apply result = func(self.values, **kwargs) ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1742: in array_func result = self.grouper._cython_operation( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:1008: in _cython_operation return cy_op.cython_operation( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:676: in cython_operation return self._cython_op_ndim_compat( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:508: in _cython_op_ndim_compat return self._call_cython_op( ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/groupby/ops.py:630: in _call_cython_op op_result = maybe_downcast_to_dtype(result, res_dtype) ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:294: in maybe_downcast_to_dtype converted = maybe_downcast_numeric(result, dtype, do_round) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ result = array([[5.41065251e+185, 4.10018609e+114, 2.58224988e+120, 1.00000000e+000, 6.53318624e+077]]) dtype = dtype('int64'), do_round = False def maybe_downcast_numeric( result: ArrayLike, dtype: DtypeObj, do_round: bool = False ) -> ArrayLike: """ Subset of maybe_downcast_to_dtype restricted to numeric dtypes. Parameters ---------- result : ndarray or ExtensionArray dtype : np.dtype or ExtensionDtype do_round : bool Returns ------- ndarray or ExtensionArray """ if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): # e.g. SparseDtype has no itemsize attr return result def trans(x): if do_round: return x.round() return x if dtype.kind == result.dtype.kind: # don't allow upcasts here (except if empty) if result.dtype.itemsize <= dtype.itemsize and result.size: return result if is_bool_dtype(dtype) or is_integer_dtype(dtype): if not result.size: # if we don't have any elements, just astype it return trans(result).astype(dtype) # do a test on the first element, if it fails then we are done r = result.ravel() arr = np.array([r[0]]) if isna(arr).any(): # if we have any nulls, then we are done return result elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): # a comparable, e.g. a Decimal may slip in here return result if ( issubclass(result.dtype.type, (np.object_, np.number)) and notna(result).all() ): > new_result = trans(result).astype(dtype) E RuntimeWarning: invalid value encountered in cast ../conda/envs/dask-upstream/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:380: RuntimeWarning