6767 CHECK_FREQ ["check_freq" ] = False
6868
6969
70+ def _drop_mean (df , col = None ):
71+ """TODO: In pandas 2.0, mean is implemented for datetimes, but Dask returns None."""
72+ if isinstance (df , pd .DataFrame ):
73+ df .at ["mean" , col ] = np .nan
74+ df .dropna (how = "all" , inplace = True )
75+ elif isinstance (df , pd .Series ):
76+ df .drop (labels = ["mean" ], inplace = True , errors = "ignore" )
77+ else :
78+ raise NotImplementedError ("Expected Series or DataFrame with mean" )
79+ return df
80+
81+
7082def test_dataframe_doc ():
7183 doc = d .add .__doc__
7284 disclaimer = "Some inconsistencies with the Dask version may exist."
@@ -501,7 +513,7 @@ def test_describe(include, exclude, percentiles, subset):
501513
502514 ddf = dd .from_pandas (df , 2 )
503515
504- if PANDAS_GT_110 :
516+ if PANDAS_GT_110 and not PANDAS_GT_200 :
505517 datetime_is_numeric_kwarg = {"datetime_is_numeric" : True }
506518 else :
507519 datetime_is_numeric_kwarg = {}
@@ -520,9 +532,8 @@ def test_describe(include, exclude, percentiles, subset):
520532 ** datetime_is_numeric_kwarg ,
521533 )
522534
523- if "e" in expected and datetime_is_numeric_kwarg :
524- expected .at ["mean" , "e" ] = np .nan
525- expected .dropna (how = "all" , inplace = True )
535+ if "e" in expected and (datetime_is_numeric_kwarg or PANDAS_GT_200 ):
536+ expected = _drop_mean (expected , "e" )
526537
527538 assert_eq (actual , expected )
528539
@@ -532,8 +543,8 @@ def test_describe(include, exclude, percentiles, subset):
532543 expected = df [col ].describe (
533544 include = include , exclude = exclude , ** datetime_is_numeric_kwarg
534545 )
535- if col == "e" and datetime_is_numeric_kwarg :
536- expected . drop ( "mean" , inplace = True )
546+ if col == "e" and ( datetime_is_numeric_kwarg or PANDAS_GT_200 ) :
547+ expected = _drop_mean ( expected )
537548 actual = ddf [col ].describe (
538549 include = include , exclude = exclude , ** datetime_is_numeric_kwarg
539550 )
@@ -560,13 +571,25 @@ def test_describe_without_datetime_is_numeric():
560571 ddf = dd .from_pandas (df , 2 )
561572
562573 # Assert
563- assert_eq (ddf .describe (), df .describe ())
574+ expected = df .describe ()
575+ if PANDAS_GT_200 :
576+ expected = _drop_mean (expected , "e" )
577+
578+ assert_eq (ddf .describe (), expected )
564579
565580 # Check series
566581 for col in ["a" , "c" ]:
567582 assert_eq (df [col ].describe (), ddf [col ].describe ())
568583
569- if PANDAS_GT_110 :
584+ if PANDAS_GT_200 :
585+ expected = _drop_mean (df .e .describe ())
586+ assert_eq (expected , ddf .e .describe ())
587+ with pytest .raises (
588+ TypeError ,
589+ match = "datetime_is_numeric is removed in pandas>=2.0.0" ,
590+ ):
591+ ddf .e .describe (datetime_is_numeric = True )
592+ elif PANDAS_GT_110 :
570593 with pytest .warns (
571594 FutureWarning ,
572595 match = (
@@ -575,10 +598,11 @@ def test_describe_without_datetime_is_numeric():
575598 ):
576599 ddf .e .describe ()
577600 else :
578- assert_eq (df .e .describe (), ddf .e .describe ())
601+ expected = _drop_mean (df .e .describe ())
602+ assert_eq (expected , ddf .e .describe ())
579603 with pytest .raises (
580604 NotImplementedError ,
581- match = "datetime_is_numeric=True is only supported for pandas >= 1.1.0" ,
605+ match = "datetime_is_numeric=True is only supported for pandas >= 1.1.0, < 2.0.0 " ,
582606 ):
583607 ddf .e .describe (datetime_is_numeric = True )
584608
0 commit comments