@@ -1654,6 +1654,7 @@ def get_group(self, key):
16541654
16551655 @_aggregate_docstring ()
16561656 def aggregate (self , arg , split_every = None , split_out = 1 , shuffle = None ):
1657+ column_projection = None
16571658 if isinstance (self .obj , DataFrame ):
16581659 if isinstance (self .by , tuple ) or np .isscalar (self .by ):
16591660 group_columns = {self .by }
@@ -1681,6 +1682,10 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
16811682
16821683 spec = _normalize_spec (arg , non_group_columns )
16831684
1685+ # Check if the aggregation involves implicit column projection
1686+ if isinstance (arg , dict ):
1687+ column_projection = group_columns | arg .keys ()
1688+
16841689 elif isinstance (self .obj , Series ):
16851690 if isinstance (arg , (list , tuple , dict )):
16861691 # implementation detail: if self.obj is a series, a pseudo column
@@ -1709,11 +1714,17 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
17091714 else :
17101715 levels = 0
17111716
1717+ # Add an explicit `getitem` operation if the groupby
1718+ # aggregation involves implicit column projection.
1719+ # This makes it possible for the column-projection
1720+ # to be pushed into the IO layer
1721+ _obj = self .obj [list (column_projection )] if column_projection else self .obj
1722+
17121723 if not isinstance (self .by , list ):
1713- chunk_args = [self . obj , self .by ]
1724+ chunk_args = [_obj , self .by ]
17141725
17151726 else :
1716- chunk_args = [self . obj ] + self .by
1727+ chunk_args = [_obj ] + self .by
17171728
17181729 if not PANDAS_GT_110 and self .dropna :
17191730 raise NotImplementedError (
0 commit comments