fix(bigframes): include pyopenssl as a dependency (#17362)

tswast · gemini-code-assist[bot] · web-flow · commit 1f6205ee5a37 · 2026-06-03T14:22:07.000-07:00
Also, support pandas 3.0 in various system tests.

Internal issue b/519591816 🦕

---------

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py
@@ -61,7 +61,7 @@ def generate(
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
         >>> country = bpd.Series(["Japan", "Canada"])
-        >>> bbq.ai.generate(("What's the capital city of ", country, " one word only"))
+        >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +ELLIPSIS
         0    {'result': 'Tokyo', 'full_response': '{"cand...
         1    {'result': 'Ottawa', 'full_response': '{"can...
         dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
@@ -231,8 +231,8 @@ def generate_int(
 
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
-        >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"])
-        >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?"))
+        >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"])
+        >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS
         0    {'result': 2, 'full_response': '{"candidates":...
         1    {'result': 4, 'full_response': '{"candidates":...
         2    {'result': 8, 'full_response': '{"candidates":...
@@ -305,8 +305,8 @@ def generate_double(
 
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
-        >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"])
-        >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?"))
+        >>> animal = bpd.Series(["Ostrich", "Rabbit", "Spider"])
+        >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) # doctest: +ELLIPSIS
         0    {'result': 2.0, 'full_response': '{"candidates...
         1    {'result': 4.0, 'full_response': '{"candidates...
         2    {'result': 8.0, 'full_response': '{"candidates...
@@ -383,7 +383,7 @@ def generate_embedding(
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
         >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]})
-        >>> bbq.ai.generate_embedding(
+        >>> bbq.ai.generate_embedding( # doctest: +SKIP
         ...     "project.dataset.model_name",
         ...     df
         ... )
@@ -486,7 +486,7 @@ def generate_text(
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
         >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]})
-        >>> bbq.ai.generate_text(
+        >>> bbq.ai.generate_text( # doctest: +SKIP
         ...     "project.dataset.model_name",
         ...     df
         ... )
@@ -601,7 +601,7 @@ def generate_table(
         >>> # the necessary columns for the model's prompt. For example, a
         >>> # DataFrame with a 'prompt' column for text classification.
         >>> df = bpd.DataFrame({'prompt': ["some text to classify"]})
-        >>> result = bbq.ai.generate_table(
+        >>> result = bbq.ai.generate_table( # doctest: +SKIP
         ...     "project.dataset.model_name",
         ...     data=df,
         ...     output_schema="category STRING"
@@ -708,12 +708,14 @@ def embed(
 
         >>> import bigframes.pandas as bpd
         >>> import bigframes.bigquery as bbq
-        >>> bbq.ai.embed("dog", endpoint="text-embedding-005")
+        >>> bbq.ai.embed("dog", endpoint="text-embedding-005") # doctest: +ELLIPSIS
         0    {'result': array([ 1.78243860e-03, -1.10658340...
+        dtype: struct<result: list<item: double>, status: string>[pyarrow]
 
         >>> s = bpd.Series(['dog'])
-        >>> bbq.ai.embed(s, endpoint='text-embedding-005')
+        >>> bbq.ai.embed(s, endpoint='text-embedding-005') # doctest: +ELLIPSIS
         0    {'result': array([ 1.78243860e-03, -1.10658340...
+        dtype: struct<result: list<item: double>, status: string>[pyarrow]
 
     Args:
         content (str | Series):
@@ -1004,6 +1006,7 @@ def similarity(
         >>> bbq.ai.similarity(df['word'], 'glad', endpoint='text-embedding-005')
         0    0.916601
         1    0.660579
+        Name: word, dtype: Float64
 
     Args:
         content1 (str | Series):
@@ -1082,8 +1085,8 @@ def forecast(
         >>> df = pd.DataFrame({"value": [1, 2, 3], "time": pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"])})
         >>> bpd.options.display.progress_bar = None
         >>> forecasted_pandas_df = df.bigquery.ai.forecast(data_col="value", timestamp_col="time", horizon=2)
-        >>> type(forecasted_pandas_df)
-        <class 'pandas.core.frame.DataFrame'>
+        >>> type(forecasted_pandas_df) # doctest: +ELLIPSIS
+        <class 'pandas...DataFrame'>
 
         Forecast using a BigFrames DataFrame:
 
diff --git a/packages/bigframes/bigframes/bigquery/_operations/struct.py b/packages/bigframes/bigframes/bigquery/_operations/struct.py
@@ -57,5 +57,5 @@ def struct(value: dataframe.DataFrame) -> series.Series:
     block, result_id = block.apply_nary_op(
         block.value_columns, ops.StructOp(column_names=tuple(block.column_labels))
     )
-    block = block.select_column(result_id)
+    block = block.select_column(result_id).with_column_labels([None])
     return series.Series(block)
diff --git a/packages/bigframes/bigframes/core/blocks.py b/packages/bigframes/bigframes/core/blocks.py
@@ -1991,6 +1991,10 @@ def _generate_resample_label(
                 )
             level = level or 0
             col_id = self.index.resolve_level(level)[0]
+            if isinstance(level, int):
+                resample_label = self.index.names[level]
+            else:
+                resample_label = level
             # Reset index to make the resampling level a column, then drop all other index columns.
             # This simplifies processing by focusing solely on the column required for resampling.
             block = self.reset_index(drop=False)
@@ -2009,6 +2013,7 @@ def _generate_resample_label(
                 raise KeyError(f"The grouper name {on} is not found")
 
             col_id = matches[0]
+            resample_label = on
             block = self
         if level is None:
             dtype = self._column_type(col_id)
@@ -2101,6 +2106,7 @@ def _generate_resample_label(
             block.value_columns[0],
             block.value_columns[1],
             op=ops.IntegerLabelToDatetimeOp(freq=freq, label=label, origin=origin),
+            result_label=resample_label,
         )
 
         # After multiple merges, the columns:
diff --git a/packages/bigframes/bigframes/core/compile/polars/compiler.py b/packages/bigframes/bigframes/core/compile/polars/compiler.py
@@ -178,8 +178,59 @@ def _(
             self,
             expression: ex.OpExpression,
         ) -> pl.Expr:
-            # TODO: Complete the implementation
+            import datetime
+
+            import pyarrow as pa
+
             op = expression.op
+
+            # Polars panics on nulls from pandas objects in timezone-aware
+            # datetimes for certain ops. Convert to timezone-naive temporarily
+            # to avoid this issue.
+            # TODO(tswast): Remove workaround when
+            # https://github.com/pola-rs/polars/issues/27862 has been fixed.
+            is_problematic_op = type(op) in (
+                date_ops.YearOp,
+                date_ops.QuarterOp,
+                date_ops.MonthOp,
+                date_ops.DayOp,
+                date_ops.IsoWeekOp,
+            )
+
+            if is_problematic_op and len(expression.inputs) == 1:
+                input_expr = expression.inputs[0]
+                if (
+                    input_expr.is_resolved
+                    and isinstance(input_expr.output_type, pd.ArrowDtype)
+                    and isinstance(
+                        input_expr.output_type.pyarrow_dtype, pa.TimestampType
+                    )
+                    and input_expr.output_type.pyarrow_dtype.tz is not None
+                ):
+                    tz_str = input_expr.output_type.pyarrow_dtype.tz
+                    if tz_str == "UTC":
+                        dummy_tz = datetime.timezone.utc
+                    else:
+                        try:
+                            from zoneinfo import ZoneInfo
+
+                            dummy_tz = ZoneInfo(tz_str)  # type: ignore
+                        except Exception:
+                            dummy_tz = datetime.timezone.utc
+
+                    dummy_val = datetime.datetime(1970, 1, 1, tzinfo=dummy_tz)
+
+                    compiled_input = self.compile_expression(input_expr)
+                    filled_input = compiled_input.fill_null(dummy_val)
+                    compiled_op_with_fill = self.compile_op(op, filled_input)
+
+                    return (
+                        pl.when(compiled_input.is_null())
+                        .then(None)
+                        .otherwise(compiled_op_with_fill)
+                    )
+
+            # TODO: Complete the implementation
             args = tuple(map(self.compile_expression, expression.inputs))
             return self.compile_op(op, *args)
 
diff --git a/packages/bigframes/bigframes/core/indexes/base.py b/packages/bigframes/bigframes/core/indexes/base.py
@@ -325,6 +325,7 @@ def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
             # Return boolean mask for non-monotonic duplicates
             mask_block = block_with_offsets.select_columns([match_col_id])
             mask_block = mask_block.reset_index(drop=True)
+            mask_block = mask_block.with_column_labels([None])
             result_series = bigframes.series.Series(mask_block)
             return result_series.astype("boolean")
 
diff --git a/packages/bigframes/bigframes/operations/ai.py b/packages/bigframes/bigframes/operations/ai.py
@@ -122,12 +122,10 @@ def map(
             >>> model = llm.GeminiTextGenerator(model_name="gemini-2.5-pro")
 
             >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]})
-            >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"})
-              ingredient_1 ingredient_2      food
-            0   Burger Bun   Beef Patty  Burger
-            <BLANKLINE>
-            1     Soy Bean      Bittern    Tofu
-            <BLANKLINE>
+            >>> df.ai.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", model=model, output_schema={"food": "string"}) # doctest: +ELLIPSIS
+              ingredient_1 ingredient_2...
+            0   Burger Bun   Beef Patty...
+            1     Soy Bean      Bittern...Tofu
             <BLANKLINE>
             [2 rows x 3 columns]
 
diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py
@@ -2472,7 +2472,9 @@ def map(
 
         self_df = self.to_frame(name="series")
         result_df = self_df.join(map_df, on="series")
-        return result_df[self.name]
+        result = cast(Series, result_df[self.name])
+        result.name = self.name
+        return result
 
     @validations.requires_ordering()
     def sample(
@@ -2698,7 +2700,7 @@ def _apply_nary_op(
             others, ignore_self=ignore_self, cast_scalars=False
         )
         block, result_id = block.project_expr(op.as_expr(*values))
-        return Series(block.select_column(result_id))
+        return Series(block.select_column(result_id).with_column_labels([None]))
 
     def _apply_binary_aggregation(
         self, other: Series, stat: agg_ops.BinaryAggregateOp
diff --git a/packages/bigframes/bigframes/testing/utils.py b/packages/bigframes/bigframes/testing/utils.py
@@ -93,6 +93,14 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
 def _normalize_all_nulls(col: pd.Series) -> pd.Series:
     if pd_types.is_float_dtype(col.dtype):
         col = col.astype("float64").astype("Float64")
+    elif col.dtype == "object":
+        if any(isinstance(x, decimal.Decimal) for x in col):
+            pass
+        else:
+            try:
+                col = col.astype("Float64")
+            except (TypeError, ValueError, SystemError):
+                pass
     return col
 
 
diff --git a/packages/bigframes/setup.py b/packages/bigframes/setup.py
@@ -38,7 +38,7 @@
     "fsspec >=2023.3.0",
     "gcsfs >=2023.3.0, !=2025.5.0, !=2026.2.0, !=2026.3.0",
     "geopandas >=0.12.2",
-    "google-auth >=2.15.0,<3.0",
+    "google-auth[pyopenssl] >=2.15.0,<3.0",
     "google-cloud-bigquery[bqstorage,pandas] >=3.36.0",
     # 2.30 needed for arrow support.
     "google-cloud-bigquery-storage >= 2.30.0, < 3.0.0",
@@ -75,6 +75,7 @@
         "pytest-snapshot",
         "google-cloud-bigtable >=2.24.0",
         "google-cloud-pubsub >=2.21.4",
+        "tzdata",
     ],
     # used for local engine
     "polars": ["polars >= 1.21.0"],
diff --git a/packages/bigframes/tests/system/small/functions/test_remote_function.py b/packages/bigframes/tests/system/small/functions/test_remote_function.py
@@ -468,7 +468,12 @@ def add_one(x):
 
     pd_int64_df = scalars_pandas_df[int64_cols]
     pd_int64_df_filtered = pd_int64_df.dropna()
-    pd_result = pd_int64_df_filtered.applymap(add_one)
+
+    # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported.
+    if hasattr(pd_int64_df_filtered, "map"):
+        pd_result = pd_int64_df_filtered.map(add_one)
+    else:
+        pd_result = pd_int64_df_filtered.applymap(add_one)
     # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e.
     # pd_int64_df_filtered.dtype is Int64Dtype()
     # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64.
@@ -503,7 +508,13 @@ def add_one(x):
 
     pd_int64_df = scalars_pandas_df[int64_cols]
     pd_int64_df_filtered = pd_int64_df[pd_int64_df["int64_col"].notnull()]
-    pd_result = pd_int64_df_filtered.applymap(add_one)
+
+    # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported.
+    if hasattr(pd_int64_df_filtered, "map"):
+        pd_result = pd_int64_df_filtered.map(add_one)
+    else:
+        pd_result = pd_int64_df_filtered.applymap(add_one)
+
     # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e.
     # pd_int64_df_filtered.dtype is Int64Dtype()
     # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64.
@@ -536,7 +547,13 @@ def add_one(x):
     bf_result = bf_int64_df.applymap(remote_add_one, na_action="ignore").to_pandas()
 
     pd_int64_df = scalars_pandas_df[int64_cols]
-    pd_result = pd_int64_df.applymap(add_one, na_action="ignore")
+
+    # TODO(swast): Remove when pandas 2.1.x+ is the minimum supported.
+    if hasattr(pd_int64_df, "map"):
+        pd_result = pd_int64_df.map(add_one, na_action="ignore")
+    else:
+        pd_result = pd_int64_df.applymap(add_one, na_action="ignore")
+
     # TODO(shobs): Figure why pandas .applymap() changes the dtype, i.e.
     # pd_int64_df_filtered.dtype is Int64Dtype()
     # pd_int64_df_filtered.applymap(lambda x: x).dtype is int64.
diff --git a/packages/bigframes/tests/system/small/test_magics.py b/packages/bigframes/tests/system/small/test_magics.py
@@ -44,7 +44,7 @@ def test_magic_select_lit_to_var(ip):
     assert "dst_var" in ip.user_ns
     result_df = ip.user_ns["dst_var"]
     assert result_df.shape == (1, 1)
-    assert result_df.loc[0, 0] == 3
+    assert result_df.to_pandas().iloc[0, 0] == 3
 
 
 def test_magic_select_lit_dry_run(ip):
@@ -97,4 +97,4 @@ def test_magic_select_interpolate(ip):
     assert "dst_var" in ip.user_ns
     result_df = ip.user_ns["dst_var"]
     assert result_df.shape == (1, 1)
-    assert result_df.loc[0, 0] == 9
+    assert result_df.loc[0, "total"] == 9
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql
@@ -60,7 +60,7 @@ WITH `bfcte_0` AS (
 SELECT
   CAST(TIMESTAMP_MICROS(
     CAST(CAST(`bfcol_17` AS BIGNUMERIC) * 7000000 + CAST(UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)) AS BIGNUMERIC) AS INT64)
-  ) AS DATETIME) AS `bigframes_unnamed_index`,
+  ) AS DATETIME) AS `timestamp_col`,
   `bfcol_11` AS `int64_col`,
   `bfcol_12` AS `int64_too`
 FROM (
@@ -72,4 +72,4 @@ FROM (
 LEFT JOIN `bfcte_5`
   ON `bfcol_17` = `bfcol_13`
 ORDER BY
-  `bfcol_17` ASC NULLS LAST
+  `bfcol_17` ASC NULLS LAST
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_bigframes_sql_scalar/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
   ROUND(`int64_col` + `int64_too`) AS `0`
-FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
+FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_dataframe_accessor/test_sql_scalar/out.sql
@@ -1,4 +1,4 @@
 SELECT
   `rowindex`,
   ROUND(`int64_col` + `int64_too`) AS `0`
-FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
+FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_compile_fromrange.py
@@ -32,4 +32,4 @@ def test_compile_fromrange(compiler_session, snapshot):
     sql, _, _ = df.resample(rule="7s")._block.to_sql_query(
         include_index=True, enable_cache=False
     )
-    snapshot.assert_match(sql, "out.sql")
+    snapshot.assert_match(sql.strip() + "\n", "out.sql")
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py b/packages/bigframes/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py
@@ -22,6 +22,10 @@
 
 pytest.importorskip("pytest_snapshot")
 
+# Only test on the latest pandas since column naming behavior is slightly
+# different across versions, e.g. unnamed vs 0 for unnamed Series.
+pytest.importorskip("pandas", minversion="3.0.0")
+
 
 def test_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot, monkeypatch):
     session = mock.create_autospec(bigframes.session.Session)
@@ -42,7 +46,7 @@ def to_pandas(series, *, ordered):
     )
 
     session.read_pandas.assert_called_once()
-    snapshot.assert_match(result, "out.sql")
+    snapshot.assert_match(result.strip() + "\n", "out.sql")
 
 
 def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot):
@@ -57,4 +61,4 @@ def test_bigframes_sql_scalar(scalar_types_df: bpd.DataFrame, snapshot):
     session.read_pandas.assert_not_called()
     # Bigframes implementation returns a bigframes.series.Series
     sql, _, _ = result.to_frame()._to_sql_query(include_index=True)
-    snapshot.assert_match(sql, "out.sql")
+    snapshot.assert_match(sql.strip() + "\n", "out.sql")
diff --git a/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py b/packages/bigframes/tests/unit/extensions/core/test_dataframe_accessor.py
diff --git a/packages/bigframes/tests/unit/test_col.py b/packages/bigframes/tests/unit/test_col.py
diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py
diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/generic.py
diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/indexes/accessor.py
diff --git a/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py b/packages/bigframes/third_party/bigframes_vendored/pandas/core/series.py
diff --git a/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/packages/bigframes/third_party/bigframes_vendored/sklearn/decomposition/_mf.py

Original file line number	Diff line number	Diff line change
`@@ -57,5 +57,5 @@ def struct(value: dataframe.DataFrame) -> series.Series:`
`57`	`57`	`block, result_id = block.apply_nary_op(`
`58`	`58`	`block.value_columns, ops.StructOp(column_names=tuple(block.column_labels))`
`59`	`59`	`)`
`60`		`- block = block.select_column(result_id)`
	`60`	`+ block = block.select_column(result_id).with_column_labels([None])`
`61`	`61`	`return series.Series(block)`