Skip to content

Commit 3b7c0e2

Browse files
ARROW-16120: [Python] ParquetDataset deprecation: change Deprecation to FutureWarnings
\+ suppressing some additional warnings when running the tests Closes #12798 from jorisvandenbossche/ARROW-16120 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent 36a7f11 commit 3b7c0e2

4 files changed

Lines changed: 59 additions & 43 deletions

File tree

python/pyarrow/parquet/__init__.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ def __init__(self, path, open_file_func=partial(open, mode='rb'),
10591059
warnings.warn(
10601060
"ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
10611061
"be removed in a future version.",
1062-
DeprecationWarning, stacklevel=2)
1062+
FutureWarning, stacklevel=2)
10631063
self._init(
10641064
path, open_file_func, file_options, row_group, partition_keys)
10651065

@@ -1692,7 +1692,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
16921692
"Specifying the 'metadata_nthreads' argument is deprecated as "
16931693
"of pyarrow 8.0.0, and the argument will be removed in a "
16941694
"future version",
1695-
DeprecationWarning, stacklevel=2,
1695+
FutureWarning, stacklevel=2,
16961696
)
16971697
else:
16981698
metadata_nthreads = 1
@@ -1742,7 +1742,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
17421742
"specify it in combination with 'use_legacy_dataet=False', "
17431743
"but in that case you need to specify a pyarrow.Schema "
17441744
"instead of a ParquetSchema.",
1745-
DeprecationWarning, stacklevel=2)
1745+
FutureWarning, stacklevel=2)
17461746
self._schema = schema
17471747

17481748
self.split_row_groups = split_row_groups
@@ -1953,7 +1953,7 @@ def pieces(self):
19531953
" Specify 'use_legacy_dataset=False' while constructing the "
19541954
"ParquetDataset, and then use the '.fragments' attribute "
19551955
"instead."),
1956-
DeprecationWarning, stacklevel=2)
1956+
FutureWarning, stacklevel=2)
19571957
return self._pieces
19581958

19591959
@property
@@ -1967,7 +1967,7 @@ def partitions(self):
19671967
" Specify 'use_legacy_dataset=False' while constructing the "
19681968
"ParquetDataset, and then use the '.partitioning' attribute "
19691969
"instead."),
1970-
DeprecationWarning, stacklevel=2)
1970+
FutureWarning, stacklevel=2)
19711971
return self._partitions
19721972

19731973
@property
@@ -1979,7 +1979,7 @@ def schema(self):
19791979
"ParquetDataset, and then use the '.schema' attribute "
19801980
"instead (which will return an Arrow schema instead of a "
19811981
"Parquet schema)."),
1982-
DeprecationWarning, stacklevel=2)
1982+
FutureWarning, stacklevel=2)
19831983
return self._schema
19841984

19851985
@property
@@ -1989,7 +1989,7 @@ def memory_map(self):
19891989
"""
19901990
warnings.warn(
19911991
_DEPR_MSG.format("ParquetDataset.memory_map", ""),
1992-
DeprecationWarning, stacklevel=2)
1992+
FutureWarning, stacklevel=2)
19931993
return self._metadata.memory_map
19941994

19951995
@property
@@ -1999,7 +1999,7 @@ def read_dictionary(self):
19991999
"""
20002000
warnings.warn(
20012001
_DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
2002-
DeprecationWarning, stacklevel=2)
2002+
FutureWarning, stacklevel=2)
20032003
return self._metadata.read_dictionary
20042004

20052005
@property
@@ -2009,7 +2009,7 @@ def buffer_size(self):
20092009
"""
20102010
warnings.warn(
20112011
_DEPR_MSG.format("ParquetDataset.buffer_size", ""),
2012-
DeprecationWarning, stacklevel=2)
2012+
FutureWarning, stacklevel=2)
20132013
return self._metadata.buffer_size
20142014

20152015
_fs = property(
@@ -2027,7 +2027,7 @@ def fs(self):
20272027
" Specify 'use_legacy_dataset=False' while constructing the "
20282028
"ParquetDataset, and then use the '.filesystem' attribute "
20292029
"instead."),
2030-
DeprecationWarning, stacklevel=2)
2030+
FutureWarning, stacklevel=2)
20312031
return self._metadata.fs
20322032

20332033
_common_metadata = property(
@@ -2041,7 +2041,7 @@ def common_metadata(self):
20412041
"""
20422042
warnings.warn(
20432043
_DEPR_MSG.format("ParquetDataset.common_metadata", ""),
2044-
DeprecationWarning, stacklevel=2)
2044+
FutureWarning, stacklevel=2)
20452045
return self._metadata.common_metadata
20462046

20472047
@property
@@ -2453,7 +2453,7 @@ def pieces(self):
24532453
warnings.warn(
24542454
_DEPR_MSG.format("ParquetDataset.pieces",
24552455
" Use the '.fragments' attribute instead"),
2456-
DeprecationWarning, stacklevel=2)
2456+
FutureWarning, stacklevel=2)
24572457
return list(self._dataset.get_fragments())
24582458

24592459
@property
@@ -2744,7 +2744,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
27442744
"Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
27452745
"deprecated as of pyarrow 8.0.0, and the legacy implementation will "
27462746
"be removed in a future version.",
2747-
DeprecationWarning, stacklevel=2)
2747+
FutureWarning, stacklevel=2)
27482748

27492749
if ignore_prefixes is not None:
27502750
raise ValueError(

python/pyarrow/tests/parquet/common.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,25 @@
2323
import pyarrow as pa
2424
from pyarrow.tests import util
2525

26+
legacy_filter_mark = pytest.mark.filterwarnings(
27+
"ignore:Passing 'use_legacy:FutureWarning"
28+
)
29+
2630
parametrize_legacy_dataset = pytest.mark.parametrize(
2731
"use_legacy_dataset",
28-
[True, pytest.param(False, marks=pytest.mark.dataset)])
32+
[pytest.param(True, marks=legacy_filter_mark),
33+
pytest.param(False, marks=pytest.mark.dataset)]
34+
)
2935
parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
30-
"use_legacy_dataset", [True, pytest.param(False, marks=pytest.mark.skip)])
36+
"use_legacy_dataset",
37+
[pytest.param(True, marks=legacy_filter_mark),
38+
pytest.param(False, marks=pytest.mark.skip)]
39+
)
3140
parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
32-
"use_legacy_dataset", [pytest.param(True, marks=pytest.mark.xfail),
33-
pytest.param(False, marks=pytest.mark.dataset)])
41+
"use_legacy_dataset",
42+
[pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
43+
pytest.param(False, marks=pytest.mark.dataset)]
44+
)
3445

3546
# Marks all of the tests in this module
3647
# Ignore these with pytest ... -m 'not parquet'
@@ -58,7 +69,7 @@ def _read_table(*args, **kwargs):
5869

5970

6071
def _roundtrip_table(table, read_table_kwargs=None,
61-
write_table_kwargs=None, use_legacy_dataset=True):
72+
write_table_kwargs=None, use_legacy_dataset=False):
6273
read_table_kwargs = read_table_kwargs or {}
6374
write_table_kwargs = write_table_kwargs or {}
6475

@@ -70,7 +81,7 @@ def _roundtrip_table(table, read_table_kwargs=None,
7081

7182

7283
def _check_roundtrip(table, expected=None, read_table_kwargs=None,
73-
use_legacy_dataset=True, **write_table_kwargs):
84+
use_legacy_dataset=False, **write_table_kwargs):
7485
if expected is None:
7586
expected = table
7687

@@ -87,7 +98,7 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
8798
assert result.equals(expected)
8899

89100

90-
def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=True):
101+
def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
91102
table = pa.Table.from_pandas(df)
92103
result = _roundtrip_table(
93104
table, write_table_kwargs=write_kwargs,

python/pyarrow/tests/parquet/test_basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,6 @@ def test_read_table_legacy_deprecated(tempdir):
795795
pq.write_table(table, path)
796796

797797
with pytest.warns(
798-
DeprecationWarning, match="Passing 'use_legacy_dataset=True'"
798+
FutureWarning, match="Passing 'use_legacy_dataset=True'"
799799
):
800800
pq.read_table(path, use_legacy_dataset=True)

python/pyarrow/tests/parquet/test_dataset.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_parquet_piece_read(tempdir):
5555
path = tempdir / 'parquet_piece_read.parquet'
5656
_write_table(table, path, version='2.6')
5757

58-
with pytest.warns(DeprecationWarning):
58+
with pytest.warns(FutureWarning):
5959
piece1 = pq.ParquetDatasetPiece(path)
6060

6161
result = piece1.read()
@@ -70,7 +70,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
7070
path = tempdir / 'parquet_piece_read.parquet'
7171
_write_table(table, path, version='2.6')
7272

73-
with pytest.warns(DeprecationWarning):
73+
with pytest.warns(FutureWarning):
7474
piece = pq.ParquetDatasetPiece(path)
7575
table1 = piece.read()
7676
assert isinstance(table1, pa.Table)
@@ -80,7 +80,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
8080
assert table.equals(table1)
8181

8282

83-
@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
83+
@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
8484
def test_parquet_piece_basics():
8585
path = '/baz.parq'
8686

@@ -140,7 +140,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
140140
_partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
141141

142142

143-
@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
143+
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
144144
@pytest.mark.pandas
145145
def test_create_parquet_dataset_multi_threaded(tempdir):
146146
fs = LocalFileSystem._get_instance()
@@ -151,7 +151,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir):
151151
manifest = pq.ParquetManifest(base_path, filesystem=fs,
152152
metadata_nthreads=1)
153153
with pytest.warns(
154-
DeprecationWarning, match="Specifying the 'metadata_nthreads'"
154+
FutureWarning, match="Specifying the 'metadata_nthreads'"
155155
):
156156
dataset = pq.ParquetDataset(
157157
base_path, filesystem=fs, metadata_nthreads=16)
@@ -801,14 +801,14 @@ def _test_read_common_metadata_files(fs, base_path):
801801

802802

803803
@pytest.mark.pandas
804-
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
804+
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
805805
def test_read_common_metadata_files(tempdir):
806806
fs = LocalFileSystem._get_instance()
807807
_test_read_common_metadata_files(fs, tempdir)
808808

809809

810810
@pytest.mark.pandas
811-
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
811+
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
812812
def test_read_metadata_files(tempdir):
813813
fs = LocalFileSystem._get_instance()
814814

@@ -922,7 +922,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
922922
result2 = read_multiple_files(paths, metadata=metadata)
923923
assert result2.equals(expected)
924924

925-
with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
925+
with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
926926
result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read()
927927
assert result3.equals(expected)
928928
else:
@@ -968,7 +968,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
968968
mixed_paths = [bad_apple_path, paths[0]]
969969

970970
with pytest.raises(ValueError):
971-
with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
971+
with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
972972
read_multiple_files(mixed_paths, schema=bad_meta.schema)
973973

974974
with pytest.raises(ValueError):
@@ -1014,7 +1014,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
10141014
tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
10151015

10161016

1017-
@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
1017+
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
10181018
@pytest.mark.pandas
10191019
@parametrize_legacy_dataset
10201020
def test_dataset_memory_map(tempdir, use_legacy_dataset):
@@ -1217,7 +1217,7 @@ def test_empty_directory(tempdir, use_legacy_dataset):
12171217
assert result.num_columns == 0
12181218

12191219

1220-
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
1220+
@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
12211221
def _test_write_to_dataset_with_partitions(base_path,
12221222
use_legacy_dataset=True,
12231223
filesystem=None,
@@ -1259,7 +1259,7 @@ def _test_write_to_dataset_with_partitions(base_path,
12591259
use_legacy_dataset=use_legacy_dataset)
12601260
# ARROW-2209: Ensure the dataset schema also includes the partition columns
12611261
if use_legacy_dataset:
1262-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
1262+
with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
12631263
dataset_cols = set(dataset.schema.to_arrow_schema().names)
12641264
else:
12651265
# NB schema property is an arrow and not parquet schema
@@ -1409,7 +1409,7 @@ def test_write_to_dataset_no_partitions_s3fs(
14091409
path, use_legacy_dataset, filesystem=fs)
14101410

14111411

1412-
@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
1412+
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
14131413
@pytest.mark.pandas
14141414
@parametrize_legacy_dataset_not_supported
14151415
def test_write_to_dataset_with_partitions_and_custom_filenames(
@@ -1569,6 +1569,7 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
15691569

15701570
@pytest.mark.dataset
15711571
@pytest.mark.pandas
1572+
@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
15721573
def test_read_table_schema(tempdir):
15731574
# test that schema keyword is passed through in read_table
15741575
table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
@@ -1622,6 +1623,7 @@ def test_dataset_unsupported_keywords():
16221623

16231624

16241625
@pytest.mark.dataset
1626+
@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
16251627
def test_dataset_partitioning(tempdir):
16261628
import pyarrow.dataset as ds
16271629

@@ -1669,7 +1671,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
16691671
assert result.equals(table)
16701672

16711673

1672-
@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
1674+
@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
16731675
def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
16741676
# ARROW-10462 ensure that on Windows we properly use posix-style paths
16751677
# as used by fsspec
@@ -1693,30 +1695,33 @@ def test_parquet_dataset_deprecated_properties(tempdir):
16931695
pq.write_table(table, path)
16941696
dataset = pq.ParquetDataset(path)
16951697

1696-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
1698+
with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
16971699
dataset.pieces
16981700

1699-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
1701+
with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
17001702
dataset.partitions
17011703

1702-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
1704+
with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
17031705
dataset.memory_map
17041706

1705-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
1707+
with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
17061708
dataset.read_dictionary
17071709

1708-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
1710+
with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
17091711
dataset.buffer_size
17101712

1711-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
1713+
with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
17121714
dataset.fs
17131715

1714-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
1716+
with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
17151717
dataset.schema
17161718

1719+
with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
1720+
dataset.common_metadata
1721+
17171722
dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
17181723

1719-
with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
1724+
with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
17201725
dataset2.pieces
17211726

17221727

0 commit comments

Comments
 (0)