Skip to content

Commit 3bf0617

Browse files
ARROW-15870: [Python] Start to raise deprecation warnings for use_legacy_dataset=True in parquet.read_table
Currently, users can still specify `use_legacy_dataset=True` explicitly to get the old implementation/behaviour. But if we want to remove that implementation at some point (ARROW-15868), we should start deprecating that option, to futher nudge people to the new implementation. Closes #12584 from jorisvandenbossche/ARROW-15870 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent 2e314cb commit 3bf0617

11 files changed

Lines changed: 51 additions & 24 deletions

python/pyarrow/parquet.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,7 +1908,8 @@ def partitioning(self):
19081908
pyarrow 1.0.0. Among other things, this allows to pass `filters`
19091909
for all columns and not only the partition keys, enables
19101910
different partitioning schemes, etc.
1911-
Set to True to use the legacy behaviour.
1911+
Set to True to use the legacy behaviour (this option is deprecated,
1912+
and the legacy implementation will be removed in a future version).
19121913
ignore_prefixes : list, optional
19131914
Files matching any of these prefixes will be ignored by the
19141915
discovery process if use_legacy_dataset=False.
@@ -2006,6 +2007,12 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
20062007
return dataset.read(columns=columns, use_threads=use_threads,
20072008
use_pandas_metadata=use_pandas_metadata)
20082009

2010+
warnings.warn(
2011+
"Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
2012+
"deprecated as of pyarrow 8.0.0, and the legacy implementation will "
2013+
"be removed in a future version.",
2014+
DeprecationWarning, stacklevel=2)
2015+
20092016
if ignore_prefixes is not None:
20102017
raise ValueError(
20112018
"The 'ignore_prefixes' keyword is only supported when "
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import pytest
19+
20+
# Marks all of the tests in this module
21+
# Ignore these with pytest ... -m 'not parquet'
22+
pytestmark = [
23+
pytest.mark.parquet,
24+
pytest.mark.filterwarnings(
25+
"ignore:Passing 'use_legacy_dataset=True':DeprecationWarning"
26+
),
27+
]

python/pyarrow/tests/parquet/test_basic.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@
4545
pd = tm = None
4646

4747

48-
pytestmark = pytest.mark.parquet
49-
50-
5148
def test_parquet_invalid_version(tempdir):
5249
table = pa.table({'a': [1, 2, 3]})
5350
with pytest.raises(ValueError, match="Unsupported Parquet format version"):
@@ -593,7 +590,10 @@ def test_read_table_doesnt_warn(datadir, use_legacy_dataset):
593590
pq.read_table(datadir / 'v0.7.1.parquet',
594591
use_legacy_dataset=use_legacy_dataset)
595592

596-
assert len(record) == 0
593+
if use_legacy_dataset:
594+
assert len(record) == 1
595+
else:
596+
assert len(record) == 0
597597

598598

599599
@pytest.mark.pandas
@@ -758,3 +758,15 @@ def test_permutation_of_column_order(tempdir):
758758
names=['a', 'b'])
759759

760760
assert table == table2
761+
762+
763+
def test_read_table_legacy_deprecated(tempdir):
764+
# ARROW-15870
765+
table = pa.table({'a': [1, 2, 3]})
766+
path = tempdir / 'data.parquet'
767+
pq.write_table(table, path)
768+
769+
with pytest.warns(
770+
DeprecationWarning, match="Passing 'use_legacy_dataset=True'"
771+
):
772+
pq.read_table(path, use_legacy_dataset=True)

python/pyarrow/tests/parquet/test_compliant_nested_type.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
except ImportError:
3636
pd = tm = None
3737

38-
pytestmark = pytest.mark.parquet
3938

4039
# Tests for ARROW-11497
4140
_test_data_simple = [

python/pyarrow/tests/parquet/test_data_types.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@
4444
pd = tm = None
4545

4646

47-
pytestmark = pytest.mark.parquet
48-
49-
5047
# General roundtrip of data types
5148
# -----------------------------------------------------------------------------
5249

python/pyarrow/tests/parquet/test_dataset.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@
4646
except ImportError:
4747
pd = tm = None
4848

49-
pytestmark = pytest.mark.parquet
50-
5149

5250
@pytest.mark.pandas
5351
def test_parquet_piece_read(tempdir):

python/pyarrow/tests/parquet/test_datetime.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,6 @@
4141
pd = tm = None
4242

4343

44-
pytestmark = pytest.mark.parquet
45-
46-
4744
@pytest.mark.pandas
4845
@parametrize_legacy_dataset
4946
def test_pandas_parquet_datetime_tz(use_legacy_dataset):

python/pyarrow/tests/parquet/test_metadata.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@
4040
pd = tm = None
4141

4242

43-
pytestmark = pytest.mark.parquet
44-
45-
4643
@pytest.mark.pandas
4744
def test_parquet_metadata_api():
4845
df = alltypes_sample(size=10000)

python/pyarrow/tests/parquet/test_pandas.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,6 @@
4646
pd = tm = None
4747

4848

49-
pytestmark = pytest.mark.parquet
50-
51-
5249
@pytest.mark.pandas
5350
def test_pandas_parquet_custom_metadata(tempdir):
5451
df = alltypes_sample(size=10000)

python/pyarrow/tests/parquet/test_parquet_file.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
except ImportError:
3737
pd = tm = None
3838

39-
pytestmark = pytest.mark.parquet
40-
4139

4240
@pytest.mark.pandas
4341
def test_pass_separate_metadata():

0 commit comments

Comments
 (0)