-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Closed
Description
In #7405 we temporarily pinned sqlalchemy<1.4.0 to avoid deprecation warnings from being raised inside pandas (xref pandas-dev/pandas#40467) which caused CI here to start failing.
We should revisit this and make sure Dask is compatible with newer version of sqlalchemy. There may be some code changes we need to make to support sqlalchemy>=1.4.0 (see the traceback for test_to_sql below) though I didn't spend much time debugging things
Example traceback:
_________________________________________________________________ test_to_sql[True-1] __________________________________________________________________
npartitions = 1, parallel = True
@pytest.mark.parametrize("npartitions", (1, 2))
@pytest.mark.parametrize("parallel", (False, True))
def test_to_sql(npartitions, parallel):
df_by_age = df.set_index("age")
df_appended = pd.concat(
[
df,
df,
]
)
ddf = dd.from_pandas(df, npartitions)
ddf_by_age = ddf.set_index("age")
# Simple round trip test: use existing "number" index_col
with tmp_db_uri() as uri:
ddf.to_sql("test", uri, parallel=parallel)
result = read_sql_table("test", uri, "number")
> assert_eq(df, result)
dask/dataframe/io/tests/test_sql.py:422:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = name age negish
number
0 Alice 33 -5
1 Bob 40 -3
2 ... 3
3 Dora 16 5
4 Edith 53 0
5 Francis 30 0
6 Garreth 20 0
b = name age negish number__1
number
0 Alice 33 -5 ...4 Edith 53 0 4
5 Francis 30 0 5
6 Garreth 20 0 6
check_names = True, check_dtypes = True, check_divisions = True, check_index = True, kwargs = {}
def assert_eq(
a,
b,
check_names=True,
check_dtypes=True,
check_divisions=True,
check_index=True,
**kwargs,
):
if check_divisions:
assert_divisions(a)
assert_divisions(b)
if hasattr(a, "divisions") and hasattr(b, "divisions"):
at = type(np.asarray(a.divisions).tolist()[0]) # numpy to python
bt = type(np.asarray(b.divisions).tolist()[0]) # scalar conversion
assert at == bt, (at, bt)
assert_sane_keynames(a)
assert_sane_keynames(b)
a = _check_dask(a, check_names=check_names, check_dtypes=check_dtypes)
b = _check_dask(b, check_names=check_names, check_dtypes=check_dtypes)
if not check_index:
a = a.reset_index(drop=True)
b = b.reset_index(drop=True)
if hasattr(a, "to_pandas"):
a = a.to_pandas()
if hasattr(b, "to_pandas"):
b = b.to_pandas()
if isinstance(a, pd.DataFrame):
a = _maybe_sort(a)
b = _maybe_sort(b)
> tm.assert_frame_equal(a, b, **kwargs)
E AssertionError: DataFrame are different
E
E DataFrame shape mismatch
E [left]: (7, 3)
E [right]: (7, 4)
dask/dataframe/utils.py:833: AssertionErrorReactions are currently unavailable