Skip to content

Support for sqlalchemy>= 1.4.0 #7406

@jrbourbeau

Description

@jrbourbeau

In #7405 we temporarily pinned sqlalchemy<1.4.0 to avoid deprecation warnings from being raised inside pandas (xref pandas-dev/pandas#40467) which caused CI here to start failing.

We should revisit this and make sure Dask is compatible with newer version of sqlalchemy. There may be some code changes we need to make to support sqlalchemy>=1.4.0 (see the traceback for test_to_sql below) though I didn't spend much time debugging things

Example traceback:
_________________________________________________________________ test_to_sql[True-1] __________________________________________________________________

npartitions = 1, parallel = True

    @pytest.mark.parametrize("npartitions", (1, 2))
    @pytest.mark.parametrize("parallel", (False, True))
    def test_to_sql(npartitions, parallel):
        df_by_age = df.set_index("age")
        df_appended = pd.concat(
            [
                df,
                df,
            ]
        )

        ddf = dd.from_pandas(df, npartitions)
        ddf_by_age = ddf.set_index("age")

        # Simple round trip test: use existing "number" index_col
        with tmp_db_uri() as uri:
            ddf.to_sql("test", uri, parallel=parallel)
            result = read_sql_table("test", uri, "number")
>           assert_eq(df, result)

dask/dataframe/io/tests/test_sql.py:422:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

a =            name  age  negish
number
0         Alice   33      -5
1           Bob   40      -3
2 ...  3
3          Dora   16       5
4         Edith   53       0
5       Francis   30       0
6       Garreth   20       0
b =            name  age  negish  number__1
number
0         Alice   33      -5          ...4         Edith   53       0          4
5       Francis   30       0          5
6       Garreth   20       0          6
check_names = True, check_dtypes = True, check_divisions = True, check_index = True, kwargs = {}

    def assert_eq(
        a,
        b,
        check_names=True,
        check_dtypes=True,
        check_divisions=True,
        check_index=True,
        **kwargs,
    ):
        if check_divisions:
            assert_divisions(a)
            assert_divisions(b)
            if hasattr(a, "divisions") and hasattr(b, "divisions"):
                at = type(np.asarray(a.divisions).tolist()[0])  # numpy to python
                bt = type(np.asarray(b.divisions).tolist()[0])  # scalar conversion
                assert at == bt, (at, bt)
        assert_sane_keynames(a)
        assert_sane_keynames(b)
        a = _check_dask(a, check_names=check_names, check_dtypes=check_dtypes)
        b = _check_dask(b, check_names=check_names, check_dtypes=check_dtypes)
        if not check_index:
            a = a.reset_index(drop=True)
            b = b.reset_index(drop=True)
        if hasattr(a, "to_pandas"):
            a = a.to_pandas()
        if hasattr(b, "to_pandas"):
            b = b.to_pandas()
        if isinstance(a, pd.DataFrame):
            a = _maybe_sort(a)
            b = _maybe_sort(b)
>           tm.assert_frame_equal(a, b, **kwargs)
E           AssertionError: DataFrame are different
E
E           DataFrame shape mismatch
E           [left]:  (7, 3)
E           [right]: (7, 4)

dask/dataframe/utils.py:833: AssertionError

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions