|
| 1 | +# ruff: noqa: CPY001 |
| 2 | +""" |
| 3 | +======================================= |
| 4 | +Release Highlights for scikit-learn 1.8 |
| 5 | +======================================= |
| 6 | +
|
| 7 | +.. currentmodule:: sklearn |
| 8 | +
|
| 9 | +We are pleased to announce the release of scikit-learn 1.8! Many bug fixes |
| 10 | +and improvements were added, as well as some key new features. Below we |
| 11 | +detail the highlights of this release. **For an exhaustive list of |
| 12 | +all the changes**, please refer to the :ref:`release notes <release_notes_1_8>`. |
| 13 | +
|
| 14 | +To install the latest version (with pip):: |
| 15 | +
|
| 16 | + pip install --upgrade scikit-learn |
| 17 | +
|
| 18 | +or with conda:: |
| 19 | +
|
| 20 | + conda install -c conda-forge scikit-learn |
| 21 | +
|
| 22 | +""" |
| 23 | + |
| 24 | +# %% |
| 25 | +# Array API support (enables GPU computations) |
| 26 | +# -------------------------------------------- |
| 27 | +# The progressive adoption of the Python array API standard in |
| 28 | +# scikit-learn means that PyTorch and CuPy input arrays |
| 29 | +# are used directly. This means that in scikit-learn estimators |
| 30 | +# and functions non-CPU devices, such as GPUs, can be used |
| 31 | +# to perform the computation. As a result performance is improved |
| 32 | +# and integration with these libraries is easier. |
| 33 | +# |
| 34 | +# In scikit-learn 1.8, several estimators and functions have been updated to |
| 35 | +# support array API compatible inputs, for example PyTorch tensors and CuPy |
| 36 | +# arrays. |
| 37 | +# |
| 38 | +# Array API support was added to the following estimators: |
| 39 | +# :class:`preprocessing.StandardScaler`, |
| 40 | +# :class:`preprocessing.PolynomialFeatures`, :class:`linear_model.RidgeCV`, |
| 41 | +# :class:`linear_model.RidgeClassifierCV`, :class:`mixture.GaussianMixture` and |
| 42 | +# :class:`calibration.CalibratedClassifierCV`. |
| 43 | +# |
| 44 | +# Array API support was also added to several metrics in :mod:`sklearn.metrics` |
| 45 | +# module, see :ref:`array_api_supported` for more details. |
| 46 | +# |
| 47 | +# Please refer to the :ref:`array API support<array_api>` page for instructions |
| 48 | +# to use scikit-learn with array API compatible libraries such as PyTorch or CuPy. |
| 49 | +# Note: Array API support is experimental and must be explicitly enabled both |
| 50 | +# in SciPy and scikit-learn. |
| 51 | +# |
| 52 | +# Here is an excerpt of using a feature engineering preprocessor on the CPU, |
| 53 | +# followed by :class:`calibration.CalibratedClassifierCV` |
| 54 | +# and :class:`linear_model.RidgeCV` together on a GPU with the help of PyTorch: |
| 55 | +# |
| 56 | +# .. code-block:: python |
| 57 | +# |
| 58 | +# ridge_pipeline_gpu = make_pipeline( |
| 59 | +# # Ensure that all features (including categorical features) are preprocessed |
| 60 | +# # on the CPU and mapped to a numerical representation. |
| 61 | +# feature_preprocessor, |
| 62 | +# # Move the results to the GPU and perform computations there |
| 63 | +# FunctionTransformer( |
| 64 | +# lambda x: torch.tensor(x.to_numpy().astype(np.float32), device="cuda")) |
| 65 | +# , |
| 66 | +# CalibratedClassifierCV( |
| 67 | +# RidgeClassifierCV(alphas=alphas), method="temperature" |
| 68 | +# ), |
| 69 | +# ) |
| 70 | +# with sklearn.config_context(array_api_dispatch=True): |
| 71 | +# cv_results = cross_validate(ridge_pipeline_gpu, features, target) |
| 72 | +# |
| 73 | +# |
| 74 | +# See the `full notebook on Google Colab |
| 75 | +# <https://colab.research.google.com/drive/1ztH8gUPv31hSjEeR_8pw20qShTwViGRx?usp=sharing>`_ |
| 76 | +# for more details. On this particular example, using the Colab GPU vs using a |
| 77 | +# single CPU core leads to a 10x speedup which is quite typical for such workloads. |
| 78 | + |
| 79 | +# %% |
| 80 | +# Free-threaded CPython 3.14 support |
| 81 | +# ---------------------------------- |
| 82 | +# |
| 83 | +# scikit-learn has support for free-threaded CPython, in particular |
| 84 | +# free-threaded wheels are available for all of our supported platforms on Python |
| 85 | +# 3.14. |
| 86 | +# |
| 87 | +# We would be very interested by user feedback. Here are a few things you can |
| 88 | +# try: |
| 89 | +# |
| 90 | +# - install free-threaded CPython 3.14, run your favourite |
| 91 | +# scikit-learn script and check that nothing breaks unexpectedly. |
| 92 | +# Note that CPython 3.14 (rather than 3.13) is strongly advised because a |
| 93 | +# number of free-threaded bugs have been fixed since CPython 3.13. |
| 94 | +# - if you use some estimators with a `n_jobs` parameter, try changing the |
| 95 | +# default backend to threading with `joblib.parallel_config` as in the |
| 96 | +# snippet below. This could potentially speed-up your code because the |
| 97 | +# default joblib backend is process-based and incurs more overhead than |
| 98 | +# threads. |
| 99 | +# |
| 100 | +# .. code-block:: python |
| 101 | +# |
| 102 | +# grid_search = GridSearchCV(clf, param_grid=param_grid, n_jobs=4) |
| 103 | +# with joblib.parallel_config(backend="threading"): |
| 104 | +# grid_search.fit(X, y) |
| 105 | +# |
| 106 | +# - don't hesitate to report any issue or unexpected performance behaviour by |
| 107 | +# opening a `GitHub issue <https://github.com/scikit-learn/scikit-learn/issues/new/choose>`_! |
| 108 | +# |
| 109 | +# Free-threaded (also known as nogil) CPython is a version of CPython that aims |
| 110 | +# to enable efficient multi-threaded use cases by removing the Global |
| 111 | +# Interpreter Lock (GIL). |
| 112 | +# |
| 113 | +# For more details about free-threaded CPython see `py-free-threading doc |
| 114 | +# <https://py-free-threading.github.io>`_, in particular `how to install a |
| 115 | +# free-threaded CPython <https://py-free-threading.github.io/installing-cpython/>`_ |
| 116 | +# and `Ecosystem compatibility tracking <https://py-free-threading.github.io/tracking/>`_. |
| 117 | +# |
| 118 | +# In scikit-learn, one hope with free-threaded Python is to more efficiently |
| 119 | +# leverage multi-core CPUs by using thread workers instead of subprocess |
| 120 | +# workers for parallel computation when passing `n_jobs>1` in functions or |
| 121 | +# estimators. Efficiency gains are expected by removing the need for |
| 122 | +# inter-process communication. Be aware that switching the default joblib |
| 123 | +# backend and testing that everything works well with free-threaded Python is an |
| 124 | +# ongoing long-term effort. |
| 125 | + |
| 126 | +# %% |
| 127 | +# Temperature scaling in `CalibratedClassifierCV` |
| 128 | +# ----------------------------------------------- |
| 129 | +# Probability calibration of classifiers with temperature scaling is available in |
| 130 | +# :class:`calibration.CalibratedClassifierCV` by setting `method="temperature"`. |
| 131 | +# This method is particularly well suited for multiclass problems because it provides |
| 132 | +# (better) calibrated probabilities with a single free parameter. This is in |
| 133 | +# contrast to all the other available calibrations methods |
| 134 | +# which use a "One-vs-Rest" scheme that adds more parameters for each class. |
| 135 | + |
| 136 | +from sklearn.calibration import CalibratedClassifierCV |
| 137 | +from sklearn.datasets import make_classification |
| 138 | +from sklearn.naive_bayes import GaussianNB |
| 139 | + |
| 140 | +X, y = make_classification(n_classes=3, n_informative=8, random_state=42) |
| 141 | +clf = GaussianNB().fit(X, y) |
| 142 | +sig = CalibratedClassifierCV(clf, method="sigmoid", ensemble=False).fit(X, y) |
| 143 | +ts = CalibratedClassifierCV(clf, method="temperature", ensemble=False).fit(X, y) |
| 144 | + |
| 145 | +# %% |
| 146 | +# The following example shows that temperature scaling can produce better calibrated |
| 147 | +# probabilities than sigmoid calibration in multi-class classification problem |
| 148 | +# with 3 classes. |
| 149 | + |
| 150 | +import matplotlib.pyplot as plt |
| 151 | + |
| 152 | +from sklearn.calibration import CalibrationDisplay |
| 153 | + |
| 154 | +fig, axes = plt.subplots( |
| 155 | + figsize=(8, 4.5), |
| 156 | + ncols=3, |
| 157 | + sharey=True, |
| 158 | +) |
| 159 | +for i, c in enumerate(ts.classes_): |
| 160 | + CalibrationDisplay.from_predictions( |
| 161 | + y == c, clf.predict_proba(X)[:, i], name="Uncalibrated", ax=axes[i], marker="s" |
| 162 | + ) |
| 163 | + CalibrationDisplay.from_predictions( |
| 164 | + y == c, |
| 165 | + ts.predict_proba(X)[:, i], |
| 166 | + name="Temperature scaling", |
| 167 | + ax=axes[i], |
| 168 | + marker="o", |
| 169 | + ) |
| 170 | + CalibrationDisplay.from_predictions( |
| 171 | + y == c, sig.predict_proba(X)[:, i], name="Sigmoid", ax=axes[i], marker="v" |
| 172 | + ) |
| 173 | + axes[i].set_title(f"Class {c}") |
| 174 | + axes[i].set_xlabel(None) |
| 175 | + axes[i].set_ylabel(None) |
| 176 | + axes[i].get_legend().remove() |
| 177 | +fig.suptitle("Reliability Diagrams per Class") |
| 178 | +fig.supxlabel("Mean Predicted Probability") |
| 179 | +fig.supylabel("Fraction of Class") |
| 180 | +fig.legend(*axes[0].get_legend_handles_labels(), loc=(0.72, 0.5)) |
| 181 | +plt.subplots_adjust(right=0.7) |
| 182 | +_ = fig.show() |
| 183 | + |
| 184 | +# %% |
| 185 | +# Efficiency improvements in linear models |
| 186 | +# ---------------------------------------- |
| 187 | +# The fit time has been massively reduced for squared error based estimators |
| 188 | +# with L1 penalty: `ElasticNet`, `Lasso`, `MultiTaskElasticNet`, |
| 189 | +# `MultiTaskLasso` and their CV variants. The fit time improvement is mainly |
| 190 | +# achieved by **gap safe screening rules**. They enable the coordinate descent |
| 191 | +# solver to set feature coefficients to zero early on and not look at them |
| 192 | +# again. The stronger the L1 penalty the earlier features can be excluded from |
| 193 | +# further updates. |
| 194 | + |
| 195 | +from time import time |
| 196 | + |
| 197 | +from sklearn.datasets import make_regression |
| 198 | +from sklearn.linear_model import ElasticNetCV |
| 199 | + |
| 200 | +X, y = make_regression(n_features=10_000, random_state=0) |
| 201 | +model = ElasticNetCV() |
| 202 | +tic = time() |
| 203 | +model.fit(X, y) |
| 204 | +toc = time() |
| 205 | +print(f"Fitting ElasticNetCV took {toc - tic:.3} seconds.") |
| 206 | + |
| 207 | +# %% |
| 208 | +# HTML representation of estimators |
| 209 | +# --------------------------------- |
| 210 | +# Hyperparameters in the dropdown table of the HTML representation now include |
| 211 | +# links to the online documentation. Docstring descriptions are also shown as |
| 212 | +# tooltips on hover. |
| 213 | + |
| 214 | +from sklearn.linear_model import LogisticRegression |
| 215 | +from sklearn.pipeline import make_pipeline |
| 216 | +from sklearn.preprocessing import StandardScaler |
| 217 | + |
| 218 | +clf = make_pipeline(StandardScaler(), LogisticRegression(random_state=0, C=10)) |
| 219 | + |
| 220 | +# %% |
| 221 | +# Expand the estimator diagram below by clicking on "LogisticRegression" and then on |
| 222 | +# "Parameters". |
| 223 | + |
| 224 | +clf |
| 225 | + |
| 226 | + |
| 227 | +# %% |
| 228 | +# DecisionTreeRegressor with `criterion="absolute_error"` |
| 229 | +# ------------------------------------------------------ |
| 230 | +# :class:`tree.DecisionTreeRegressor` with `criterion="absolute_error"` |
| 231 | +# now runs much faster. It has now `O(n * log(n))` complexity compared to |
| 232 | +# `O(n**2)` previously, which allows to scale to millions of data points. |
| 233 | +# |
| 234 | +# As an illustration, on a dataset with 100_000 samples and 1 feature, doing a |
| 235 | +# single split takes of the order of 100 ms, compared to ~20 seconds before. |
| 236 | + |
| 237 | +import time |
| 238 | + |
| 239 | +from sklearn.datasets import make_regression |
| 240 | +from sklearn.tree import DecisionTreeRegressor |
| 241 | + |
| 242 | +X, y = make_regression(n_samples=100_000, n_features=1) |
| 243 | +tree = DecisionTreeRegressor(criterion="absolute_error", max_depth=1) |
| 244 | + |
| 245 | +tic = time.time() |
| 246 | +tree.fit(X, y) |
| 247 | +elapsed = time.time() - tic |
| 248 | +print(f"Fit took {elapsed:.2f} seconds") |
| 249 | + |
| 250 | +# %% |
| 251 | +# ClassicalMDS |
| 252 | +# ------------ |
| 253 | +# Classical MDS, also known as "Principal Coordinates Analysis" (PCoA) |
| 254 | +# or "Torgerson's scaling" is now available within the `sklearn.manifold` |
| 255 | +# module. Classical MDS is close to PCA and instead of approximating |
| 256 | +# distances, it approximates pairwise scalar products, which has an exact |
| 257 | +# analytic solution in terms of eigendecomposition. |
| 258 | +# |
| 259 | +# Let's illustrate this new addition by using it on an S-curve dataset to |
| 260 | +# get a low-dimensional representation of the data. |
| 261 | + |
| 262 | +import matplotlib.pyplot as plt |
| 263 | +from matplotlib import ticker |
| 264 | + |
| 265 | +from sklearn import datasets, manifold |
| 266 | + |
| 267 | +n_samples = 1500 |
| 268 | +S_points, S_color = datasets.make_s_curve(n_samples, random_state=0) |
| 269 | +md_classical = manifold.ClassicalMDS(n_components=2) |
| 270 | +S_scaling = md_classical.fit_transform(S_points) |
| 271 | + |
| 272 | +fig = plt.figure(figsize=(8, 4)) |
| 273 | +ax1 = fig.add_subplot(1, 2, 1, projection="3d") |
| 274 | +x, y, z = S_points.T |
| 275 | +ax1.scatter(x, y, z, c=S_color, s=50, alpha=0.8) |
| 276 | +ax1.set_title("Original S-curve samples", size=16) |
| 277 | +ax1.view_init(azim=-60, elev=9) |
| 278 | +for axis in (ax1.xaxis, ax1.yaxis, ax1.zaxis): |
| 279 | + axis.set_major_locator(ticker.MultipleLocator(1)) |
| 280 | + |
| 281 | +ax2 = fig.add_subplot(1, 2, 2) |
| 282 | +x2, y2 = S_scaling.T |
| 283 | +ax2.scatter(x2, y2, c=S_color, s=50, alpha=0.8) |
| 284 | +ax2.set_title("Classical MDS", size=16) |
| 285 | +for axis in (ax2.xaxis, ax2.yaxis): |
| 286 | + axis.set_major_formatter(ticker.NullFormatter()) |
| 287 | + |
| 288 | +plt.show() |
0 commit comments