Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions examples/ensemble/plot_stack_predictors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
# Maria Telenczuk <https://github.com/maikia>
# License: BSD 3 clause

# %%
from sklearn import set_config

set_config(display="diagram")
Expand Down Expand Up @@ -99,11 +99,10 @@ def load_ames_housing():

cat_selector = make_column_selector(dtype_include=object)
num_selector = make_column_selector(dtype_include=np.number)
cat_selector(X)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You let those two outputs. They are not costly to be evaluated. This is to show the name of the columns.


cat_selector(X)
# %%
num_selector(X)

# %%
# Then, we will need to design preprocessing pipelines which depends on the
# ending regressor. If the ending regressor is a linear model, one needs to
Expand Down Expand Up @@ -175,14 +174,16 @@ def load_ames_housing():
# %%
from sklearn.ensemble import RandomForestRegressor

rf_pipeline = make_pipeline(tree_preprocessor, RandomForestRegressor(random_state=42))
rf_pipeline = make_pipeline(
tree_preprocessor, RandomForestRegressor(n_estimators=25, random_state=42)
)
rf_pipeline

# %%
from sklearn.ensemble import HistGradientBoostingRegressor

gbdt_pipeline = make_pipeline(
tree_preprocessor, HistGradientBoostingRegressor(random_state=0)
tree_preprocessor, HistGradientBoostingRegressor(max_iter=50, random_state=0)
)
gbdt_pipeline

Expand Down Expand Up @@ -213,7 +214,7 @@ def load_ames_housing():

import time
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.model_selection import cross_validate, KFold


def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):
Expand Down Expand Up @@ -244,27 +245,37 @@ def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):
fig, axs = plt.subplots(2, 2, figsize=(9, 7))
axs = np.ravel(axs)

cv = KFold(n_splits=3, shuffle=False)

for ax, (name, est) in zip(
axs, estimators + [("Stacking Regressor", stacking_regressor)]
):
start_time = time.time()
score = cross_validate(
est, X, y, scoring=["r2", "neg_mean_absolute_error"], n_jobs=-1, verbose=0
cv_results = cross_validate(
est,
X,
y,
cv=cv,
scoring=["r2", "neg_mean_absolute_error"],
n_jobs=-1,
verbose=0,
return_estimator=True,
)
elapsed_time = time.time() - start_time

y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
y_pred = np.zeros_like(y)
for estimator, fold in zip(cv_results["estimator"], cv.split(X)):
y_pred[fold[1]] = estimator.predict(X.iloc[fold[1]])

plot_regression_results(
ax,
y,
y_pred,
name,
(r"$R^2={:.2f} \pm {:.2f}$" + "\n" + r"$MAE={:.2f} \pm {:.2f}$").format(
np.mean(score["test_r2"]),
np.std(score["test_r2"]),
-np.mean(score["test_neg_mean_absolute_error"]),
np.std(score["test_neg_mean_absolute_error"]),
np.mean(cv_results["test_r2"]),
np.std(cv_results["test_r2"]),
-np.mean(cv_results["test_neg_mean_absolute_error"]),
np.std(cv_results["test_neg_mean_absolute_error"]),
),
elapsed_time,
)
Expand Down