From 0d7d738670c2b80a5d898045c5843d405fc5efe2 Mon Sep 17 00:00:00 2001 From: Raghav R V Date: Sat, 4 Jul 2015 22:43:39 +0530 Subject: [PATCH] FIX all the examples to use the new cv classes --- examples/applications/face_recognition.py | 4 +-- examples/calibration/plot_calibration.py | 2 +- .../calibration/plot_calibration_curve.py | 2 +- .../plot_classifier_comparison.py | 2 +- ...e_agglomeration_vs_univariate_selection.py | 6 ++-- .../covariance/plot_covariance_estimation.py | 2 +- .../plot_pca_vs_fa_model_selection.py | 4 +-- .../ensemble/plot_gradient_boosting_oob.py | 8 +++--- examples/ensemble/plot_partial_dependence.py | 2 +- examples/exercises/plot_cv_diabetes.py | 16 +++++++---- examples/exercises/plot_cv_digits.py | 5 ++-- ...lot_permutation_test_for_classification.py | 5 ++-- .../plot_rfe_with_cross_validation.py | 4 +-- examples/feature_stacker.py | 2 +- .../gaussian_process/gp_diabetes_dataset.py | 9 +++--- examples/linear_model/plot_sgd_comparison.py | 2 +- examples/missing_values.py | 2 +- examples/mixture/plot_gmm_classifier.py | 6 ++-- examples/model_selection/README.txt | 3 +- .../model_selection/grid_search_digits.py | 4 +-- .../grid_search_text_feature_extraction.py | 3 +- .../model_selection/plot_confusion_matrix.py | 2 +- .../model_selection/plot_learning_curve.py | 28 ++++++++++++------- .../model_selection/plot_precision_recall.py | 2 +- examples/model_selection/plot_roc.py | 2 +- examples/model_selection/plot_roc_crossval.py | 8 +++--- .../plot_underfitting_overfitting.py | 6 ++-- .../model_selection/plot_validation_curve.py | 2 +- examples/model_selection/randomized_search.py | 3 +- .../neighbors/plot_digits_kde_sampling.py | 2 +- .../plot_rbm_logistic_classification.py | 2 +- examples/plot_cv_predict.py | 2 +- examples/plot_digits_pipe.py | 2 +- examples/plot_kernel_ridge_regression.py | 2 +- examples/svm/plot_rbf_parameters.py | 6 ++-- examples/svm/plot_svm_anova.py | 5 ++-- examples/svm/plot_svm_scale_c.py | 8 +++--- 37 files changed, 96 insertions(+), 79 deletions(-) diff --git a/examples/applications/face_recognition.py b/examples/applications/face_recognition.py index be466e9532cdf..b79599ecb3a06 100644 --- a/examples/applications/face_recognition.py +++ b/examples/applications/face_recognition.py @@ -31,9 +31,9 @@ import logging import matplotlib.pyplot as plt -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.datasets import fetch_lfw_people -from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 2267f02dd0022..299f924e2a468 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -36,7 +36,7 @@ from sklearn.naive_bayes import GaussianNB from sklearn.metrics import brier_score_loss from sklearn.calibration import CalibratedClassifierCV -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split n_samples = 50000 diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py index 42dc8473e6c30..a37e3158c0c12 100644 --- a/examples/calibration/plot_calibration_curve.py +++ b/examples/calibration/plot_calibration_curve.py @@ -56,7 +56,7 @@ from sklearn.metrics import (brier_score_loss, precision_score, recall_score, f1_score) from sklearn.calibration import CalibratedClassifierCV, calibration_curve -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split # Create dataset of classification task with many redundant and few diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index 7be3c0cacad9f..5c7673a5ff11e 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -31,7 +31,7 @@ import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neighbors import KNeighborsClassifier diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index 488db4d64855d..a0152c85baf60 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -30,9 +30,9 @@ from sklearn.cluster import FeatureAgglomeration from sklearn.linear_model import BayesianRidge from sklearn.pipeline import Pipeline -from sklearn.grid_search import GridSearchCV from sklearn.externals.joblib import Memory -from sklearn.cross_validation import KFold +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import KFold ############################################################################### # Generate data @@ -60,7 +60,7 @@ ############################################################################### # Compute the coefs of a Bayesian Ridge with GridSearch -cv = KFold(len(y), 2) # cross-validation generator for model selection +cv = KFold(2) # cross-validation generator for model selection ridge = BayesianRidge() cachedir = tempfile.mkdtemp() mem = Memory(cachedir=cachedir, verbose=1) diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index 85e26705b03e9..96f637974ee29 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -49,7 +49,7 @@ from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \ log_likelihood, empirical_covariance -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV ############################################################################### diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index 067bcd7b6b479..89bb707a37699 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -35,8 +35,8 @@ from sklearn.decomposition import PCA, FactorAnalysis from sklearn.covariance import ShrunkCovariance, LedoitWolf -from sklearn.cross_validation import cross_val_score -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GridSearchCV ############################################################################### # Create the data diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py index a39f709d36979..39e623f261cca 100644 --- a/examples/ensemble/plot_gradient_boosting_oob.py +++ b/examples/ensemble/plot_gradient_boosting_oob.py @@ -33,8 +33,8 @@ import matplotlib.pyplot as plt from sklearn import ensemble -from sklearn.cross_validation import KFold -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import KFold +from sklearn.model_selection import train_test_split # Generate data (adapted from G. Ridgeway's gbm example) @@ -75,10 +75,10 @@ def heldout_score(clf, X_test, y_test): def cv_estimate(n_folds=3): - cv = KFold(n=X_train.shape[0], n_folds=n_folds) + cv = KFold(n_folds=n_folds) cv_clf = ensemble.GradientBoostingClassifier(**params) val_scores = np.zeros((n_estimators,), dtype=np.float64) - for train, test in cv: + for train, test in cv.split(X_train, y_train): cv_clf.fit(X_train[train], y_train[train]) val_scores += heldout_score(cv_clf, X_train[test], y_train[test]) val_scores /= n_folds diff --git a/examples/ensemble/plot_partial_dependence.py b/examples/ensemble/plot_partial_dependence.py index b480e228d3ca3..d4a26166944c3 100644 --- a/examples/ensemble/plot_partial_dependence.py +++ b/examples/ensemble/plot_partial_dependence.py @@ -51,7 +51,7 @@ from mpl_toolkits.mplot3d import Axes3D -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble.partial_dependence import plot_partial_dependence from sklearn.ensemble.partial_dependence import partial_dependence diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py index 424c037a5547e..5524d2e3c1334 100644 --- a/examples/exercises/plot_cv_diabetes.py +++ b/examples/exercises/plot_cv_diabetes.py @@ -14,13 +14,17 @@ import numpy as np import matplotlib.pyplot as plt -from sklearn import cross_validation, datasets, linear_model +from sklearn import datasets +from sklearn.linear_model import LassoCV +from sklearn.linear_model import Lasso +from sklearn.model_selection import KFold +from sklearn.model_selection import cross_val_score diabetes = datasets.load_diabetes() X = diabetes.data[:150] y = diabetes.target[:150] -lasso = linear_model.Lasso() +lasso = Lasso() alphas = np.logspace(-4, -.5, 30) scores = list() @@ -28,7 +32,7 @@ for alpha in alphas: lasso.alpha = alpha - this_scores = cross_validation.cross_val_score(lasso, X, y, n_jobs=1) + this_scores = cross_val_score(lasso, X, y, n_jobs=1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) @@ -51,15 +55,15 @@ # performs cross-validation on the training data it receives). # We use external cross-validation to see how much the automatically obtained # alphas differ across different cross-validation folds. -lasso_cv = linear_model.LassoCV(alphas=alphas) -k_fold = cross_validation.KFold(len(X), 3) +lasso_cv = LassoCV(alphas=alphas) +k_fold = KFold(3) print("Answer to the bonus question:", "how much can you trust the selection of alpha?") print() print("Alpha parameters maximising the generalization score on different") print("subsets of the data:") -for k, (train, test) in enumerate(k_fold): +for k, (train, test) in enumerate(k_fold.split(X, y)): lasso_cv.fit(X[train], y[train]) print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}". format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test]))) diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py index 92f04a935cc5a..a68f92afbdad9 100644 --- a/examples/exercises/plot_cv_digits.py +++ b/examples/exercises/plot_cv_digits.py @@ -12,7 +12,8 @@ import numpy as np -from sklearn import cross_validation, datasets, svm +from sklearn.model_selection import cross_val_score +from sklearn import datasets, svm digits = datasets.load_digits() X = digits.data @@ -25,7 +26,7 @@ scores_std = list() for C in C_s: svc.C = C - this_scores = cross_validation.cross_val_score(svc, X, y, n_jobs=1) + this_scores = cross_val_score(svc, X, y, n_jobs=1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) diff --git a/examples/feature_selection/plot_permutation_test_for_classification.py b/examples/feature_selection/plot_permutation_test_for_classification.py index 4df102578c9da..24b999451a067 100644 --- a/examples/feature_selection/plot_permutation_test_for_classification.py +++ b/examples/feature_selection/plot_permutation_test_for_classification.py @@ -20,7 +20,8 @@ import matplotlib.pyplot as plt from sklearn.svm import SVC -from sklearn.cross_validation import StratifiedKFold, permutation_test_score +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import permutation_test_score from sklearn import datasets @@ -39,7 +40,7 @@ X = np.c_[X, E] svm = SVC(kernel='linear') -cv = StratifiedKFold(y, 2) +cv = StratifiedKFold(2) score, permutation_scores, pvalue = permutation_test_score( svm, X, y, scoring="accuracy", cv=cv, n_permutations=100, n_jobs=1) diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py index 232aa115c2d77..8b22ab0d54108 100644 --- a/examples/feature_selection/plot_rfe_with_cross_validation.py +++ b/examples/feature_selection/plot_rfe_with_cross_validation.py @@ -10,7 +10,7 @@ import matplotlib.pyplot as plt from sklearn.svm import SVC -from sklearn.cross_validation import StratifiedKFold +from sklearn.model_selection import StratifiedKFold from sklearn.feature_selection import RFECV from sklearn.datasets import make_classification @@ -23,7 +23,7 @@ svc = SVC(kernel="linear") # The "accuracy" scoring is proportional to the number of correct # classifications -rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2), +rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2), scoring='accuracy') rfecv.fit(X, y) diff --git a/examples/feature_stacker.py b/examples/feature_stacker.py index d1f9453e28e58..4ce574aa36bca 100644 --- a/examples/feature_stacker.py +++ b/examples/feature_stacker.py @@ -20,7 +20,7 @@ # License: BSD 3 clause from sklearn.pipeline import Pipeline, FeatureUnion -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py index 9baaeccf17d6d..d188e268a0959 100644 --- a/examples/gaussian_process/gp_diabetes_dataset.py +++ b/examples/gaussian_process/gp_diabetes_dataset.py @@ -25,7 +25,8 @@ from sklearn import datasets from sklearn.gaussian_process import GaussianProcess -from sklearn.cross_validation import cross_val_score, KFold +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import KFold # Load the dataset from scikit's data sets diabetes = datasets.load_diabetes() @@ -43,9 +44,9 @@ gp.theta0 = gp.theta_ # Given correlation parameter = MLE gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE -# Perform a cross-validation estimate of the coefficient of determination using -# the cross_validation module using all CPUs available on the machine +# Perform a cross-validated estimate of the coefficient of determination using +# the model_selection.cross_val_score using all CPUs available on the machine K = 20 # folds -R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean() +R2 = cross_val_score(gp, X, y=y, cv=KFold(K), n_jobs=1).mean() print("The %d-Folds estimate of the coefficient of determination is R2 = %s" % (K, R2)) diff --git a/examples/linear_model/plot_sgd_comparison.py b/examples/linear_model/plot_sgd_comparison.py index 123aff9f06a6b..049a35e3309a4 100644 --- a/examples/linear_model/plot_sgd_comparison.py +++ b/examples/linear_model/plot_sgd_comparison.py @@ -14,7 +14,7 @@ import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.linear_model import SGDClassifier, Perceptron from sklearn.linear_model import PassiveAggressiveClassifier diff --git a/examples/missing_values.py b/examples/missing_values.py index 59444b36490e3..8a0895f9a589f 100644 --- a/examples/missing_values.py +++ b/examples/missing_values.py @@ -28,7 +28,7 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.pipeline import Pipeline from sklearn.preprocessing import Imputer -from sklearn.cross_validation import cross_val_score +from sklearn.model_selection import cross_val_score rng = np.random.RandomState(0) diff --git a/examples/mixture/plot_gmm_classifier.py b/examples/mixture/plot_gmm_classifier.py index cebbe36cada69..201bd7dd3934a 100644 --- a/examples/mixture/plot_gmm_classifier.py +++ b/examples/mixture/plot_gmm_classifier.py @@ -33,7 +33,7 @@ import numpy as np from sklearn import datasets -from sklearn.cross_validation import StratifiedKFold +from sklearn.model_selection import StratifiedKFold from sklearn.externals.six.moves import xrange from sklearn.mixture import GMM @@ -55,9 +55,9 @@ def make_ellipses(gmm, ax): # Break up the dataset into non-overlapping training (75%) and testing # (25%) sets. -skf = StratifiedKFold(iris.target, n_folds=4) +skf = StratifiedKFold(n_folds=4) # Only take the first fold. -train_index, test_index = next(iter(skf)) +train_index, test_index = next(iter(skf.split(iris.data, iris.target))) X_train = iris.data[train_index] diff --git a/examples/model_selection/README.txt b/examples/model_selection/README.txt index 553c6e7d6498e..b35a778b28a7f 100644 --- a/examples/model_selection/README.txt +++ b/examples/model_selection/README.txt @@ -3,5 +3,4 @@ Model Selection ----------------------- -Examples concerning model selection, mostly contained in the -:mod:`sklearn.grid_search` and :mod:`sklearn.cross_validation` modules. +Examples related to the :mod:`sklearn.model_selection` module. diff --git a/examples/model_selection/grid_search_digits.py b/examples/model_selection/grid_search_digits.py index c8aec1bab8c0f..e7ae63069d656 100644 --- a/examples/model_selection/grid_search_digits.py +++ b/examples/model_selection/grid_search_digits.py @@ -19,8 +19,8 @@ from __future__ import print_function from sklearn import datasets -from sklearn.cross_validation import train_test_split -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.metrics import classification_report from sklearn.svm import SVC diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py index 11b690d91e2ef..daf82718d42e1 100644 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ b/examples/model_selection/grid_search_text_feature_extraction.py @@ -1,3 +1,4 @@ + """ ========================================================== Sample pipeline for text feature extraction and evaluation @@ -56,7 +57,7 @@ from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from sklearn.linear_model import SGDClassifier -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline print(__doc__) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 771d058e2a4a5..250d71c08c442 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -30,7 +30,7 @@ import matplotlib.pyplot as plt from sklearn import svm, datasets -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix # import some data to play with diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index 7a47fd574635d..c358471e1eebc 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -17,11 +17,11 @@ import numpy as np import matplotlib.pyplot as plt -from sklearn import cross_validation from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.datasets import load_digits -from sklearn.learning_curve import learning_curve +from sklearn.model_selection import learning_curve +from sklearn.model_selection import ShuffleSplit def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, @@ -48,10 +48,20 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, ylim : tuple, shape (ymin, ymax), optional Defines minimum and maximum yvalues plotted. - cv : integer, cross-validation generator, optional - If an integer is passed, it is the number of folds (defaults to 3). - Specific cross-validation objects can be passed, see - sklearn.cross_validation module for the list of possible objects + cv : int, cross-validation generator or an iterable, optional + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + - None, to use the default 3-fold cross-validation, + - integer, to specify the number of folds. + - An object to be used as a cross-validation generator. + - An iterable yielding train/test splits. + + For integer/None inputs, if ``y`` is binary or multiclass, + :class:`StratifiedKFold` used. If classifier is False or if ``y`` is + neither binary nor multiclass, :class:`KFold` is used. + + Refer :ref:`User Guide ` for the various + cross-validators that can be used here. n_jobs : integer, optional Number of jobs to run in parallel (default 1). @@ -91,16 +101,14 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, title = "Learning Curves (Naive Bayes)" # Cross validation with 100 iterations to get smoother mean test and train # score curves, each time with 20% data randomly selected as a validation set. -cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=100, - test_size=0.2, random_state=0) +cv = ShuffleSplit(n_iter=100, test_size=0.2, random_state=0) estimator = GaussianNB() plot_learning_curve(estimator, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=4) title = "Learning Curves (SVM, RBF kernel, $\gamma=0.001$)" # SVC is more expensive so we do a lower number of CV iterations: -cv = cross_validation.ShuffleSplit(digits.data.shape[0], n_iter=10, - test_size=0.2, random_state=0) +cv = ShuffleSplit(n_iter=10, test_size=0.2, random_state=0) estimator = SVC(gamma=0.001) plot_learning_curve(estimator, title, X, y, (0.7, 1.01), cv=cv, n_jobs=4) diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index 6617a6b5d88fa..9f48d93d8b298 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -78,7 +78,7 @@ from sklearn import svm, datasets from sklearn.metrics import precision_recall_curve from sklearn.metrics import average_precision_score -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.preprocessing import label_binarize from sklearn.multiclass import OneVsRestClassifier diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 49ae4b5fe5ce7..94aeecab05fd8 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -34,7 +34,7 @@ import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.preprocessing import label_binarize from sklearn.multiclass import OneVsRestClassifier diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 0599813653640..91a41ffbaadcb 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -37,7 +37,7 @@ from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc -from sklearn.cross_validation import StratifiedKFold +from sklearn.model_selection import StratifiedKFold ############################################################################### # Data IO and generation @@ -57,7 +57,7 @@ # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves -cv = StratifiedKFold(y, n_folds=6) +cv = StratifiedKFold(n_folds=6) classifier = svm.SVC(kernel='linear', probability=True, random_state=random_state) @@ -65,7 +65,7 @@ mean_fpr = np.linspace(0, 1, 100) all_tpr = [] -for i, (train, test) in enumerate(cv): +for i, (train, test) in enumerate(cv.split(X, y)): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) @@ -76,7 +76,7 @@ plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') -mean_tpr /= len(cv) +mean_tpr /= cv.n_splits(X, y) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py index f8958cbffe21b..ff454664c7b18 100644 --- a/examples/model_selection/plot_underfitting_overfitting.py +++ b/examples/model_selection/plot_underfitting_overfitting.py @@ -27,7 +27,7 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression -from sklearn import cross_validation +from sklearn.model_selection import cross_val_score np.random.seed(0) @@ -51,8 +51,8 @@ pipeline.fit(X[:, np.newaxis], y) # Evaluate the models using crossvalidation - scores = cross_validation.cross_val_score(pipeline, - X[:, np.newaxis], y, scoring="mean_squared_error", cv=10) + scores = cross_val_score(pipeline, X[:, np.newaxis], y, + scoring="mean_squared_error", cv=10) X_test = np.linspace(0, 1, 100) plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model") diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index 336e0349eacbf..4b1c5c87ce7ff 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -17,7 +17,7 @@ import numpy as np from sklearn.datasets import load_digits from sklearn.svm import SVC -from sklearn.learning_curve import validation_curve +from sklearn.model_selection import validation_curve digits = load_digits() X, y = digits.data, digits.target diff --git a/examples/model_selection/randomized_search.py b/examples/model_selection/randomized_search.py index 0682b24689813..85a16c6f52d55 100644 --- a/examples/model_selection/randomized_search.py +++ b/examples/model_selection/randomized_search.py @@ -26,7 +26,8 @@ from operator import itemgetter from scipy.stats import randint as sp_randint -from sklearn.grid_search import GridSearchCV, RandomizedSearchCV +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import RandomizedSearchCV from sklearn.datasets import load_digits from sklearn.ensemble import RandomForestClassifier diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py index 4680a41780aed..ba59fb5ece537 100644 --- a/examples/neighbors/plot_digits_kde_sampling.py +++ b/examples/neighbors/plot_digits_kde_sampling.py @@ -16,7 +16,7 @@ from sklearn.datasets import load_digits from sklearn.neighbors import KernelDensity from sklearn.decomposition import PCA -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV # load the data digits = load_digits() diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index 9f085036d2013..2b9b15fe3d966 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -37,7 +37,7 @@ from scipy.ndimage import convolve from sklearn import linear_model, datasets, metrics -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.neural_network import BernoulliRBM from sklearn.pipeline import Pipeline diff --git a/examples/plot_cv_predict.py b/examples/plot_cv_predict.py index 5a9e541949dc9..4657ff816369a 100644 --- a/examples/plot_cv_predict.py +++ b/examples/plot_cv_predict.py @@ -8,7 +8,7 @@ """ from sklearn import datasets -from sklearn.cross_validation import cross_val_predict +from sklearn.model_selection import cross_val_predict from sklearn import linear_model import matplotlib.pyplot as plt diff --git a/examples/plot_digits_pipe.py b/examples/plot_digits_pipe.py index 139ade15ba7c2..cd134fe20ddec 100644 --- a/examples/plot_digits_pipe.py +++ b/examples/plot_digits_pipe.py @@ -25,7 +25,7 @@ from sklearn import linear_model, decomposition, datasets from sklearn.pipeline import Pipeline -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV logistic = linear_model.LogisticRegression() diff --git a/examples/plot_kernel_ridge_regression.py b/examples/plot_kernel_ridge_regression.py index 19aeece6658cc..30d2025e2020c 100644 --- a/examples/plot_kernel_ridge_regression.py +++ b/examples/plot_kernel_ridge_regression.py @@ -41,7 +41,7 @@ import numpy as np from sklearn.svm import SVR -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import GridSearchCV from sklearn.learning_curve import learning_curve from sklearn.kernel_ridge import KernelRidge import matplotlib.pyplot as plt diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index ead630e2933ec..eea9c0e6fc621 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -74,8 +74,8 @@ from sklearn.svm import SVC from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris -from sklearn.cross_validation import StratifiedShuffleSplit -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import GridSearchCV # Utility function to move the midpoint of a colormap to be around @@ -128,7 +128,7 @@ def __call__(self, value, clip=None): C_range = np.logspace(-2, 10, 13) gamma_range = np.logspace(-9, 3, 13) param_grid = dict(gamma=gamma_range, C=C_range) -cv = StratifiedShuffleSplit(y, n_iter=5, test_size=0.2, random_state=42) +cv = StratifiedShuffleSplit(n_iter=5, test_size=0.2, random_state=42) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv) grid.fit(X, y) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 9ce225bf980e5..45da4c35e0a64 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -10,7 +10,8 @@ import numpy as np import matplotlib.pyplot as plt -from sklearn import svm, datasets, feature_selection, cross_validation +from sklearn import svm, datasets, feature_selection +from sklearn.model_selection import cross_val_score from sklearn.pipeline import Pipeline ############################################################################### @@ -42,7 +43,7 @@ for percentile in percentiles: clf.set_params(anova__percentile=percentile) # Compute cross-validation score using all CPUs - this_scores = cross_validation.cross_val_score(clf, X, y, n_jobs=1) + this_scores = cross_val_score(clf, X, y, n_jobs=1) score_means.append(this_scores.mean()) score_stds.append(this_scores.std()) diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index c977d3d9b997d..ea0ed9b3e555a 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -88,8 +88,8 @@ import matplotlib.pyplot as plt from sklearn.svm import LinearSVC -from sklearn.cross_validation import ShuffleSplit -from sklearn.grid_search import GridSearchCV +from sklearn.model_selection import ShuffleSplit +from sklearn.model_selection import GridSearchCV from sklearn.utils import check_random_state from sklearn import datasets @@ -128,8 +128,8 @@ # To get nice curve, we need a large number of iterations to # reduce the variance grid = GridSearchCV(clf, refit=False, param_grid=param_grid, - cv=ShuffleSplit(n=n_samples, train_size=train_size, - n_iter=250, random_state=1)) + cv=ShuffleSplit(train_size=train_size, n_iter=250, + random_state=1)) grid.fit(X, y) scores = [x[1] for x in grid.grid_scores_]