jnothman · waterponey · Oct 13, 2016 · Oct 13, 2016 · Oct 13, 2016 · Oct 13, 2016
diff --git a/Makefile b/Makefile
@@ -26,9 +26,6 @@ clean: clean-ctags
 
 in: inplace # just a shortcut
 inplace:
-	# to avoid errors in 0.15 upgrade
-	rm -f sklearn/utils/sparsefuncs*.so
-	rm -f sklearn/utils/random*.so
 	$(PYTHON) setup.py build_ext -i
 
 test-code: in

diff --git a/README.rst b/README.rst
@@ -78,15 +78,15 @@ Development
 
 We welcome new contributors of all experience levels. The scikit-learn
 community goals are to be helpful, welcoming, and effective. The
-`Contributor's Guide <http://scikit-learn.org/stable/developers/index.html>`_ 
+`Development Guide <http://scikit-learn.org/stable/developers/index.html>`_ 
 has detailed information about contributing code, documentation, tests, and
 more. We've included some basic information in this README.
 
 Important links
 ~~~~~~~~~~~~~~~
 
 - Official source code repo: https://github.com/scikit-learn/scikit-learn
-- Download releases: http://sourceforge.net/projects/scikit-learn/files/
+- Download releases: https://pypi.python.org/pypi/scikit-learn
 - Issue tracker: https://github.com/scikit-learn/scikit-learn/issues
 
 Source code
@@ -158,4 +158,4 @@ Communication
 - Mailing list: https://mail.python.org/mailman/listinfo/scikit-learn
 - IRC channel: ``#scikit-learn`` at ``irc.freenode.net``
 - Stack Overflow: http://stackoverflow.com/questions/tagged/scikit-learn
-- Website: http://scikit-learn.org
+- Website: http://scikit-learn.org
diff --git a/appveyor.yml b/appveyor.yml
@@ -36,6 +36,16 @@ environment:
 
 
 install:
+  # If there is a newer build queued for the same PR, cancel this one.
+  # The AppVeyor 'rollout builds' option is supposed to serve the same
+  # purpose but is problematic because it tends to cancel builds pushed
+  # directly to master instead of just PR builds.
+  # credits: JuliaLang developers.
+  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
+        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
+        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
+        throw "There are newer queued builds for this pull request, failing early." }
+
   # Install Python (from the official .msi of http://python.org) and pip when
   # not already installed.
   - "powershell ./build_tools/appveyor/install.ps1"

diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py
@@ -0,0 +1,119 @@
+"""
+============================
+LocalOutlierFactor benchmark
+============================
+
+A test of LocalOutlierFactor on classical anomaly detection datasets.
+
+"""
+
+from time import time
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.metrics import roc_curve, auc
+from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_mldata
+from sklearn.preprocessing import LabelBinarizer
+from sklearn.utils import shuffle as sh
+
+print(__doc__)
+
+np.random.seed(2)
+
+# datasets available: ['http', 'smtp', 'SA', 'SF', 'shuttle', 'forestcover']
+datasets = ['shuttle']
+
+novelty_detection = True  # if False, training set polluted by outliers
+
+for dataset_name in datasets:
+    # loading and vectorization
+    print('loading data')
+    if dataset_name in ['http', 'smtp', 'SA', 'SF']:
+        dataset = fetch_kddcup99(subset=dataset_name, shuffle=True,
+                                 percent10=False)
+        X = dataset.data
+        y = dataset.target
+
+    if dataset_name == 'shuttle':
+        dataset = fetch_mldata('shuttle')
+        X = dataset.data
+        y = dataset.target
+        X, y = sh(X, y)
+        # we remove data with label 4
+        # normal data are then those of class 1
+        s = (y != 4)
+        X = X[s, :]
+        y = y[s]
+        y = (y != 1).astype(int)
+
+    if dataset_name == 'forestcover':
+        dataset = fetch_covtype(shuffle=True)
+        X = dataset.data
+        y = dataset.target
+        # normal data are those with attribute 2
+        # abnormal those with attribute 4
+        s = (y == 2) + (y == 4)
+        X = X[s, :]
+        y = y[s]
+        y = (y != 2).astype(int)
+
+    print('vectorizing data')
+
+    if dataset_name == 'SF':
+        lb = LabelBinarizer()
+        lb.fit(X[:, 1])
+        x1 = lb.transform(X[:, 1])
+        X = np.c_[X[:, :1], x1, X[:, 2:]]
+        y = (y != 'normal.').astype(int)
+
+    if dataset_name == 'SA':
+        lb = LabelBinarizer()
+        lb.fit(X[:, 1])
+        x1 = lb.transform(X[:, 1])
+        lb.fit(X[:, 2])
+        x2 = lb.transform(X[:, 2])
+        lb.fit(X[:, 3])
+        x3 = lb.transform(X[:, 3])
+        X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
+        y = (y != 'normal.').astype(int)
+
+    if dataset_name == 'http' or dataset_name == 'smtp':
+        y = (y != 'normal.').astype(int)
+
+    n_samples, n_features = np.shape(X)
+    n_samples_train = n_samples // 2
+    n_samples_test = n_samples - n_samples_train
+
+    X = X.astype(float)
+    X_train = X[:n_samples_train, :]
+    X_test = X[n_samples_train:, :]
+    y_train = y[:n_samples_train]
+    y_test = y[n_samples_train:]
+
+    if novelty_detection:
+        X_train = X_train[y_train == 0]
+        y_train = y_train[y_train == 0]
+
+    print('LocalOutlierFactor processing...')
+    model = LocalOutlierFactor(n_neighbors=20)
+    tstart = time()
+    model.fit(X_train)
+    fit_time = time() - tstart
+    tstart = time()
+
+    scoring = -model.decision_function(X_test)  # the lower, the more normal
+    predict_time = time() - tstart
+    fpr, tpr, thresholds = roc_curve(y_test, scoring)
+    AUC = auc(fpr, tpr)
+    plt.plot(fpr, tpr, lw=1,
+             label=('ROC for %s (area = %0.3f, train-time: %0.2fs,'
+                    'test-time: %0.2fs)' % (dataset_name, AUC, fit_time,
+                                            predict_time)))
+
+plt.xlim([-0.05, 1.05])
+plt.ylim([-0.05, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver operating characteristic')
+plt.legend(loc="lower right")
+plt.show()
diff --git a/circle.yml b/circle.yml
@@ -6,7 +6,8 @@ dependencies:
   # Check whether the doc build is required, install build dependencies and
   # run sphinx to build the doc.
   override:
-    - ./build_tools/circle/build_doc.sh
+    - ./build_tools/circle/build_doc.sh:
+        timeout: 3600 # seconds
 test:
   # Grep error on the documentation
   override:

diff --git a/doc/conf.py b/doc/conf.py
@@ -35,6 +35,7 @@
     'numpy_ext.numpydoc',
     'sphinx.ext.linkcode', 'sphinx.ext.doctest',
     'sphinx_gallery.gen_gallery',
+    'sphinx_issues',
 ]
 
 # pngmath / imgmath compatibility layer for different sphinx versions
@@ -269,6 +270,13 @@ def make_carousel_thumbs(app, exception):
             sphinx_gallery.gen_rst.scale_image(image, c_thumb, max_width, 190)
 
 
+# Config for sphinx_issues
+
+issues_uri = 'https://github.com/scikit-learn/scikit-learn/issues/{issue}'
+issues_github_path = 'scikit-learn/scikit-learn'
+issues_user_uri = 'https://github.com/{user}'
+
+
 def setup(app):
     # to hide/show the prompt in code examples:
     app.add_javascript('js/copybutton.js')

diff --git a/doc/documentation.rst b/doc/documentation.rst
@@ -64,7 +64,7 @@ Documentation of scikit-learn 0.19.dev0
           <!-- row -->
             <div class="row-fluid">
                 <div class="span4 box">
-                    <h2><a href="developers/index.html">Contributing</a></h2>
+                    <h2><a href="developers/index.html">Development</a></h2>
                             <blockquote>Information on how to contribute. This also
                             contains useful information for advanced users, for example
                             how to build their own estimators.

diff --git a/doc/faq.rst b/doc/faq.rst
@@ -248,10 +248,13 @@ Python processes for parallel computing. Unfortunately this is a violation of
 the POSIX standard and therefore some software editors like Apple refuse to
 consider the lack of fork-safety in Accelerate / vecLib as a bug.
 
-In Python 3.4+ it is now possible to configure ``multiprocessing`` to use the
-'forkserver' or 'spawn' start methods (instead of the default 'fork') to manage
-the process pools. This makes it possible to not be subject to this issue
-anymore.
+In Python 3.4+ it is now possible to configure ``multiprocessing`` to
+use the 'forkserver' or 'spawn' start methods (instead of the default
+'fork') to manage the process pools. To work around this issue when
+using scikit-learn, you can set the JOBLIB_START_METHOD environment
+variable to 'forkserver'. However the user should be aware that using
+the 'forkserver' method prevents joblib.Parallel to call function
+interactively defined in a shell session.
 
 If you have custom code that uses ``multiprocessing`` directly instead of using
 it via joblib you can enable the 'forkserver' mode globally for your

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -186,6 +186,7 @@ Splitter Functions
    :template: function.rst
 
    model_selection.train_test_split
+   model_selection.check_cv
 
 Hyper-parameter optimizers
 --------------------------
@@ -201,6 +202,13 @@ Hyper-parameter optimizers
    model_selection.ParameterGrid
    model_selection.ParameterSampler
 
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   model_selection.fit_grid_point
+
 Model validation
 ----------------
 
@@ -315,7 +323,6 @@ Samples generator
    decomposition.PCA
    decomposition.IncrementalPCA
    decomposition.ProjectedGradientNMF
-   decomposition.RandomizedPCA
    decomposition.KernelPCA
    decomposition.FactorAnalysis
    decomposition.FastICA
@@ -560,7 +567,6 @@ From text
 
   gaussian_process.GaussianProcessRegressor
   gaussian_process.GaussianProcessClassifier
-  gaussian_process.GaussianProcess
 
 Kernels:
 
@@ -957,7 +963,6 @@ See the :ref:`metrics` section of the user guide for further details.
 
    mixture.GaussianMixture
    mixture.BayesianGaussianMixture
-   mixture.DPGMM
 
 
 .. _multiclass_ref:
@@ -1051,7 +1056,8 @@ See the :ref:`metrics` section of the user guide for further details.
    neighbors.LSHForest
    neighbors.DistanceMetric
    neighbors.KernelDensity
-
+   neighbors.LocalOutlierFactor
+
 .. autosummary::
    :toctree: generated/
    :template: function.rst
@@ -1349,3 +1355,67 @@ Low-level methods
    utils.estimator_checks.check_estimator
    utils.resample
    utils.shuffle
+
+
+Recently deprecated
+===================
+
+To be removed in 0.19
+---------------------
+
+.. autosummary::
+   :toctree: generated/
+   :template: deprecated_class.rst
+
+   lda.LDA
+   qda.QDA
+
+.. autosummary::
+   :toctree: generated/
+   :template: deprecated_function.rst
+
+   datasets.load_lfw_pairs
+   datasets.load_lfw_people
+
+
+To be removed in 0.20
+---------------------
+
+.. autosummary::
+   :toctree: generated/
+   :template: deprecated_class.rst
+
+   grid_search.ParameterGrid
+   grid_search.ParameterSampler
+   grid_search.GridSearchCV
+   grid_search.RandomizedSearchCV
+   cross_validation.LeaveOneOut
+   cross_validation.LeavePOut
+   cross_validation.KFold
+   cross_validation.LabelKFold
+   cross_validation.LeaveOneLabelOut
+   cross_validation.LeavePLabelOut
+   cross_validation.LabelShuffleSplit
+   cross_validation.StratifiedKFold
+   cross_validation.ShuffleSplit
+   cross_validation.StratifiedShuffleSplit
+   cross_validation.PredefinedSplit
+   decomposition.RandomizedPCA
+   gaussian_process.GaussianProcess
+   mixture.GMM
+   mixture.DPGMM
+   mixture.VBGMM
+
+
+.. autosummary::
+   :toctree: generated/
+   :template: deprecated_function.rst
+
+   grid_search.fit_grid_point
+   learning_curve.learning_curve
+   learning_curve.validation_curve
+   cross_validation.cross_val_predict
+   cross_validation.cross_val_score
+   cross_validation.check_cv
+   cross_validation.permutation_test_score
+   cross_validation.train_test_split