scikit-learn · larsmans · Feb 10, 2015 · Dec 4, 2014 · larsmans · Dec 4, 2014
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -743,8 +743,9 @@ with a svm classifier in a binary class problem::
   >>> est = svm.LinearSVC(random_state=0)
   >>> est.fit(X, y)
   LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='l2', max_iter=1000, multi_class='ovr',
-       penalty='l2', random_state=0, tol=0.0001, verbose=0)
+       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+       multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
+       verbose=0)
   >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
   >>> pred_decision  # doctest: +ELLIPSIS
   array([-2.18...,  2.36...,  0.09...])
@@ -760,8 +761,9 @@ with a svm classifier in a multiclass problem::
   >>> est = svm.LinearSVC()
   >>> est.fit(X, Y)
   LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='l2', max_iter=1000, multi_class='ovr',
-       penalty='l2', random_state=None, tol=0.0001, verbose=0)
+       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+       multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+       verbose=0)
   >>> pred_decision = est.decision_function([[-1], [2], [3]])
   >>> y_true = [0, 2, 3]
   >>> hinge_loss(y_true, pred_decision, labels)  #doctest: +ELLIPSIS

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
@@ -129,8 +129,9 @@ two classes, only one model is trained::
     >>> lin_clf = svm.LinearSVC()
     >>> lin_clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-    intercept_scaling=1, loss='l2', max_iter=1000, multi_class='ovr',
-    penalty='l2', random_state=None, tol=0.0001, verbose=0)
+         intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+         multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+         verbose=0)
     >>> dec = lin_clf.decision_function([[1]])
     >>> dec.shape[1]
     4

diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -715,7 +715,7 @@ def test_count_vectorizer_pipeline_grid_selection():
 
     parameters = {
         'vect__ngram_range': [(1, 1), (1, 2)],
-        'svc__loss': ('l1', 'l2')
+        'svc__loss': ('hinge', 'squared_hinge')
     }
 
     # find the best parameters for both the feature extraction and the

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
@@ -1452,8 +1452,9 @@ def hinge_loss(y_true, pred_decision, labels=None, sample_weight=None):
     >>> est = svm.LinearSVC(random_state=0)
     >>> est.fit(X, y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-         intercept_scaling=1, loss='l2', max_iter=1000, multi_class='ovr',
-         penalty='l2', random_state=0, tol=0.0001, verbose=0)
+         intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+         multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
+         verbose=0)
     >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
     >>> pred_decision  # doctest: +ELLIPSIS
     array([-2.18...,  2.36...,  0.09...])
@@ -1467,8 +1468,9 @@ def hinge_loss(y_true, pred_decision, labels=None, sample_weight=None):
     >>> est = svm.LinearSVC()
     >>> est.fit(X, Y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-         intercept_scaling=1, loss='l2', max_iter=1000, multi_class='ovr',
-         penalty='l2', random_state=None, tol=0.0001, verbose=0)
+         intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+         multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+         verbose=0)
     >>> pred_decision = est.decision_function([[-1], [2], [3]])
     >>> y_true = [0, 2, 3]
     >>> hinge_loss(y_true, pred_decision, labels)  #doctest: +ELLIPSIS

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -608,50 +608,60 @@ def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
     solver to use.
     """
 
+    # nested dicts containing level 1: available loss functions, 
+    # level2: available penalties for the given loss functin,
+    # level3: wether the dual solver is available for the specified
+    # combination of loss function and penalty
     _solver_type_dict = {
-        'PL2_LLR_D0': 0,  # L2 penalty, logistic regression
-        'PL2_LL2_D1': 1,  # L2 penalty, L2 loss, dual form
-        'PL2_LL2_D0': 2,  # L2 penalty, L2 loss, primal form
-        'PL2_LL1_D1': 3,  # L2 penalty, L1 Loss, dual form
-        'MC_SVC': 4,      # Multi-class Support Vector Classification
-        'PL1_LL2_D0': 5,  # L1 penalty, L2 Loss, primal form
-        'PL1_LLR_D0': 6,  # L1 penalty, logistic regression
-        'PL2_LLR_D1': 7,  # L2 penalty, logistic regression, dual form
-        'PL2_LSE_D0': 11, # L2 penalty, squared epsilon-insensitive loss, primal form
-        'PL2_LSE_D1': 12, # L2 penalty, squared epsilon-insensitive loss, dual form
-        'PL2_LEI_D1': 13, # L2 penalty, epsilon-insensitive loss, dual form
+        'logistic_regression': {
+            'l1': {False: 6},
+            'l2': {False: 0, True: 7}},
+        'hinge' : {
+            'l2' : {True: 3}},
+        'squared_hinge': {
+            'l1': {False : 5},
+            'l2': {False: 2, True: 1}},
+        'epsilon_insensitive': {
+            'l2': {True: 13}},
+        'squared_epsilon_insensitive': {
+            'l2': {False: 11, True: 12}},
+        'crammer_singer': 4
     }
+
 
     if multi_class == 'crammer_singer':
-        solver_type = 'MC_SVC'
-    elif multi_class == 'ovr':
-        solver_type = "P%s_L%s_D%d" % (
-            penalty.upper(), loss.upper(), int(dual))
-    else:
+        return _solver_type_dict[multi_class]
+    elif multi_class != 'ovr':
         raise ValueError("`multi_class` must be one of `ovr`, "
                          "`crammer_singer`, got %r" % multi_class)
-    if not solver_type in _solver_type_dict:
-        if penalty.upper() == 'L1' and loss.upper() == 'L1':
-            error_string = ("The combination of penalty='l1' "
-                            "and loss='l1' is not supported.")
-        elif penalty.upper() == 'L2' and loss.upper() == 'L1':
-            # this has to be in primal
-            error_string = ("penalty='l2' and loss='l1' is "
-                            "only supported when dual='true'.")
+
+    _solver_pen = _solver_type_dict.get(loss, None)
+    if _solver_pen is None:
+        error_string = ("Loss %s is not supported" % loss)
+    else:
+        _solver_dual = _solver_pen.get(penalty, None)
+        if _solver_dual is None:
+            error_string = ("The combination of penalty='%s'"
+                            "and loss='%s' is not supported"
+                            % (loss, penalty))
         else:
-            # only PL1 in dual remains
-            error_string = ("penalty='l1' is only supported "
-                            "when dual='false'.")
-        raise ValueError('Unsupported set of arguments: %s, '
+            solver_num = _solver_dual.get(dual, None)
+            if solver_num is None:
+                error_string = ("loss='%s' and penalty='%s'"
+                                "are not supported when dual=%s"
+                                % (loss, penalty, dual))
+            else:
+                return solver_num
+    raise ValueError('Unsupported set of arguments: %s, '
                          'Parameters: penalty=%r, loss=%r, dual=%r'
                          % (error_string, penalty, loss, dual))
     return _solver_type_dict[solver_type]
 
 
 def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
                    penalty, dual, verbose, max_iter, tol,
-                   random_state=None, multi_class='ovr', loss='lr',
-                   epsilon=0.1):
+                   random_state=None, multi_class='ovr', 
+                   loss='logistic_regression', epsilon=0.1):
     """Used by Logistic Regression (and CV) and LinearSVC.
 
     Preprocessing is done in this function before supplying it to liblinear.
@@ -712,10 +722,9 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
         If `crammer_singer` is chosen, the options loss, penalty and dual will
         be ignored.
 
-    loss : str, {'lr', 'l1', 'l2', 'ei'}
-        The loss function. 'l1' is the hinge loss while 'l2' is the squared
-        hinge loss, 'lr' is the Logistic loss and 'ei' is the epsilon-insensitive
-        loss.
+    loss : str, {'logistic_regression', 'hinge', 'squared_hinge', 
+                 'epsilon_insensitive', 'squared_epsilon_insensitive}
+        The loss function used to fit the model.
 
     epsilon : float, optional (default=0.1)
         Epsilon parameter in the epsilon-insensitive loss function. Note
@@ -734,7 +743,7 @@ def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,
     n_iter_ : int
         Maximum number of iterations run across all classes.
     """
-    if loss is not 'ei':
+    if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:
         enc = LabelEncoder()
         y_ind = enc.fit_transform(y)
         classes_ = enc.classes_

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -1,3 +1,4 @@
+import warnings
 import numpy as np
 
 from .base import _fit_liblinear, BaseSVC, BaseLibSVM
@@ -25,9 +26,10 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     C : float, optional (default=1.0)
         Penalty parameter C of the error term.
 
-    loss : string, 'l1' or 'l2' (default='l2')
-        Specifies the loss function. 'l1' is the hinge loss (standard SVM)
-        while 'l2' is the squared hinge loss.
+    loss : string, 'hinge' or 'squared_hinge' (default='squared_hinge')
+        Specifies the loss function. 'hinge' is the standard SVM loss
+        (used e.g. by the SVC class) while 'squared_hinge' is the 
+        square of the hinge loss.
 
     penalty : string, 'l1' or 'l2' (default='l2')
         Specifies the norm used in the penalization. The 'l2'
@@ -141,7 +143,7 @@ class frequencies.
 
     """
 
-    def __init__(self, penalty='l2', loss='l2', dual=True, tol=1e-4, C=1.0,
+    def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.0,
                  multi_class='ovr', fit_intercept=True, intercept_scaling=1,
                  class_weight=None, verbose=0, random_state=None, max_iter=1000):
         self.penalty = penalty
@@ -180,11 +182,20 @@ def fit(self, X, y):
 
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
         self.classes_ = np.unique(y)
+
+        if self.loss in ('l1', 'l2'):
+            # convert for backwards compatibility
+            loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(self.loss)
+            warnings.warn("loss='l1' (resp. loss='l2') is deprecated and will" +
+                          "be removed before version 1.0. Please use loss='hinge'" +
+                          "(resp. loss='squared_hinge') instead", DeprecationWarning)
+        else:
+            loss = self.loss
         self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
             X, y, self.C, self.fit_intercept, self.intercept_scaling,
             self.class_weight, self.penalty, self.dual, self.verbose,
             self.max_iter, self.tol, self.random_state, self.multi_class,
-            self.loss
+            loss
             )
 
         if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
@@ -212,7 +223,8 @@ class LinearSVR(LinearModel, RegressorMixin):
         Penalty parameter C of the error term. The penalty is a squared
         l2 penalty. The bigger this parameter, the less regularization is used.
 
-    loss : string, 'l1' or 'l2' (default='l2')
+    loss : string, 'epsilon_insensitive' or 'squared_epsilon_insensitive' 
+           (default='epsilon_insensitive')
         Specifies the loss function. 'l1' is the epsilon-insensitive loss
         (standard SVR) while 'l2' is the squared epsilon-insensitive loss.
 
@@ -288,9 +300,9 @@ class LinearSVR(LinearModel, RegressorMixin):
         various loss functions and regularization regimes.
     """
 
-    def __init__(self, epsilon=0.0, tol=1e-4, C=1.0, loss='l1', fit_intercept=True,
-                 intercept_scaling=1., dual=True, verbose=0, random_state=None,
-                 max_iter=1000):
+    def __init__(self, epsilon=0.0, tol=1e-4, C=1.0, loss='epsilon_insensitive', 
+                 fit_intercept=True, intercept_scaling=1., dual=True, verbose=0, 
+                 random_state=None, max_iter=1000):
         self.tol = tol
         self.C = C
         self.epsilon = epsilon
@@ -324,11 +336,11 @@ def fit(self, X, y):
                              % self.C)
 
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64, order="C")
-        loss = {'l1': 'ei', 'l2': 'se'}.get(self.loss)
+        penalty = 'l2' # SVR only accepts L2 penalty
         self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
             X, y, self.C, self.fit_intercept, self.intercept_scaling,
-            None, 'l2', self.dual, self.verbose,
-            self.max_iter, self.tol, self.random_state, loss=loss,
+            None, penalty, self.dual, self.verbose,
+            self.max_iter, self.tol, self.random_state, loss=self.loss,
             epsilon=self.epsilon)
         self.coef_ = self.coef_.ravel()
 
@@ -687,6 +699,9 @@ class SVR(BaseLibSVM, RegressorMixin):
         Support Vector Machine for regression implemented using libsvm
         using a parameter to control the number of support vectors.
 
+    LinearSVR
+        Scalable Linear Support Vector Machine for regression
+        implemented using liblinear.
     """
     def __init__(self, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, tol=1e-3,
                  C=1.0, epsilon=0.1, shrinking=True, cache_size=200,

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -459,14 +459,15 @@ def test_linearsvc_parameters():
     """
     # generate list of possible parameter combinations
     params = [(dual, loss, penalty) for dual in [True, False]
-              for loss in ['l1', 'l2', 'lr'] for penalty in ['l1', 'l2']]
+              for loss in ['hinge', 'squared_hinge', 'logistic_regression']
+              for penalty in ['l1', 'l2']]
 
     X, y = make_classification(n_samples=5, n_features=5)
 
     for dual, loss, penalty in params:
         clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual)
-        if (loss == 'l1' and penalty == 'l1') or (
-            loss == 'l1' and penalty == 'l2' and not dual) or (
+        if (loss == 'hinge' and penalty == 'l1') or (
+            loss == 'hinge' and penalty == 'l2' and not dual) or (
             penalty == 'l1' and dual):
             assert_raises(ValueError, clf.fit, X, y)
         else:
@@ -486,7 +487,7 @@ def test_linearsvc():
     assert_array_almost_equal(clf.intercept_, [0], decimal=3)
 
     # the same with l1 penalty
-    clf = svm.LinearSVC(penalty='l1', dual=False, random_state=0).fit(X, Y)
+    clf = svm.LinearSVC(penalty='l1', loss='l2', dual=False, random_state=0).fit(X, Y)
     assert_array_equal(clf.predict(T), true_result)
 
     # l2 penalty with dual formulation