Warnings related to deprecation of pos_label

jnothman · jnothman · commit 31eafddf6484 · 2013-11-25T23:24:15.000+11:00
diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py
@@ -1394,7 +1394,7 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels='compat',
         When `labels` is None, all labels in `y_true` and `y_pred` are used in
         sorted order. By default, binary classification is handled specially
         for backwards compatibility, but this feature will be removed in
-        version 0.16.
+        release 0.16.
 
     average : string, [None (default), 'micro', 'macro', 'samples', 'weighted']
         If ``None``, the scores for each class are returned. Otherwise,
@@ -1471,9 +1471,19 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels='compat',
     y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred)
     present_labels = unique_labels(y_true, y_pred)
 
+    if pos_label != '!deprecated':
+        warnings.warn('The `pos_label` parameter to precision, recall and '
+                      'F-score is deprecated, and will be removed in release '
+                      '0.16. The `labels` parameter may be used instead.',
+                      DeprecationWarning)
+
     if not isinstance(labels, np.ndarray) and labels == 'compat':
         if y_type == 'binary' and (average is not None and
                                    pos_label is not None):
+            warnings.warn('From release 0.16, binary classification will not '
+                          'be handled specially for precision, recall and '
+                          'F-score. Instead, specify a single positive label '
+                          'with the `labels` parameter.', FutureWarning)
 
             if pos_label == '!deprecated':
                 pos_label = 1
@@ -1491,6 +1501,14 @@ def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels='compat',
     else:
         n_labels = len(labels)
         labels = np.hstack([labels, np.setdiff1d(present_labels, labels)])
+        if n_labels == 2 and len(labels) == 2 and (pos_label is not None and
+                                                   average is not None):
+            warnings.warn('Precision, recall and F-score behaviour has '
+                          'changed: providing two classes to the `labels` '
+                          'parameter no longer returns results only for the '
+                          'positive label. Use `labels=[positive_label]` for '
+                          'former behaviour, or `labels=None` for all labels '
+                          'present in the data to be considered equally.')
 
     ### Calculate tp_sum, pred_sum, true_sum ###
 
diff --git a/sklearn/metrics/tests/test_metrics.py b/sklearn/metrics/tests/test_metrics.py
@@ -553,6 +553,7 @@ def test_auc_score_non_binary_class():
                              y_pred)
 
 
+@ignore_warnings
 def test_precision_recall_f1_score_binary():
     """Test Precision Recall and F1 Score for binary classification task"""
     y_true, y_pred, _ = make_prediction(binary=True)
@@ -679,6 +680,7 @@ def test_average_precision_score_tied_values():
     assert_not_equal(average_precision_score(y_true, y_score), 1.)
 
 
+@ignore_warnings
 def test_precision_recall_fscore_support_errors():
     y_true, y_pred, _ = make_prediction(binary=True)
 
@@ -737,7 +739,8 @@ def test_precision_recall_f1_score_multiclass():
     assert_array_equal(s, [24, 31, 20])
 
     # averaging tests
-    ps = precision_score(y_true, y_pred, pos_label=1, average='micro')
+    ps = assert_warns(DeprecationWarning, precision_score,
+                      y_true, y_pred, pos_label=1, average='micro')
     assert_array_almost_equal(ps, 0.53, 2)
 
     rs = recall_score(y_true, y_pred, average='micro')
@@ -780,6 +783,7 @@ def test_precision_recall_f1_score_multiclass():
     assert_array_equal(s, [24, 20, 31])
 
 
+@ignore_warnings
 def test_precision_recall_f1_score_multiclass_pos_label_none():
     """Test Precision Recall and F1 Score for multiclass classification task
 
@@ -1118,6 +1122,7 @@ def test_r2_one_case_error():
     assert_raises(ValueError, r2_score, [0], [0])
 
 
+@ignore_warnings
 def test_symmetry():
     """Test the symmetry of score and loss functions"""
     y_true, y_pred, _ = make_prediction(binary=True)
@@ -1155,6 +1160,7 @@ def test_symmetry():
                             zero_one_score(y_pred, y_true))
 
 
+@ignore_warnings
 def test_sample_order_invariance():
     y_true, y_pred, _ = make_prediction(binary=True)
 
@@ -1169,6 +1175,7 @@ def test_sample_order_invariance():
                                     % name)
 
 
+@ignore_warnings
 def test_format_invariance_with_1d_vectors():
     y1, y2, _ = make_prediction(binary=True)
 
@@ -1243,6 +1250,7 @@ def test_format_invariance_with_1d_vectors():
             assert_raises(ValueError, metric, y1_row, y2_row)
 
 
+@ignore_warnings
 def test_invariance_string_vs_numbers_labels():
     """Ensure that classification metrics with string labels"""
     y1, y2, _ = make_prediction(binary=True)
@@ -1285,6 +1293,7 @@ def test_invariance_string_vs_numbers_labels():
         assert_raises(ValueError, metrics, y1_str, y2_str)
 
 
+@ignore_warnings
 def test_clf_single_sample():
     """Non-regression test: scores should work with a single sample.
 
@@ -1948,6 +1957,47 @@ def test_prf_warnings():
                      'being set to 0.0 due to no true samples.')
 
 
+def test_prf_pos_label_deprecation_warnings():
+    with warnings.catch_warnings(record=True) as record:
+        warnings.simplefilter('always')
+        # need deprecation warning as long as pos_label is explicitly set
+        recall_score([1, 2, 3, 2], [2, 2, 1, 3], pos_label=None)
+        assert_equal(str(record.pop().message),
+                     'The `pos_label` parameter to precision, recall and '
+                     'F-score is deprecated, and will be removed in release '
+                     '0.16. The `labels` parameter may be used instead.')
+        recall_score([1, 2, 3, 2], [2, 2, 1, 3], pos_label=1)
+        assert_equal(str(record.pop().message),
+                     'The `pos_label` parameter to precision, recall and '
+                     'F-score is deprecated, and will be removed in release '
+                     '0.16. The `labels` parameter may be used instead.')
+
+        # warning that default binary behaviour will be removed in the future
+        recall_score([1, 2, 1], [2, 2, 1], average='macro')
+        assert_equal(str(record.pop().message),
+                     'From release 0.16, binary classification will not be '
+                     'handled specially for precision, recall and F-score. '
+                     'Instead, specify a single positive label with the '
+                     '`labels` parameter.')
+
+        # but no warning for the follwing
+        recall_score([1, 2, 1], [2, 2, 1], average=None)
+        assert_equal(len(record), 0)
+        recall_score([1, 2, 1], [2, 2, 1], labels=[2], average='macro')
+        assert_equal(len(record), 0)
+
+        # warning that behaviour has changed when labels is specified as binary
+        # for binary data, with pos_label non-None and average non-None
+        recall_score([1, 2, 1], [2, 2, 1], labels=[1, 2], average='macro')
+        assert_equal(str(record.pop().message),
+                     'Precision, recall and F-score behaviour has changed: '
+                     'providing two classes to the `labels` parameter no '
+                     'longer returns results only for the positive label. '
+                     'Use `labels=[positive_label]` for former behaviour, '
+                     'or `labels=None` for all labels present in the data '
+                     'to be considered equally.')
+
+
 def test__check_clf_targets():
     """Check that _check_clf_targets correctly merges target types, squeezes
     output and fails if input lengths differ."""