@@ -1390,3 +1390,56 @@ def ndcg_score(y_true, y_score, k=None, sample_weight=None, ignore_ties=False):
13901390 _check_dcg_target_type (y_true )
13911391 gain = _ndcg_sample_scores (y_true , y_score , k = k , ignore_ties = ignore_ties )
13921392 return np .average (gain , weights = sample_weight )
1393+
1394+
1395+ def lorenz_curve (y_true , y_pred , sample_weight = None ,
1396+ ascending_predictions = True ,
1397+ normalize = True ,
1398+ return_gini = False ):
1399+ y_true = check_array (y_true , ensure_2d = False )
1400+ y_pred = check_array (y_pred , ensure_2d = False )
1401+ check_consistent_length (y_true , y_pred )
1402+ y_true_min = y_true .min ()
1403+ if y_true_min < 0 :
1404+ raise ValueError ("lorenz_curve is only defined for regression problems"
1405+ " with non-negative target values. Observed minimum"
1406+ " target value is %f" % y_true_min )
1407+ if sample_weight is None :
1408+ sample_weight = np .ones (len (y_true ), dtype = np .float64 )
1409+ else :
1410+ sample_weight = check_array (sample_weight , ensure_2d = False )
1411+ check_consistent_length (y_true , sample_weight )
1412+
1413+ # Rank the ranking base on y_pred
1414+ ranking = np .argsort (y_pred )
1415+ if not ascending_predictions :
1416+ ranking = ranking [::- 1 ]
1417+
1418+ ranked_sample_weight = sample_weight [ranking ]
1419+ ranked_target = y_true [ranking ]
1420+
1421+ # Accumulate the sample weights and target values
1422+ cumulated_samples = np .cumsum (ranked_sample_weight )
1423+ cumulated_target = np .cumsum (ranked_target )
1424+
1425+ # Normalize to report fractions instead of absolute values.
1426+ # Normalization is necessary to compute the Gini index from
1427+ # the area under the Lorenz curve
1428+ if normalize :
1429+ cumulated_samples /= cumulated_samples [- 1 ]
1430+ cumulated_target /= cumulated_target [- 1 ]
1431+
1432+ if return_gini :
1433+ if not normalize or not ascending_predictions :
1434+ raise ValueError ("Gini coefficient requires normalize=True"
1435+ " and ascending_predictions=True" )
1436+ gini = 1 - 2 * auc (cumulated_samples , cumulated_target )
1437+ return cumulated_samples , cumulated_target , gini
1438+ return cumulated_samples , cumulated_target
1439+
1440+
1441+ def gini_score (y_true , y_pred , sample_weight = None ):
1442+ cumulated_weights , cumulated_values = lorenz_curve (
1443+ y_true , y_pred , sample_weight = sample_weight ,
1444+ ascending_predictions = True , normalize = True )
1445+ return 1 - 2 * auc (cumulated_weights , cumulated_values )
0 commit comments