@@ -593,11 +593,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
593593 sample_weight = np .ones (X .shape [0 ])
594594
595595 # If class_weights is a dict (provided by the user), the weights
596- # are assigned to the original labels. If it is "auto ", then
596+ # are assigned to the original labels. If it is "balanced ", then
597597 # the class_weights are assigned after masking the labels with a OvR.
598598 le = LabelEncoder ()
599599
600- if isinstance (class_weight , dict ):
600+ if isinstance (class_weight , dict ) or multi_class == 'multinomial' :
601601 if solver == "liblinear" :
602602 if classes .size == 2 :
603603 # Reconstruct the weights with keys 1 and -1
@@ -609,7 +609,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
609609 "solver cannot handle multiclass with "
610610 "class_weight of type dict. Use the lbfgs, "
611611 "newton-cg or sag solvers or set "
612- "class_weight='auto '" )
612+ "class_weight='balanced '" )
613613 else :
614614 class_weight_ = compute_class_weight (class_weight , classes , y )
615615 sample_weight *= class_weight_ [le .fit_transform (y )]
@@ -622,20 +622,21 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
622622 mask = (y == pos_class )
623623 y_bin = np .ones (y .shape , dtype = np .float64 )
624624 y_bin [~ mask ] = - 1.
625+ # for compute_class_weight
626+
627+ # 'auto' is deprecated and will be removed in 0.19
628+ if class_weight in ("auto" , "balanced" ):
629+ class_weight_ = compute_class_weight (class_weight , mask_classes ,
630+ y_bin )
631+ sample_weight *= class_weight_ [le .fit_transform (y_bin )]
625632
626633 else :
627634 lbin = LabelBinarizer ()
628- Y_bin = lbin .fit_transform (y )
629- if Y_bin .shape [1 ] == 1 :
630- Y_bin = np .hstack ([1 - Y_bin , Y_bin ])
631- w0 = np .zeros ((Y_bin .shape [1 ], n_features + int (fit_intercept )),
635+ Y_binarized = lbin .fit_transform (y )
636+ if Y_binarized .shape [1 ] == 1 :
637+ Y_binarized = np .hstack ([1 - Y_binarized , Y_binarized ])
638+ w0 = np .zeros ((Y_binarized .shape [1 ], n_features + int (fit_intercept )),
632639 order = 'F' )
633- mask_classes = classes
634-
635- if class_weight == "auto" :
636- class_weight_ = compute_class_weight (class_weight , mask_classes ,
637- y_bin )
638- sample_weight *= class_weight_ [le .fit_transform (y_bin )]
639640
640641 if coef is not None :
641642 # it must work both giving the bias term and not
@@ -664,7 +665,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
664665 if multi_class == 'multinomial' :
665666 # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
666667 w0 = w0 .ravel ()
667- target = Y_bin
668+ target = Y_binarized
668669 if solver == 'lbfgs' :
669670 func = lambda x , * args : _multinomial_loss_grad (x , * args )[0 :2 ]
670671 elif solver == 'newton-cg' :
@@ -1534,9 +1535,18 @@ def fit(self, X, y, sample_weight=None):
15341535 if self .class_weight and not (isinstance (self .class_weight , dict ) or
15351536 self .class_weight in
15361537 ['balanced' , 'auto' ]):
1538+ # 'auto' is deprecated and will be removed in 0.19
15371539 raise ValueError ("class_weight provided should be a "
15381540 "dict or 'balanced'" )
15391541
1542+ # compute the class weights for the entire dataset y
1543+ if self .class_weight in ("auto" , "balanced" ):
1544+ classes = np .unique (y )
1545+ class_weight = compute_class_weight (self .class_weight , classes , y )
1546+ class_weight = dict (zip (classes , class_weight ))
1547+ else :
1548+ class_weight = self .class_weight
1549+
15401550 path_func = delayed (_log_reg_scoring_path )
15411551
15421552 # The SAG solver releases the GIL so it's more efficient to use
@@ -1548,7 +1558,7 @@ def fit(self, X, y, sample_weight=None):
15481558 fit_intercept = self .fit_intercept , penalty = self .penalty ,
15491559 dual = self .dual , solver = self .solver , tol = self .tol ,
15501560 max_iter = self .max_iter , verbose = self .verbose ,
1551- class_weight = self . class_weight , scoring = self .scoring ,
1561+ class_weight = class_weight , scoring = self .scoring ,
15521562 multi_class = self .multi_class ,
15531563 intercept_scaling = self .intercept_scaling ,
15541564 random_state = self .random_state ,
@@ -1620,7 +1630,7 @@ def fit(self, X, y, sample_weight=None):
16201630 fit_intercept = self .fit_intercept , coef = coef_init ,
16211631 max_iter = self .max_iter , tol = self .tol ,
16221632 penalty = self .penalty , copy = False ,
1623- class_weight = self . class_weight ,
1633+ class_weight = class_weight ,
16241634 multi_class = self .multi_class ,
16251635 verbose = max (0 , self .verbose - 1 ),
16261636 random_state = self .random_state ,
0 commit comments