|
12 | 12 | # License: BSD Style. |
13 | 13 |
|
14 | 14 | import numpy as np |
| 15 | +import scipy.sparse |
15 | 16 |
|
16 | 17 | from ..base import BaseEstimator, RegressorMixin, ClassifierMixin |
17 | 18 | from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber |
@@ -47,39 +48,41 @@ def predict(self, X): |
47 | 48 | return safe_sparse_dot(X, self.coef_.T) + self.intercept_ |
48 | 49 |
|
49 | 50 | @staticmethod |
50 | | - def _center_data(X, y, fit_intercept, normalize=False): |
| 51 | + def _center_data(X, y, fit_intercept, normalize=False, copy=False): |
51 | 52 | """ |
52 | 53 | Centers data to have mean zero along axis 0. This is here because |
53 | 54 | nearly all linear models will want their data to be centered. |
54 | 55 | """ |
55 | | - import scipy.sparse # importing scipy.sparse just for this is overkill |
56 | 56 | if fit_intercept: |
57 | 57 | if scipy.sparse.issparse(X): |
58 | | - Xmean = np.zeros(X.shape[1]) |
59 | | - Xstd = np.ones(X.shape[1]) |
| 58 | + X_mean = np.zeros(X.shape[1]) |
| 59 | + X_std = np.ones(X.shape[1]) |
60 | 60 | else: |
61 | | - Xmean = X.mean(axis=0) |
62 | | - X = X - Xmean |
| 61 | + if copy: |
| 62 | + X = X.copy() |
| 63 | + |
| 64 | + X_mean = X.mean(axis=0) |
| 65 | + X = X - X_mean |
63 | 66 | if normalize: |
64 | | - Xstd = X.std(axis=0) |
65 | | - Xstd[Xstd==0] = 1 |
66 | | - X = X / Xstd |
| 67 | + X_std = np.sqrt(np.sum(X ** 2, axis=0)) |
| 68 | + X_std[X_std==0] = 1 |
| 69 | + X = X / X_std |
67 | 70 | else: |
68 | | - Xstd = np.ones(X.shape[1]) |
69 | | - ymean = y.mean() |
70 | | - y = y - ymean |
| 71 | + X_std = np.ones(X.shape[1]) |
| 72 | + y_mean = y.mean() |
| 73 | + y = y - y_mean |
71 | 74 | else: |
72 | | - Xmean = np.zeros(X.shape[1]) |
73 | | - Xstd = np.ones(X.shape[1]) |
74 | | - ymean = 0. |
75 | | - return X, y, Xmean, ymean, Xstd |
| 75 | + X_mean = np.zeros(X.shape[1]) |
| 76 | + X_std = np.ones(X.shape[1]) |
| 77 | + y_mean = 0. |
| 78 | + return X, y, X_mean, y_mean, X_std |
76 | 79 |
|
77 | | - def _set_intercept(self, Xmean, ymean, Xstd): |
| 80 | + def _set_intercept(self, X_mean, y_mean, X_std): |
78 | 81 | """Set the intercept_ |
79 | 82 | """ |
80 | 83 | if self.fit_intercept: |
81 | | - self.coef_ = self.coef_ / Xstd |
82 | | - self.intercept_ = ymean - np.dot(Xmean, self.coef_.T) |
| 84 | + self.coef_ = self.coef_ / X_std |
| 85 | + self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T) |
83 | 86 | else: |
84 | 87 | self.intercept_ = 0 |
85 | 88 |
|
@@ -132,12 +135,13 @@ def fit(self, X, y, **params): |
132 | 135 | X = np.asanyarray(X) |
133 | 136 | y = np.asanyarray(y) |
134 | 137 |
|
135 | | - X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize) |
| 138 | + X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y, |
| 139 | + self.fit_intercept, self.normalize) |
136 | 140 |
|
137 | 141 | self.coef_, self.residues_, self.rank_, self.singular_ = \ |
138 | 142 | np.linalg.lstsq(X, y) |
139 | 143 |
|
140 | | - self._set_intercept(Xmean, ymean, Xstd) |
| 144 | + self._set_intercept(X_mean, y_mean, X_std) |
141 | 145 | return self |
142 | 146 |
|
143 | 147 | ## |
|
0 commit comments