Skip to content

Commit 295dc3c

Browse files
committed
Added parameter normalize to omp.py
Some changes in Linear_Model
1 parent a23365c commit 295dc3c

2 files changed

Lines changed: 31 additions & 32 deletions

File tree

scikits/learn/linear_model/base.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# License: BSD Style.
1313

1414
import numpy as np
15+
import scipy.sparse
1516

1617
from ..base import BaseEstimator, RegressorMixin, ClassifierMixin
1718
from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
@@ -47,39 +48,41 @@ def predict(self, X):
4748
return safe_sparse_dot(X, self.coef_.T) + self.intercept_
4849

4950
@staticmethod
50-
def _center_data(X, y, fit_intercept, normalize=False):
51+
def _center_data(X, y, fit_intercept, normalize=False, copy=False):
5152
"""
5253
Centers data to have mean zero along axis 0. This is here because
5354
nearly all linear models will want their data to be centered.
5455
"""
55-
import scipy.sparse # importing scipy.sparse just for this is overkill
5656
if fit_intercept:
5757
if scipy.sparse.issparse(X):
58-
Xmean = np.zeros(X.shape[1])
59-
Xstd = np.ones(X.shape[1])
58+
X_mean = np.zeros(X.shape[1])
59+
X_std = np.ones(X.shape[1])
6060
else:
61-
Xmean = X.mean(axis=0)
62-
X = X - Xmean
61+
if copy:
62+
X = X.copy()
63+
64+
X_mean = X.mean(axis=0)
65+
X = X - X_mean
6366
if normalize:
64-
Xstd = X.std(axis=0)
65-
Xstd[Xstd==0] = 1
66-
X = X / Xstd
67+
X_std = np.sqrt(np.sum(X ** 2, axis=0))
68+
X_std[X_std==0] = 1
69+
X = X / X_std
6770
else:
68-
Xstd = np.ones(X.shape[1])
69-
ymean = y.mean()
70-
y = y - ymean
71+
X_std = np.ones(X.shape[1])
72+
y_mean = y.mean()
73+
y = y - y_mean
7174
else:
72-
Xmean = np.zeros(X.shape[1])
73-
Xstd = np.ones(X.shape[1])
74-
ymean = 0.
75-
return X, y, Xmean, ymean, Xstd
75+
X_mean = np.zeros(X.shape[1])
76+
X_std = np.ones(X.shape[1])
77+
y_mean = 0.
78+
return X, y, X_mean, y_mean, X_std
7679

77-
def _set_intercept(self, Xmean, ymean, Xstd):
80+
def _set_intercept(self, X_mean, y_mean, X_std):
7881
"""Set the intercept_
7982
"""
8083
if self.fit_intercept:
81-
self.coef_ = self.coef_ / Xstd
82-
self.intercept_ = ymean - np.dot(Xmean, self.coef_.T)
84+
self.coef_ = self.coef_ / X_std
85+
self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
8386
else:
8487
self.intercept_ = 0
8588

@@ -132,12 +135,13 @@ def fit(self, X, y, **params):
132135
X = np.asanyarray(X)
133136
y = np.asanyarray(y)
134137

135-
X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y, self.fit_intercept, self.normalize)
138+
X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
139+
self.fit_intercept, self.normalize)
136140

137141
self.coef_, self.residues_, self.rank_, self.singular_ = \
138142
np.linalg.lstsq(X, y)
139143

140-
self._set_intercept(Xmean, ymean, Xstd)
144+
self._set_intercept(X_mean, y_mean, X_std)
141145
return self
142146

143147
##

scikits/learn/linear_model/omp.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -497,11 +497,8 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
497497
X = np.atleast_2d(X)
498498
y = np.atleast_1d(y)
499499

500-
X, y, Xmean, ymean = LinearModel._center_data(X, y, self.fit_intercept)
501-
if self.normalize:
502-
norms = np.sqrt(np.sum(X ** 2, axis=0))
503-
nonzeros = np.flatnonzero(norms)
504-
X[:, nonzeros] /= norms[nonzeros]
500+
X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
501+
self.normalize, not(overwrite_x))
505502
if Gram is not None:
506503
Gram = np.atleast_2d(Gram)
507504

@@ -520,11 +517,11 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
520517
if not overwrite_xy:
521518
Xy = Xy.copy()
522519
if self.normalize:
523-
Xy /= norms
520+
Xy /= X_std
524521

525522
if self.normalize:
526-
Gram /= norms
527-
Gram /= norms[:, np.newaxis]
523+
Gram /= X_std
524+
Gram /= X_std[:, np.newaxis]
528525

529526
norms_sq = np.sum((y ** 2), axis=0) if eps is not None else None
530527
self.coef_ = orthogonal_mp_gram(Gram, Xy, self.n_nonzero_coefs,
@@ -538,7 +535,5 @@ def fit(self, X, y, Gram=None, Xy=None, overwrite_x=False,
538535
precompute_gram=precompute_gram,
539536
overwrite_x=overwrite_x).T
540537

541-
if self.normalize:
542-
self.coef_ /= norms
543-
self._set_intercept(Xmean, ymean)
538+
self._set_intercept(X_mean, y_mean, X_std)
544539
return self

0 commit comments

Comments
 (0)