Skip to content

Commit 0a47b76

Browse files
committed
Added tests, increased coverage.
1 parent 8dfd0bf commit 0a47b76

2 files changed

Lines changed: 57 additions & 49 deletions

File tree

sklearn/preprocessing/data.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def __init__(self, copy=True, with_centering=True, with_scaling=True,
8686

8787
def _check_array(self, X, copy):
8888
"""Makes sure centering is not enabled for sparse matrices."""
89-
X = check_arrays(X, copy=copy, sparse_format="csr")[0]
89+
X = check_arrays(X, copy=copy)[0]
9090
if warn_if_not_float(X, estimator=self):
9191
X = X.astype(np.float)
9292
if sparse.issparse(X):
@@ -152,11 +152,6 @@ def inverse_transform(self, X, copy=None):
152152
copy = copy if copy is not None else self.copy
153153
X = self._check_array(X, copy)
154154
if sparse.issparse(X):
155-
if not sparse.isspmatrix_csr(X) and not sparse.isspmatrix_csc(X):
156-
X = X.tocsr()
157-
copy = False
158-
if copy:
159-
X = X.copy()
160155
if self.with_scaling:
161156
if isinstance(X, sparse.csr_matrix):
162157
inplace_csr_column_scale(X, self.scale_)

sklearn/preprocessing/tests/test_data.py

Lines changed: 56 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,30 @@ def test_scaler_2d_arrays():
118118
assert_true(X_scaled is not X)
119119

120120

121+
def test_standard_scaler_zero_variance_features():
122+
"""Check min max scaler on toy data with zero variance features"""
123+
X = [[0., 1., +0.5],
124+
[0., 1., -0.1],
125+
[0., 1., +1.1]]
126+
scaler = StandardScaler()
127+
X_trans = scaler.fit_transform(X)
128+
X_expected = [[0., 0., 0.],
129+
[0., 0., -1.22474487],
130+
[0., 0., 1.22474487]]
131+
assert_array_almost_equal(X_trans, X_expected)
132+
X_trans_inv = scaler.inverse_transform(X_trans)
133+
assert_array_almost_equal(X, X_trans_inv, decimal=4)
134+
135+
X_new = [[+0., 2., 0.5],
136+
[-1., 1., 0.0],
137+
[+0., 1., 1.5]]
138+
X_trans_new = scaler.transform(X_new)
139+
X_expected_new = [[+0., 1., 0.],
140+
[-1., 0., -1.02062073],
141+
[+0., 0., 2.04124145]]
142+
assert_array_almost_equal(X_trans_new, X_expected_new, decimal=4)
143+
144+
121145
def test_min_max_scaler_iris():
122146
X = iris.data
123147
scaler = MinMaxScaler()
@@ -761,8 +785,8 @@ def test_robust_scaler_1d():
761785
assert_array_almost_equal(np.median(X_scaled, axis=0), 0.0)
762786

763787

764-
def test_scaler_2d_arrays():
765-
"""Test scaling of 2d array along first axis"""
788+
def test_robust_scaler_2d_arrays():
789+
"""Test robust scaling of 2d array along first axis"""
766790
rng = np.random.RandomState(0)
767791
X = rng.randn(4, 5)
768792
X[:, 0] = 0.0 # first feature is always of zero
@@ -855,6 +879,12 @@ def test_robust_scaler_zero_variance_features():
855879
X_trans_inv = scaler.inverse_transform(X_trans)
856880
assert_array_almost_equal(X, X_trans_inv)
857881

882+
X_trans_new = scaler.transform(X_new)
883+
X_expected_new = [[+0., 1., +0.],
884+
[-1., 0., -0.52083],
885+
[+0., 0., +1.04166]]
886+
assert_array_almost_equal(X_trans_new, X_expected_new, decimal=3)
887+
858888

859889
def test_robust_scaler_int():
860890
# test that scaler converts integer input to floating
@@ -976,36 +1006,6 @@ def test_maxabs_scaler_zero_variance_features():
9761006
assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2)
9771007

9781008

979-
def test_maxabs_scaler_zero_variance_features():
980-
"""Check MaxAbsScaler on toy data with zero variance features"""
981-
X = [[0., 1., +0.5],
982-
[0., 1., -0.3],
983-
[0., 1., +1.5],
984-
[0., 0., +0.0]]
985-
986-
X_new = [[+0., 2., 0.5],
987-
[-1., 1., 0.0],
988-
[+0., 1., 1.5]]
989-
990-
# default params
991-
scaler = MaxAbsScaler()
992-
X_trans = scaler.fit_transform(X)
993-
X_expected = [[0., 1., 1.0 / 3.0],
994-
[0., 1., -0.2],
995-
[0., 1., 1.0],
996-
[0., 0., 0.0]]
997-
assert_array_almost_equal(X_trans, X_expected)
998-
X_trans_inv = scaler.inverse_transform(X_trans)
999-
assert_array_almost_equal(X, X_trans_inv)
1000-
1001-
X_trans_new = scaler.transform(X_new)
1002-
X_expected_new = [[+0., 2.0, 1.0 / 3.0],
1003-
[-1., 1.0, 0.0],
1004-
[+0., 1.0, 1.0]]
1005-
1006-
assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2)
1007-
1008-
10091009
def test_maxabs_scaler_sparse_1D():
10101010
rng = np.random.RandomState(42)
10111011
A = rng.randn(40)
@@ -1026,11 +1026,6 @@ def test_maxabs_scaler_sparse_data():
10261026
[0., 1., -1.5, 0],
10271027
[0., 0., +0.0, -2]]
10281028

1029-
X_expected = [[0., 1., 1.0 / 3.0, -0.5],
1030-
[0., 1., -0.2, -0.25],
1031-
[0., 1., -1.0, 0.0],
1032-
[0., 0., 0.0, -1.0]]
1033-
10341029
X_csr = sparse.csr_matrix(X)
10351030
X_csc = sparse.csc_matrix(X)
10361031

@@ -1057,7 +1052,6 @@ def test_maxabs_scaler_large_negative_value():
10571052
[0., 1., -100.0, 0.0],
10581053
[0., 0., +0.0, -2.0]]
10591054

1060-
# default params
10611055
scaler = MaxAbsScaler()
10621056
X_trans = scaler.fit_transform(X)
10631057
X_expected = [[0., 1., 0.005, -0.5],
@@ -1076,7 +1070,7 @@ def test_center_property():
10761070
[0., 0.5, -1.4]]
10771071

10781072
scaler = StandardScaler()
1079-
X_trans = scaler.fit_transform(X)
1073+
scaler.fit_transform(X)
10801074
means = [0.0, 0.5, -0.4]
10811075
assert_array_almost_equal(scaler.center_, means)
10821076
assert_array_almost_equal(scaler.mean_, means) # deprecated parameter
@@ -1089,7 +1083,26 @@ def test_scale_property():
10891083
[0., 4.0, -1.7]]
10901084

10911085
scaler = StandardScaler()
1092-
X_trans = scaler.fit_transform(X)
1093-
means = [1.0, 1.632993, 0.90921211]
1094-
assert_array_almost_equal(scaler.scale_, means)
1095-
assert_array_almost_equal(scaler.std_, means) # deprecated parameter
1086+
scaler.fit_transform(X)
1087+
std = [1.0, 1.632993, 0.90921211]
1088+
assert_array_almost_equal(scaler.scale_, std)
1089+
assert_array_almost_equal(scaler.std_, std) # deprecated parameter
1090+
1091+
1092+
def test_throws_exceptions_on_unallowed_sparse():
1093+
"""Check that the Scalers throw on unallowed matrix formats."""
1094+
X = [[0., 2.0, +0.5],
1095+
[0., 0.0, -0.3],
1096+
[0., 4.0, -1.7]]
1097+
X_dia = sparse.dia_matrix(X)
1098+
1099+
for Scaler in (MinMaxScaler, RobustScaler):
1100+
scaler = MinMaxScaler()
1101+
assert_raises(TypeError, scaler.fit, X_dia)
1102+
scaler.fit(X)
1103+
did_raise = False
1104+
try:
1105+
scaler.transform(X_dia)
1106+
except ValueError:
1107+
did_raise = True
1108+
assert(did_raise)

0 commit comments

Comments
 (0)