Skip to content

Commit 3546217

Browse files
committed
multiple editing
1 parent 17c9196 commit 3546217

4 files changed

Lines changed: 24 additions & 31 deletions

File tree

sklearn/decomposition/pca.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,14 @@ class PCA(_BasePCA):
176176
0 < n_components < min(X.shape)
177177
randomized :
178178
run randomized SVD by the method of Halko et al.
179+
lobpcg :
180+
run lobpcg_svd by LOBPCG of Knyazev 2001
179181
180182
.. versionadded:: 0.18.0
181183
182184
tol : float >= 0, optional (default .0)
183-
Tolerance for singular values computed by svd_solver == 'arpack'.
185+
Tolerance for singular values computed by svd_solver == 'arpack'
186+
For svd_solver == 'lobpcg', tol must be reasonable, not .0!
184187
185188
.. versionadded:: 0.18.0
186189
@@ -270,6 +273,8 @@ class PCA(_BasePCA):
270273
"A randomized algorithm for the decomposition of matrices".
271274
Applied and Computational Harmonic Analysis, 30(1), 47-68.`
272275
276+
For svd_solver == 'lobpcg', see: lobpcg_svd
277+
273278
274279
Examples
275280
--------

sklearn/decomposition/tests/test_pca.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,9 @@ def test_pca_score_with_different_solvers():
613613
digits = datasets.load_digits()
614614
X_digits = digits.data
615615

616+
# the PCA default tol=.0 may break lobpcg_svd
616617
pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver,
617-
random_state=0)
618+
random_state=0, tol=1e-4)
618619
for svd_solver in solver_list}
619620

620621
for pca in pca_dict.values():
@@ -631,6 +632,8 @@ def test_pca_score_with_different_solvers():
631632
assert_almost_equal(score_dict['full'], score_dict['arpack'])
632633
assert_almost_equal(score_dict['full'], score_dict['randomized'],
633634
decimal=3)
635+
assert_almost_equal(score_dict['full'], score_dict['lobpcg'],
636+
decimal=3)
634637

635638

636639
def test_pca_zero_noise_variance_edge_cases():
@@ -716,9 +719,10 @@ def check_pca_float_dtype_preservation(svd_solver):
716719
X_64 = np.random.RandomState(0).rand(1000, 4).astype(np.float64)
717720
X_32 = X_64.astype(np.float32)
718721

722+
# the PCA default tol=.0 may break lobpcg_svd
719723
pca_64 = PCA(n_components=3, svd_solver=svd_solver,
720-
random_state=0).fit(X_64)
721-
pca_32 = PCA(n_components=3, svd_solver=svd_solver,
724+
random_state=0, tol=1-10).fit(X_64)
725+
pca_32 = PCA(n_components=3, tol=1-5, svd_solver=svd_solver,
722726
random_state=0).fit(X_32)
723727

724728
assert pca_64.components_.dtype == np.float64
@@ -736,10 +740,11 @@ def check_pca_int_dtype_upcast_to_double(svd_solver):
736740
X_i64 = X_i64.astype(np.int64)
737741
X_i32 = X_i64.astype(np.int32)
738742

743+
# the PCA default tol=.0 may break lobpcg_svd
739744
pca_64 = PCA(n_components=3, svd_solver=svd_solver,
740-
random_state=0).fit(X_i64)
745+
random_state=0, tol=1-10).fit(X_i64)
741746
pca_32 = PCA(n_components=3, svd_solver=svd_solver,
742-
random_state=0).fit(X_i32)
747+
random_state=0, tol=1-5).fit(X_i32)
743748

744749
assert pca_64.components_.dtype == np.float64
745750
assert pca_32.components_.dtype == np.float64

sklearn/decomposition/tests/test_truncated_svd.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,9 @@ def test_singular_values():
203203
random_state=rng).fit(X)
204204
rpca = TruncatedSVD(n_components=2, algorithm='randomized',
205205
random_state=rng).fit(X)
206+
# the PCA default tol=.0 may break lobpcg_svd
206207
lpca = TruncatedSVD(n_components=2, algorithm='lobpcg',
207-
random_state=rng).fit(X)
208+
random_state=rng, tol=1e-10).fit(X)
208209
assert_array_almost_equal(apca.singular_values_, rpca.singular_values_, 12)
209210
assert_array_almost_equal(apca.singular_values_, lpca.singular_values_, 12)
210211

@@ -238,8 +239,9 @@ def test_singular_values():
238239
random_state=rng)
239240
rpca = TruncatedSVD(n_components=3, algorithm='randomized',
240241
random_state=rng)
242+
# the PCA default tol=.0 may break lobpcg_svd
241243
lpca = TruncatedSVD(n_components=3, algorithm='lobpcg',
242-
random_state=rng)
244+
random_state=rng, tol=1e-8)
243245
X_apca = apca.fit_transform(X)
244246
X_rpca = rpca.fit_transform(X)
245247
X_lpca = rpca.fit_transform(X)

sklearn/utils/lobpcg.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,18 @@
1111

1212
import numpy as np
1313

14-
from numpy.testing import assert_allclose
15-
from scipy._lib.six import xrange
1614
from scipy.linalg import inv, eigh, cho_factor, cho_solve, cholesky
1715
from scipy.sparse.linalg import aslinearoperator, LinearOperator
1816

1917
__all__ = ['lobpcg']
2018

2119

22-
def pause():
23-
# Used only when verbosity level > 10.
24-
input()
25-
26-
2720
def save(ar, fileName):
2821
# Used only when verbosity level > 10.
2922
from numpy import savetxt
3023
savetxt(fileName, ar, precision=8)
3124

3225

33-
def _assert_symmetric(M, rtol=1e-5, atol=1e-8):
34-
assert_allclose(M.T.conj(), M, rtol=rtol, atol=atol)
35-
36-
37-
##
38-
# 21.05.2007, c
39-
40-
4126
def as2d(ar):
4227
"""
4328
If the input array is 2D return it, if it is 1D, append a dimension,
@@ -271,8 +256,6 @@ def lobpcg(A, X,
271256
raise ValueError('expected rank-2 array for argument X')
272257

273258
n, sizeX = blockVectorX.shape
274-
if sizeX > n:
275-
raise ValueError('X column dimension exceeds the row dimension')
276259

277260
A = _makeOperator(A, (n, n))
278261
B = _makeOperator(B, (n, n))
@@ -336,8 +319,7 @@ def lobpcg(A, X,
336319
try:
337320
# gramYBY is a Cholesky factor from now on...
338321
gramYBY = cho_factor(gramYBY)
339-
# E722 do not use bare except
340-
except:
322+
except linearlyDependentConstraints:
341323
raise ValueError('cannot handle linearly dependent constraints')
342324

343325
_applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY)
@@ -383,7 +365,9 @@ def lobpcg(A, X,
383365
blockVectorAP = None
384366
blockVectorBP = None
385367

386-
for iterationNumber in xrange(maxIterations):
368+
iterationNumber = -1
369+
while iterationNumber < maxIterations:
370+
iterationNumber += 1
387371
if verbosityLevel > 0:
388372
print('iteration %d' % iterationNumber)
389373

@@ -505,9 +489,7 @@ def lobpcg(A, X,
505489

506490
if verbosityLevel > 10:
507491
print(eigBlockVector)
508-
pause()
509492

510-
##
511493
# Compute Ritz vectors.
512494
if iterationNumber > 0:
513495
eigBlockVectorX = eigBlockVector[:sizeX]
@@ -534,7 +516,6 @@ def lobpcg(A, X,
534516
print(pp)
535517
print(app)
536518
print(bpp)
537-
pause()
538519

539520
blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp
540521
blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app

0 commit comments

Comments
 (0)