1313from collections import defaultdict
1414import matplotlib .pyplot as plt
1515from sklearn .datasets import fetch_lfw_people
16- from sklearn .decomposition import IncrementalPCA , RandomizedPCA , PCA
16+ from sklearn .decomposition import IncrementalPCA , PCA
1717
1818
1919def plot_results (X , y , label ):
@@ -37,7 +37,6 @@ def plot_feature_times(all_times, batch_size, all_components, data):
3737 plot_results (all_components , all_times ['pca' ], label = "PCA" )
3838 plot_results (all_components , all_times ['ipca' ],
3939 label = "IncrementalPCA, bsize=%i" % batch_size )
40- plot_results (all_components , all_times ['rpca' ], label = "RandomizedPCA" )
4140 plt .legend (loc = "upper left" )
4241 plt .suptitle ("Algorithm runtime vs. n_components\n \
4342 LFW, size %i x %i" % data .shape )
@@ -50,7 +49,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
5049 plot_results (all_components , all_errors ['pca' ], label = "PCA" )
5150 plot_results (all_components , all_errors ['ipca' ],
5251 label = "IncrementalPCA, bsize=%i" % batch_size )
53- plot_results (all_components , all_errors ['rpca' ], label = "RandomizedPCA" )
5452 plt .legend (loc = "lower left" )
5553 plt .suptitle ("Algorithm error vs. n_components\n "
5654 "LFW, size %i x %i" % data .shape )
@@ -61,7 +59,6 @@ def plot_feature_errors(all_errors, batch_size, all_components, data):
6159def plot_batch_times (all_times , n_features , all_batch_sizes , data ):
6260 plt .figure ()
6361 plot_results (all_batch_sizes , all_times ['pca' ], label = "PCA" )
64- plot_results (all_batch_sizes , all_times ['rpca' ], label = "RandomizedPCA" )
6562 plot_results (all_batch_sizes , all_times ['ipca' ], label = "IncrementalPCA" )
6663 plt .legend (loc = "lower left" )
6764 plt .suptitle ("Algorithm runtime vs. batch_size for n_components %i\n \
@@ -92,11 +89,9 @@ def fixed_batch_size_comparison(data):
9289 all_errors = defaultdict (list )
9390 for n_components in all_features :
9491 pca = PCA (n_components = n_components )
95- rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
9692 ipca = IncrementalPCA (n_components = n_components , batch_size = batch_size )
9793 results_dict = {k : benchmark (est , data ) for k , est in [('pca' , pca ),
98- ('ipca' , ipca ),
99- ('rpca' , rpca )]}
94+ ('ipca' , ipca )]}
10095
10196 for k in sorted (results_dict .keys ()):
10297 all_times [k ].append (results_dict [k ]['time' ])
@@ -116,7 +111,8 @@ def variable_batch_size_comparison(data):
116111 all_times = defaultdict (list )
117112 all_errors = defaultdict (list )
118113 pca = PCA (n_components = n_components )
119- rpca = RandomizedPCA (n_components = n_components , random_state = 1999 )
114+ rpca = PCA (n_components = n_components , svd_solver = 'randomized' ,
115+ random_state = 1999 )
120116 results_dict = {k : benchmark (est , data ) for k , est in [('pca' , pca ),
121117 ('rpca' , rpca )]}
122118
@@ -138,8 +134,6 @@ def variable_batch_size_comparison(data):
138134 all_errors ['ipca' ].append (results_dict ['ipca' ]['error' ])
139135
140136 plot_batch_times (all_times , n_components , batch_sizes , data )
141- # RandomizedPCA error is always worse (approx 100x) than other PCA
142- # tests
143137 plot_batch_errors (all_errors , n_components , batch_sizes , data )
144138
145139faces = fetch_lfw_people (resize = .2 , min_faces_per_person = 5 )
0 commit comments