@@ -235,7 +235,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
235235n_labels = ['Full Data' , 'Chained Imputer' , 'Mice Imputer' , 'Mice Imputer with y' ]
236236colors = ['r' , 'orange' , 'b' , 'purple' ]
237237width = 0.3
238- plt .figure (figsize = (24 , 16 ))
238+ plt .figure (figsize = (12 , 16 ))
239239
240240plt1 = plt .subplot (211 )
241241for j in n :
@@ -275,6 +275,23 @@ def get_results_full_data(X_train, X_test, y_train, y_test):
275275
276276 return mse_full
277277
278+ def get_results_single_imputation (X_train , X_test , y_train , y_test ):
279+
280+ imputer = ChainedImputer (n_burn_in = 99 , n_imputations = 1 , random_state = 0 )
281+ X_train_imputed = imputer .fit_transform (X_train )
282+ X_test_imputed = imputer .transform (X_test )
283+
284+ scaler = StandardScaler ()
285+ X_train_scaled = scaler .fit_transform (X_train_imputed )
286+ X_test_scaled = scaler .transform (X_test_imputed )
287+
288+ estimator = LinearRegression ()
289+ estimator .fit (X_train_scaled , y_train )
290+ y_predict = estimator .predict (X_test_scaled )
291+ mse_single = mse (y_test , y_predict )
292+
293+ return mse_single
294+
278295# Perform pipeline for i in m
279296# Approach 1: pool the mse values of the m datasets
280297def get_results_multiple_imputation_approach1 (X_train , X_test , y_train , y_test ):
@@ -358,32 +375,35 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
358375 y_test = y [test_indices ]
359376
360377 mse_full = get_results_full_data (X_full_train , X_full_test , y_train , y_test )
378+ mse_single = get_results_single_imputation (X_incomplete_train , X_incomplete_test , y_train , y_test )
361379 mse_approach1 = get_results_multiple_imputation_approach1 (X_incomplete_train , X_incomplete_test , y_train , y_test )
362380 mse_approach2 = get_results_multiple_imputation_approach2 (X_incomplete_train , X_incomplete_test , y_train , y_test )
363381
364- outcome .append ((mse_full , mse_approach1 , mse_approach2 ))
382+ outcome .append ((mse_full , mse_single , mse_approach1 , mse_approach2 ))
365383
366384 return np .mean (outcome , axis = 0 ), np .std (outcome , axis = 0 )
367385
368386# Execute
369- print ("Executing Example 1 MCAR Missingness" )
387+ print ("Executing Example 2 MCAR Missingness" )
370388Boston_X_incomplete_MCAR = ampute (X_scaled , mech = "MCAR" )
371389mse_means , mse_std = perform_simulation (load_boston (), Boston_X_incomplete_MCAR , nsim = 10 )
372390
373391# Plot results
374- n_situations = 3
392+ n_situations = 4
375393n = np .arange (n_situations )
376- n_labels = ['Full Data' , 'Average MSE' , 'Average Predictions' ]
377- colors = ['r' , 'green' , 'yellow' ]
378- width = 0.3
379- plt .figure (figsize = (6 , 6 ))
394+ n_labels = ['Full Data' , 'Single Imputation' , 'MI Average MSE' , 'MI Average Predictions' ]
395+ colors = ['r' , 'orange' , 'green' , 'yellow' ]
380396
381- plt1 = plt .subplot (111 )
397+ plt .figure (figsize = (12 , 6 ))
398+ ax1 = plt .subplot (111 )
382399for j in n :
383- plt1 .bar (j , mse_means [j ], yerr = mse_std [j ],
384- width = width , color = colors [j ])
400+ ax1 .barh (j , mse_means [j ], xerr = mse_std [j ],
401+ color = colors [j ], alpha = 0.6 , align = 'center' )
402+
403+ ax1 .set_title ('MCAR Missingness' )
404+ ax1 .set_yticks (n )
405+ ax1 .set_xlabel ('Mean Squared Error' )
406+ ax1 .invert_yaxis ()
407+ ax1 .set_yticklabels (n_labels )
385408
386- plt1 .set_title ("MCAR Missingness" )
387- plt1 .set_ylabel ("Mean Squared Error" )
388- plt .legend (n_labels )
389409plt .show ()
0 commit comments