-
Notifications
You must be signed in to change notification settings - Fork 61
Expand file tree
/
Copy pathSimpleDataFrame.py
More file actions
150 lines (126 loc) · 4.93 KB
/
SimpleDataFrame.py
File metadata and controls
150 lines (126 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import PySimpleGUI as sg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
def read_table():
sg.set_options(auto_size_buttons=True)
filename = sg.popup_get_file(
'Dataset to read',
title='Dataset to read',
no_window=True,
file_types=(("CSV Files", "*.csv"),("Text Files", "*.txt")))
# --- populate table with file contents --- #
if filename == '':
return
data = []
header_list = []
colnames_prompt = sg.popup_yes_no('Does this file have column names already?')
nan_prompt = sg.popup_yes_no('Drop NaN entries?')
if filename is not None:
fn = filename.split('/')[-1]
try:
if colnames_prompt == 'Yes':
df = pd.read_csv(filename, sep=',', engine='python')
# Uses the first row (which should be column names) as columns names
header_list = list(df.columns)
# Drops the first row in the table (otherwise the header names and the first row will be the same)
data = df[1:].values.tolist()
else:
df = pd.read_csv(filename, sep=',', engine='python', header=None)
# Creates columns names for each column ('column0', 'column1', etc)
header_list = ['column' + str(x) for x in range(len(df.iloc[0]))]
df.columns = header_list
# read everything else into a list of rows
data = df.values.tolist()
# NaN drop?
if nan_prompt=='Yes':
df = df.dropna()
return (df,data, header_list,fn)
except:
sg.popup_error('Error reading file')
return
def show_table(data, header_list, fn):
layout = [
[sg.Table(values=data,
headings=header_list,
font='Helvetica',
pad=(25,25),
display_row_numbers=False,
auto_size_columns=True,
num_rows=min(25, len(data)))]
]
window = sg.Window(fn, layout, grab_anywhere=False)
event, values = window.read()
window.close()
def show_stats(df):
stats = df.describe().T
header_list = list(stats.columns)
data = stats.values.tolist()
for i,d in enumerate(data):
d.insert(0,list(stats.index)[i])
header_list=['Feature']+header_list
layout = [
[sg.Table(values=data,
headings=header_list,
font='Helvetica',
pad=(10,10),
display_row_numbers=False,
auto_size_columns=True,
num_rows=min(25, len(data)))]
]
window = sg.Window("Statistics", layout, grab_anywhere=False)
event, values = window.read()
window.close()
def plot_fig(df):
"""
Plots
"""
fig = plt.figure(dpi=100)
x = list(df.columns)[3]
y = list(df.columns)[5]
fig.add_subplot(111).scatter(df[x],df[y], color='blue',edgecolor='k')
plt.xlabel(x)
plt.ylabel(y)
# ------------------------------- END OF YOUR MATPLOTLIB CODE -------------------------------
# ------------------------------- Beginning of Matplotlib helper code -----------------------
def draw_figure(canvas, figure):
figure_canvas_agg = FigureCanvasTkAgg(figure, canvas)
figure_canvas_agg.draw()
figure_canvas_agg.get_tk_widget().pack(side='top', fill='both', expand=1)
return figure_canvas_agg
# ------------------------------- Beginning of GUI CODE -------------------------------
# define the window layout
layout = [[sg.Text('Plot of {} vs. {}'.format(x,y))],
[sg.Canvas(key='-CANVAS-',
size=(700,500),
pad=(15,15))],
[sg.Button('Ok')]]
# create the form and show it without the plot
window = sg.Window('Plot',
layout,
size=(800,600),
finalize=True,
element_justification='center',
font='Helvetica 18')
# add the plot to the window
fig_canvas_agg = draw_figure(window['-CANVAS-'].TKCanvas, fig)
event, values = window.read()
window.close()
def main():
df,data, header_list,fn=read_table()
# Show data?
show_prompt = sg.popup_yes_no('Show the dataset?')
if show_prompt=='Yes':
show_table(data,header_list,fn)
# Show stats?
stats_prompt = sg.popup_yes_no('Show the descriptive stats?')
if stats_prompt=='Yes':
show_stats(df)
# Show a plot?
plot_prompt = sg.popup_yes_no('Show a scatter plot?')
if plot_prompt=='Yes':
plot_fig(df)
# Executes main
if __name__ == '__main__':
main()