generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathlogistic_regression.py
More file actions
158 lines (128 loc) · 4.54 KB
/
logistic_regression.py
File metadata and controls
158 lines (128 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# AUTOGENERATED! DO NOT EDIT! File to edit: 04_logistic_regression.ipynb (unless otherwise specified).
__all__ = ['load_data', 'plot_data', 'sigmoid', 'predict', 'decision_boundary', 'classify', 'cost_function', 'train',
'update_weights', 'get_accuracy']
# Cell
import numpy as np
import pandas as pd
import altair as alt
from tqdm import tqdm
def load_data(file_name):
"""
Read the csv data file with name `file_name`
and return a Pandas DataFrame.
"""
data = pd.read_csv(file_name)
return data
# Cell
def plot_data(data):
"""
Create the scatter plot of the `data` and
return the chart object
"""
base = alt.Chart(data).mark_point(size=100,filled=True).encode(
alt.X("studied"),
alt.Y("slept",scale=alt.Scale(domain=[0,11])),
alt.Color("passed:N")
)
return base
# Cell
def sigmoid(z):
return 1.0 / (1 + np.exp(-z))
# Cell
def predict(examples,parameters):
"""
Compute the probability of being y=1 for all the `examples` given `parameters`.
Return a 1D array of probabilities.
"""
z = np.dot(examples,parameters)
return sigmoid(z)
# Cell
def decision_boundary(prob):
"""
Convert a probability `prob` into a class and
return 1 if `prob` >= 0.5, otherwise return 0
"""
return 1 if prob >= .5 else 0
# Cell
def classify(predictions):
"""
Convert a array of probabilities of the `predictions` into an array of classes.
Return an N-element array of 0s (False) and 1s (True)
"""
vec_decision_boundary = np.vectorize(decision_boundary)
return vec_decision_boundary(predictions)
# Cell
def cost_function(examples, labels, parameters):
"""
Compute the cost using Mean Absolute Error
`examples`: array of (examples,features) of shape (100,3),
`labels`: array of labels with shape (100,1),
`parameters`: array of parameters w of shape (3,1), and
return a 1D matrix of predictions
"""
observations = len(labels)
predictions = predict(examples, parameters)
#Take the error when label=1
class1_cost = -labels*np.log(predictions)
#Take the error when label=0
class2_cost = -(1-labels)*np.log(1-predictions)
#Take the sum of both costs
cost = class1_cost + class2_cost
#Take the average cost
cost = cost.sum() / observations
return cost
# Cell
def train(examples, labels, parameters, learning_rate, iterations):
"""
Train the logistic regression model
`examples` : training examples,
`labels` : class labels, i.e. 0 or 1,
`parameters` : parameters to be fit, i.e. w,
`learning Rate` : learning rate of the gradient descent,
`iterations` : number of gradient descent iterations, and
return the parameters w and an array of all the costs
"""
cost_history = []
for i in tqdm(range(1,iterations + 1)):
parameters = update_weights(examples, labels, parameters, learning_rate)
#Calculate error for auditing purposes
cost = cost_function(examples, labels, parameters)
cost_history.append(cost)
# Log Progress
if i % 1000 == 0:
print("iter: {:d}, cost: {:.4f}".format(i,cost))
return parameters, cost_history
# Cell
def update_weights(examples, labels, parameters, learning_rate):
"""
update the vector of parameters using the gradient descent rule
`examples`: array of examples with shape (200, 3),
`labels`: array of class labels for examples with shape (200, 1),
`parameters`: vector of parameters with shape (3, 1), and
return the new vector of parameters
"""
N = len(examples)
#1 - Get Predictions
predictions = predict(examples, parameters)
# Transpose features from (200, 3) to (3, 200)
# So we can multiply w the (200,1) cost matrix.
# Returns a (3,1) matrix holding 3 partial derivatives --
# one for each feature -- representing the aggregate
# slope of the cost function across all observations
gradient = np.dot(examples.T, predictions - labels)
#3 Take the average cost derivative for each feature
gradient /= N
#4 - Multiply the gradient by our learning rate
gradient *= learning_rate
#5 - Subtract from our weights to minimize cost
parameters -= gradient
return parameters
# Cell
def get_accuracy(predicted_labels, actual_labels):
"""
Measure the accuracy of the model
`predicted_labels`: labels that the model is predicted,
`actual_labels`: actual labels of the examples
"""
diff = predicted_labels - actual_labels
return 1.0 - (np.count_nonzero(diff) / len(diff))