-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathmcc.cpp
More file actions
202 lines (156 loc) · 5.91 KB
/
mcc.cpp
File metadata and controls
202 lines (156 loc) · 5.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
/******************* MCC EXAMPLE ******************/
// Last update: 13/08/2020
//
// In this example we show how you can use easyDAG to
// build a more complex pipeline for the evaluation of
// the Matthews Correlation Coefficient (MCC).
// We create a series of function starting from the
// implementation provided in the "scorer" project
// (https://github.com/Nico-Curti/scorer).
// We start from two arrays of labels (true labels and
// predicted labels) and we create a pipeline including
// each required step for the evaluation of the MCC
// starting from the Confusion Matrix of the problem.
// Please see the "scorer" project for more details.
//
/***************************************************/
#include <set>
#include <cmath>
#include <vector>
#include <memory>
#include <numeric>
#include <iostream>
#include <algorithm>
#include <easyDAG.hpp>
int main ()
{
const int Nlabels = 12;
int Nclass;
int * y_true = new int[Nlabels];
y_true[0] = 2; y_true[1] = 0; y_true[2] = 2; y_true[3] = 2; y_true[4] = 0; y_true[5] = 1;
y_true[6] = 1; y_true[7] = 2; y_true[8] = 2; y_true[9] = 0; y_true[10] = 1; y_true[11] = 2;
int * y_pred = new int[Nlabels];
y_pred[0] = 0; y_pred[1] = 0; y_pred[2] = 2; y_pred[3] = 1; y_pred[4] = 0; y_pred[5] = 2;
y_pred[6] = 1; y_pred[7] = 0; y_pred[8] = 2; y_pred[9] = 0; y_pred[10] = 2; y_pred[11] = 2;
auto get_classes = [&] (const int * lbl_true, const int * lbl_pred, const int & n_true, const int & n_pred)
{
std :: set < int > u1 (lbl_true, lbl_true + n_true);
std :: set < int > u2 (lbl_pred, lbl_pred + n_pred);
std :: vector < float > classes (u1.size() + u2.size());
auto it = std :: set_union(u1.begin(), u1.end(), u2.begin(), u2.end(), classes.begin());
classes.resize(it - classes.begin());
Nclass = static_cast < int >(classes.size());
float * res = new float[classes.size()];
std :: move(classes.begin(), classes.end(), res);
return res;
};
auto get_confusion_matrix = [] (const int * lbl_true, const int * lbl_pred, const int & n_lbl, const float * classes, const int & Nclass)
{
float * confusion_matrix = new float[Nclass * Nclass];
std :: fill_n(confusion_matrix, Nclass * Nclass, 0.f);
auto start = classes;
auto end = classes + Nclass;
for (int i = 0; i < n_lbl; ++i)
{
const int i1 = std :: distance(start, std :: find(start, end, lbl_true[i]));
const int i2 = std :: distance(start, std :: find(start, end, lbl_pred[i]));
++ confusion_matrix[i1 * Nclass + i2];
}
return confusion_matrix;
};
auto get_TP = [] (const float * confusion_matrix, const int & Nclass)
{
float * TP = new float[Nclass];
for (int i = 0; i < Nclass; ++i)
TP[i] = confusion_matrix[i * Nclass + i];
return TP;
};
auto get_FN = [] (const float * confusion_matrix, const int & Nclass)
{
float * FN = new float[Nclass];
for (int i = 0; i < Nclass; ++i)
{
const int N = i * Nclass;
FN[i] = std :: accumulate(confusion_matrix + N, confusion_matrix + N + i, 0.f) +
std :: accumulate(confusion_matrix + N + i + 1, confusion_matrix + N + Nclass, 0.f);
}
return FN;
};
auto get_FP = [] (const float * confusion_matrix, const int & Nclass)
{
float * FP = new float[Nclass];
std :: fill_n(FP, Nclass, 0.f);
for (int i = 0; i < Nclass; ++i)
for (int j = 0; j < Nclass; ++j)
FP[j] += (i != j) ? confusion_matrix[i * Nclass + j] : 0.f;
return FP;
};
auto get_TOP = [] (const float * TP, const float * FP, const int & Nclass)
{
float * TOP = new float[Nclass];
std :: transform(TP, TP + Nclass, FP, TOP, [](const float & tp, const float & fp){return tp + fp;});
return TOP;
};
auto get_P = [] (const float * TP, const float * FN, const int & Nclass)
{
float * P = new float[Nclass];
std :: transform(TP, TP + Nclass, FN, P, [](const float & tp, const float & fn){return tp + fn;});
return P;
};
auto get_overall_MCC = [] (const float * confusion_matrix, const float * TOP, const float * P, const int & Nclass)
{
const float s = std :: accumulate(TOP, TOP + Nclass, 0.f);
float cov_x_y = 0.f;
float cov_x_x = 0.f;
float cov_y_y = 0.f;
for (int i = 0; i < Nclass; ++i)
{
cov_x_x += TOP[i] * (s - TOP[i]);
cov_y_y += P[i] * (s - P[i]);
cov_x_y += confusion_matrix[i * Nclass + i] * s - P[i] * TOP[i];
}
return cov_x_y / std :: sqrt(cov_y_y * cov_x_x);
};
auto yt = InputVariable(y_true);
yt.set_name(y_true);
auto yp = InputVariable(y_pred);
yp.set_name(y_pred);
auto n_labels = InputVariable(Nlabels);
n_labels.set_name(Nlabels);
auto n_class = InputVariable < int >();
n_class.set_name(Nclass);
// Compute the unique classes
Task classes(get_classes, yt, yp, n_labels, n_labels);
classes.set_name(classes);
// set the Nclass as step for printing
classes.eval();
classes();
n_class.set(Nclass);
// Compute the confusion matrix
Task confusion_matrix(get_confusion_matrix, yt, yp, n_labels, classes, n_class);
confusion_matrix.set_name(confusion_matrix);
// Compute the True Positive
Task TP(get_TP, confusion_matrix, n_class);
TP.set_name(TP);
// Compute the False Positive
Task FP(get_FP, confusion_matrix, n_class);
FP.set_name(FP);
// Compute the False Negative
Task FN(get_FN, confusion_matrix, n_class);
FN.set_name(FN);
// Compute the Test outcome positive
Task TOP(get_TOP, TP, FP, n_class);
TOP.set_name(TOP);
// Compute the Condition positive or support
Task P(get_P, TP, FN, n_class);
P.set_name(P);
// Compute the Matthews Correlation Coefficient
Task MCC(get_overall_MCC, confusion_matrix, TOP, P, n_class);
MCC.set_name(MCC);
MCC.eval();
auto res = MCC();
std :: cout << "Matthews Correlation Coefficient: " << res << std :: endl;
std :: cout << std :: endl << "DOT graph:" << std :: endl;
MCC.graphviz(std :: cout, "matthews_coefficient");
return 0;
}