NMODL generates OpenACC code from the Gfluct3.mod mechanism that gives incorrect results. This leads to test failures when the NEURON test suite is run using CoreNEURON and NMODL, i.e.
cmake .. -DNRN_ENABLE_TESTS=ON -DNRN_ENABLE_CORENEURON=ON -DCORENRN_ENABLE_GPU=ON -DCORENRN_ENABLE_NMODL=ON
cmake --build . --parallel
ctest -j 8
The testcorenrn_gf::compare_results test fails because the output spikes from testcorenrn_gf::coreneuron_gpu_online and testcorenrn_gf::coreneuron_gpu_offline do not match NEURON and the reference file.
The following diff, which was inspired by comparing the NMODL and MOD2C translations of this .mod file, fixes the testcorenrn_gf test for NMODL+GPU. This might not be a minimal patch, and the part related to #680 should be considered separately and discussed in that issue.
--- generated/Gfluct3.cpp 2021-05-31 14:27:33.065048143 +0200
+++ modified/Gfluct3.cpp 2021-05-31 14:27:13.992794000 +0200
@@ -462,9 +462,10 @@
static inline void net_send_buffering(NetSendBuffer_t* nsb, int type, int vdata_index, int weight_index, int point_index, double t, double flag) {
- int i = nsb->_cnt;
- nsb->_cnt++;
- if(nsb->_cnt >= nsb->_size) {
+ int i = 0;
+ #pragma acc atomic capture
+ i = nsb->_cnt++;
+ if(i >= nsb->_size) {
printf("Error : netsend buffer size (%d) exceeded\n", nsb->_cnt);
coreneuron_abort();
}
@@ -546,8 +547,10 @@
net_receive_kernel_Gfluct3(t, point_process, inst, nt, ml, weight_index, flag);
}
}
-
+ #pragma acc wait(nt->stream_id)
NetSendBuffer_t* nsb = ml->_net_send_buffer;
+ #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+ update_net_send_buffer_on_host(nt, nsb);
for (int i=0; i < nsb->_cnt; i++) {
int type = nsb->_sendtype[i];
int tid = nt->id;
@@ -559,7 +562,7 @@
net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
}
nsb->_cnt = 0;
-
+ #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
nrb->_displ_cnt = 0;
nrb->_cnt = 0;
}
@@ -630,11 +633,27 @@
inst->amp_i[id] = inst->std_i[id] * sqrt((1.0 - exptrap_in_1));
}
if ((inst->tau_e[id] != 0.0) || (inst->tau_i[id] != 0.0)) {
- net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[3*pnodecount+id], -1, inst->point_process[1*pnodecount+id], nt->_t+inst->h[id], 1.0);
+ net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[3*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+inst->h[id], 1.0);
}
}
}
}
+ #pragma acc wait(nt->stream_id)
+ NetSendBuffer_t* nsb = ml->_net_send_buffer;
+ #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+ update_net_send_buffer_on_host(nt, nsb);
+ for (int i=0; i < nsb->_cnt; i++) {
+ int type = nsb->_sendtype[i];
+ int tid = nt->id;
+ double t = nsb->_nsb_t[i];
+ double flag = nsb->_nsb_flag[i];
+ int vdata_index = nsb->_vdata_index[i];
+ int weight_index = nsb->_weight_index[i];
+ int point_index = nsb->_pnt_index[i];
+ net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
+ }
+ nsb->_cnt = 0;
+ #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
}
cc: @pramodk @iomaganaris
NMODL generates OpenACC code from the Gfluct3.mod mechanism that gives incorrect results. This leads to test failures when the NEURON test suite is run using CoreNEURON and NMODL, i.e.
cmake .. -DNRN_ENABLE_TESTS=ON -DNRN_ENABLE_CORENEURON=ON -DCORENRN_ENABLE_GPU=ON -DCORENRN_ENABLE_NMODL=ON cmake --build . --parallel ctest -j 8The
testcorenrn_gf::compare_resultstest fails because the output spikes fromtestcorenrn_gf::coreneuron_gpu_onlineandtestcorenrn_gf::coreneuron_gpu_offlinedo not match NEURON and the reference file.The following diff, which was inspired by comparing the NMODL and MOD2C translations of this
.modfile, fixes thetestcorenrn_gftest for NMODL+GPU. This might not be a minimal patch, and the part related to #680 should be considered separately and discussed in that issue.cc: @pramodk @iomaganaris