NMODL does not produce correct results with OpenACC+GPU

NMODL generates OpenACC code from the [Gfluct3.mod](https://github.com/neuronsimulator/testcorenrn/blob/7b781b3a7ac1e0b785f897e4db5c3bf84c5127d2/mod/Gfluct3.mod) mechanism that gives incorrect results. This leads to test failures when the NEURON test suite is run using CoreNEURON and NMODL, i.e.
```sh
cmake ..  -DNRN_ENABLE_TESTS=ON -DNRN_ENABLE_CORENEURON=ON -DCORENRN_ENABLE_GPU=ON -DCORENRN_ENABLE_NMODL=ON
cmake --build . --parallel
ctest -j 8
```

The `testcorenrn_gf::compare_results` test fails because the output spikes from `testcorenrn_gf::coreneuron_gpu_online` and `testcorenrn_gf::coreneuron_gpu_offline` do not match NEURON and the reference file.

The following diff, which was inspired by comparing the NMODL and MOD2C translations of this `.mod` file, fixes the `testcorenrn_gf` test for NMODL+GPU. This might not be a **minimal** patch, and the part related to https://github.com/BlueBrain/nmodl/issues/680 should be considered separately and discussed in that issue.
```diff
--- generated/Gfluct3.cpp	2021-05-31 14:27:33.065048143 +0200
+++ modified/Gfluct3.cpp	2021-05-31 14:27:13.992794000 +0200
@@ -462,9 +462,10 @@
​
​
     static inline void net_send_buffering(NetSendBuffer_t* nsb, int type, int vdata_index, int weight_index, int point_index, double t, double flag)  {
-        int i = nsb->_cnt;
-        nsb->_cnt++;
-        if(nsb->_cnt >= nsb->_size) {
+        int i = 0;
+    #pragma acc atomic capture
+        i = nsb->_cnt++;
+        if(i >= nsb->_size) {
             printf("Error : netsend buffer size (%d) exceeded\n", nsb->_cnt);
             coreneuron_abort();
         }
@@ -546,8 +547,10 @@
                     net_receive_kernel_Gfluct3(t, point_process, inst, nt, ml, weight_index, flag);
                 }
             }
-
+            #pragma acc wait(nt->stream_id)
             NetSendBuffer_t* nsb = ml->_net_send_buffer;
+            #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+            update_net_send_buffer_on_host(nt, nsb);
             for (int i=0; i < nsb->_cnt; i++) {
                 int type = nsb->_sendtype[i];
                 int tid = nt->id;
@@ -559,7 +562,7 @@
                 net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
             }
             nsb->_cnt = 0;
-
+            #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
             nrb->_displ_cnt = 0;
             nrb->_cnt = 0;
         }
@@ -630,11 +633,27 @@
                         inst->amp_i[id] = inst->std_i[id] * sqrt((1.0 - exptrap_in_1));
                     }
                     if ((inst->tau_e[id] != 0.0) || (inst->tau_i[id] != 0.0)) {
-                        net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[3*pnodecount+id], -1, inst->point_process[1*pnodecount+id], nt->_t+inst->h[id], 1.0);
+                        net_send_buffering(ml->_net_send_buffer, 0, inst->tqitem[3*pnodecount+id], 0, inst->point_process[1*pnodecount+id], nt->_t+inst->h[id], 1.0);
                     }
                 }
             }
         }
+        #pragma acc wait(nt->stream_id)
+        NetSendBuffer_t* nsb = ml->_net_send_buffer;
+        #pragma acc update self(nsb->_cnt) if(nt->compute_gpu)
+        update_net_send_buffer_on_host(nt, nsb);
+        for (int i=0; i < nsb->_cnt; i++) {
+            int type = nsb->_sendtype[i];
+            int tid = nt->id;
+            double t = nsb->_nsb_t[i];
+            double flag = nsb->_nsb_flag[i];
+            int vdata_index = nsb->_vdata_index[i];
+            int weight_index = nsb->_weight_index[i];
+            int point_index = nsb->_pnt_index[i];
+            net_sem_from_gpu(type, vdata_index, weight_index, tid, point_index, t, flag);
+        }
+        nsb->_cnt = 0;
+        #pragma acc update device(nsb->_cnt) if (nt->compute_gpu)
     }
```

cc: @pramodk @iomaganaris 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

NMODL does not produce correct results with OpenACC+GPU #675

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Uh oh!

NMODL does not produce correct results with OpenACC+GPU #675

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions