Hi,
I am running the following code using PGI Openacc compiler on ubuntu 14.04.
Run as pgcc -V15.5 -acc fitzhugh_allSynapseTypes.c ; ./a.out
Enter grid size : 6
For testing purposes, for a loop of size 6x6, I am giving the same initial condition for every iteration of the loop and essentially should get the same result in the output for each iteration.
Serial code (without -acc during compilation ) gives the right result, whereas the accelerator gives the right result for all but the last four rows in the output.
fitzhugh_allSynapseTypes.c
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define N_EQ1 2
#define NP1 8
#define COUPLING_THRESHOLD 0. //
#define THRESHOLD_SLOPE 100. //
#pragma acc routine seq
double boltzmann(const double V, const double V_0, const double k)
{
return 1./(1.+exp(-k*(V-V_0)));
}
#pragma acc routine seq
void derivs_one_electricExcitatoryAndInhibitoryConnections(double* y, double* dxdt, const double* p)
{
dxdt[0] = p[7]*(y[0]-y[0]*y[0]*y[0])-y[1]+p[0]; // x' = m (x-x^3)-y+I
dxdt[1] = p[1]*(boltzmann(y[0], p[2], p[3])-y[1]); // y' = eps * (Bfun(x, x_0, k_2) - y)
}
/*
This is different from integrate_n_rk4 in that it returns both V and X whereas integrate_n_rk4 returns just V
*/
void integrate_one_rk4(double* y, const double* params, double* output, const double dt, const unsigned N, const unsigned stride)
{
unsigned i, j, k;
double dt2, dt6;
double y1[N_EQ1], y2[N_EQ1], k1[N_EQ1], k2[N_EQ1], k3[N_EQ1], k4[N_EQ1];
dt2 = dt/2.; dt6 = dt/6.;
for(j=0; j<N_EQ1; j++) output[j] = y[j];
for(i=1; i<N; i++)
{
for(j=0; j<stride; j++)
{
derivs_one_electricExcitatoryAndInhibitoryConnections(y, k1, params);
for(k=0; k<N_EQ1; k++) y1[k] = y[k]+k1[k]*dt2;
derivs_one_electricExcitatoryAndInhibitoryConnections(y1, k2, params);
for(k=0; k<N_EQ1; k++) y2[k] = y[k]+k2[k]*dt2;
derivs_one_electricExcitatoryAndInhibitoryConnections(y2, k3, params);
for(k=0; k<N_EQ1; k++) y2[k] = y[k]+k3[k]*dt;
derivs_one_electricExcitatoryAndInhibitoryConnections(y2, k4, params);
for(k=0; k<N_EQ1; k++) y[k] += dt6*(k1[k]+2.*(k2[k]+k3[k])+k4[k]);
}
for(j=0; j<N_EQ1; j++) output[N_EQ1*i+j] = y[j];
}
}
/*
Computes the following derivatives for each neuron and also adds the input from other neurons :
x' = m (x-x^3)-y+I
y' = eps * (Bfun(x, x_0, k_2) - y)
n => number of neurons
y => array with structure x0,y0,x1,y1,x2,y2,...x{n-1},y{n-1} for the above equations
dydt => derivatives of y i.e., x0',y0',x1',y1',x2',y2'...x{n-1}',y{n-1}'
p => parameter array storing NP1 parameters for each neuron i.e., p[0] to p[NP1-1] for x0',y0' followed by
p[NP] to p[2*NP-1] for x1',y1' and so on
coupling_strengths_type => array of size n*n storing coupling strengths in the order (0->0),(0->1),(0->2) ... (0->n-1) followed by
(1->0),(1->1),(1->2),(1->3)...(1->n-1) and so on
xi'_coupled = xi' + (E0_chlorine - xi) * ( coupling_strengths_chlorine(0->i)*bm_factor(x0) + coupling_strength_chlorine(1->i)*bm_factor(x1) + ... )
+ (E0_potassium - xi) * ( coupling_strengths_potassium(0->i)*bm_factor(x0) + coupling_strength_potassium(1->i)*bm_factor(x1) + ... )
+ (E0_sodium - xi) * ( coupling_strengths_sodium(0->i)*bm_factor(x0) + coupling_strength_sodium(1->i)*bm_factor(x1) + ... )
+ (x0 - xi) * coupling_strengths_chlorine(0->i) + (x1 - xi) * coupling_strengths_chlorine(1->i) + ... ;
yi'_coupled = yi'
bm_factor here uses different k, x0 than in the computation of uncoupled y' ;
A neuron j will influence neuron i only if it's voltage is above the threshold obtained from the boltzmann function; See bolzmann.py
TODO : Instead of using three different arrays, should use a linked list for the coupling strengths; For each neuron 'i', the correspoinding linked list will contain nodes each having the presynaptic neuron number, synaptic strenght, and type of channel (or the channel's equilibrium potential); This improves performance from O(n^2) to O(E) where E is no.of edges or synaptic connections
*/
#pragma acc routine seq
void derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(const unsigned n, double* y, double* dydt,
const double* p, const double* coupling_strengths_chlorine, const double* coupling_strengths_potassium,
const double* coupling_strengths_sodium, const double* coupling_strengths_electric,
double* bm_factor)
{
unsigned i, j;
for(i=0;i <n; i++){
bm_factor[i] = boltzmann(y[i*N_EQ1], COUPLING_THRESHOLD, THRESHOLD_SLOPE);
derivs_one_electricExcitatoryAndInhibitoryConnections(y+i*N_EQ1, dydt+i*N_EQ1, p+i*NP1);
}
for(i=0; i<n; i++)
{
double bm_factor_sum = 0;
for(j=0;j<n;j++){
//TODO : p[4] is E_Cl; p[5] is E_K; p[6] is E_Na;
dydt[i*N_EQ1] += ((p[4+i*NP1]-y[i*N_EQ1])*coupling_strengths_chlorine[j*n+i] +
(p[5+i*NP1]-y[i*N_EQ1])*coupling_strengths_potassium[j*n+i] +
(p[6+i*NP1]-y[i*N_EQ1])*coupling_strengths_sodium[j*n+i] )
* bm_factor[j]
+ (y[j*N_EQ1]-y[i*N_EQ1])*coupling_strengths_electric[j*n+i] ;
}
}
}
/*
This method finds the rk4 approximation of the curve for a system of 'n' neurons obeying the following equations :
x' = m (x-x^3)-y+I
y' = eps * (Bfun(x, x_0, k_2) - y)
networkSize => number of neurons
y => array with structure x0,y0,x1,y1,x2,y2,...x{n-1},y{n-1} for the above equations
dydt => derivatives of y i.e., x0',y0',x1',y1',x2',y2'...x{n-1}',y{n-1}'
p => parameter array storing NP1 parameters for each neuron i.e., p[0] to p[NP1-1] for x0',y0' followed by
p[NP] to p[2*NP-1] for x1',y1' and so on
coupling_strengths => array of size n*n storing coupling strengths in the order (0->0),(0->1),(0->2) ... (0->n-1) followed by
(1->0),(1->1),(1->2),(1->3)...(1->n-1) and so on
output => array of size n*N in the following order x0,x1,...x{n-1} representing initial point on the curve
followed by x0-1,x1-1,...x{n-1}-1 representing the next point and so on
dt => step size
N => number of points to compute
stride => number of strides to make while computing each of the N points
also in the output for each time point 1 to N, check crossings for each neuron;
for each neuron store all the crossings and also the number of crossings present;
compute and return phase differences of all cells w.r.t the first cell phase differences
*/
#pragma acc routine seq
void integrate_n_rk4_phasedifferences_electricExcitatoryAndInhibitoryConnections(const unsigned networkSize,
const double* initial_state, const double* p,
const double* coupling_strengths_chlorine, const double* coupling_strengths_potassium,
const double* coupling_strengths_sodium, const double* coupling_strengths_electric,
double* output, double* phase_differences_output, unsigned* numberOfPhaseDifferences,
double* current_state, double* temp_state, double*k1, double*k2, double*k3, double*k4,
int* lastCrossings, int* numberOfCrossingsToEdit, double *bm_factor,
const double dt, const unsigned N, const unsigned stride, const unsigned initial_states_start) {
unsigned i, j, m, point_dimension = networkSize*N_EQ1;
int threshhold = 0, allCellsHadZeroPhaseBefore = 0, numberOfCrossingsUnedited = 0, phase_differences_output_index=0;
for(m=0; m<point_dimension; m++){
current_state[m]=initial_state[m+initial_states_start];
if(m%2==0) {
output[m/2]=current_state[m];
lastCrossings[m/2]=N+1;
numberOfCrossingsToEdit[m/2] = 0;
}
}
for(i=1; i<N; i++)
{
for(j=0; j<stride; j++)
{
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, current_state, k1, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k1[m]*dt/2;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k2, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric,bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k2[m]*dt/2;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k3, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k3[m]*dt;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k4, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
current_state[m] += (k1[m]+2.*(k2[m]+k3[m])+k4[m])*dt/6;
}
for(m=0; m<point_dimension; m++){
if(m%2==0) {
//Outputs only voltages of each cell.
output[i*point_dimension/2+m/2]=current_state[m];
// computing the crossings here itself
if(m==0){ // first cell
if(current_state[m]>=threshhold && output[(i-1)*point_dimension/2+m/2] < threshhold){
//implies a crossing in the first cell
if(!allCellsHadZeroPhaseBefore){
int allCellsHadZeroPhaseBefore1 = 1;
for(j=1;j<point_dimension/2;j++){
if(i<lastCrossings[j]) {
allCellsHadZeroPhaseBefore1=0;
}
}
allCellsHadZeroPhaseBefore = allCellsHadZeroPhaseBefore1;
}
if(allCellsHadZeroPhaseBefore){
for(j=1;j<point_dimension/2;j++){
phase_differences_output[phase_differences_output_index*(point_dimension/2 -1) + j-1] = i - lastCrossings[j];
numberOfCrossingsToEdit[j]=numberOfCrossingsToEdit[j]+1;
}
phase_differences_output_index++;
}
}
} else{ // cells other than first cell
if(current_state[m]>=threshhold && output[(i-1)*point_dimension/2+m/2] < threshhold){
if(!allCellsHadZeroPhaseBefore){
lastCrossings[m/2]=i;
}else{
j=numberOfCrossingsToEdit[m/2];
while(j>0){
phase_differences_output[(phase_differences_output_index-j)*(point_dimension/2-1)+m/2-1]/=i-lastCrossings[m/2];
j--;
}
lastCrossings[m/2]=i;
numberOfCrossingsToEdit[m/2]=0;
}
}
}
}
}
}
for(m=0; m<point_dimension; m++){
if(m%2==0) {
if(numberOfCrossingsToEdit[m/2]>numberOfCrossingsUnedited){
numberOfCrossingsUnedited = numberOfCrossingsToEdit[m/2];
}
}
}
//number of the phase differences in the array
*numberOfPhaseDifferences = phase_differences_output_index - numberOfCrossingsUnedited;
}
/*
Same as integrate_n_rk4_phasedifferences_electricExcitatoryAndInhibitoryConnections
No need to return output array, so uses a cyclic array for output
*/
#pragma acc routine seq
void integrate_n_rk4_phasedifferences_nooutput_electricExcitatoryAndInhibitoryConnections(const unsigned networkSize,
const double* initial_state, const double* p,
const double* coupling_strengths_chlorine, const double* coupling_strengths_potassium,
const double* coupling_strengths_sodium, const double* coupling_strengths_electric,
double* output, double* phase_differences_output, unsigned* numberOfPhaseDifferences,
double* current_state, double* temp_state, double*k1, double*k2, double*k3, double*k4,
int* lastCrossings, int* numberOfCrossingsToEdit, double *bm_factor,
const double dt, const unsigned N, const unsigned stride, const unsigned initial_states_start) {
unsigned i, j, m, point_dimension = networkSize*N_EQ1;
int threshhold = 0, allCellsHadZeroPhaseBefore = 0, numberOfCrossingsUnedited = 0, phase_differences_output_index=0;
for(m=0; m<point_dimension; m++){
current_state[m]=initial_state[m+initial_states_start];
if(m%2==0) {
output[m/2]=current_state[m];
lastCrossings[m/2]=N+1;
numberOfCrossingsToEdit[m/2] = 0;
}
}
for(i=1; i<N; i++)
{
for(j=0; j<stride; j++)
{
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, current_state, k1, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k1[m]*dt/2;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k2, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric,bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k2[m]*dt/2;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k3, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
temp_state[m] = current_state[m]+k3[m]*dt;
derivs_n_variablecoupling_electricExcitatoryAndInhibitoryConnections(networkSize, temp_state, k4, p, coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric, bm_factor);
for(m=0; m<point_dimension; m++)
current_state[m] += (k1[m]+2.*(k2[m]+k3[m])+k4[m])*dt/6;
}
for(m=0; m<point_dimension; m++){
if(m%2==0) {
//Outputs only voltages of each cell.
output[(i%2)*point_dimension/2+m/2]=current_state[m];
// computing the crossings here itself
if(m==0){ // first cell
if(current_state[m]>=threshhold && output[((i-1)%2)*point_dimension/2+m/2] < threshhold){
//implies a crossing in the first cell
if(!allCellsHadZeroPhaseBefore){
int allCellsHadZeroPhaseBefore1 = 1;
for(j=1;j<point_dimension/2;j++){
if(i<lastCrossings[j]) {
allCellsHadZeroPhaseBefore1=0;
}
}
allCellsHadZeroPhaseBefore = allCellsHadZeroPhaseBefore1;
}
if(allCellsHadZeroPhaseBefore){
for(j=1;j<point_dimension/2;j++){
phase_differences_output[phase_differences_output_index*(point_dimension/2 -1) + j-1] = i - lastCrossings[j];
numberOfCrossingsToEdit[j]=numberOfCrossingsToEdit[j]+1;
}
phase_differences_output_index++;
}
}
} else{ // cells other than first cell
if(current_state[m]>=threshhold && output[((i-1)%2)*point_dimension/2+m/2] < threshhold){
if(!allCellsHadZeroPhaseBefore){
lastCrossings[m/2]=i;
}else{
j=numberOfCrossingsToEdit[m/2];
while(j>0){
phase_differences_output[(phase_differences_output_index-j)*(point_dimension/2-1)+m/2-1]/=i-lastCrossings[m/2];
j--;
}
lastCrossings[m/2]=i;
numberOfCrossingsToEdit[m/2]=0;
}
}
}
}
}
}
for(m=0; m<point_dimension; m++){
if(m%2==0) {
if(numberOfCrossingsToEdit[m/2]>numberOfCrossingsUnedited){
numberOfCrossingsUnedited = numberOfCrossingsToEdit[m/2];
}
}
}
//number of the phase differences in the array
*numberOfPhaseDifferences = phase_differences_output_index - numberOfCrossingsUnedited;
}
/*
for an array of initial conditions, do everything in integrate_n_rk4_phasedifferences and return just the last point in phase differences trajectory for each initial condition
*/
void sweeptraces_electricExcitatoryAndInhibitoryConnections(const long networkSize, const double* initial_states_array, const double* p,
const double* coupling_strengths_chlorine, const double* coupling_strengths_potassium,
const double* coupling_strengths_sodium, const double* coupling_strengths_electric,
double* trajectory_targets_phasedifferences,
const long noOfInitialStates, const double dt, const long N, const long stride) {
int l,m, point_dimension = networkSize*N_EQ1;
//double output[N*networkSize]; not working when 'output' copied privately since size exceeds 2.3GB for all threads together; As of pgcc v15.5
//double output[N*networkSize*noOfInitialStates];
//experimenting with stateless computations; not using full output array since we are only using phase_difference array; also making it private since
// size won't be greater than 2.3GB anymore
double output[2*networkSize];
//double phase_differences_output[(networkSize-1)*(N/2+1)];
double phase_differences_output[(networkSize-1)*(N/2+1)*noOfInitialStates];
unsigned numberOfPhaseDifferences=0;
double current_state[point_dimension], temp_state[point_dimension], k1[point_dimension], k2[point_dimension], k3[point_dimension], k4[point_dimension];
int lastCrossings[point_dimension/2], numberOfCrossingsToEdit[point_dimension/2];
double bm_factor[networkSize];
struct timeval start_time, stop_time, elapsed_time;
gettimeofday(&start_time,NULL);
/*
In order to avoid memory issues, the number of threads, regulated here by num_gangs(16) vector_length(128), could also be dynamic, based on the size of N*networkSize*noOfInitialStates .
Also, for very large resolutions, say 150x150, rather than clogging memory and thereby slowing down everything, it is instead better to run in multiple batches of say, 50x50 -
something that makes a reasonable use of GPU memory.
*/
/*#pragma omp parallel for \
private(phase_differences_output,numberOfPhaseDifferences),\
private(current_state,temp_state,k1,k2,k3,k4,lastCrossings,numberOfCrossingsToEdit),\
private(bm_factor)*/
//copy(output)
/*
private(output) ; not working for >2.3GB
num_gangs(16) vector_length(128) ; Can set the maximum no.of parallel threads with this; Could adopt dynamically based on sizes but not worth the wait.
Rather, if memory requirement's too great to be fulfilled, split the input array and run in batches
private(phase_differences_output,numberOfPhaseDifferences),\
*/
#pragma acc parallel loop independent ,\
pcopyin(initial_states_array[0:point_dimension*noOfInitialStates],p[0:NP1*networkSize],coupling_strengths_chlorine[0:networkSize*networkSize],coupling_strengths_potassium[0:networkSize*networkSize],coupling_strengths_sodium[0:networkSize*networkSize],coupling_strengths_electric[0:networkSize*networkSize]), \
copy(trajectory_targets_phasedifferences[0:(networkSize-1)*noOfInitialStates]), \
private(current_state,temp_state,k1,k2,k3,k4,lastCrossings,numberOfCrossingsToEdit),\
private(bm_factor),\
private(output,numberOfPhaseDifferences),\
copy(phase_differences_output)
for (l=0;l<noOfInitialStates;l++){
/*
integrate_n_rk4_phasedifferences_nooutput(networkSize,initial_states_array, p, coupling_strengths,
output+l*N*networkSize, phase_differences_output, &numberOfPhaseDifferences,
current_state,temp_state,k1,k2,k3,k4,
lastCrossings,numberOfCrossingsToEdit,bm_factor,
dt, N, stride, l*point_dimension);
*/
integrate_n_rk4_phasedifferences_nooutput_electricExcitatoryAndInhibitoryConnections(networkSize,initial_states_array, p,
coupling_strengths_chlorine, coupling_strengths_potassium, coupling_strengths_sodium, coupling_strengths_electric,
output, phase_differences_output+l*(networkSize-1)*(N/2+1), &numberOfPhaseDifferences,
current_state,temp_state,k1,k2,k3,k4,
lastCrossings,numberOfCrossingsToEdit,bm_factor,
dt, N, stride, l*point_dimension);
for(m=0;m<networkSize-1;m++){
if(numberOfPhaseDifferences>0)
trajectory_targets_phasedifferences[l*(networkSize-1)+m]=
phase_differences_output[(numberOfPhaseDifferences-1)*(networkSize-1)+m];
}
}
printf("\n");
for(l=0; l<noOfInitialStates;l++){
for(m=0;m<networkSize-1;m++)printf("%f ",trajectory_targets_phasedifferences[l*(networkSize-1)+m]);
printf("\n");
}
fflush(stdout);
gettimeofday(&stop_time,NULL);
timersub(&stop_time, &start_time, &elapsed_time); // Unix time subtract routine
printf("Total time was %f seconds.\n", elapsed_time.tv_sec+elapsed_time.tv_usec/1000000.0);
}
int main(){
// test case for 6 cells
double *initial_states_array;
double p[] = {0.4,0.3,0.,10.,-1.5,-1.2,1.5,1.,
0.4,0.3,0.,10.,-1.5,-1.2,1.5,1.,
0.4,0.3,0.,10.,-1.5,-1.2,1.5,1.,
0.4,0.3,0.,10.,-1.5,-1.2,1.5,1.,
0.4,0.3,0.,10.,-1.5,-1.2,1.5,1.,
0.4,0.3,0.,10.,-1.5,-1.2,1.5,1};
double coupling_strengths_chlorine[] = {0.,0.001,0.001,0.001,0.001,0.,
0.,0.001,0.001,0.001,0.001,0.,
0.,0.001,0.001,0.001,0.001,0.,
0.,0.001,0.001,0.001,0.001,0.,
0.,0.001,0.001,0.001,0.001,0.,
0.,0.001,0.001,0.001,0.001,0.};
double *trajectory_targets_phasedifferences;
void *ptr ;
int i,j,n, networkSize = 6;
printf("Enter size of grid\n");
scanf("%d",&n);
trajectory_targets_phasedifferences = (double*)malloc(5*n*n*sizeof(double));
ptr = malloc(networkSize*N_EQ1*n*n*sizeof(double));
if(!ptr)
{
printf("malloc returns NULL\n");
return 1;
}
initial_states_array = (double*)ptr;
double initial_state[] = {-0.026541128437339426, 0.07017957749966305, -0.026541128437339426, 0.07017957749966305, -0.026541128437339426, 0.07017957749966305, -0.026541128437339426,
0.07017957749966305, -0.026541128437339426, 0.07017957749966305, -0.026541128437339426, 0.07017957749966305};
for(i=0;i<n*n;i++){
for(j=0;j<networkSize*N_EQ1;j++){
initial_states_array[i*networkSize*N_EQ1+j]=initial_state[j];
}
}
sweeptraces_electricExcitatoryAndInhibitoryConnections(networkSize, initial_states_array, p,
coupling_strengths_chlorine, coupling_strengths_chlorine, coupling_strengths_chlorine,
coupling_strengths_chlorine,
trajectory_targets_phasedifferences,n*n,
(double)3/1000, 1006, 30);
}
Output with serial code :
pgcc -V15.5 fitzhugh_allSynapseTypes.c; ./a.out
Enter size of grid
6
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
Total time was 2.549745 seconds.
Output with accelerator code :
pgcc -V15.5 -acc fitzhugh_allSynapseTypes.c; ./a.out
Enter size of grid
6
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
0.010684 0.010684 0.010684 0.010684 1.000000
5.000000 5.000000 5.000000 5.000000 1.000000
5.000000 5.000000 5.000000 5.000000 1.000000
5.000000 5.000000 5.000000 5.000000 1.000000
5.000000 5.000000 5.000000 5.000000 1.000000
Total time was 16.210918 seconds.
Last four rows in the above accelerator output are inaccurate. I have printed and verified that the input to each iteration is the same. And the dimensions of copy(trajectory_targets_phasedifferences[0:(networkSize-1)*noOfInitialStates]) also look fine.
Can you please help me understand what’s going wrong?