Ok, it took 12 minutes to do it, but nvcc finally built the release with debug info. The segfault is happening at line 83 in my main.
9
10 //#define EMULATION_MODE
11 //#define use_export // uncomment this if project is built using a compiler that
12 // supports the C++ keyword "export". If not using such a
13 // compiler, be sure not to add cuLsoda.cc to the target, or
14 // you will get redefinition errors.
15
16 #include <stdio.h>
17 #include <math.h>
18 #include "cuLsoda_kernel.cu"
19
20
21 int main(void) /* Main program */
22 {
23
24 /* Local variables */
25 double *t, *y/*[3]*/;
26 int *jt;
27 int *neq, *liw, *lrw;
28 double *atol/*[3]*/;
29 int *itol, *iopt;
30 double *rtol;
31 int *iout;
32 double *tout;
33 int *itask, *iwork/*[23]*/;
34 double *rwork/*[70]*/;
35 int *istate;
36 /* End Local Block */
37
38 cudaMallocHost((void**)&t,sizeof(double));
39 cudaMallocHost((void**)&y,sizeof(double)*3);
40 cudaMallocHost((void**)&jt,sizeof(int));
41 cudaMallocHost((void**)&neq,sizeof(int));
42 cudaMallocHost((void**)&liw,sizeof(int));
43 cudaMallocHost((void**)&lrw,sizeof(int));
44 cudaMallocHost((void**)&atol,sizeof(double)*13);
45 cudaMallocHost((void**)&itol,sizeof(int));
46 cudaMallocHost((void**)&iopt,sizeof(int));
47 cudaMallocHost((void**)&rtol,sizeof(double));
48 cudaMallocHost((void**)&iout,sizeof(int));
49 cudaMallocHost((void**)&tout,sizeof(double));
50 cudaMallocHost((void**)&itask,sizeof(int));
51 cudaMallocHost((void**)&iwork,sizeof(int)*23);
52 cudaMallocHost((void**)&rwork,sizeof(double)*70);
53 cudaMallocHost((void**)&istate,sizeof(int));
54
55 /* Pointers to Device versions of Local variables */
56 double *_Dt;
57 double *_Dy; // [3]
58 int *_Djt;
59 int *_Dneq;
60 int *_Dliw;
61 int *_Dlrw;
62 double *_Datol; //[3]
63 int *_Ditol;
64 int *_Diopt;
65 double *_Drtol;
66 int *_Diout;
67 double *_Dtout;
68 int *_Ditask;
69 int *_Diwork; // [23]
70 double *_Drwork; // [70]
71 int *_Distate;
72 /* End Pointer Block */
73
74
75
76 /* Method instantiations for Derivative and Jacobian functions to send to template */
77 myFex fex;
78 myJex jex;
79
80
81 /* Assignment of initial values to locals */
82 *neq = 3;
83 y[0] = 1.;
84 y[1] = 0.;
85 y[2] = 0.;
86 *t = 0.;
87 *tout = .4;
88 *itol = 2;
89 *rtol = 1e-4;
90 atol[0] = 1e-6;
91 atol[1] = 1e-10;
92 atol[2] = 1e-6;
93 *itask = 1;
94 *istate = 1;
95 *iopt = 0;
96 *lrw = 70;
97 *liw = 23;
98 *jt = 2;
99
100 /* Allocate device memory for each of the pointers, and copy the values from local to device */
101 cudaMalloc((void**)&_Dt,sizeof(double)); cudaMemcpy(_Dt,t,sizeof(double),cudaMemcpyHostToDevice);
102 cudaMalloc((void**)&_Dy,sizeof(double)*3); cudaMemcpy(_Dy,y,sizeof(double)*3,cudaMemcpyHostToDevice);
103 cudaMalloc((void**)&_Djt,sizeof(int)); cudaMemcpy(_Djt,jt,sizeof(int),cudaMemcpyHostToDevice);
104 cudaMalloc((void**)&_Dneq,sizeof(int)); cudaMemcpy(_Dneq,neq,sizeof(int),cudaMemcpyHostToDevice);
105 cudaMalloc((void**)&_Dliw,sizeof(int)); cudaMemcpy(_Dliw,liw,sizeof(int),cudaMemcpyHostToDevice);
106 cudaMalloc((void**)&_Dlrw,sizeof(int)); cudaMemcpy(_Dlrw,lrw,sizeof(int),cudaMemcpyHostToDevice);
107 cudaMalloc((void**)&_Datol,sizeof(double)*13); cudaMemcpy(_Datol,atol,sizeof(double)*13,cudaMemcpyHostToDevice);
108 cudaMalloc((void**)&_Ditol,sizeof(int)); cudaMemcpy(_Ditol,itol,sizeof(int),cudaMemcpyHostToDevice);
109 cudaMalloc((void**)&_Diopt,sizeof(int)); cudaMemcpy(_Diopt,iopt,sizeof(int),cudaMemcpyHostToDevice);
110 cudaMalloc((void**)&_Drtol,sizeof(double)); cudaMemcpy(_Drtol,rtol,sizeof(double),cudaMemcpyHostToDevice);
111 cudaMalloc((void**)&_Diout,sizeof(int)); cudaMemcpy(_Diout,iout,sizeof(int),cudaMemcpyHostToDevice);
112 cudaMalloc((void**)&_Dtout,sizeof(double)); cudaMemcpy(_Dtout,tout,sizeof(double),cudaMemcpyHostToDevice);
113 cudaMalloc((void**)&_Ditask,sizeof(int)); cudaMemcpy(_Ditask,itask,sizeof(int),cudaMemcpyHostToDevice);
114 cudaMalloc((void**)&_Diwork,sizeof(int)*23); cudaMemcpy(_Diwork,iwork,sizeof(int)*23,cudaMemcpyHostToDevice);
115 cudaMalloc((void**)&_Drwork,sizeof(double)*70); cudaMemcpy(_Drwork,rwork,sizeof(double)*70,cudaMemcpyHostToDevice);
116 cudaMalloc((void**)&_Distate,sizeof(int)); cudaMemcpy(_Distate,istate,sizeof(int),cudaMemcpyHostToDevice);
117 /* End Allocation and Copy Block */
118
119 for (*iout = 1; *iout <= 12; ++*iout) {
120
121 cuLsoda<<<1,1>>>(fex, _Dneq, _Dy, _Dt, _Dtout, _Ditol, _Drtol, _Datol, _Ditask, _Distate, _Diopt, _Drwork, _Dlrw, _Diwork, _Dliw, jex, _Djt);
122
123 /* Copy memory back from Device to Host */
124 cudaMemcpy(t,_Dt,sizeof(double),cudaMemcpyDeviceToHost);
125 cudaMemcpy(y,_Dy,sizeof(double)*3,cudaMemcpyDeviceToHost);
126 cudaMemcpy(jt,_Djt,sizeof(int),cudaMemcpyDeviceToHost);
127 cudaMemcpy(neq,_Dneq,sizeof(int),cudaMemcpyDeviceToHost);
128 cudaMemcpy(liw,_Dliw,sizeof(int),cudaMemcpyDeviceToHost);
129 cudaMemcpy(lrw,_Dlrw,sizeof(int),cudaMemcpyDeviceToHost);
130 cudaMemcpy(atol,_Datol,sizeof(double)*13,cudaMemcpyDeviceToHost);
131 cudaMemcpy(itol,_Ditol,sizeof(int),cudaMemcpyDeviceToHost);
132 cudaMemcpy(iopt,_Diopt,sizeof(int),cudaMemcpyDeviceToHost);
133 cudaMemcpy(rtol,_Drtol,sizeof(double),cudaMemcpyDeviceToHost);
134 cudaMemcpy(tout,_Dtout,sizeof(double),cudaMemcpyDeviceToHost);
135 cudaMemcpy(itask,_Ditask,sizeof(int),cudaMemcpyDeviceToHost);
136 cudaMemcpy(iwork,_Diwork,sizeof(int)*23,cudaMemcpyDeviceToHost);
137 cudaMemcpy(rwork,_Drwork,sizeof(double)*70,cudaMemcpyDeviceToHost);
138 cudaMemcpy(istate,_Distate,sizeof(int),cudaMemcpyDeviceToHost);
139 /* End Copy Block */
140
141
142 printf("Exit: \t\t\tAt t =\t%G\ty = \t%g\t%g\t%g\n", *t, y[0], y[1], y[2]);
143 if (istate < 0) {
144 goto L80;
145 }
146 /* L40: */
147 *tout *= 10.;
148 cudaMemcpy(_Dtout,tout,sizeof(double),cudaMemcpyHostToDevice);
149 }
150 printf("Number of Steps: %i\nNo. f-s: %i\nNo. J-s = %i\nMethod Last Used = %i\nLast switch was at t = %g\n",iwork[10],iwork[11],iwork[12],iwork[18],rwork[14]);
151
152 L80:
153 printf( "STOP istate is < 0\n");
154 return 0;
155 } /* MAIN__ */
I don’t get what is wrong here. Can someone enlighten me?
Thanks,
Paul