This a simplified code :
[codebox]
#include <stdio.h>
float ins[64][3];
global void function_4(double dt, int npart)
{
printf( " ============================ npart %d \n",npart);
}
int main ( )
{
double dt= 1e-11 ;
int npart=600;
int nbytes = npart ;
int device = 0;
cudaSetDevice(device);
float *d_input=0, *d_output=0;
cudaMalloc( (void**)&d_input,3* nbytes);
cudaMalloc( (void**)&d_output,3* nbytes);
float *h_data=ins[0];
h_data = (float*)malloc(3*nbytes);
cudaMemcpy( d_input, h_data, 3*nbytes, cudaMemcpyHostToDevice );
dim3 block(16);
dim3 grid( npart/block.x );
/***************** execute kernel *****************/
function_4<<<grid,block>>>(dt,npart);
cudaMemcpy( h_data, d_output, 3*nbytes, cudaMemcpyDeviceToHost );
if( d_input )
cudaFree( d_input );
if( d_output )
cudaFree( d_output );
if( h_data )
free( h_data );
}
[/codebox]
And the corresponding debugger output :
[codebox]
…
…
35 dim3 block(16);
(gdb) s
dim3 (this=0x7ffff5059cc0, x=16, y=1, z=1) at /SATA/500G/CUDA/cuda_30/cuda/bin/…/include/vector_types.h:479
479 host device dim3(unsigned int x = 1, unsigned int y = 1, unsigned int z = 1) : x(x), y(y), z(z) {}
(gdb) s
main () at main_cuda_function.cu:36
36 dim3 grid( npart/block.x );
(gdb) s
dim3 (this=0x7ffff5059cb0, x=37, y=1, z=1) at /SATA/500G/CUDA/cuda_30/cuda/bin/…/include/vector_types.h:479
479 host device dim3(unsigned int x = 1, unsigned int y = 1, unsigned int z = 1) : x(x), y(y), z(z) {}
(gdb) s
main () at main_cuda_function.cu:41
41 function_4<<<grid,block>>>(dt,npart);
(gdb) s
function_4__entry (__cuda_0=9.9999999999999994e-12, __cuda_1=600) at main_cuda_function.cu:7
7 {
(gdb) s
__device_stub__Z10function_4di (__par0=9.9999999999999994e-12, __par1=600) at /tmp/tmpxft_00000dd7_00000000-1_main_cuda_function.cudafe1.stub.c:10
10 /tmp/tmpxft_00000dd7_00000000-1_main_cuda_function.cudafe1.stub.c: No such file or directory.
in /tmp/tmpxft_00000dd7_00000000-1_main_cuda_function.cudafe1.stub.c
(gdb) s
cudaLaunch (entry=0x40b9ec "UH\211åH\203ì\020ò\017\021Eø\211}ô\213}ôò\017\020EøèpÿÿÿÉÃUH\211åH\213=\n½ ")
at /SATA/500G/CUDA/cuda_30/cuda/bin/../include/cuda_runtime.h:714
714 return cudaLaunch((const char*)entry);
(gdb) s
715 }
…
…
[/codebox]