Hello! I was testing the Parallel Nsight Eclipse Edition with a simple code:
#include <stdio.h>
#include <stdlib.h>
#define N 20
void cpu_fill( float *vector, float value, int n ){
for( int i = 0; i < n; i++ )
vector[i] = value;
}
void cpu_show( float *vector, int n ){
for( int i = 0; i < n; i++ )
printf(" %f |", vector[i] );
}
__global__ void add( float *a, float *b, float *c ){
int tid = blockIdx.x;
while( tid < N ){
c[tid] = a[tid] + b[tid];
tid += blockDim.x * gridDim.x;
}
}
int main( void ){
/** CPU vectors **/
float a[N], b[N], c[N];
cpu_fill( a, 2, N );
cpu_fill( b, 3, N );
/** Device vectors **/
float *dev_a, *dev_b, *dev_c;
cudaMalloc( ( void** )&dev_a, N * sizeof( float ) );
cudaMalloc( ( void** )&dev_b, N * sizeof( float ) );
cudaMalloc( ( void** )&dev_c, N * sizeof( float ) );
/** Fill the device vectors **/
cudaMemcpy( dev_a, a, N * sizeof( float ), cudaMemcpyHostToDevice );
cudaMemcpy( dev_b, b, N * sizeof( float ), cudaMemcpyHostToDevice );
/** Execute the kernel **/
add<<<10,1>>>(dev_a, dev_b, dev_c);
/** Copy the dev_c vector of sums **/
cudaMemcpy( c, dev_c, N * sizeof( float ), cudaMemcpyDeviceToHost );
cpu_show( c, N );
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
return 0;
}
So, I put a breakpoint in the line number 17( int tid = blockIdx.x; ), but debugger did not start there. Despite this, I continued debugging, step over, step over, but when debugger arrived to the line 33( cudaMalloc ) an error occurs: “Can not parse XML OS data; XML support was disabled at compile time”.
what are the steps for debugging? I saw the video(NVIDIA CUDA - Introduction to NVIDIA Nsight, Eclipse Edition by David Goodwin - YouTube) and it builds a debug project and then starts to debug.
Am I doing it wrong?
P.S: I just want to debug a kernel i.e a CUDA function, and view how threads works.