Unexpected behavior of cuda-gdb with Tesla M2050

Hello,

cuda-gdb is not working properly for me on by 64 bit Ubuntu system with a Tesla M2050 and CUDA 3.2.

I’ve followed the instructions in the cuda-gdb manual included with CUDA but am not getting the same results.

I compile the example code from the manual:

[codebox]#include <stdio.h>

#include <stdlib.h>

// Simple 8-bit bit reversal Compute test

#define N 256

global void bitreverse(void *data) {

unsigned int *idata = (unsigned int*)data;

extern __shared__ int array[];

array[threadIdx.x] = idata[threadIdx.x];

array[threadIdx.x] = ((0xf0f0f0f0 & array[threadIdx.x]) >> 4) |

((0x0f0f0f0f & array[threadIdx.x]) << 4);

array[threadIdx.x] = ((0xcccccccc & array[threadIdx.x]) >> 2) |

((0x33333333 & array[threadIdx.x]) << 2);

array[threadIdx.x] = ((0xaaaaaaaa & array[threadIdx.x]) >> 1) |

((0x55555555 & array[threadIdx.x]) << 1);

idata[threadIdx.x] = array[threadIdx.x];

}

int main(void) {

void *d = NULL; int i;

unsigned int idata[N], odata[N];

for (i = 0; i < N; i++)

idata[i] = (unsigned int)i;

cudaMalloc((void**)&d, sizeof(int)*N);

cudaMemcpy(d, idata, sizeof(int)*N,

cudaMemcpyHostToDevice);

bitreverse<<<1, N, N*sizeof(int)>>>(d);

cudaMemcpy(odata, d, sizeof(int)*N,

cudaMemcpyDeviceToHost);

for (i = 0; i < N; i++)

printf("%u -> %u\n", idata[i], odata[i]);

cudaFree((void*)d);

return 0;

}[/codebox]

with the following command:

nvcc test.cu -g -G -gencode arch=compute_20,code=sm_20 -o test

then I run

cuda-gdb test

and set a breakpoint to line 21, just like in the example, and run the program. The program does stop but the debugger does not seem to be able to focus on the running kernel. When it stops I get this output:

[Switching to Thread 140533119010592 (LWP 23724)]

Breakpoint 1, bitreverse (__cuda_0=0xf800000000) at test.cu:22

22 }

instead of the expected

Breakpoint 1 at 0x1e30910: file bitreverse.cu, line 21.

[Launch of CUDA Kernel 0 on Device 0]

[Switching to CUDA Kernel 0 (<<<(0,0),(0,0,0)>>>)]

as in the manual.

Using:

info cuda threads

reports back

“Focus not set on any running CUDA kernel.”

What am I missing here?

Many thanks, Carlos T

Hello,

cuda-gdb is not working properly for me on by 64 bit Ubuntu system with a Tesla M2050 and CUDA 3.2.

I’ve followed the instructions in the cuda-gdb manual included with CUDA but am not getting the same results.

I compile the example code from the manual:

[codebox]#include <stdio.h>

#include <stdlib.h>

// Simple 8-bit bit reversal Compute test

#define N 256

global void bitreverse(void *data) {

unsigned int *idata = (unsigned int*)data;

extern __shared__ int array[];

array[threadIdx.x] = idata[threadIdx.x];

array[threadIdx.x] = ((0xf0f0f0f0 & array[threadIdx.x]) >> 4) |

((0x0f0f0f0f & array[threadIdx.x]) << 4);

array[threadIdx.x] = ((0xcccccccc & array[threadIdx.x]) >> 2) |

((0x33333333 & array[threadIdx.x]) << 2);

array[threadIdx.x] = ((0xaaaaaaaa & array[threadIdx.x]) >> 1) |

((0x55555555 & array[threadIdx.x]) << 1);

idata[threadIdx.x] = array[threadIdx.x];

}

int main(void) {

void *d = NULL; int i;

unsigned int idata[N], odata[N];

for (i = 0; i < N; i++)

idata[i] = (unsigned int)i;

cudaMalloc((void**)&d, sizeof(int)*N);

cudaMemcpy(d, idata, sizeof(int)*N,

cudaMemcpyHostToDevice);

bitreverse<<<1, N, N*sizeof(int)>>>(d);

cudaMemcpy(odata, d, sizeof(int)*N,

cudaMemcpyDeviceToHost);

for (i = 0; i < N; i++)

printf("%u -> %u\n", idata[i], odata[i]);

cudaFree((void*)d);

return 0;

}[/codebox]

with the following command:

nvcc test.cu -g -G -gencode arch=compute_20,code=sm_20 -o test

then I run

cuda-gdb test

and set a breakpoint to line 21, just like in the example, and run the program. The program does stop but the debugger does not seem to be able to focus on the running kernel. When it stops I get this output:

[Switching to Thread 140533119010592 (LWP 23724)]

Breakpoint 1, bitreverse (__cuda_0=0xf800000000) at test.cu:22

22 }

instead of the expected

Breakpoint 1 at 0x1e30910: file bitreverse.cu, line 21.

[Launch of CUDA Kernel 0 on Device 0]

[Switching to CUDA Kernel 0 (<<<(0,0),(0,0,0)>>>)]

as in the manual.

Using:

info cuda threads

reports back

“Focus not set on any running CUDA kernel.”

What am I missing here?

Many thanks, Carlos T