I set a simple example in my code that writting in the invalid location to test cuda-memcheck. But it doesn’t seem to work. Is there any mistake in my approach?
__global__ void write_out_of_bound(int *dev_array, const int len) {
int tid = threadIdx.x;
if(tid < len) {
dev_array[tid] = tid;
}
}
int main(int argc, char **argv)
{
int *dev_array;
cudaMalloc((void**)&dev_array, sizeof(int) * 64);
write_out_of_bound<<<1, 128>>>(dev_array, 128);
CHECK(cudaDeviceSynchronize());
}
The command line:
nvcc -g -G test.cu -o test
cuda-memcheck ./test
My device information is as follows: