Hi!
This is my first post here. I’m also a newbie as regards CUDA programming. The problem I have is that when I compile the program below and run it I get the following output:
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
when it should simply output 0 1 2 3 4 5 6 … etc; it is as if only the first thread had done it’s job. There’s probably a very simple and stupid mistake, if someone could illuminate me I’d be very grateful. Thank you!
BTW, I can run the examples in the SDK, for example BlackScholes and Binomial and some others I’ve tried seem to run just fine. I have an XFX Geforce 8800GT 256MB.
#include <stdlib.h>
#include <stdio.h>
#include <cutil.h>
#define N_DIV 32
__global__ void createTestGrid_kernel(int* d_TestGrid)
{
d_TestGrid[threadIdx.x]=threadIdx.x;
}
int main( int argc, char** argv)
{
CUT_DEVICE_INIT();
int *h_TestGrid;
int *d_TestGrid;
h_TestGrid=(int *)malloc(N_DIV*sizeof(int));
int i;
for (i=0;i<N_DIV;i++){
h_TestGrid[i]=-1;
}
CUDA_SAFE_CALL( cudaMalloc((void **)&d_TestGrid,N_DIV*sizeof(int)));
dim3 threads(N_DIV,1,1); dim3 grid(1,1,1);
CUDA_SAFE_CALL( cudaMemcpy(d_TestGrid, h_TestGrid, N_DIV*sizeof(int), cudaMemcpyHostToDevice));
createTestGrid_kernel<<<threads,grid>>>(d_TestGrid);
CUDA_SAFE_CALL( cudaThreadSynchronize() );
CUDA_SAFE_CALL( cudaMemcpy(h_TestGrid, d_TestGrid, N_DIV*sizeof(int), cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaFree(d_TestGrid));
FILE *output;
fopen_s(&output,"output.txt","w");
for (i=0;i<N_DIV;i++){
fprintf_s(output, "%i ",h_TestGrid[i]);
}
fclose(output);
free(h_TestGrid);
CUT_EXIT(argc, argv);
}