hi,
I’m new to CUDA and I ran into some unexpected results of a simple application. The application is extremely simple and I can’t find the bug.
The kernel copies an array of int’s to another array of int’s …
-I am using CUDA wizard 2.0 in Visual studio 2005 on WnXP 32 Pro.
[codebox]global void coppy(int *in,int *out, int size);[/codebox]
If i run it in EmuRelease mode it works fine (I know emurelease uses the operating system threads as threads ad are more heavier than GPU threads )… but if I run it in Release it gives me huge random numbers (The same numbers every run).
Here is the code:
[codebox]
global void coppy(int *in,int *out, int size)
{
int i=blockDim.x*blockIdx.x+threadIdx.x;
if(i<size)
out[i]=in[i];
}
int main(int argc, char* argv)
{
if(!InitCUDA()) {
return 0;
}
int h_in[10],h_out[10]={0};
int * d_in=0,*d_out=0;
CUDA_SAFE_CALL( cudaMalloc((void**) &d_in, sizeof(int) * 10));
CUDA_SAFE_CALL( cudaMalloc((void**) &d_out, sizeof(int) * 10));
for(int i=0;i<10;i++)
h_in[i]=i;
CUDA_SAFE_CALL(cudaMemcpy(d_in,h_in,sizeof(int)*10,cudaMemcp
yHostToDevice));
coppy<<<1,256>>>(d_in,d_out,10);
cudaThreadSynchronize();
CUDA_SAFE_CALL(cudaMemcpy(h_out,d_out,sizeof(int)*10,cudaMem
cpyDeviceToHost));
printf("\n\n");
for(int i=0;i<10;i++)
printf("\nh_out[%d]= %d ",i,h_out[i]);
scanf("%d");
cudaFree(d_out);
cudaFree(d_in);
return 0;
}
[/codebox]
Can anyone help me find this bug??
I also used the exercices on nvidia CUDA U which didn’t use the wizard , they were already configured and I did the folowing kernel and worked fine:
[codebox]
global void vectSum(int *vect, int *sum,int N)
{
int idxx = blockIdx.x*blockDim.x + threadIdx.x;
int bit=4/2;
for( ;bit>0 ;bit=bit/2)
{
int t=vect[idxx]+(vect[idxx^bit]);__syncthreads();
vect[idxx]=t; __syncthreads();
}
*sum=vect[0];
}
[/codebox]
HELP!!! :">