I am trying to use global device pointer vars to be used on any device and global subroutines calls. The follow example works fine in creating and performing operations over the device arrays d_a, d_b and d_c by calling sum_device() routine. The problem arises when I try to access these variables globally by calling alter_data without parameters. Follow the code:
__global__ void sum_device(double *d_a, double *d_b, double *d_c)
{ int tid;
tid=threadIdx.x+blockIdx.x*blockDim.x;
int offset=blockDim.x * gridDim.x;
while(tid<SIZE)
{ d_c[tid]=d_a[tid]+d_b[tid];
d_a[tid]=tid;
d_b[tid]=tid+1;
tid+=offset;
}
}
__global__ void alter_data(void)
{ int tid;
tid=threadIdx.x+blockIdx.x*blockDim.x;
int offset=blockDim.x * gridDim.x;
while(tid<SIZE)
{ d_Sc[tid]=2.0;
d_Sa[tid]=0.0;
d_Sb[tid]=1.0;
tid+=offset;
}
}
int main( int argc, char **argv)
{ double *a, *b, *c;
// host allocation memory
a = (double *)malloc(sizeof(double)*SIZE);
b = (double *)malloc(sizeof(double)*SIZE);
c = (double *)malloc(sizeof(double)*SIZE);
if ( (a || b || c) == NULL)
{ cout<<"Error: Not enough memory on host."<<endl;
exit(0);
}
init_host_data(a, b, c);
cout<<"Initial Values before calls:"<<endl;
print_vectors(a, b, c);
copy_host_to_device(a, b);
sum_device<<<1,SIZE>>>(d_a, d_b, d_c);
copy_device_to_host(a, b, c);
cout<<endl<<"Final Values after sum call:"<<endl;
print_vectors(a, b, c);
checkCudaErrors(cudaMemcpyToSymbol(d_Sa, &d_a, sizeof(double)*SIZE));
checkCudaErrors(cudaMemcpyToSymbol(d_Sb, &d_b, sizeof(double)*SIZE));
checkCudaErrors(cudaMemcpyToSymbol(d_Sc, &d_c, sizeof(double)*SIZE));
alter_data<<<1,SIZE>>>();
checkCudaErrors(cudaMemcpyFromSymbol(a, d_Sa, sizeof(double)*SIZE));
checkCudaErrors(cudaMemcpyFromSymbol(b, d_Sb, sizeof(double)*SIZE));
checkCudaErrors(cudaMemcpyFromSymbol(c, d_Sc, sizeof(double)*SIZE));
cout<<endl<<"Final Values after modifying vars:"<<endl;
print_vectors(a, b, c);
return 0;
}
Form some reason the changes at the variables promoted by the alter_data<<<1,SIZE>>>() routine or the copy back to the host are not being properly done, L36-L43…
The code compiles fine.
Any help is appreciated.
PS.: when I point out the mouse over the cudaMemcpyToSymbol and cudaMemcpyFromSymbol functions the VS shows the older deprecated functions calls usage mode respectively…
My system: Window 8.1, VS 2010 Express and CUDA 5.5.