Global device pointer access using cudaMemcpyToSymbol and cudaMemcpyFromSymbol

cesarcx · November 20, 2013, 11:39am

I am trying to use global device pointer vars to be used on any device and global subroutines calls. The follow example works fine in creating and performing operations over the device arrays d_a, d_b and d_c by calling sum_device() routine. The problem arises when I try to access these variables globally by calling alter_data without parameters. Follow the code:

__global__ void sum_device(double *d_a, double *d_b, double *d_c)
{ int tid;

 tid=threadIdx.x+blockIdx.x*blockDim.x;
 int offset=blockDim.x * gridDim.x;

 while(tid<SIZE)
 { d_c[tid]=d_a[tid]+d_b[tid];
   d_a[tid]=tid;
   d_b[tid]=tid+1;
   tid+=offset;
 }
}

__global__ void alter_data(void)
{ int tid;

 tid=threadIdx.x+blockIdx.x*blockDim.x;
 int offset=blockDim.x * gridDim.x;

 while(tid<SIZE)
 { d_Sc[tid]=2.0;
   d_Sa[tid]=0.0;
   d_Sb[tid]=1.0;
   tid+=offset;
 }
}

int main( int argc, char **argv)
{ double *a, *b, *c;

// host allocation memory

  a	= (double *)malloc(sizeof(double)*SIZE);
  b	= (double *)malloc(sizeof(double)*SIZE);
  c	= (double *)malloc(sizeof(double)*SIZE);

  if ( (a || b || c) == NULL)
  { cout<<"Error: Not enough memory on host."<<endl;
    exit(0);
  }
  init_host_data(a, b, c);
  cout<<"Initial Values before calls:"<<endl;
  print_vectors(a, b, c);
  copy_host_to_device(a, b);
  sum_device<<<1,SIZE>>>(d_a, d_b, d_c);
  copy_device_to_host(a, b, c);
  cout<<endl<<"Final Values after sum call:"<<endl;
  print_vectors(a, b, c);
  checkCudaErrors(cudaMemcpyToSymbol(d_Sa, &d_a, sizeof(double)*SIZE));
  checkCudaErrors(cudaMemcpyToSymbol(d_Sb, &d_b, sizeof(double)*SIZE));
  checkCudaErrors(cudaMemcpyToSymbol(d_Sc, &d_c, sizeof(double)*SIZE));
  alter_data<<<1,SIZE>>>();
  checkCudaErrors(cudaMemcpyFromSymbol(a, d_Sa, sizeof(double)*SIZE));
  checkCudaErrors(cudaMemcpyFromSymbol(b, d_Sb, sizeof(double)*SIZE));
  checkCudaErrors(cudaMemcpyFromSymbol(c, d_Sc, sizeof(double)*SIZE));
  cout<<endl<<"Final Values after modifying vars:"<<endl;
  print_vectors(a, b, c);
  return 0;
}

Form some reason the changes at the variables promoted by the alter_data<<<1,SIZE>>>() routine or the copy back to the host are not being properly done, L36-L43…

The code compiles fine.

Any help is appreciated.

PS.: when I point out the mouse over the cudaMemcpyToSymbol and cudaMemcpyFromSymbol functions the VS shows the older deprecated functions calls usage mode respectively…

My system: Window 8.1, VS 2010 Express and CUDA 5.5.

jgonzac · November 20, 2013, 2:04pm

Are you trying to write into a constant variable within a kernel?
Constant variables are read-only once the kernel is executing.
Kernels without parameters don’t make sense to me.

cesarcx · November 20, 2013, 2:55pm

I am so sorry… I had forgotten to mention about the variables. They are declared as:

__device__ double *d_Sa, *d_Sb, *d_Sc;

double *d_a, *d_b, *d_c;

I believe that the global variables d_Sa, d_Sb and d_Sc are all read-write variables or am I wrong?

The problem I am trying to solve is how to access global read/write pointer variables to doubles in a device without passing as parameters to the devices functions. I have many global vars in a equivalent host code that are accessed by many subroutines, and I just would like to do the same on the gpu, and I just can’t figure it out how to do it…

cesarcx · November 20, 2013, 4:12pm

Here what I got so far in order to make it run:

__device__ void alter_data_once_again(void)
{ int tid;

 tid=threadIdx.x+blockIdx.x*blockDim.x;
 int offset=blockDim.x * gridDim.x;

 while(tid<SIZE)
 { d_Sc[tid]=5.0;
   d_Sa[tid]=3.0;
   d_Sb[tid]=4.0;
   tid+=offset;
 }
}


__global__ void alter_data(double *d_a, double *d_b, double *d_c)
{ int tid;

  d_Sa=d_a;
  d_Sb=d_b;
  d_Sc=d_c;

 tid=threadIdx.x+blockIdx.x*blockDim.x;
 int offset=blockDim.x * gridDim.x;

 while(tid<SIZE)
 { d_Sc[tid]=2.0;
   d_Sa[tid]=0.0;
   d_Sb[tid]=1.0;
   tid+=offset;
 }
 alter_data_once_again();
}

int main( int argc, char **argv)
{ double *a, *b, *c;

// host allocation memory

  a	= (double *)malloc(sizeof(double)*SIZE);
  b	= (double *)malloc(sizeof(double)*SIZE);
  c	= (double *)malloc(sizeof(double)*SIZE);

  if ( (a || b || c) == NULL)
  { cout<<"Error: Not enough memory on host."<<endl;
    exit(0);
  }
  init_host_data(a, b, c);
  cout<<"Initial Values before calls:"<<endl;
  print_vectors(a, b, c);
  copy_host_to_device(a, b);
  sum_device<<<1,SIZE>>>(d_a, d_b, d_c);
  copy_device_to_host(a, b, c);
  cout<<endl<<"Final Values after sum call:"<<endl;
  print_vectors(a, b, c);
  alter_data<<<1,SIZE>>>(d_a, d_b, d_c);
  copy_device_to_host(a, b, c);
  cout<<endl<<"Final Values after modifying vars:"<<endl;
  print_vectors(a, b, c);
  return 0;
}

It solved but I still had to pass the vars to a function to copy their addresses… Am I not able to make it run using cudaMemcpyToSymbol? It means that I will have to make a call to a global function, passing all my desired global variables, and then copying all theirs addresses?

cesarcx · November 21, 2013, 9:15am

Follow the code working fine now. No parameters is passed to the device function alter_data():

__device__ double *d_Sa, *d_Sb, *d_Sc;
double *d_a, *d_b, *d_c;

__global__ void alter_data(void)
{ int tid;

 tid=threadIdx.x+blockIdx.x*blockDim.x;
 int offset=blockDim.x * gridDim.x;

 while(tid<SIZE)
 { d_Sa[tid]=0.0;
   d_Sb[tid]=1.0;
   d_Sc[tid]=2.0;
   tid+=offset;
 }
}


int main( int argc, char **argv)
{ double *a, *b, *c;

// host allocation memory

  a	= (double *)malloc(sizeof(double)*SIZE);
  b	= (double *)malloc(sizeof(double)*SIZE);
  c	= (double *)malloc(sizeof(double)*SIZE);

  if ( (a || b || c) == NULL)
  { cout<<"Error: Not enough memory on host."<<endl;
    exit(0);
  }
  init_host_data(a, b, c);
  cout<<"Initial Values before calls:"<<endl;
  print_vectors(a, b, c);
  copy_host_to_device(a, b);
  sum_device<<<1,SIZE>>>(d_a, d_b, d_c);
  copy_device_to_host(a, b, c);
  cout<<endl<<"Final Values after sum call:"<<endl;
  print_vectors(a, b, c);
  checkCudaErrors(cudaMemcpyToSymbol(d_Sa, &d_a, sizeof(d_a)));
  checkCudaErrors(cudaMemcpyToSymbol(d_Sb, &d_b, sizeof(d_b)));
  checkCudaErrors(cudaMemcpyToSymbol(d_Sc, &d_c, sizeof(d_c)));
  alter_data<<<1,SIZE>>>();
  copy_device_to_host(a, b, c);
  cout<<endl<<"Final Values after modifying vars:"<<endl;
  print_vectors(a, b, c);
  return 0;
}

Topic		Replies	Views
how to use global device struct variables in device functions CUDA Programming and Performance	4	9274	May 19, 2011
How to use cudaMemcpyFromSymbol with global device variable? CUDA Programming and Performance	1	1104	December 8, 2013
Global arrays? CUDA Programming and Performance	24	10871	August 18, 2010
The most basic problem,ask for help CUDA Programming and Performance	5	2162	February 2, 2009
__device__ variables and arrays CUDA Programming and Performance	8	15522	August 16, 2014
device memory declared Globally not passed in CUDA Programming and Performance	1	1322	March 31, 2011
__constant__ Strangeness CUDA Programming and Performance	5	2556	July 21, 2008
How can Iget the pointer to the device memory var CUDA Programming and Performance	9	4902	October 31, 2007
accessing __device__ global variables CUDA Programming and Performance	2	1487	July 28, 2008
__constant__ and __device__ memory access CUDA Programming and Performance	4	6063	April 10, 2012

Global device pointer access using cudaMemcpyToSymbol and cudaMemcpyFromSymbol

Related topics