Hello,
I am new to both CUDA and parallel programming, and I am learning (well trying to teach myself) CUDA by applying it to plasma simulation. My question is, can the value in a device variable be copied to a host variable and displayed? I have written
#include <stdio.h>
#include <cuda.h>
device int x1; //declare a variable on the device
//initialize device variable
global void Initialze( int *x2 ){ x1 = 99; *x2 = x1; }
//increment device variable
global void Increment( int *x2 ){ x1 += x1; *x2 = x1; }
int main(int argc, char **argv){
int blksz, //block size, threads per block
nblk, //number of blocks per grid
*val, //host var to display value of a device var
*x2; //a device var
//execution configuration
blksz = 1;
nblk = 1;
//allocate for device variable
cudaMalloc((void **) &x2, sizeof(int));
//allocate for host var
val = (int *)malloc( sizeof(int));
//initialize the device variable and display it
Initialze <<< nblk, blksz >>> ( x2 );
cudaMemcpy(&val, &x2, sizeof(int), cudaMemcpyDeviceToHost);
fprintf(stdout, "%d \n", *val);
//increment the device variable and display it
Increment <<< nblk, blksz >>> ( x2 );
cudaMemcpy(&val, &x2, sizeof(int), cudaMemcpyDeviceToHost);
fprintf(stdout, "%d \n", *val );
return 0;
}//end main
The output I expected was
C:\>mylesson1.exe
99
100
But instead I got
C:\>mylesson1.exe
3555488
3555488
Or some other such random value. Can anyone tell me what I am doing wrong or what concept I am missing (or misunderstanding)? Thnx.