Copying struct from host to device using cudaMemcpy

I am facing a problem in copying struct data from host to device in the CUDA architecture.
Following is the code snippet.

struct point  
{  
     double x,y;  
};

int main()  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)d_a,sizeof(point));  
   cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  

   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);
}  

__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  

The x and y fields of point a should have been changed to 100. Instead, it is still 10 as initialized. What is happening here? Please help.

In the allocation of d_a take the address with an “&” to actually form a double pointer:

cudaMalloc((void**)&d_a,sizeof(point));

and remove the nonsensical casts to (void**) from the cudaMemcpy()s.

In general, check return codes of Cuda calls for errors.

Use %f as format string for double variables, %Lf is for long doubles.

And declare kernels before they are invoked (or use prototypes).