Hi there,
I have code similar to this one
float *d_C;
int size = 10;
struct variable_struct
{
float *var_ENTHALPY_device;
float *var_U_VELOCITY_device;
float *only_device;
};
variable_struct *variables_device;
__global__ void VecAdd(float* d_C, struct variable_struct *variables_device);
void main()
{
float *host_array = new float[10];
float *host_array_debug = new float[10];
float *host_array_debug2 = new float[10];
float *ENTHALPY_host = new float[10];
float *U_VELOCITY_host = new float[10];
cudaSetDevice(1);
for(int i=0; i<10;i++)
{
host_array[i] = 1;
ENTHALPY_host[i] = 2;
U_VELOCITY_host[i] = 3;
debug_non_point[i] = 4;
}
cudaMalloc((void**)&d_C, size*sizeof(float));
cudaMemcpy(d_C, host_array, size*sizeof(float), cudaMemcpyHostToDevice);
struct variable_struct variables_host;
struct variable_struct *variables_host_debug;
//copy into host structure
cudaMalloc( (void**)&(variables_host.var_ENTHALPY_device), size*sizeof(float));
cudaMemcpy( variables_host.var_ENTHALPY_device, ENTHALPY_host, size*sizeof(float), cudaMemcpyHostToDevice );
cudaMalloc( (void**)&(variables_host.var_U_VELOCITY_device), size*sizeof(float));
cudaMemcpy( variables_host.var_U_VELOCITY_device, U_VELOCITY_host, size*sizeof(float), cudaMemcpyHostToDevice );
cudaMalloc( (void**)&(variables_host.only_device), size*sizeof(float));
//copy host structure into device structure
cudaMalloc((void**)&(variables_device), sizeof(variable_struct));
cudaMemcpy( variables_device, &variables_host, sizeof(variable_struct), cudaMemcpyHostToDevice );
// Invoke kernel
VecAdd<<<1, 10>>>(d_C, variables_device);
// Copy result from device memory to host memory
cudaMemcpy(host_array_debug, d_C, size*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(host_array_debug2, variables_device->var_U_VELOCITY_device, size*sizeof(float), cudaMemcpyDeviceToHost);
}
__global__ void VecAdd(float* d_C, struct variable_struct *variables_device)
{
int tx = threadIdx.x;
variables_device->only_device[tx] = variables_device->var_ENTHALPY_device[tx];
d_C[tx] = variables_device->only_device[tx];
}
I can use my struct on the GPU the only thing that I can’t get my head around is this line
cudaMemcpy(host_array_debug2, variables_device->var_U_VELOCITY_device, size*sizeof(float), cudaMemcpyDeviceToHost);
how can I directly access the data from the array from within the struct without copying it to an intermediate array?
thanks in advance