Hey everyone, thanks for reading.
I’m still fairly new to cuda, but I have written a program, and the cudaMemcpy isn’t working, here is an example of the code
#include <iostream>
#include <cutil.h>
struct Particle_drift{ //This structure is for the particle drift
Particle_drift() : x(0),p_x(0),y(0),p_y(0),t(0),p_t(0) {}
double* x;
double* p_x;
double* y;
double* p_y;
double* t;
double* p_t;
};
struct Particle_quadrupole{ //This is a structure to record particle data as it passes through a quadrupole
double* x;
double* p_x;
double* y;
double* p_y;
double* t;
double* p_t;
};
struct transport_map1{ //This is a structure containing the 1st order transport maps for all the various apparatus
double drift[6][6];
struct bend{ //Nested struct for the s and r bends with appropriate matrices
double edge_focus[6][6];
double dipole[6][6];
} sbend, rbend;
double quadrupole[6][6];
double sextupole[6][6];
};
__global__ void kernel(double* x, double*p_x, double* y, double* p_y, double* t, double* p_t, Particle_quadrupole* p_device_particle_quadrupole, transport_map1* p_R){
const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
double dummy;
dummy = x[tid] * p_R[0][0] + p_x[tid] * p_R[0][1]
...
...Insert more calculations here
...
x[tid] = dummy;
}
int main(){
Particle_drift device_particle_drift, host_particle_drift;
Particle_quadrupole device_particle_quadrupole, host_particle_quadrupole;
transport_map1 R;
...
...Stuff gets initialised and stuff here
...
Particle_drift* p_device_particle_drift = &device_particle_drift;
Particle_drift* p_device_particle_drift = &host_particle_drift;
Particle_quadrupole* p_device_particle_quadrupole = &device_particle_quadrupole;
Particle_quadrupole* p_device_particle_quadrupole = &host_particle_quadrupole;
transport_map1* p_R = &R;
size_t size = no_particles*sizeof(double);
size_t Rsize = 6*6*7*sizeof(double);
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->x, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->p_x, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->y, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->p_y, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->t, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_drift->p_t, size));
cutilSafeCall(cudaMalloc((void**)&p_device_particle_quadrupole, size*6));
cutilSafeCall(cudaMalloc((void**)&p_R, Rsize));
cudaError_t retval;
retval = cudaMemcpy(p_device_particle_drift->x, p_particle->x, size/6, cudaMemcpyHostToDevice);
if (retval != cudaSuccess)
{
cout << "cudaMemcpy error at p_device_particle_drift: " << cudaGetErrorString(retval) << "value " << retval << endl;
}
cutilSafeCall(cudaMemcpy(p_device_particle_drift->p_x, p_particle->p_x, size, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_device_particle_drift->y, p_particle->y, size, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_device_particle_drift->p_y, p_particle->p_y, size, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_device_particle_drift->t, p_particle->t, size, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_device_particle_drift->p_t, p_particle->p_t, size, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_R, p_Rhost, matrixRsize, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(p_T, p_Thost, matrixTsize, cudaMemcpyHostToDevice));
...kernel is executed...
...Program continues...
When I run the code I get an error from my own error handling where cudaGetErrorString(retval) is Uknown Error and its value is 30
If anyone has any idea why its not copying from the host to device please let me know.
Many Thanks,
Phill