I believe i have an issue writing to global memory. Although the runtime gives me a error for when i read the memory
cudaSafeCall() Runtime API error in file <i:/Nvidia/CUDA/NVIDIA GPU Computing SDK/C/src/nrtc/nrtc.cu>, line 130 : unknown error.
[codebox]
cutilSafeCall(cudaMalloc((void**)&dLocation, sizeof(float3) * numparts));
cutilSafeCall(cudaMemcpy(dLocation, Location, sizeof(float3) * numparts, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMalloc((void**)&dVelocity, sizeof(float3) * numparts));
cutilSafeCall(cudaMemcpy(dVelocity, Velocity, sizeof(float3) * numparts, cudaMemcpyHostToDevice));
cutilSafeCall(cudaMalloc((void**)&dForce, sizeof(float3) * numparts));
cutilSafeCall(cudaMemcpy(dForce, Force, sizeof(float3) * numparts, cudaMemcpyHostToDevice));
particleCalculations<<<NUM_BLOCKS, NUM_THREADS>>>(dLocation,dVelocity, dForce, dGravity, dMass, dTimeP, dnumparts, dpdens, ddebug);
Sleep(500);
cutilSafeCall(cudaMemcpy(debug, ddebug, sizeof(float) * NUM_THREADS*NUM_BLOCKS, cudaMemcpyDeviceToHost)); //Runtime API error
cutilSafeCall(cudaMemcpy(Location, dLocation, sizeof(float3) * numparts, cudaMemcpyDeviceToHost)); //Runtime API error WITH ABOVE LINE COMMENTED OUT
cutilSafeCall(cudaMemcpy(Velocity, dVelocity, sizeof(float3) * numparts, cudaMemcpyDeviceToHost)); //Runtime API error WITH ABOVE LINE COMMENTED OUT[/codebox]
I assume i’m doing nothing wrong here, even though if i comment all the lines out it works.
it also works if the device doesn’t change the memory so i assume thats where my problem lies.
[codebox]global static void particleCalculations(float3* gLocation,
float3* gVelocity,
float3* gForce,
const float* pGravity,
const float* pMass,
const float* pTimeP,
const int* pnumparts,
const int* ppdens, float* debug)
{
float3 Location, Velocity, Force;
const int tid = threadIdx.x + NUM_THREADS*blockIdx.x;
const int numparts = *pnumparts;
const int pdens = *ppdens;
const float Gravity = *pGravity;
const float Mass = *pMass;
const float TimeP = *pTimeP;
float d, TForce;
float ax, ay, az, at;
int icount = tid;
int pcount=0;
__syncthreads();
for( int icount = threadIdx.x + NUM_THREADS*blockIdx.x; icount<numparts; icount = icount+NUM_THREADS*NUM_BLOCKS)
//do
{
Location = gLocation[icount];
Velocity = gVelocity[icount];
Location = gForce[icount];
Force.x=0.0f;
Force.y=0.0f;
Force.z=0.0f;[/codebox]
it continues for quite some time. then to write it back into the arrays (gX)
[codebox] Velocity.x = Velocity.x+Force.x/Mass*TimeP;
Velocity.y = Velocity.y+Force.y/Mass*TimeP;
Velocity.z = Velocity.z+Force.z/Mass*TimeP;
__syncthreads();
gLocation[icount] = Location;
gVelocity[icount] = Velocity;
gForce[icount] = Force;
icount = icount+NUM_THREADS*NUM_BLOCKS;
}[/codebox]
My first assumption was that the device could not handle reading and writting from a global pointer at the same time on multiple threads. so i renamed and added per thread arrays.
I’m fairly stuck and every time i try something i have to reset my graphics card due to corrupt memory, it’s not pretty
Thanks