Hi.
I have a cuda code like this:
__global__ void kernel(double* velocities,double* positions)
{
int objectId=blockDim.y*blockIdx.x+threadIdx.y;
int positionId=threadIdx.x;
int ty=threadIdx.y;
double force=0;
//calculate force
double accelerate=0;
if(Mass!=0)
accelerate=force/Mass;
double velocity=velocities[objectId*8+positionId];
velocity=velocity+ time * accelerate;
velocities[objectId*8+positionId]=velocity;
positions[objectId*8+positionId]=velocity*time +position;
}
void callKernel(…)
{
dim3 DimGrid(32,1,1);
dim3 DimThread(8,32,1);
kernel<<<DimGrid,DimThread>>>(…);
}
my problem is when I run this code it takes 8 seconds to complete the task but when I remove lines 20 and 21 or change the kernel code like this:
__global__ void kernel(double* velocities,double* positions)
{
int objectId=blockDim.y*blockIdx.x+threadIdx.y;
int positionId=threadIdx.x;
int ty=threadIdx.y;
double force=0;
//calculate force
double accelerate=0;
if(Mass!=0)
accelerate=force/Mass;
accelerate=4; // any number (4 is an example)
double velocity=velocities[objectId*8+positionId];
velocity=velocity+ time * accelerate;
velocities[objectId*8+positionId]=velocity;
positions[objectId*8+positionId]=velocity*time +position;
}
it takes less than 1 seconds to execute . It’s very annoying and I don’t know how to reduce time in first code. please help me.