I am getting a strange error when attempting to run on the GPU. I am using separate .cu files and compiling with the following flags:
nvcc --gpu-architecture=compute_20 --gpu-code=sm_20 -c --relocatable-device-code=true *.cu
nvcc --gpu-architecture=compute_20 --gpu-code=sm_20 --device-link *.o --output-file link.o
g++ -c *.cpp
g++ -o /home/ben/Executables/LCDPLD/GPULCDPLD *.o -L/usr/local/cuda-5.0/lib -lcuda -lcudart -lcurand
Any variables that you find undefined are in constant memory in cudafunctions.h.
The code seems to have a problem with attempting to write Tempdist2 back to global memory ggrid_r (found by compiling and commenting out lines of code)
#include "cudafunctions.h"
__global__ void GPULCD(float* ggrid_r, ATOMS* gExtraAtoms)
{
int tidx = blockIdx.x*blockDim.x+threadIdx.x;
int tidy = blockIdx.y*blockDim.y+threadIdx.y;
int tidz = blockIdx.z*blockDim.z+threadIdx.z;
int position = tidz*gGridPoints.x*gGridPoints.y+tidy*gGridPoints.x+tidx;
int i;
float posx,posy,posz;
float TempX,TempY,TempZ;
float FracX,FracY,FracZ;
float TempDist, TempDist2;
if(tidx<gGridPoints.x && tidy<gGridPoints.y && tidz<gGridPoints.z)
{
posx=tidx*gGridSize.x/gGridPoints.x+gGridShift.x;
posy=tidy*gGridSize.y/gGridPoints.y+gGridShift.y;
posz=tidz*gGridSize.z/gGridPoints.z+gGridShift.z;
TempDist2=5000;
for (i=0; i<(gAtomSize-gExtraAtomSize); i++)
{
TempX=posx-gAtoms[i].CartPosition.x;
TempY=posy-gAtoms[i].CartPosition.y;
TempZ=posz-gAtoms[i].CartPosition.z;
FracX=gInverseUnitCellVectors.ax*TempX+gInverseUnitCellVectors.bx*TempY+gInverseUnitCellVectors.cx*TempZ;
FracY=gInverseUnitCellVectors.ay*TempX+gInverseUnitCellVectors.by*TempY+gInverseUnitCellVectors.cy*TempZ; FracZ=gInverseUnitCellVectors.az*TempX+gInverseUnitCellVectors.bz*TempY+gInverseUnitCellVectors.cz*TempZ;
FracX-=(float)rint(FracX);
FracY-=(float)rint(FracY);
FracZ-=(float)rint(FracZ);
TempX=gUnitCellVectors.ax*FracX+gUnitCellVectors.bx*FracY+gUnitCellVectors.cx*FracZ;
TempY=gUnitCellVectors.ay*FracX+gUnitCellVectors.by*FracY+gUnitCellVectors.cy*FracZ;
TempZ=gUnitCellVectors.az*FracX+gUnitCellVectors.bz*FracY+gUnitCellVectors.cz*FracZ;
TempDist=sqrt((pow(TempX,2)+pow(TempY,2)+pow(TempZ,2)))-gAtoms[i].Radius;
if (TempDist2>TempDist)
TempDist2=TempDist;
}
ggrid_r[position]=TempDist2;
}
}
This is the error from cuda-gdb:
[Launch of CUDA Kernel 42 (GPULCD<<<(1,2,3),(8,8,8)>>>) on Device 0]
Program received signal CUDA_EXCEPTION_6, Warp Misaligned Address.
[Switching focus to CUDA kernel 42, grid 3, block (0,0,0), thread (0,4,1), device 0, sm 2, warp 3, lane 0]
0x000000000830d4a8 in GPULCD(float*, ATOMS*)<<<(1,2,3),(8,8,8)>>> ()