Hi,
I’m trying to implement a path tracer in CUDA, and I’m seeing very weird memory errors in the CUDA debugger/memcheck. It’s a bidirectional path tracer, so there’s loads of kernel functions, and the one causing the errors is the Ray-Triangle intersection. I’m using the vector functions defined in helper_math.h from the CUDA samples.
The errors are not always the same, here are the code and some examples:
inline __device__ float intersect(const Triangle& triangle, const Ray& r, float& u, float& v, bool& face)
{
//Find vectors for two edges sharing A
float3 e1 = triangle.B.position - triangle.A.position;
float3 e2 = triangle.C.position - triangle.A.position;
//Begin calculating determinant - also used to calculate u parameter
float3 P = cross(r.d, e2);
//if determinant is near zero, ray lies in plane of triangle or ray is parallel to plane of triangle
float det = dot(e1, P);
if (det > -EPSILON && det < EPSILON)
return -1.0;
//calculate distance from A to ray origin
float3 T = r.o - triangle.A.position;
//Calculate u parameter and test bound
u = dot(T, P) / det;
//The intersection lies outside of the triangle
if (u < EPSILON || u > 1.0f - EPSILON)
return -1.0;
//Prepare to test v parameter
float3 Q = cross(T, e1);
//Calculate V parameter and test bound
v = dot(r.d, Q) / det;
//The intersection lies outside of the triangle
if (v < EPSILON || u + v > 1.0f)
return -1.0;
float t = dot(e2, Q) / det;
if (t > EPSILON) { //ray intersection
face = det > 0.0f;
return t;
}
// No hit, no win
return -1.0;
}
EXAMPLE #1:
This one happens in the function “float3 operator-(float3 a, float3 b)” which is called from intersect.
CUDA Memory Checker detected 32 threads caused an access violation:
Launch Parameters
CUcontext = 23f7553c750
CUstream = 23f77b8eb70
CUmodule = 23f0313dcd0
CUfunction = 23f0325dfa0
FunctionName = _ZN12CuBDPTKernel16evalContributionEy
GridId = 40
gridDim = {32,64,1}
blockDim = {8,4,1}
sharedSize = 256
Parameters:
surface = 6
Parameters (raw):
0x00000006 0x00000000
GPU State:
Address Size Type Mem Block Thread blockIdx threadIdx PC Source
23f7bfff158 4 adr ld g 0 0 {0,0,0} {0,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 1 {0,0,0} {1,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 2 {0,0,0} {2,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 3 {0,0,0} {3,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 4 {0,0,0} {4,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 5 {0,0,0} {5,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 6 {0,0,0} {6,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 7 {0,0,0} {7,0,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 8 {0,0,0} {0,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 9 {0,0,0} {1,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 10 {0,0,0} {2,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 11 {0,0,0} {3,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 12 {0,0,0} {4,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 13 {0,0,0} {5,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 14 {0,0,0} {6,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 15 {0,0,0} {7,1,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 16 {0,0,0} {0,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 17 {0,0,0} {1,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 18 {0,0,0} {2,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 19 {0,0,0} {3,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 20 {0,0,0} {4,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 21 {0,0,0} {5,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 22 {0,0,0} {6,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 23 {0,0,0} {7,2,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 24 {0,0,0} {0,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 25 {0,0,0} {1,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 26 {0,0,0} {2,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 27 {0,0,0} {3,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 28 {0,0,0} {4,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 29 {0,0,0} {5,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 30 {0,0,0} {6,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
23f7bfff158 4 adr ld g 0 31 {0,0,0} {7,3,0} Zmi6float3S+000778 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:581
Summary of access violations:
c:\projects\ilmjru\gpugi\common\inc\helper_math.h(581): error MemoryChecker: #misaligned=0 #invalidAddress=32
Memory Checker detected 32 access violations.
error = access violation on load (global memory)
gridid = 40
blockIdx = {0,0,0}
threadIdx = {0,0,0}
address = 0x23f7bfff158
accessSize = 4
EXAMPLE #2:
This one is happening in shared memory, and I don’t even use shared memory, so I’m guessing that some local variables are placed there as there is no more register space. The violation happens in the fucntion “void* memset(void *dest, int c, size_t n)”, which is in my code, I don’t even know why it’s being called.
CUDA Memory Checker detected 31 threads caused an access violation:
Launch Parameters
CUcontext = 23f7553c750
CUstream = 23f77b8eb70
CUmodule = 23f0313dcd0
CUfunction = 23f0325dfa0
FunctionName = _ZN12CuBDPTKernel16evalContributionEy
GridId = 40
gridDim = {32,64,1}
blockDim = {8,4,1}
sharedSize = 256
Parameters:
surface = 6
Parameters (raw):
0x00000006 0x00000000
GPU State:
Address Size Type Mem Block Thread blockIdx threadIdx PC Source
23f00fff185 1 adr st s 18 1 {18,0,0} {1,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 2 {18,0,0} {2,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 3 {18,0,0} {3,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 4 {18,0,0} {4,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 5 {18,0,0} {5,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 6 {18,0,0} {6,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 7 {18,0,0} {7,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 8 {18,0,0} {0,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 9 {18,0,0} {1,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 10 {18,0,0} {2,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 11 {18,0,0} {3,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 12 {18,0,0} {4,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 13 {18,0,0} {5,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 14 {18,0,0} {6,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 15 {18,0,0} {7,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 16 {18,0,0} {0,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 17 {18,0,0} {1,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 18 {18,0,0} {2,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 19 {18,0,0} {3,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 20 {18,0,0} {4,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 21 {18,0,0} {5,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 22 {18,0,0} {6,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 23 {18,0,0} {7,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 24 {18,0,0} {0,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 25 {18,0,0} {1,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 26 {18,0,0} {2,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 27 {18,0,0} {3,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 28 {18,0,0} {4,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 29 {18,0,0} {5,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 30 {18,0,0} {6,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f00fff185 1 adr st s 18 31 {18,0,0} {7,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
Summary of access violations:
c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp(422): error MemoryChecker: #misaligned=0 #invalidAddress=32
Memory Checker detected 31 access violations.
error = access violation on store (shared memory)
gridid = 40
blockIdx = {18,0,0}
threadIdx = {1,0,0}
address = 0x23f00fff185
accessSize = 1
EXAMPLE #3:
Similar to the first example. It’s happening in function “float dot(float3 a, float3 b)” which is called from intersect. Again it’s shared memory which I don’t use.
CUDA Memory Checker detected 31 threads caused an access violation:
Launch Parameters
CUcontext = 23f7553c750
CUstream = 23f77b8eb70
CUmodule = 23f0313dcd0
CUfunction = 23f0325dfa0
FunctionName = _ZN12CuBDPTKernel16evalContributionEy
GridId = 40
gridDim = {32,64,1}
blockDim = {8,4,1}
sharedSize = 256
Parameters:
surface = 6
Parameters (raw):
0x00000006 0x00000000
GPU State:
Address Size Type Mem Block Thread blockIdx threadIdx PC Source
23f00fff160 4 adr st s 38 1 {6,1,0} {1,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 2 {6,1,0} {2,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 3 {6,1,0} {3,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 4 {6,1,0} {4,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 5 {6,1,0} {5,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 6 {6,1,0} {6,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 7 {6,1,0} {7,0,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 8 {6,1,0} {0,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 9 {6,1,0} {1,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 10 {6,1,0} {2,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 11 {6,1,0} {3,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 12 {6,1,0} {4,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 13 {6,1,0} {5,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 14 {6,1,0} {6,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 15 {6,1,0} {7,1,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 16 {6,1,0} {0,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 17 {6,1,0} {1,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 18 {6,1,0} {2,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 19 {6,1,0} {3,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 20 {6,1,0} {4,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 21 {6,1,0} {5,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 22 {6,1,0} {6,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 23 {6,1,0} {7,2,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 24 {6,1,0} {0,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 25 {6,1,0} {1,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 26 {6,1,0} {2,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 27 {6,1,0} {3,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 28 {6,1,0} {4,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 29 {6,1,0} {5,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 30 {6,1,0} {6,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
23f00fff160 4 adr st s 38 31 {6,1,0} {7,3,0} Z3dot6float3S+000470 c:\projects\ilmjru\gpugi\common\inc\helper_math.h:1248
Summary of access violations:
c:\projects\ilmjru\gpugi\common\inc\helper_math.h(1248): error MemoryChecker: #misaligned=0 #invalidAddress=32
Memory Checker detected 31 access violations.
error = access violation on store (shared memory)
gridid = 40
blockIdx = {6,1,0}
threadIdx = {1,0,0}
address = 0x23f00fff160
accessSize = 4
EXAMPLE #4:
This one is a misaligned store in global memory, caused by the memset function mentioned above.
CUDA Memory Checker detected 32 threads caused an access violation:
Launch Parameters
CUcontext = 23f7553c750
CUstream = 23f77b8eb70
CUmodule = 23f0313dcd0
CUfunction = 23f0325dfa0
FunctionName = _ZN12CuBDPTKernel16evalContributionEy
GridId = 40
gridDim = {32,64,1}
blockDim = {8,4,1}
sharedSize = 256
Parameters:
surface = 6
Parameters (raw):
0x00000006 0x00000000
GPU State:
Address Size Type Mem Block Thread blockIdx threadIdx PC Source
23f7bfff181 1 mis st g 48 0 {16,1,0} {0,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 1 {16,1,0} {1,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 2 {16,1,0} {2,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 3 {16,1,0} {3,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 4 {16,1,0} {4,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 5 {16,1,0} {5,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 6 {16,1,0} {6,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 7 {16,1,0} {7,0,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 8 {16,1,0} {0,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 9 {16,1,0} {1,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 10 {16,1,0} {2,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 11 {16,1,0} {3,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 12 {16,1,0} {4,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 13 {16,1,0} {5,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 14 {16,1,0} {6,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 15 {16,1,0} {7,1,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 16 {16,1,0} {0,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 17 {16,1,0} {1,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 18 {16,1,0} {2,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 19 {16,1,0} {3,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 20 {16,1,0} {4,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 21 {16,1,0} {5,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 22 {16,1,0} {6,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 23 {16,1,0} {7,2,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 24 {16,1,0} {0,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 25 {16,1,0} {1,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 26 {16,1,0} {2,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 27 {16,1,0} {3,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 28 {16,1,0} {4,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 29 {16,1,0} {5,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 30 {16,1,0} {6,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
23f7bfff181 1 mis st g 48 31 {16,1,0} {7,3,0} _ZN10CuGeometry9intersectERK8TriangleRK3RayRfS6_Rb+000d30 c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp:422
Summary of access violations:
c:\program files\nvidia gpu computing toolkit\cuda\v8.0\include\device_functions.hpp(422): error MemoryChecker: #misaligned=32 #invalidAddress=0
Memory Checker detected 32 access violations.
error = misaligned store (global memory)
gridid = 40
blockIdx = {16,1,0}
threadIdx = {0,0,0}
address = 0x23f7bfff181
accessSize = 1
There are a bunch of other variants, but it’s all happening in the intersect function, always with local variables. The structs I use (Triangle, etc.) are all defined with 16 byte alignment (“alignas(16)”).
This is a school project, the deadline is very close now so I’ve been working on this non-stop for 2 days now but I have no idea why it happens. Could you guys help me out please?
Thanks in advance,
David