Hi everybody,
First, sorry for my English, I am a French cuda user.
I have a invalid read error that I am unable to solve with my code…
Let us consider the following kernel (simplified in order not to be too complicated, but what I find strange happens even with this simplified code):
__global__ void kernel2(int InteriorLower, int InteriorUpperX, int InteriorUpperY, int InputWidth,
int InputHeight, int OutputWidth, int OutputHeight, const int *Stencil, const float *RhoSamples,
int SupportSize, int SupportWidth, int ScaleFactor, const float * Input, float * Output,
int OutputOffset, int SampleOffset) {
int iy, ix, y, x;
int bX = blockIdx.x,
tX = threadIdx.x,
bY = blockIdx.y,
tY = threadIdx.y;
int xx = bX * blockDim.x + tX,
yy = bY * blockDim.y + tY;
iy = yy - WINDOWRADIUS;
ix = xx - WINDOWRADIUS;
y = ScaleFactor * iy - SampleOffset;
x = ScaleFactor * ix - SampleOffset;
i = 3 * (x + OutputWidth * y);
aux = Output[i];
}
I get no error with this previous kernel using cuda-memcheck.
But actually my code is (the last line differs):
__global__ void kernel2(int InteriorLower, int InteriorUpperX, int InteriorUpperY, int InputWidth,
int InputHeight, int OutputWidth, int OutputHeight, const int *Stencil, const float *RhoSamples,
int SupportSize, int SupportWidth, int ScaleFactor, const float * Input, float * Output,
int OutputOffset, int SampleOffset) {
int iy, ix, y, x;
int bX = blockIdx.x,
tX = threadIdx.x,
bY = blockIdx.y,
tY = threadIdx.y;
int xx = bX * blockDim.x + tX,
yy = bY * blockDim.y + tY;
iy = yy - WINDOWRADIUS;
ix = xx - WINDOWRADIUS;
y = ScaleFactor * iy - SampleOffset;
x = ScaleFactor * ix - SampleOffset;
i = 3 * (x + OutputWidth * y);
aux = Output[i];
Output[i] = aux;
}
Here, using cuda-memchek, I get the following error:
========= CUDA-MEMCHECK
========= Invalid read of size 4
========= at 0x000003f0 in sinterp.cu:717:kernel2(int, int, int, int, int, int, int, int const *, float const *, int, int, int, float const *, float*, int, int)
========= by thread (0,8,0) in block (2,0,0)
========= Address 0x03110dcc is out of bounds
...
The line of error (717) is the line corresponding to:
aux = Output[i];
in the previous kernel.
So, I get no error in the first case at this line, but I get one with the second case… I don’t see why the last line
Output[i] = aux;
would impact the previous one…
Any idea ?
Thank you all in advance.
Bastien.