Hi all…
I’m completely at a loss as to why this (below) works one way, and not the other. I have a gray image that I’ve copied into a uchar4 cudaMallocPitch memory, and then have bound to a texture.
As I read through the books I have on kicking off kernels … I would think the code should take an image from bount Pitch memory and copy it into memory I will display using OpenGL. However it does not work and I’m not certain why. Any help here with a link to a good tutorial explaining why this doesn’t work or suggestions on what is wrong is greatly appreciated! (Now the code)
display8bitImage (from the CUDA 5.5 Sobel Filter example code)
__global__ void
display8bitImage(uchar4 *original, unsigned int Pitch, int w, int h, float fscale)
{
uchar4 *orginalPtr = (uchar4 *)(((char *) original)+blockIdx.x*Pitch);
uchar4 temp;
for (int i = threadIdx.x; i < w; i += blockDim.x)
{
temp = tex2D(tex, (float) i, (float) blockIdx.x);
orginalPtr[i].x = min(max((temp.x * fscale), 0.f), 255.f);
orginalPtr[i].y = orginalPtr[i].x;
orginalPtr[i].z = orginalPtr[i].x;
orginalPtr[i].w = 255.f;
}
}
DOES WORK …
display8bitImage<<<ih, 384>>>(pos, d_pitchBytes, iw, ih, fScale);
The following DOES NOT WORK …
#define BLOCKXDIM 16
#define BLOCKYDIM 16
....
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
checkCudaErrors(cudaBindTexture2D(0, &tex, d_pitchMem, &desc, iw, ih, d_pitchBytes));
dim3 numBlocks(BLOCKXDIM,BLOCKYDIM);
dim3 numThreadsPerBlock(iDivUp(iw,BLOCKXDIM),iDivUp(ih,BLOCKYDIM));
display8bitImage<<<numBlocks,numThreadsPerBlock>>>(pos, d_pitchBytes, iw, ih, fScale);
The following example does not work either, even though from what I can gather from the literature it seems like it should…
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
checkCudaErrors(cudaBindTexture2D(0, &tex, d_pitchMem, &desc, iw, ih, d_pitchBytes));
dim3 numBlocks(BLOCKXDIM,BLOCKYDIM);
dim3 numThreadsPerBlock(iDivUp(iw,BLOCKXDIM),iDivUp(ih,BLOCKYDIM));
display8bitImage2<<<numBlocks,numThreadsPerBlock>>>(pos, d_pitchBytes, iw, ih, fScale);
with display8bitImage2
__global__ void
display8bitImage2(uchar4 *original, unsigned int Pitch, int w, int h, float fscale)
{
const int ix = threadIdx.x + blockDim.x*blockIdx.x;
const int iy = threadIdx.y + blockDim.y*blockIdx.y*Pitch;
float u = (float)ix/(float)w;
float v = (float)iy/(float)h;
u -= 0.5f;
v -= 0.5f;
if(ix < w && iy < h) {
uchar4 temp = tex2D(tex,u,v);
original[w*iy + ix].x = min(max((temp.x * fscale), 0.f), 255.0f);
original[w*iy + ix].y = original[w*iy + ix].x;
original[w*iy + ix].z = original[w*iy + ix].x;
original[w*iy + ix].z = 255;
}
}