Hi
I’m a newcoming to CUDA, have read a book and have jumped in the deep end:)
I’m struggling to read image data from a GL_TEXTURE_RECTANGLE texture, the info online is pretty mixed up due to SDK changes over the years, I think what I have done would work on a normal texture, does anyone have any ideas of what would need to changed?
The GL_TEXTURE_RECTANGLE is RGBA float.
My texture reference:
texture<float4, cudaTextureType2D, cudaReadModeElementType> texRef;
My simple kernel:
__global__ void kernel(float *dev_ouput, int width, int height)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
if ( x > width || y > height ) return;
float4 pixel = tex2D(texRef, x, y);
dev_ouput[offset*4 + 0] = pixel.x;
dev_ouput[offset*4 + 1] = pixel.y;
dev_ouput[offset*4 + 2] = pixel.z;
dev_ouput[offset*4 + 3] = pixel.w;
}
And my code that calls it:
cudaGraphicsResource *resource;
cudaArray* array;
cudaGraphicsGLRegisterImage(&resource, textureID, GL_TEXTURE_RECTANGLE, cudaGraphicsRegisterFlagsReadOnly);
cudaGraphicsMapResources( 1, &resource, NULL );
cudaGraphicsSubResourceGetMappedArray (&array, resource, 0, 0);
cudaBindTextureToArray(texRef, array);
//create buffer on device to store output
float *dev_ouput;
cudaMalloc((void**)&dev_ouput, renderedWidth * renderedHeight * sizeof(float) * 4);
dim3 grids(renderedWidth/16,renderedHeight/16);
dim3 threads(16,16);
kernel<<<grids, threads>>>(dev_ouput, renderedWidth, renderedHeight);
cudaMemcpy(dataptr, dev_ouput, renderedWidth * renderedHeight * sizeof(float) * 4, cudaMemcpyDeviceToHost);
cudaFree(dev_ouput);
cudaUnbindTexture(texRef);
cudaGraphicsUnmapResources( 1, &resource, NULL );
cudaGraphicsUnregisterResource(resource);
The result is an empty buffer but no CUDA errors - if in the kernel I just write fixed values I get the results in the buffer I expect.
Any ideas anyone?