16bit image through OpenGL&PBO

Hello,

My task is simple: display a 16-bits-per-channel image through OpenGl, and apply some basic imageprocessing stuff. I use textures and PBO. Everything looks fine, but the texture binding fails. I would appriciate any kind of help!

Here is the relevant part of the source:

...

cutilSafeCall( CUDA_FreeArray() );

cutilSafeCall( CUDA_MallocArray(&imgbuffer, width, height, memNeeded) );

//imagebuffer is the actual data of the imagecontent RGBA each channel is 16bit

//memNeeded is the exact size of the image data

//the imagedata is ok, i've checked

//Creating GL texture...

	glEnable(GL_TEXTURE_2D);

		glGenTextures(1, &gl_Tex);

		glBindTexture(GL_TEXTURE_2D, gl_Tex);

		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);

		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);

		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);

		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16, w, h, 0, GL_RGBA, GL_UNSIGNED_SHORT, imgbuffer);

	//Texture created...

	//Creating PBO...

		glGenBuffers(1, &gl_PBO);

		glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);

		glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 8, imgbuffer, GL_STREAM_COPY);

		CUT_CHECK_ERROR_GL();

	//PBO created...

///this seems ok

cutStartTimer(hTimer);

	

	TColor *d_dst = NULL;

	cutilSafeCall( cudaGLMapBufferObject((void**)&d_dst, gl_PBO) );

	CUT_CHECK_ERROR("Bind hiba elott");

	cutilSafeCall( CUDA_Bind2TextureArray()); // <--------- this is the line where the cuda exception is CudaErrorInvalidTexture. The program exits with -1 returning value.....

	CUT_CHECK_ERROR("Bind hiba");

	cuda_Copy(d_dst, w, h);

	cutilSafeCall( CUDA_UnbindTexture()	 );

	cutilSafeCall( cudaGLUnmapBufferObject(gl_PBO) );

	glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_SHORT, BUFFER_DATA(0) );

	glBegin(GL_TRIANGLES);

	   

		glTexCoord2f(0, 1); glVertex2f(-1, -1);

		glTexCoord2f(2, 1); glVertex2f(3, -1);

		glTexCoord2f(0, -1); glVertex2f(-1, 3);

	glEnd();

	glFinish();

	glutSwapBuffers();

	cutStopTimer(hTimer);

	FPS();

	glutPostRedisplay();

The relevant part of the .cu file is here:

//try to create a struct with 4*16bit data..... 

texture<ushort4, 2, cudaReadModeNormalizedFloat> texImage;

cudaChannelFormatDesc shortIntTex = cudaCreateChannelDesc(16,16,16,16,cudaChannelFormatKindUnsigned);

//CUDA array descriptor

cudaArray *a_Src;

//#include "FrameDecoder_copy_kernel.cu"

__global__ void Copy(

	TColor *dst,

	int imageW,

	int imageH

){

   //do some imageprocessing, but this part of the code is never reached....

	}

}

extern "C" void

cuda_Copy(TColor *d_dst, int imageW, int imageH)

{

	dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);

	dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y));

	Copy<<<grid, threads>>>(d_dst, imageW, imageH);

}

extern "C"

cudaError_t CUDA_Bind2TextureArray()

{

///this function fails, the return value is CudaErrorInvalidTexture

	return cudaBindTextureToArray(texImage, a_Src, shortIntTex);

}

extern "C"

cudaError_t CUDA_UnbindTexture()

{

	return cudaUnbindTexture(texImage);

}

extern "C"

cudaError_t CUDA_MallocArray( unsigned char **h_Source, int imageW, int imageH, int memNeeded)

{

	cudaError_t error;

	error = cudaMallocArray(&a_Src, &shortIntTex, imageW, imageH);

	error = cudaMemcpyToArray(a_Src, 0, 0,

							  *h_Source, memNeeded ,

							  cudaMemcpyHostToDevice

							  );

	return error;

}

The compiler throws this line after the binding:

“Microsoft C++ exception: cudaError at memory location 0x0012ea3c…”

I can provide more source if requested.

Thanks a lot,

Peter Bako