I’m testing interoperability with different OpenGl textures and I have no idea how to process 16b floats.
My kernel function
template <class T, int C, class M>
__global__ void invert(cudaSurfaceObject_t s, dim3 texDim, char size, M max) {
	unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
	unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
	if (x < texDim.x && y < texDim.y) {
		T data = surf2Dread<T>(s, x * size, y);
		T inverted;
		switch (C) {
			case 4:
				inverted.w = data.w;
				inverted.z = max - data.z;
			case 2:
				inverted.y = max - data.y;
			case 1:
				inverted.x = max - data.x;
		}		
		surf2Dwrite(inverted, s, x * size, y);
	}
}
And I call it like that:
dim3 texDim(width, height);
dim3 thread(32, 32);
dim3 block(texDim.x / thread.x, texDim.y / thread.y);
// for GL_RGBA32F
invert<float4, 4, float><<< block, thread >>>(surface, texDim, 16, 1.f);
// for GL_RGBA8
invert<uchar4, 4, unsigned char><<< block, thread >>>(surface, texDim, 4, 255);
How to make it work with e.g. GL_RGBA16F? There is no struct like “halffloat4” nor “halffloat” type.
I’ve tried:
invert<float4, 4, float><<< block, thread >>>(surface, texDim, 8, 1.f);
But it only makes weird green lines.