I’m testing interoperability with different OpenGl textures and I have no idea how to process 16b floats.
My kernel function
template <class T, int C, class M>
__global__ void invert(cudaSurfaceObject_t s, dim3 texDim, char size, M max) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < texDim.x && y < texDim.y) {
T data = surf2Dread<T>(s, x * size, y);
T inverted;
switch (C) {
case 4:
inverted.w = data.w;
inverted.z = max - data.z;
case 2:
inverted.y = max - data.y;
case 1:
inverted.x = max - data.x;
}
surf2Dwrite(inverted, s, x * size, y);
}
}
And I call it like that:
dim3 texDim(width, height);
dim3 thread(32, 32);
dim3 block(texDim.x / thread.x, texDim.y / thread.y);
// for GL_RGBA32F
invert<float4, 4, float><<< block, thread >>>(surface, texDim, 16, 1.f);
// for GL_RGBA8
invert<uchar4, 4, unsigned char><<< block, thread >>>(surface, texDim, 4, 255);
How to make it work with e.g. GL_RGBA16F? There is no struct like “halffloat4” nor “halffloat” type.
I’ve tried:
invert<float4, 4, float><<< block, thread >>>(surface, texDim, 8, 1.f);
But it only makes weird green lines.