And here is the code:
__global__ void remap(
const size_t width,
const size_t height,
const float * mapX0,
const float * mapY0,
const float * mapX1,
const float * mapY1,
const float * mapW0,
const float * mapW1,
const size_t frameWidth,
const CUsurfObject * frame0,
const CUsurfObject * frame1,
CUsurfObject * pano
)
{
size_t x{ blockIdx.x * blockDim.x + threadIdx.x };
size_t y{ blockIdx.y * blockDim.y + threadIdx.y };
if( x < width and y < height )
{
size_t index{ x + y * width };
uint8_t c{}, c0{}, c1{};
float w0{ mapW0[ index ] };
float w1{ mapW1[ index ] };
if( w0 > 0.f )
{
int32_t X0{ int32_t( mapX0[ index ] ) };
int32_t Y0{ int32_t( mapY0[ index ] ) };
surf2Dread( & c0, frame0, X0, Y0 );
}
if( w1 > 0.f )
{
int32_t X1{ int32_t( mapX1[ index ] ) };
int32_t Y1{ int32_t( mapY1[ index ] ) };
surf2Dread( & c1, frame1, X1, Y1 );
}
c = c0 * w0 + c1 * w1;
surf2Dwrite( c, pano, x, y );
// if( ( index + 1 ) % width == 0 )
}
}