Convert nv12 to rgb, without alpha channel

Hi,

in the codec sdk, you provide the api to convert nv12 to rgba

void Nv12ToBgra32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);

But I don’t want the alpha channel

So I create

union BGR24 {
    uchar3 v;
    struct {
        uint8_t b, g, r
    } c;
};

And modify YuvToRgbKernel as follows

void Nv12ToBgra32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbKernel<uchar2, BGR24, uchar3>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}

template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
    if (x + 1 >= nWidth || y + 1 >= nHeight) {
        return;
    }

    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;

    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
    YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
    YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);

    *(RgbIntx2 *)pDst = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).v;
    *(RgbIntx2 *)(pDst + 1) =  YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).v;        
    *(RgbIntx2 *)(pDst + nRgbPitch) = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).v;
    *(RgbIntx2 *)(pDst + nRgbPitch + 1) = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).v;
}

but this does not work…

I am not familiar with nv12.

Any advise?

have you solved this problem? I have the same issue with converting NV12 to BGR24.