I am trying to use NPP warpaffine to perform some image transformation. But for some of the coefficients I keep getting the NPP_WRONG_INTERSECTION_QUAD_WARNING error. The same coefficients works fine in OpenCV and I calculated the quads and it obviously has intersection with dst ROI. The errors happens in all the CUDA releases I tried ( 6.5, 7.0 and 7.5). Below is some simple code to demonstrate the problem. Any help will be appreciated.

```
#include <cuda_runtime.h>
#include <npp.h>
#include <stdio.h>
int main()
{
const int width = 200, height = 170;
const int out_width = 64, out_height = 64;
unsigned char *dSrc, *dDst;
cudaMalloc<unsigned char>(&dSrc,3*width*height*sizeof(Npp32f));
cudaMalloc<unsigned char>(&dDst,3*out_width*out_height*sizeof(Npp32f));
NppiSize srcSize = {width, height};
NppiSize dstSize = {out_width, out_height};
NppiRect srcRoi = {0,0,width, height};
NppiRect dstRoi = {0,0,out_width, out_height};
double coeffs[2][3];
coeffs[0][0]=0.967700;
coeffs[0][1]=0.523475;
coeffs[0][2]=-90.066200;
coeffs[1][0]=0.444953;
coeffs[1][1]=-1.138470;
coeffs[1][2]=30.769278;
int outImgSz = out_width*out_height;
int srcImgSz = width*height;
const Npp32f * pSrc[3];
Npp32f * pDst[3];
pSrc[0] = (Npp32f*) (dSrc);
pSrc[1] = (Npp32f*) (pSrc[0] + srcImgSz);
pSrc[2] = (Npp32f*) (pSrc[0] + 2*srcImgSz);
pDst[0] = (Npp32f*) dDst;
pDst[1] = (Npp32f*) (pDst[0] + outImgSz);
pDst[2] = (Npp32f*) (pDst[0] + 2*outImgSz);
int rval = nppiWarpAffine_32f_P3R (pSrc, srcSize, width*sizeof(Npp32f), srcRoi, pDst, out_width*sizeof(Npp32f), dstRoi, coeffs, NPPI_INTER_CUBIC);
if(NPP_NO_ERROR != rval)
{
fprintf(stderr, "NPP error %d\n", rval);
exit(1);
}
cudaFree(dSrc);
cudaFree(dDst);
return 0;
}
```