I’m having all kinds of trouble getting nppiWarpAffine to work. I haven’t witnessed it writing a single byte to the destination array with any of the settings I’ve tried. Here’s the simplest test case I could come up with:
#define ROWS 256
#define COLS 256
#define BYTES ROWS*COLS*4
int main(void) {
void *src, *dst;
NppiRect src_roi, dst_roi;
NppiSize src_size;
float localdata[ROWS*COLS];
double coeffs[2][3]={{1,0,0},{0,1,0}};
int rval,i;
cudaMalloc(&src, BYTES);
for(i=0; i<ROWS*COLS;i++)
localdata[i]=1;
cudaMemcpy(src, localdata, BYTES, cudaMemcpyHostToDevice);
cudaMalloc(&dst, BYTES);
for(i=0; i<ROWS*COLS;i++)
localdata[i]=0;
cudaMemcpy(dst, localdata, BYTES, cudaMemcpyHostToDevice);
src_roi.x=0;
src_roi.y=0;
src_roi.width=COLS;
src_roi.height=ROWS;
dst_roi.x=0;
dst_roi.y=0;
dst_roi.width=COLS;
dst_roi.height=ROWS;
src_size.width=COLS;
src_size.height=ROWS;
rval=nppiWarpAffine_32f_C1R((Npp32f *) src, src_size, COLS*sizeof(float), src_roi, (Npp32f *) dst, COLS*sizeof(float), dst_roi, coeffs, 1);
cudaMemcpy(localdata, dst, BYTES, cudaMemcpyDeviceToHost);
printf("%d\n",rval);
printf("%f %f\n", localdata[0], localdata[1]);
return rval;
}
As far as I understand, this should just copy src to dst, but dst always stays at whatever I initialized it to. The command I’m using to compile is
nvcc -o nppi_test nppi_test.cu -L/usr/local/cuda/lib64 -I/usr/local/cuda/include -lnpp -lcudart
Can anyone tell me what I’m doing wrong?