a problem about cuMemcpy2DUnaligned()

my code is :
int status = 0;
int nWidthTest = 672;
int nInViewTest = 1208 ;
int nOutView = 604;
CUdeviceptr d_dataTemp2;
status = cuMemAlloc( &d_dataTemp2, nInViewTest * nWidthTest * sizeof(float));
status = cuMemsetD32(d_dataTemp2,0,nInViewTest * nWidthTest);

CUdeviceptr d_dataTemp3;
status = cuMemAlloc( &d_dataTemp3, nInViewTest * nWidthTest * sizeof(float));
status = cuMemsetD32(d_dataTemp2,0,nInViewTest * nWidthTest);

CUDA_MEMCPY2D copyParam;
memset(&copyParam, 0, sizeof(copyParam));
copyParam.srcMemoryType = CU_MEMORYTYPE_DEVICE;
copyParam.srcDevice = d_dataTemp2;
copyParam.srcPitch = 1 * sizeof(float);
copyParam.WidthInBytes = 1 * sizeof(float);
copyParam.Height = nOutView * nWidthTest;
copyParam.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copyParam.dstDevice = d_dataTemp3;
copyParam.dstPitch = 2 * sizeof(float);
copyParam.WidthInBytes = 1 * sizeof(float);
copyParam.Height = nOutView * nWidthTest;
status = cuMemcpy2DUnaligned(&copyParam);
//copy to host
memset(pfOut,0,nOutViewnWidthTest2sizeof(float));
status = cuMemcpyDtoH(pfOut,d_dataTemp3,nOutView
nWidthTest * 2 * sizeof(float));
status is 1.
The result is wrong,I want to know why.
When I signed the nWidthTest 16,the result is right.
My question is that Does the copyParam.WidthInBytes and copyParam.Height have some special restrictions.