Wrong output when copy array with cuda

Hi. I’m new in CUDA and now i’m trying to write kernel which copies image from one memory to another
for educational purposes.
So, that’s how i do it:

inline bool cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size )
{
	if( !cpuPtr || !gpuPtr || size == 0 )
		return false;
	if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) )
		return false;

	if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) )
		return false;

    memset(*cpuPtr, 0, size);
	return true;
}


__global__ void gpuCopy(uchar1* input, uchar1* output, size_t outputWidth, size_t outputHeight)
{
    const int x =  blockIdx.x * blockDim.x + threadIdx.x;
    const int y =  blockIdx.y * blockDim.y + threadIdx.y;

    if (x < 0 || x > 1920
            || y < 0 || y > 1080)
    {
        return;
    }
    const int inputPosition = y * 1920 + x;

    output[inputPosition * 3].x = input[inputPosition * 3].x;
    output[inputPosition * 3 + 1].x = input[inputPosition * 3 + 1].x;
    output[inputPosition * 3 + 2].x = input[inputPosition * 3 + 2].x;
}


cudaError_t cudaCopy(uchar1* input, uchar1* output, size_t outputWidth, size_t outputHeight, cudaStream_t stream)
{
    if (!input)
    {
        return cudaErrorInvalidDevicePointer;
    }
    if (outputWidth == 0 || outputHeight == 0)
    {
        return cudaErrorInvalidValue;
    }
    const dim3 blockDim(8, 8);
    const dim3 gridDim(iDivUp(outputWidth, blockDim.x), iDivUp(outputHeight, blockDim.y));
    gpuCopy<<<gridDim, blockDim, 0, stream>>>(input, output, outputWidth, outputHeight);
    return cudaSuccess;
}

void* cpuPtr, gpuPtr;
cudaAllocMapped(&cpuPtr, &gpuPtr, 1920 * 1080 * 3);
Mat img = cv::imread(“image.tif”);
cuda::GpuMat imgCuda;
imgCuda.upload(img);
cudaCopy((uchar1
)imgCuda.data, (uchar1*)gpuPtr, 1920, 1080, 0);
cudaDeviceSynchronize();
Mat out(1080, 1920, img.type(), cpuPtr);
imwrite(“image_copy.tif”, out);

But copy is note same as original image.

Original:


After copy:

What is wrong here?