ID3D11Texture2D to uint8_t* fail

i want read ID3D11Texture2D in cuda (only use gpu)
here is my code:

auto imgz = 320;
    D3D11_TEXTURE2D_DESC desc;
    ZeroMemory(&desc, sizeof(D3D11_TEXTURE2D_DESC));
    desc.Width = imgz;
    desc.Height = imgz;
    desc.MipLevels = 1;
    desc.ArraySize = 1;
    desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
    desc.SampleDesc.Count = 1;
    desc.SampleDesc.Quality = 0;
    desc.Usage = D3D11_USAGE_DEFAULT; 
    desc.BindFlags = D3D11_BIND_RENDER_TARGET; 
    desc.CPUAccessFlags = 0;
    desc.MiscFlags = 0;

    D3D11_SUBRESOURCE_DATA data;
    unsigned char* out = (unsigned char*)malloc(imgz * imgz * 4 * sizeof(unsigned char));
    for (int i = 0; i < imgz * imgz; i++) {
        out[i * 4 + 2] = 255;
    }
    data.pSysMem = out; // Default data will be full red
    data.SysMemPitch = imgz * 4;
    data.SysMemSlicePitch = 0;

    ID3D11Texture2D* StagingTexture;
    winrt::com_ptr<ID3D11DeviceContext> m_d3dContext;
    auto d3dDevice = CreateD3DDevice();
    auto dxgiDevice = d3dDevice.as<IDXGIDevice>();
    auto m_device = CreateDirect3DDevice(dxgiDevice.get());

    d3dDevice = GetDXGIInterfaceFromObject<ID3D11Device>(m_device);
    d3dDevice->GetImmediateContext(m_d3dContext.put());
    d3dDevice.get()->CreateTexture2D(&desc, &data, &StagingTexture);
    /*
    D3D11_MAPPED_SUBRESOURCE MappedTex;
    m_d3dContext->Map(StagingTexture, 0, D3D11_MAP_READ, 0, &MappedTex);
    m_d3dContext->Unmap(StagingTexture, 0);
    cv::Mat c = cv::Mat(imgz, imgz, CV_8UC4, MappedTex.pData, imgz * 4);
    cv::imshow("aaaaa", c);
    cv::waitKey(1);
    */
    cudaGraphicsResource* CUDAResource;
    cudaArray* textPtr;
    uint8_t* gpuArr;
    CUDA_CHECK(cudaGraphicsD3D11RegisterResource(&CUDAResource, StagingTexture, cudaGraphicsRegisterFlagsNone));
    //CUDA_CHECK(cudaGraphicsResourceSetMapFlags(CUDAResource, cudaGraphicsMapFlagsWriteDiscard));
    
    CUDA_CHECK(cudaGraphicsMapResources(1, &CUDAResource, 0));
        CUDA_CHECK(cudaGraphicsSubResourceGetMappedArray(&textPtr, CUDAResource, 0, 0));
        CUDA_CHECK(cudaMemcpyToArray(textPtr, 0, 0, gpuArr, sizeof(uint8_t) * imgz* imgz * 4, cudaMemcpyDeviceToDevice)); //<< error here : invalid argument
    CUDA_CHECK(cudaGraphicsUnmapResources(1, &CUDAResource, 0));

cudaMemcpyToArray will cause error : invalid argument
I stuck for many days
hope some one can help thx~

You might wish to study the CUDA sample codes that demonstrate D3D11 interop.