Use CudaArray

Hello,

I would like to use a cudaArray in different cudaStream. I would like to know if my program is possible.

I malloc my cudaArray with a size of my image to process on it a sobel filter with different stream.
Then I bind my cudaArray to a 2d texture.
Then I fill a part of my cudaArray with the image data (for example the 64 first line) with a stream and I process the kernel on that piece of image.

I would like to know it that is possible to process a kernel on a part of cudaArray whereas the other part of the cudaArray is not filled?

I made a example

u_int8_t *u8_PtImageHost;
    u_int8_t *u8_PtImageDevice;

    u_int8_t *u8_ptDataOutHost;
    u_int8_t *u8_ptDataOutDevice;

    u_int8_t u8_Used[NB_STREAM];


    u8_ptDataOutHost = (u_int8_t *)malloc(WIDTH*HEIGHT*sizeof(u_int8_t));
    checkCudaErrors(cudaMalloc((void**)&u8_ptDataOutDevice,WIDTH*HEIGHT*sizeof(u_int8_t)));

    u8_PtImageHost = (u_int8_t *)malloc(WIDTH*HEIGHT*sizeof(u_int8_t));
    checkCudaErrors(cudaMalloc((void**)&u8_PtImageDevice,WIDTH*HEIGHT*sizeof(u_int8_t)));


    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned char>();
    checkCudaErrors(cudaMallocArray(&Array_PatchsMaxDevice, &channelDesc,WIDTH,HEIGHT ));
    checkCudaErrors(cudaBindTextureToArray(Image,Array_PatchsMaxDevice));


    int blockh = HEIGHT/NB_STREAM;

    for(int i=0;i<NB_STREAM;i++)
    {
        cudaSetDevice(0);
        cudaStreamCreate(&Stream[i]);
    }

    for(int i=0;i<NB_STREAM;i++)
    {
        printf("i: %d\n",i);
        if(i == 0)
        {

            int yy =0;
            int localHeight  = blockh+1;
            checkCudaErrors(cudaMemcpy2DToArrayAsync( Array_PatchsMaxDevice,
                                                      0,
                                                      0,
                                                      u8_PtImageDevice,
                                                      WIDTH,
                                                      WIDTH,
                                                      blockh,
                                                      cudaMemcpyHostToDevice  ,
                                                      Stream[i]));

            dim3 threads(BLOC_X,BLOC_Y);
            dim3 blocks(ceil(WIDTH/BLOC_X),ceil(localHeight/BLOC_Y));
            SobelKernel<<<blocks,threads,0,Stream[i]>>>(u8_ptDataOutDevice,0,WIDTH,localHeight-1);
            u8_Used[i] = 1;

        }else{

            int ioffset =  WIDTH*(HEIGHT/NB_STREAM  );

            int localHeight  = min(HEIGHT - (blockh +1 +blockh*i),blockh);

            printf("ioffset: %d\n",ioffset);
            checkCudaErrors(cudaMemcpy2DToArrayAsync( Array_PatchsMaxDevice,
                                                      0,
                                                      ioffset*i,
                                                      &u8_PtImageDevice[ioffset*i],
                            WIDTH,
                            WIDTH,
                            localHeight,
                            cudaMemcpyHostToDevice  ,
                            Stream[i])); ##### HERE THE ERROR


            dim3 threads(BLOC_X,BLOC_Y);
            dim3 blocks(ceil(WIDTH/BLOC_X),ceil(localHeight/BLOC_Y));
            SobelKernel<<<blocks,threads,0,Stream[i]>>>(u8_ptDataOutDevice,ioffset,WIDTH,localHeight);

            u8_Used[i] = 1;
            if(localHeight<blockh-1)
            {
                break;
            }
        }
    }



    for(int i=0;i<NB_STREAM;i++)
    {
        cudaStreamSynchronize(Stream[i]);
    }


    checkCudaErrors(cudaMemcpy(u8_ptDataOutHost,u8_ptDataOutDevice,WIDTH*HEIGHT*sizeof(u_int8_t),cudaMemcpyDeviceToHost));


    int iioffset = 0;
    int jjoffset = 0;

    for(int i=iioffset;i<iioffset+25;i++)
    {
        for(int j=jjoffset;j<jjoffset+25;j++)
        {
            printf(" %d",u8_ptDataOutHost[j*WIDTH+i]);
            //            std::cout << (int)u8_ptDataOutHost[j*WIDTH+i] <<" ";
        }
        printf(" \n");
    }

On the line with the ####, I get that error:

…/Sobel_Stream/cuda_interface.cu(125) : CUDA Runtime API error 11: invalid argument.

It is due to the arogument: ioffset*i