I’m currently working on an image processing algorithm, in which I have allocated an image size(1280*1024) boolean type memory for storing status of each pixel of an image after processing, following is a snippet of my code:
//In main
bool * maskLoc;
cudaMallocManaged(&maskLoc, (1280*1024));
//parent Kernel call
parent<<< , >>>(maskLoc);
global parent(bool* maskLoc)
{
-------inside parent Kernel
… some processing
//child Kernel call child<<<1,9>>>(maskLoc);
cudaDeviceSynchronize();
} // end parent kernel
global child(bool* maskLoc)
{
-------inside Kernel
… some processing
__syncthreads();
bool* ptrMask;
ptrMask = (maskLoc + ((Y-coord)*IMAGE_WIDTH)+ X-coord);
*ptrMask = 1;
}// end of child kernel
(Note: X-coord and Y-coord have different values in each child thread)
I’m not able to access maskLoc in child Kernel.
Can I pass such maskLoc (since it is in unified memory)in child kernel and use it or is it some other way to solve the issue.
Any help is appreciated…!!!