Boxfilter using texture on mallocmanaged buffer

Hi,

I am trying to use some of the code from boxfilter example to perform filtering on an image that is stored a buffer allocated by mallocmanaged.

Code to allocate buffer:

cudaMallocManaged(&buffer, width * height * 3 * sizeof(float));

Code to initialize texture:

void initTexture(int width, int height, void *pImage)
{
    int size = width * height * sizeof(float);

    // copy image data to array
    cudaChannelFormatDesc channelDesc;
    channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

    cudaMallocArray(&d_array, &channelDesc, width, height);
    cudaMemcpyToArray(d_array, 0, 0, pImage, size, cudaMemcpyDeviceToDevice);

    cudaMallocArray(&d_tempArray,   &channelDesc, width, height);

    // set texture parameters
    tex.addressMode[0] = cudaAddressModeClamp;
    tex.addressMode[1] = cudaAddressModeClamp;
    tex.filterMode = cudaFilterModeLinear;
    tex.normalized = false;

    // Bind the array to the texture
    cudaBindTextureToArray(tex, d_array, channelDesc);
}

Code to run filter in x:

__global__ void
d_boxfilter_x_tex(float *od, int w, int h, int r)
{
    float scale = 1.0f / (float)((r << 1) + 1);
    unsigned int y = blockIdx.x*blockDim.x + threadIdx.x;

    float t = 0.0f;

    for (int x =- r; x <= r; x++)
    {
        t += tex2D(tex, x, y);
        printf("%X\n", t);
    }

    od[y * w] = t * scale;

    for (int x = 1; x < w; x++)
    {
        t += tex2D(tex, x + r, y);
        t -= tex2D(tex, x - r - 1, y);
        od[y * w + x] = t * scale;
    }
}

When running it like this my image is really dark with red colors highlighting the textures within the image. If run as follows, the image is in proper color:

__device__ void
d_boxfilter_x(float *id, float *od, int w, int h, int r)
{
    float scale = 1.0f / (float)((r << 1) + 1);
    float t;

    // do left edge
    t = id[0] * r;

    for (int x = 0; x < (r + 1); x++)
    {
        t += id[x];
    }

    od[0] = t * scale;

    for (int x = 1; x < (r + 1); x++)
    {
        t += id[x + r];
        t -= id[0];
        od[x] = t * scale;
    }

    // main loop
    for (int x = (r + 1); x < w - r; x++)
    {
        t += id[x + r];
        t -= id[x - r - 1];
        od[x] = t * scale;
    }

    // do right edge
    for (int x = w - r; x < w; x++)
    {
        t += id[w - 1];
        t -= id[x - r - 1];
        od[x] = t * scale;
    }
}

I think it is the way the mallocmanaged buffer is being mapped to the texture memory but not sure. Anyone know what could be going on?

Hi,

Thanks for your question.
We are checking this issue internally. Will update to you soon.

Thank you AastaLLL.

Hi,

Sorry for the late.

Would you mind to share a simple compileable source with us so we can directly check it for you?
By the way, have you tried if this issue occurs on a general cudaMalloc memory?

Thanks.

Hi AastaLLL,

I looked at this a little bit more. Currently I am using CudaHostAlloc and CudaMallocManaged. I am able to perform all of the processing by just using pointers. Will this be the same case with texture memory? Or do we need a memcpy? If we need memcpy, this may not be appropriate for our application.

Thanks.

Hi,

Do you mean your program can work correctly if using a pointer?
If yes, it is normal since most of our function only passes the pointer rather than the whole memory buffer.

Thanks.