Linear Memory Texture

Hi All,

I am attaching the linear memory to the 1D cuda texture and accessing it in kernel and saving it back. [Kind of just sample test using driver api].

Here is the kernel code:

texture<uchar4, 1, cudaReadModeElementType> tex1d;

extern “C”
global void
testCopyKernel(char* g_odata, int size)
{
int x = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
uchar4 temp = tex1Dfetch(tex1d, x);

g_odata[4*x]   = temp.x;
g_odata[4*x+1] = temp.y;
g_odata[4*x+2] = temp.z;
g_odata[4*x+3] = temp.w;

}

This is not giving me correct result.

  1. Load the kernel function …ie CUfunction let say m_CuFunc

  2. Load the image
    cutilCheckError(cutLoadPPM4ub(image_path, &h_data, &width, &height));

unsigned int size = width * height * sizeof(unsigned char)* 4;

  1. Allocate the device memory (r = result data, d_data= linear memory attaching to texture)
    CUdeviceptr r_data = (CUdeviceptr)NULL;
    cutilDrvSafeCall( cuMemAlloc( &r_data, size));

CUdeviceptr d_data = (CUdeviceptr)NULL;
cutilDrvSafeCall( cuMemAlloc( &d_data, size));

  1. Copy the load image data to device (from h_data to d_data)
    cutilDrvSafeCall (cuMemcpyHtoD(d_data, h_data, size));

  2. Set the texture parameters
    CUtexref cu_texref;
    unsigned int byteOffset;
    cutilDrvSafeCall(cuModuleGetTexRef(&cu_texref, cuModule, “tex1d”));
    cutilDrvSafeCall(cuTexRefSetAddress (&byteOffset, cu_texref, d_data, size));

  3. Set kernel parameters & Launch the kernel
    int offset = 0;
    void* ptr = (void*)(size_t)r_data;
    offset = (offset + __alignof(ptr) - 1) & ~(__alignof(ptr) - 1); // adjust offset to meet alignment requirement
    cutilDrvSafeCallNoSync(cuParamSetv( m_CuFunc, offset, &ptr, sizeof(ptr)));
    offset += sizeof(ptr);

offset = (offset + __alignof(size) - 1) & ~(__alignof(size) - 1);
cutilDrvSafeCall(cuParamSeti( m_CuFunc, offset, (width * height)));
offset += sizeof(size);

cutilDrvSafeCall(cuParamSetSize( m_CuFunc, offset));
cutilDrvSafeCall(cuParamSetTexRef( m_CuFunc, CU_PARAM_TR_DEFAULT, cu_texref));

int block_size = 512;
cutilDrvSafeCall(cuFuncSetBlockShape( m_CuFunc, block_size, 1, 1 ));
int grid_width = (width*height)/(block_size);

cutilDrvSafeCall(cuLaunchGrid( m_CuFunc, grid_width, 1 ));

  1. Copy the data from device to host and save the image
    unsigned char* h_odata = (unsigned char*) malloc( size);
    cutilDrvSafeCall( cuMemcpyDtoH( h_odata, r_data, size) );

cutilCheckError( cutSavePPM4ub(output_filename, h_odata, width, height));

Cuda is not giving any error :(…

Please let me know if there is any issue with tex1Dfetch or uchar4 texture access .