Hi All,
I am attaching the linear memory to the 1D cuda texture and accessing it in kernel and saving it back. [Kind of just sample test using driver api].
Here is the kernel code:
texture<uchar4, 1, cudaReadModeElementType> tex1d;
extern “C”
global void
testCopyKernel(char* g_odata, int size)
{
int x = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
uchar4 temp = tex1Dfetch(tex1d, x);
g_odata[4*x] = temp.x;
g_odata[4*x+1] = temp.y;
g_odata[4*x+2] = temp.z;
g_odata[4*x+3] = temp.w;
}
This is not giving me correct result.
-
Load the kernel function …ie CUfunction let say m_CuFunc
-
Load the image
cutilCheckError(cutLoadPPM4ub(image_path, &h_data, &width, &height));
unsigned int size = width * height * sizeof(unsigned char)* 4;
- Allocate the device memory (r = result data, d_data= linear memory attaching to texture)
CUdeviceptr r_data = (CUdeviceptr)NULL;
cutilDrvSafeCall( cuMemAlloc( &r_data, size));
CUdeviceptr d_data = (CUdeviceptr)NULL;
cutilDrvSafeCall( cuMemAlloc( &d_data, size));
-
Copy the load image data to device (from h_data to d_data)
cutilDrvSafeCall (cuMemcpyHtoD(d_data, h_data, size)); -
Set the texture parameters
CUtexref cu_texref;
unsigned int byteOffset;
cutilDrvSafeCall(cuModuleGetTexRef(&cu_texref, cuModule, “tex1d”));
cutilDrvSafeCall(cuTexRefSetAddress (&byteOffset, cu_texref, d_data, size)); -
Set kernel parameters & Launch the kernel
int offset = 0;
void* ptr = (void*)(size_t)r_data;
offset = (offset + __alignof(ptr) - 1) & ~(__alignof(ptr) - 1); // adjust offset to meet alignment requirement
cutilDrvSafeCallNoSync(cuParamSetv( m_CuFunc, offset, &ptr, sizeof(ptr)));
offset += sizeof(ptr);
offset = (offset + __alignof(size) - 1) & ~(__alignof(size) - 1);
cutilDrvSafeCall(cuParamSeti( m_CuFunc, offset, (width * height)));
offset += sizeof(size);
cutilDrvSafeCall(cuParamSetSize( m_CuFunc, offset));
cutilDrvSafeCall(cuParamSetTexRef( m_CuFunc, CU_PARAM_TR_DEFAULT, cu_texref));
int block_size = 512;
cutilDrvSafeCall(cuFuncSetBlockShape( m_CuFunc, block_size, 1, 1 ));
int grid_width = (width*height)/(block_size);
cutilDrvSafeCall(cuLaunchGrid( m_CuFunc, grid_width, 1 ));
- Copy the data from device to host and save the image
unsigned char* h_odata = (unsigned char*) malloc( size);
cutilDrvSafeCall( cuMemcpyDtoH( h_odata, r_data, size) );
cutilCheckError( cutSavePPM4ub(output_filename, h_odata, width, height));
Cuda is not giving any error :(…
Please let me know if there is any issue with tex1Dfetch or uchar4 texture access .