Hi everyone, i have a problem in using templates with driver API. How should i call the kernel if it contains a template. And where should i exactly put CUdeviceptr (kernel, host or both of them)???
template <class T, CUDPPOperator op>
global void compactData(T *oData,
const unsigned int *iValidFlags,
const T *iData)
{
…
}
runTest()
{
…
CU_SAFE_CALL( cuFuncSetBlockShape( transform, numThreads, 1, 1 ));
CU_SAFE_CALL( cuFuncSetSharedSize( transform, sizeSharedMemory) );
int offset = 0;
CU_SAFE_CALL(cuParamSeti(transform, offset, outArray)); offset += sizeof(outArray);
CU_SAFE_CALL(cuParamSeti(transform, offset, isValid); offset += sizeof(isValid);
CU_SAFE_CALL(cuParamSeti(transform, offset, const inArray); offset += sizeof(inArray);
CU_SAFE_CALL(cuParamSetSize(transform, offset));
CU_SAFE_CALL( cuLaunchGrid( transform, numBlocks, 1) );
…
}
Any help is appreciated… Thanx…