I have this problem with in my code:
no instance of overloaded function “cub::BlockLoad<InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH>::Load [with InputT=__nv_bool *, BLOCK_DIM_X=240, ITEMS_PER_THREAD=1, ALGORITHM=cub::BLOCK_LOAD_DIRECT, BLOCK_DIM_Y=1, BLOCK_DIM_Z=1, PTX_ARCH=600]” matches the argument list
template<int blockthreads>
__global__ static void cudaSOSInRegion (
unsigned int * message,
const unsigned int * region,
bool * connexionMatrix, unsigned int * result,
const int totalNbFanals,
const unsigned int regionLength,
const unsigned int memoryEffect)
{
unsigned long idx = blockIdx.x * blockDim.x
+ threadIdx.x;
if (idx < totalNbFanals)
{
// Specialize BlockStore for a 1D block of 256 threads owning 1 boolean items each
typedef cub::BlockLoad<bool*, blockthreads, 1> BlockLoad;
// Allocate shared memory for BlockStore
__shared__ typename BlockLoad::TempStorage temp_storage;
// Obtain a segment of consecutive items that are blocked across threads
bool thread_data[1];
unsigned long messageId = message[blockIdx.y];
unsigned long partie1 = (messageId
* totalNbFanals)
+ (blockIdx.x * blockDim.x);
BlockLoad(temp_storage).Load(
connexionMatrix + partie1,
thread_data);
if (thread_data[0] > 0)
{
atomicAdd(result + idx,
1
+ (messageId == idx)
* memoryEffect);
}
}
}
These are the parameters according to CUB: cub::BlockLoad< InputT, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > Class Template Reference
InputT The data type to read into (which must be convertible from the input iterator’s value type).
BLOCK_DIM_X The thread block length in threads along the X dimension
ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread.
Do you know what could be the problem ?
Thanks you