I’m diving into CUDA at the moment and was trying to use dynamic parallelism on my remote machine which is running Ubuntu 18.04 LTS with 4 Tesla V100 GPUs.
My code looks as follows (“slightly” modified):
#define LSIZE 5
#define RSIZE 5
#define LENGTH 5
// ...
__global__ void hammingDistance(const bool* left, const size_t size_l, const bool* right, const size_t size_r, int* out)
{
if (size_l != size_r) {
*out = -1;
return;
}
for (int i = 0; i < size_l; ++i) {
*out += left[i] ^ right[i];
}
}
__global__ void executeMatching(bool** leftDescriptorSet, bool** rightDescriptorSet)
{
for (size_t iLeft = 0; iLeft < LSIZE; ++iLeft) {
bool* lDesc = leftDescriptorSet[iLeft];
for (size_t iRight = 0; iRight < RSIZE; ++iRight) {
bool* rDesc = rightDescriptorSet[iRight];
int* sum = new int(0);
hammingDistance<<<1, 1>>>(lDesc, LSIZE, rDesc, RSIZE, sum);
// ...
}
}
}
// ...
int main() {
// ...
// example data
bool *dev_aSetPtr, *dev_bSetPtr;
cudaMallocManaged(&dev_aSetPtr, LSIZE * sizeof(bool));
cudaMallocManaged(&dev_bSetPtr, RSIZE * sizeof(bool));
// ...
executeMatching<<<1, 1>>>(&dev_aSetPtr, &dev_bSetPtr);
// ...
}
When compiling using
/usr/bin/nvcc /home/tibor/cuda_hm/hamming_matcher.cu -o /home/tibor/cuda_hm/hamming_matcher -gencode arch=compute_70,code=sm_70 -rdc=true
I keep getting an error:
nvlink error : Undefined reference to 'cudaGetParameterBufferV2' in '/tmp/tmpxft_00006ca8_00000000-10_hamming_matcher.o'
nvlink error : Undefined reference to 'cudaLaunchDeviceV2' in '/tmp/tmpxft_00006ca8_00000000-10_hamming_matcher.o'
The terminal process terminated with exit code: 255
Is there something wrong with my CUDA Toolkit installation?