I built OpenCV 3.4.20-dev with Cuda 11.8 and opencv extra modules(contrib). I used CMake and everything is good. I can use the library, but I cannot use cuda::HOG. It gives me the error below,
OpenCV(3.4.20-dev) Error: Gpu API call (invalid device symbol) in cv::cuda::device::hog::set_up_constants, file C:\Users\Hiran\Documents\GitHub\opencv\modules\cudaobjdetect\src\cuda\hog.cu, line 107
OpenCV(3.4.20-dev) C:\Users\Hiran\Documents\GitHub\opencv\modules\cudaobjdetect\src\cuda\hog.cu:107: error: (-217:Gpu API call) invalid device symbol in function 'cv::cuda::device::hog::set_up_constants'
I used CUDA_ARCH_BIN value 8.6 as my GPU is RTX 3070. I tried building with Cuda 11.6 but the same error comes.
This is the code it complains about from hog.cu,
void set_up_constants(int nbins,
int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y,
int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream)
{
cudaSafeCall(cudaMemcpyToSymbolAsync(cnbins, &nbins, sizeof(nbins), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_stride_x, &block_stride_x, sizeof(block_stride_x), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_stride_y, &block_stride_y, sizeof(block_stride_y), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cncells_block_x, &ncells_block_x, sizeof(ncells_block_x), 0, cudaMemcpyHostToDevice, stream));
cudaSafeCall(cudaMemcpyToSymbolAsync(cncells_block_y, &ncells_block_y, sizeof(ncells_block_y), 0, cudaMemcpyHostToDevice, stream));
int block_hist_size = nbins * ncells_block_x * ncells_block_y;
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_hist_size, &block_hist_size, sizeof(block_hist_size), 0, cudaMemcpyHostToDevice, stream));
int block_hist_size_2up = power_2up(block_hist_size);
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up), 0, cudaMemcpyHostToDevice, stream));
int descr_width = nblocks_win_x * block_hist_size;
cudaSafeCall(cudaMemcpyToSymbolAsync(cdescr_width, &descr_width, sizeof(descr_width), 0, cudaMemcpyHostToDevice, stream));
int descr_size = descr_width * nblocks_win_y;
cudaSafeCall(cudaMemcpyToSymbolAsync(cdescr_size, &descr_size, sizeof(descr_size), 0, cudaMemcpyHostToDevice, stream));
}