Opentld Cuda in Jetson xavier Nx

I’m using the opentld library with cuda 10.2.89 & OpenCV 4.5.2 in Jetson Xavier NX following the instructions here which uses cuda to speed it up, but when I use it in the tracking section of opentld thid I get the error terminate called after throwing an instance of (the old code used opencv 3.0 and cuda 4 very old)

'thrust::system::system_error'
   what(): parallel_for failed: cudaErrorInvalidValue: invalid argument
Aborted (core dumped)

I guess the faulty part is in

getCPUTick(&procInit);
cv::cuda::GpuMat gpuImg(img); 
printf("d_inWinIndices : %d - numWindows : %d\n", d_inWinIndices, numWindows);   
createIndexArray(d_inWinIndices, numWindows); // error in here

int numInWins = numWindows;
dynamic_cast<CuVarianceFilter *>(varianceFilter)->filter(gpuImg, d_inWinIndices, numInWins);
dynamic_cast<CuEnsembleClassifier *>(ensembleClassifier)->filter(gpuImg, d_inWinIndices, numInWins);    

cudaMemcpy(qualifiedWins, d_inWinIndices, numInWins * sizeof(int), cudaMemcpyDeviceToHost);

Function error in here

#undef __SSE2__

#include "CUDA.h"
#include <thrust/sequence.h>
#include <thrust/remove.h>
#include <thrust/device_ptr.h>
// #include <opencv2/gpu/gpu.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/cuda.hpp>

void createIndexArray(int * idxArr, int n) {
    // try{
    printf("n : %d\n", n);
    printf("________function________ : createIndexArray1\n");
    thrust::device_ptr<int> dev_ptr;
    printf("________function________ : createIndexArray2\n");
    dev_ptr = thrust::device_pointer_cast(idxArr);
    printf("________function________ : createIndexArray3\n");
    thrust::sequence(dev_ptr, dev_ptr + n);
    printf("________function________ : createIndexArray4\n");
    // }
    // catch(thrust::system_error e)
    // {
    //     std::cerr << "Error inside sort: " << e.what() << std::endl;
    //     exit(1);
    // }
}

__global__ void __cudaVarianceFilter(cv::cuda::PtrStep<int> integralImg, cv::cuda::PtrStep<unsigned long long> integralImg_squared,
                                     int * windows_d, int * d_inWinIndices, int numInWins, float minVar)
{
    printf("________function________ : __cudaVarianceFilter\n");
    int idx = blockDim.x * blockIdx.x + threadIdx.x;

    if(idx < numInWins) {
        int winIdx = d_inWinIndices[idx];
        int * win = &windows_d[winIdx * TLD_WINDOW_SIZE];

        int x1 = win[0]+1;
        int y1 = win[1]+1;
        int w = win[2];
        int h = win[3];
        int x2 = x1 + w - 1;
        int y2 = y1 + h - 1;
        float area = w * h;

        float mX  = (integralImg(y2, x2) - integralImg(y1-1, x2) - integralImg(y2, x1-1) + integralImg(y1-1, x1-1)) / area;
        unsigned long long l = (integralImg_squared(y2, x2) - integralImg_squared(y1-1, x2) - integralImg_squared(y2, x1-1) + integralImg_squared(y1-1, x1-1));
        float mX2 = l / area;
        float variance = mX2 - mX * mX;

        if(variance < minVar)
            d_inWinIndices[idx] = -1;
    }
}

void cudaVarianceFilter(cv::cuda::GpuMat integralImg, cv::cuda::GpuMat integralImg_squared,
                               int * windows_d, int * d_inWinIndices, int &numInWins, float minVar)
{
    printf("________function________ : cudaVarianceFilter\n");
    cudaEvent_t finished;
    cudaEventCreate(&finished);
    dim3 gridSize(ceil(numInWins / (float)VAR_FILT_BLOCK_SIZE));
    dim3 blockSize(VAR_FILT_BLOCK_SIZE);
    __cudaVarianceFilter<<<gridSize, blockSize>>>(integralImg, integralImg_squared, windows_d, d_inWinIndices, numInWins, minVar);
    cudaCheckErrors(0);
    cudaEventRecord(finished);
    cudaEventSynchronize(finished);
    thrust::device_ptr<int> idxArr = thrust::device_pointer_cast(d_inWinIndices);
    thrust::device_ptr<int> end = thrust::remove_if(idxArr, idxArr + numInWins, is_negative());
    numInWins = end - idxArr;
}

My english is not good please forgive me, if you know any solution to solve the problem please help me. Thank you, have a nice day!

Hi,

We found a similar issue below, please give it a look to see if it can give you some hints.

It looks like the error is caused by the thrust library.
Have you tried the source on another platform before?

Would you mind helping to check if the error occurs in the following two lines?

thrust::device_ptr<int> idxArr = thrust::device_pointer_cast(d_inWinIndices);
thrust::device_ptr<int> end = thrust::remove_if(idxArr, idxArr + numInWins, is_negative());

Thanks.