I am working on CUDA and THRUST and facing some problems. I want to use std::vector in CUDA kernal, but getting error that we cant do that. Therefore I used thrust library, But there also I am facing problem like we cant use thrust vector in CUDA kernal. My main aim is to use thrust resize into CUDA kernal. I am posting my code please help me.
Here is my Code description . In my project structure , my main class Detector is inheritance to Class managed , which allocate memory to main Class Object as below. new is operator overloading function.
class Managed
{
public:
void *operator new(size_t len) {
void *ptr;
CUDA_CHECK_RETURN(cudaMallocManaged(&ptr, len));
// cudaDeviceSynchronize();
return ptr;
}
void operator delete(void *ptr) {
// cudaDeviceSynchronize();
cudaFree(ptr);
}
};
class Detector : public Managed
{
public:
Detector() {};
std::vector<uint8_t> bits_;
std::vector<Level> levels_;
void dpp(Detector *detecobj);
};
__device__ Detector *ptr;
In main() we create object and pass the Object of class to Kernal . Other than this we not are using any other memory allocation . Few variable are declard as managed also . All most are member functions , variable and pointer are declared inside class only.
__device__ __managed__ char ch = 0;
__device__ __managed__ double ticks = 0;
int main(int argc, char *argv[])
{
vector<Rect> detections;
vector<Detection> detections1,detections2;
// This will create memory in unified Access memory
Detector *detectobj = new Detector;
detectobj->dpp(detectobj);
}
Function:
void Detector::dpp(Detector *detecobj ,Detector *ptr)
{
if(detecobj != NULL)
{
printf("DetectKernal.......\n");
DetectKernal<<<1,1>>>(detecobj,ptr);
cudaDeviceSynchronize();
}
}
Kernal:
__global__ void DetectKernal(Detector *detecobj , Detector *ptr)
{
int index = blockIdx.x * blockDim.x + threadIdx.x; //TO DO
printf("DetectKernal: Idenx is %d\n",index);
if(index<2)
detecobj->Hog(detecobj,ptr);
}
Hog Function:
__host__ __device__ void Detector::Hog( Detector *detecobj ,Detector *ptr)
{
for ( int i =0 ; i < detecobj->interval; ++i)
{
scale = pow(2.0, -static_cast<double>(i) / 1);
printf("Inside Hog() : scale is %d \n", scale);
//empty image check
if (scale <= 0.0)
continue;
// Same scale
if (scale > 0.0 && scale <= 1.0)
{
detecobj->rescale(detecobj,ptr); //TO DO
}
}
Above Hog function I am calling in rescale function
__host__ __device__ void Detector::rescale(Detector *detecobj, Detector *ptr )
{
const int width = ceil(detecobj->width_ * scale);
const int height = ceil(detecobj->height_ * scale);
ptr->width_ = width;
ptr->height_ = height;
ptr->depth_ = detecobj->depth_;
printf(" width is = ,height is = depth_ is = %d %d %d \n",ptr->width_ ,ptr->height_,ptr->depth_);
ptr->bits_.resize(width * height * detecobj->depth_); //TO DO
}
ptr->bits_.resize(width * height * detecobj->depth_);
Getting error in above line as below describe. My major concern is in accessing vector methods (like resize, swap) inside the CUDA kernal/Device function.
CUDA Exception: Warp Illegal Address The exception was triggered at PC 0x979b98 (detector.cu:47)Program received signal CUDA_EXCEPTION_14, Warp Illegal Address.