Can we use cudamallocmanaged to allocate memory for smart pointer?


I am trying writing a class, with cudamallocmanged in the constructor, can we allocate memory for a smart pointer member?

template <typename DType> struct deleter {
    void operator()(DType const *ptr) { delete[] ptr; }

template <typename DType> struct cudaDeleter {
    void operator()(DType const *ptr) { cudaFree((void *)ptr); }

template <class DType> class RamArray_UVM {
    RamArray_UVM(int len);
    // void upload(); // upload
    // void download();
    void create(int len); // create a new array and fill random
    void create(int len, std::vector<DType> &vec);
    void fill_random_h();
    void fill_zero_h();
    void fill_default_one();
    void reset();
    std::shared_ptr<DType> h_array;
    // std::shared_ptr<DType> d_array;
    std::shared_ptr<DType> uvm_array;
    // std::unique_ptr<DType, decltype(cudaFree)> uvm_array;
    // DType* uvm_array;
    int len;

    size_t size;

template <typename DType> RamArray_UVM<DType>::RamArray_UVM() {}

template <typename DType> RamArray_UVM<DType>::RamArray_UVM(int _len) {
    len = _len;
    size = len * sizeof(DType);
    h_array = std::shared_ptr<DType>(new DType[len], deleter<DType>());
    // d_array = std::shared_ptr<DType>(nullptr, cudaDeleter<DType>());
    uvm_array = std::shared_ptr<DType>(nullptr, cudaDeleter<DType>());
    // checkCudaError(cudaMalloc((void **)&uvm_array, size));
    // uvm_array = std::unique_ptr<DType, decltype(cudaFree)>(nullptr, cudaFree);
    checkCudaError(cudaMallocManaged(&uvm_array, size));

I got compiling error like this

What might be the problem?

This is not really a cuda problem. cudaMallocManaged(&uvm_array, size) is not how to construct a shared_ptr with len elements. You need an allocator (C++ named requirements: Allocator - and use std::allocate_shared (std::allocate_shared, std::allocate_shared_for_overwrite - to create the shared_ptr.

#include <memory>

template<class T>
struct Alloc{
    using value_type = T;
    Alloc() = default;
    template <class U> Alloc(const Alloc<U>&) {}
    T* allocate(std::size_t n)
        T* ptr;
        cudaMallocManaged(&ptr, sizeof(T) * n);
        return ptr;

    void deallocate(T* p, std::size_t n) noexcept

int main(){
    const int numElements = 100;
    std::shared_ptr<int[]> foo = std::allocate_shared<int[]>(Alloc<int>{}, numElements);
1 Like