CUDA copying nested structures

I have a struct that looks similar to this one:

template class<T>
struct array {
__host__ array(uint64_t s) : size(s) {
    CUDA_ASSERT(cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T)));
}

__host__ array(uint64_t s, const T& fill) : size(s) {
    CUDA_ASSERT(cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T)));
    T* temp = static_cast<T*>(::operator new(size * sizeof(T)));
    
    // Fill the temporary buffer with our fill value
    for(; 0 < size; --size) {
        new(reinterpret_cast<void*>(temp)) T(value);
        ++temp;
    }

    // Copy the buffer over to the device
    CUDA_ASSERT(cudaMemcpy(data, temp, size * sizeof(T), cudaMemcpyHostToDevice));
    ::operator delete(static_cast<void*>(temp));
}

__host__  array(const array<T>& other) {
    size = other.size;
    CUDA_ASSERT(cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T)));
    CUDA_ASSERT(cudaMemcpy(data, other.data, size * sizeof(T), cudaMemcpyDeviceToDevice));
}

private:
    T* data;
    uint64_t size;
}

Its a basic array wrapper that I’m writing for my little library. Everything works fine, until I try to have an array of arrays:

array<array<int>> bleh(3, array<int>(3)); // this should result in a 3x3 array of ints 

Instead, I have the outer array working properly, and the inner one is a nullptr - how can I fix this?
Note that I have extracted the above code, from my actual code, and all the ::operator calls are actually abstracted away behind an allocator struct.

Solved, I had some incorrect pointer math in my other code.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.