Preventing the copy of thrust device_vector to device

So I have a helper class (creatively named “BetterVector”) that is designed to be passed back and forth from host and device, with most of its functionality accessible from either side (a significant flaw of device_vector). However, kernels fail with a non-descriptive allocation error.
From the stack trace, it appears to trigger sometimes on the copy constructor, and sometimes on the deconstructor, and I’m not entirely sure why it changes. I figured it was the device_vector data member having a host-only constructor and deconstructor, which I used the following post to utilize a union to prevent the calling of these functions, but the issue still persists. If any of you have any suggestions, it would be greatly appreciated.

main.cu testing file:

#include <abstract/BetterVector.cuh>

struct thrust_functor {
    abstract::BetterVector<int> vector;

    explicit thrust_functor(const abstract::BetterVector<int> &vector) : vector(vector) {}

    __host__ void operator()(int i) {
        printf("Thrust functor index %d: %d\n", i, (int) vector[i]);
    }
};

__global__ void baseCudaPrint(abstract::BetterVector<int>* ptr) {
    const size_t i = blockIdx.x * blockDim.x + threadIdx.x;
    abstract::BetterVector<int> vector = *ptr;
    printf("Cuda kernel index %zu: %d\n", i, (int) vector[i]);
}


int main() {
    abstract::BetterVector<int> vector({1, 2, 3, 4});
    for (int i = 0; i < 4; i++) {
        printf("Host index %d: %d\n", i, (int) vector[i]);
    }
    printf("\n");

    abstract::BetterVector<int>* devVectorPtr;
    cudaMalloc(&devVectorPtr, sizeof(abstract::BetterVector<int>));
    cudaMemcpy(devVectorPtr, &vector, 1, cudaMemcpyHostToDevice);
    baseCudaPrint<<<1, vector.size()>>>(devVectorPtr);
    cudaDeviceSynchronize();
    cudaFree(devVectorPtr);
    printf("\n");

    thrust::counting_iterator<int> first(0);
    thrust::counting_iterator<int> last = first + vector.size();
    thrust::for_each(thrust::host, first, last, thrust_functor(vector));
    cudaDeviceSynchronize();
    printf("\n");
}

abstract/BetterVector.cuh:

#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/functional.h>

namespace abstract {
template<typename T>
    struct equal_to : public thrust::unary_function<T, bool> {
        T lhs;

        __device__ __host__ explicit equal_to(T lhs) : lhs(lhs) {}

        __device__ __host__ bool operator()(T rhs) {
            return lhs == rhs;
        }
    };
template<typename T, typename VecType = thrust::device_vector<T>>
class BetterVector {
protected:
    typename VecType::pointer raw;
    size_t cachedSize;
    union {
        VecType vector;
    };

public:

    __host__ BetterVector() : vector(), raw(vector.data()), cachedSize(0) {}

    __host__ explicit BetterVector(size_t size) : vector(size), raw(vector.data()), cachedSize(size) {}

    __host__ explicit BetterVector(VecType vec) : vector(vec), raw(vector.data()), cachedSize(vec.size()) {}

    __host__ explicit BetterVector(std::vector<T> vec) : vector(vec), raw(vector.data()), cachedSize(vec.size()) {}

    __host__ __device__ BetterVector(const BetterVector &otherVec) :
#ifndef __CUDA_ARCH__
            vector(otherVec.vector),
#endif
            cachedSize(otherVec.cachedSize), raw(otherVec.raw) {}


    __host__ __device__ virtual ~BetterVector() {
#ifndef __CUDA_ARCH__
        vector.~VecType();
#endif
    }

    __host__ __device__ typename VecType::const_reference operator[](size_t index) const {
#ifdef __CUDA_ARCH__
        return raw[index];
#else
        return vector[index];
#endif
    }

    __host__ __device__ size_t size() const {
#ifdef __CUDA_ARCH__
        return cachedSize;
#else
        return vector.size();
#endif
    }
}