cudaMemcpyFromSymbol Access violation reading location error

Goubermouche · August 27, 2022, 12:30pm

Hey, I have a struct that looks something like this:

template<class T>
struct MyArray {
    T* data;
    int elementCount;

    __host__ MyArray<T> UploadToDevice() {
        Array3D<T> device = *this;
        COMPUTE_SAFE(cudaMalloc((void**)&device.data, sizeof(T) * elementCount));
        COMPUTE_SAFE(cudaMemcpy(device.data, data, sizeof(T) * elementCount, cudaMemcpyHostToDevice));
        return device;
    }

    __host__ MyArray<T> UploadToHost() {
        Array3D<T> host = *this;
        COMPUTE_SAFE(cudaMalloc((void**)&host.data, sizeof(T) * elementCount));
        COMPUTE_SAFE(cudaMemcpy(host.data, data, sizeof(T) * elementCount, cudaMemcpyDeviceToHost));
        return host;
    }
}

After initializing an instance of the struct I’m trying to copy it to the GPU and then back to host, but I get an Access violation reading location error. This is what my upload and download code looks like:

__device__ MyArray<float> deviceArray; 

void Test() {
    MyArray<float> host; // zero initialized, element count was also correctly set
    // Copy to the GPU
    MyArray<float> device = host.UploadToDevice();
    COMPUTE_SAFE(cudaMemcpyToSymbol(deviceArray, &device, sizeof(device)));
    // Copy back from the GPU to host
    host = device.UploadToHost();
    COMPUTE_SAFE(cudaMemcpyFromSymbol(&host, deviceArray, sizeof(host), 0, cudaMemcpyDeviceToHost));

    printf("%.2f", host.data[0]); // expected output is '0.00' 
}

After the printf statement gets called an Access violation reading location error gets raised. Any ideas about what could be going wrong here? Thanks in advance :smile: .

Goubermouche · August 28, 2022, 9:49am

Alright, so I’ve managed to do this with 1 instance of MyArray, but whenever I try allocating another one I get an Invalid argument error.

I have added two new utility methods to the MyArray struct:

template <class S> 
__host__ void UploadToDevice(Array3D<T>& device, const S& symbol) {
	COMPUTE_SAFE(cudaMalloc((void**)&device.data, sizeof(T) * elementCount));
	COMPUTE_SAFE(cudaMemcpy(device.data, data, sizeof(T) * elementCount, cudaMemcpyHostToDevice));
	COMPUTE_SAFE(cudaMemcpyToSymbol(symbol, &device, sizeof(device)));
}

template <class S>
__host__ void UploadToHost(Array3D<T>& host, const S& symbol) {
	COMPUTE_SAFE(cudaMemcpy(&host.data, data, sizeof(T) * elementCount, cudaMemcpyDeviceToHost));
	COMPUTE_SAFE(cudaMemcpyFromSymbol(&host, symbol, sizeof(void*), 0, cudaMemcpyDeviceToHost));
}

I then use them to upload my data into the GPU:

__device__ MyArray<float> symbol1;
__device__ MyArray<float> symbol2;

void Test() {
    MyArray<float> host1;
    MyArray<float> host2;

    MyArray<float> device1;
    MyArray<float> device2;

    // Initialize the host arrays...

    // Upload to the GPU
    host1.UploadToDevice(device1, symbol1);
    host2.UploadToDevice(device2, symbol2);

    // Upload back to the CPU
    device1.UploadToHost(host1, symbol1);
    device2.UploadToHost(host2, symbol2);
}

With this approach I can allocate a single instance of MyArray, but as soon as I call a second upload call (host2.UploadToDevice(device2, symbol2)) I get an error originating from the first line of the UploadToHost method that tells me that I’m providing an invalid argument - Any ideas about what could be going wrong here? Thanks in advance.

Robert_Crovella · August 29, 2022, 10:46pm

I don’t wish to assemble my own testable example from your bits and pieces. If you want to provide a short, complete example that I can copy, paste, compile, and run, and observe the error, without having to add anything or change anything, I’ll take a look as time permits.

if that’s a problem for some reason, perhaps someone else will be able to spot something.

Goubermouche · August 30, 2022, 11:51am

Absolutely, I’ll remember to provide an example of my problem next time, as the problem allows. For anyone else stumbling upon this topic - Robert’s answer here solved my issue.