cudaMalloc and Valgrind

Kevincav · October 10, 2013, 4:39pm

I’ve ran the simple test-case below and valgrind seems to blow up when I cudaMalloc. In fact there’s a lot more memory than I had expected. Is this normal?

#include <iostream>
#include <cstdlib>
#include <ctime>

#define SIZE 1000

__global__ void test (int *array) {
    array[threadIdx.x] *= 2;
}

int main (int argc, char *argv[]) {
    int h_array;
    for (int i=0;i<SIZE;i++)
        h_array[i] = i;
    for (int i=0;i<SIZE/10;i++) {
        for (int j=0;j<10;j++)
            std::cout << h_array[i * 10 + j] << '\t';
        std::cout << std::endl;
    }
    std::cout << std::endl;
    
    int *d_array;
    cudaMalloc((void **) &d_array, SIZE * sizeof(int));
    cudaMemcpy(d_array, h_array, SIZE * sizeof(int), cudaMemcpyHostToDevice);

    test<<<1,SIZE>>>(d_array);

    cudaMemcpy(h_array, d_array, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
    cudaFree(d_array);

    for (int i=0;i<SIZE/10;i++) {
        for (int j=0;j<10;j++)
            std::cout << h_array[i * 10 + j] << '\t';
        std::cout << std::endl;
    }
    return 0;
}

MutantJohn · October 11, 2013, 8:04am

I think it is. I’ve always got funky output from valgrind when running CUDA code so I just trust myself XD