Hello,
i am new in the cuda world and i have a problem that i dont understand.
I read in the documentation that cudafree just frees the memory but doesnt
overwrite it with zeros.
I wrote a small programm which fills the global memory of the gpu with some
values and frees the memory afterwards.
Then it allocates memory again and prints it out.
My problem now is that on linux i get the values written in the memory before,
but on windows 10 i just get zeros.
I am working with visual studio 2013. My gpu is a Geforce GT 525M.
Cuda version 7.5.
This is my program:
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#define ARRAY_SIZE 1024 * 1024
#define ARRAY_a 102410241024
#define ARRAY_b 10241024512
using namespace std;
int main()
{
char* a_device, *a_host, *b_device, *b_host;
// write 'A's to ARRAY_a Bytes of Graphic Card
if (NULL == (a_host = (char*)malloc(ARRAY_a)))
{
printf("a_host = malloc failed\n");
exit(-1);
}
for (int i = 0; i < ARRAY_a; i++)
{
a_host[i] = 'A';
}
if (cudaErrorMemoryAllocation == cudaMalloc((void**)&a_device, ARRAY_a))
{
printf(" cudaMalloc(&a_device,...) failed\n");
exit(-2);
}
if (cudaSuccess != cudaMemcpy(a_device, a_host, ARRAY_a, cudaMemcpyHostToDevice))
{
printf("cudaMemcpy(a_device, a_host, ARRAY_a, cudaMemcpyHostToDevice) FAILED\n");
exit(-10);
}
// free a_host
free(a_host);
// write 'B's to another ARRAY_b Bytes of Graphic Card
if (NULL == (b_host = (char*)malloc(ARRAY_b)))
{
printf("b_host = malloc failed\n");
exit(-3);
}
for (int i = 0; i < ARRAY_b; i++)
{
b_host[i] = 'B';
}
if (cudaErrorMemoryAllocation == cudaMalloc((void**)&b_device, ARRAY_b))
{
printf(" cudaMalloc(&b_device,...) failed\n");
exit(-4);
}
if (cudaSuccess != cudaMemcpy(b_device, b_host, ARRAY_b, cudaMemcpyHostToDevice))
{
printf("cudaMemcpy(a_device, a_host, ARRAY_a, cudaMemcpyHostToDevice) FAILED\n");
exit(-11);
}
//free b_host and the memory on the Graphic card
free(b_host);
cudaFree(a_device);
cudaFree(b_device);
unsigned long int l = -1;
// Allocates char Arrays of size ARRAY_SIZE, checks if the value is unsimilar to zero and prints it in this case.
// The memory isnt freed, so it does this untill there is no global memory left.
//
while (1)
{
l++;
char* c_host;
if (NULL == (c_host = (char*)malloc(ARRAY_SIZE)))
{
printf("malloc failed, %ld Memory Allocated\n", l*ARRAY_SIZE);
exit(-5);
}
char* c_device;
if (cudaErrorMemoryAllocation == cudaMalloc((void**)&c_device, ARRAY_SIZE))
{
printf("Cudamalloc failed, %ld Memory Allocated\n", l*ARRAY_SIZE);
exit(-6);
}
if (cudaSuccess != cudaMemcpy(c_host, c_device, ARRAY_SIZE, cudaMemcpyDeviceToHost))
{
printf("cudaMemcpy(c_host, c_device, ARRAY_SIZE, cudaMemcpyDeviceToHost) FAILED\n");
exit(-12);
}
for (int i = 0; i < ARRAY_SIZE; i++)
{
if (c_host[i] != 0)
{
printf("%c ", c_host[i]);
}
}
printf("\n");
printf("%p\n", c_device);
printf("-----------------------------------------------------------------\n");
free(c_host);
}
return 0;
}
Does anybody have an idea why it just prints out zeros on windows?
Thanks, Matthias