Hi, guys,
I wrote a test program for the use of mapped memory. However, the it failed and I don’t know why.
Here is the source code:
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
int main(void) {
unsigned int *h_array, *d_array, *h_array_test;
int cudaError;
int num = 5;
int size = num * sizeof(unsigned int);
cudaSetDevice(0);
// set device flag
cudaSetDeviceFlags( cudaDeviceMapHost );
// allocate pinned memory
cudaError=cudaHostAlloc( (void**) &(h_array), size, cudaHostAllocMapped );
if (cudaError)
printf ("Failed to allocate pinned memory \n");
// get device ptr
cudaError=cudaHostGetDevicePointer( (void**) &(d_array), h_array, 0 );
if (cudaError)
printf ("Failed to get device pointer \n");
// initialize test data
for (int i = 0; i < num; i ++)
{
h_array[i] = i;
}
// output array in host memory
printf("array in host memory:\n");
for (int i = 0; i < num; i ++)
{
printf("%d: %d\n", i, h_array[i]);
}
printf("\n");
// output array in device memory
//cudaMemcpy(d_array, h_array, size, cudaMemcpyHostToDevice);
h_array_test = (unsigned int*)malloc(size);
cudaError=cudaMemcpy( h_array_test, d_array, size, cudaMemcpyDeviceToHost );
if (cudaError)
printf ("Failed to copy device memory \n");
printf("array in device memory:\n");
for (int i = 0; i < num; i ++)
{
printf("%d: %d\n", i, h_array_test[i]);
}
printf("\n");
}
And the result is as follows:
array in host memory:
0: 0
1: 1
2: 2
3: 3
4: 4
Failed to copy device memory
array in device memory:
0: -1163005939
1: -1163005939
2: -1163005939
3: -1163005939
4: -1163005939
Apparently, the mapping was not sucessful. Anyone can help me out? Lots of thanks!
BTW: my GPU is Quadro FX3800 and the compute capability is 1.3, which supports page-locked memory mapping