Problem CudaMallocHost

Hi everybody,
trying to use the CudaMallocHost function I encountered a problem, that is, if I allocate Host memory using CudaMallocHost in my main and then I pass the variable to a function in a .cu file where I execute my Cudamemcpy the overall program crashes, but if I substitute the CudaMallocHost with a simple malloc everything goes fine…Do you have any idea to solve this problem?


cudaError_t cudaMallocHost (void ** ptr, size_t size )

Check for cudaError_t, may be it’s not possible to allocate this amount of data in PINNED memory.

unfortunately the cudaMallocHost doesn’t return any error.

I guess the problem is in something you haven’t described. Based on your description, the following test case seems to work fine for me:

$ cat t842.cpp
#include <cuda_runtime.h>
#define DSIZE 100000
void cudaTest(int *data, size_t dsize);

int main(){

  int *data;
  cudaMallocHost(&data, DSIZE*sizeof(int));
  for (int i = 0; i < DSIZE; i++){data[i] = i;}
  cudaTest(data, DSIZE);
  return 0;

$ cat
#include <stdio.h>

#define DSIZE 10
__global__ void test(int *data, size_t dsize){
  for (int i = 0; i < dsize; i++) printf("data[%d] = %d\n", i, data[i]);

void cudaTest(int *data, size_t dsize){

  int *d_data;
  cudaMalloc(&d_data, dsize*sizeof(int));
  cudaMemcpy(d_data, data, DSIZE*sizeof(int), cudaMemcpyHostToDevice);
  test<<<1,1>>>(d_data, DSIZE);
$ nvcc -o t842 t842.cpp
$ ./t842
data[0] = 0
data[1] = 1
data[2] = 2
data[3] = 3
data[4] = 4
data[5] = 5
data[6] = 6
data[7] = 7
data[8] = 8
data[9] = 9

@mons91, are you performing a DeviceToDevice memcpy?

If so, you would need to use cudaHostGetDevicePointer() to obtain a device pointer to the cudaMallocHost() allocated memory.