Bug at Memcpy with double.

When I try to do:
cudaMemcpy(b_h, a_d, sizeof(double)*N, cudaMemcpyDeviceToHost);
when b_h and a_d are double, it’s not working, (b_h doesnt get a_d).
But when b_h and a_d are float, it’s working.
have someone the same problem?

could you provide a simple code to demonstrate this?

because in my experience, a cudaMemcpy works for “float” and “double”

// incrementArray.cu

#include <stdio.h>

#include <assert.h>

#include <cuda.h>

void incrementArrayOnHost(double *a, int N)


int i;

for (i=0; i < N; i++) a[i] = a[i]+1;


global void incrementArrayOnDevice(double *a, int N)


int idx = blockIdx.x*blockDim.x + threadIdx.x;



int main(void)


double *a_h, *b_h; // pointers to host memory

double *a_d; // pointer to device memory

int i, N = 1000;

size_t size = N*sizeof(double);

// allocate arrays on host

a_h = (double *)malloc(size);

b_h = (double *)malloc(size);

// cudaMallocHost ( (void**) &b_h, size );

// allocate array on device

cudaMalloc((void **) &a_d, size);

// initialization of host data

for (i=0; i<N; i++) a_h[i] = (double)i;

// copy data from host to device

cudaMemcpy(a_d, a_h, sizeof(double)*N, cudaMemcpyHostToDevice);

// do calculation on host

incrementArrayOnHost(a_h, N);

// do calculation on device:

// Part 1 of 2. Compute execution configuration

int blockSize = 4;

int nBlocks = N/blockSize + (N%blockSize == 0?0:1);


// Part 2 of 2. Call incrementArrayOnDevice kernel

incrementArrayOnDevice <<< nBlocks, blockSize >>> (a_d, N);

// Retrieve result from device and store in b_h

cudaMemcpy(b_h, a_d, sizeof(double)*N, cudaMemcpyDeviceToHost);


// check results


for (i=0; i<N; i++) assert(a_h[i] == b_h[i]);

// cleanup

free(a_h); free(b_h); free(a_d);


It is likely that the kernel isn’t actually running, either because you are running on a compute 1.1 capability card, or because you aren’t building for compute capability 1.3 (passing -sm13 to nvcc),

Yes , I am running on a compute 1.1 capability card.

What are the problem?

They don’t support double precision, that’s the problem.