Dynamic allocation and cudaMemcpy3D

I don’t seem to be able to use cudaMemcpy3D to stuff a 3D cudaArray from host memory when the host buffer is dynamically allocated. The following code initializes it, copies it down, copies it back, and checks it. If the buffer is allocated statically or on the stack, it works. If its malloc’d (or cudaHostAlloc’d, or cudaMalloc’d, for that matter), it doesn’t. Any ideas?

#include

#include <cuda.h>

#include <cutil_inline.h>

#include <cutil_math.h>

#define SIZE 3

// float buf;

main()

{

cudaArray *d_array;

// float buf;

float buf = (float )malloc(SIZESIZESIZE*sizeof(float));

for (int i = 0; i < SIZESIZESIZE; i++)

 buf[i] = i;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

cudaExtent bufSize = {SIZE, SIZE, SIZE};

cutilSafeCall(cudaMalloc3DArray(&d_array, &channelDesc, bufSize));

cudaMemcpy3DParms sendParams = {0};

sendParams.srcPtr = make_cudaPitchedPtr(&buf, SIZE*sizeof(float), SIZE, SIZE);

sendParams.dstArray = d_array;

sendParams.extent = bufSize;

sendParams.kind = cudaMemcpyHostToDevice;

cutilSafeCall(cudaMemcpy3D(&sendParams));

memset(buf, -1, SIZESIZESIZE*sizeof(float));

cudaMemcpy3DParms readParams = {0};

readParams.dstPtr = make_cudaPitchedPtr(&buf, SIZE*sizeof(float), SIZE, SIZE);

readParams.srcArray = d_array;

readParams.extent = bufSize;

readParams.kind = cudaMemcpyDeviceToHost;

cutilSafeCall(cudaMemcpy3D(&readParams));

for (int i = 0; i < SIZESIZESIZE; i++)

 if (buf[i] != i)

 {

    std::cerr << "bad - " << i << "\n";

    exit(1);

 }

std::cerr << “good\n”;

}

I don’t seem to be able to use cudaMemcpy3D to stuff a 3D cudaArray from host memory when the host buffer is dynamically allocated. The following code initializes it, copies it down, copies it back, and checks it. If the buffer is allocated statically or on the stack, it works. If its malloc’d (or cudaHostAlloc’d, or cudaMalloc’d, for that matter), it doesn’t. Any ideas?

#include

#include <cuda.h>

#include <cutil_inline.h>

#include <cutil_math.h>

#define SIZE 3

// float buf;

main()

{

cudaArray *d_array;

// float buf;

float buf = (float )malloc(SIZESIZESIZE*sizeof(float));

for (int i = 0; i < SIZESIZESIZE; i++)

 buf[i] = i;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

cudaExtent bufSize = {SIZE, SIZE, SIZE};

cutilSafeCall(cudaMalloc3DArray(&d_array, &channelDesc, bufSize));

cudaMemcpy3DParms sendParams = {0};

sendParams.srcPtr = make_cudaPitchedPtr(&buf, SIZE*sizeof(float), SIZE, SIZE);

sendParams.dstArray = d_array;

sendParams.extent = bufSize;

sendParams.kind = cudaMemcpyHostToDevice;

cutilSafeCall(cudaMemcpy3D(&sendParams));

memset(buf, -1, SIZESIZESIZE*sizeof(float));

cudaMemcpy3DParms readParams = {0};

readParams.dstPtr = make_cudaPitchedPtr(&buf, SIZE*sizeof(float), SIZE, SIZE);

readParams.srcArray = d_array;

readParams.extent = bufSize;

readParams.kind = cudaMemcpyDeviceToHost;

cutilSafeCall(cudaMemcpy3D(&readParams));

for (int i = 0; i < SIZESIZESIZE; i++)

 if (buf[i] != i)

 {

    std::cerr << "bad - " << i << "\n";

    exit(1);

 }

std::cerr << “good\n”;

}

Arrgghh… never mind.

Arrgghh… never mind.