Hello all,
Bit of a problem with cudaMemcpy3D. I get an “invalid argument” error when running the following code (MacBook 9400M w/ CUDA 2.0):
[codebox]#include <stdio.h>
#define W 84 //width
#define H 87 //height
#define D 16 //depth
#define TEST(EXPR) if(EXPR != cudaSuccess){ printf(“Error @ %s: %s!\n”, #EXPR, cudaGetErrorString(cudaGetLastError())); }
float* C; // Host pointer
struct cudaPitchedPtr Ch; // Wrapper for host pointer
struct cudaPitchedPtr Cd; // Device pointer
struct cudaExtent extent; // Array dimensions
struct cudaMemcpy3DParms cpyC = {0}; // Copy structure for use with cudaMemcpy3D
int main(int argc, char* argv){
// Allocate host memory
TEST(cudaMallocHost((void**)&C, sizeof(float) * W * H * D));
// Allocate memory on device
extent.width = W;
extent.height = H;
extent.depth = D * sizeof(float);
TEST(cudaMalloc3D(&Cd, extent));
// Init host pitch pointer manually
Ch.ptr = C;
Ch.pitch = extent.depth;
Ch.xsize = extent.width;
Ch.ysize = extent.height;
// Init copy parameters structure
cpyC.extent = extent;
cpyC.srcPtr = Ch;
cpyC.dstPtr = Cd;
cpyC.kind = cudaMemcpyHostToDevice;
// Fails with: "invalid argument!" error message
TEST(cudaMemcpy3D(&cpyC));
cudaFreeHost©;
cudaFree(Cd.ptr);
return 0;
}[/codebox]
If W = 64 it works however if W < 64 I get an “unspecified launch error”.
Works fine in emulation mode however.
Any ideas?
Thanks :)