When running the following code I get an invalid argument error for cudaMemcoy3D in intializeAndBindInsert3DTexture
texture<float, 3, cudaReadModeElementType> insert3DTexture;
cudaArray *insert3DArray;
void InitializeAndBindInsert3DTexture(float *d_Input, DataSize dataSize) {
// 32 bit 1D float for resampled data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
CheckCUDAError();
// Set texture parameters
insert3DTexture.addressMode[0] = cudaAddressModeClamp;
insert3DTexture.addressMode[1] = cudaAddressModeClamp;
insert3DTexture.normalized = false;
insert3DTexture.filterMode = cudaFilterModeLinear;
// Allocate 3D array
cudaExtent volumeSize = make_cudaExtent(dataSize.dataWidth, dataSize.dataHeight, dataSize.dataDepth);
cudaMalloc3DArray(&insert3DArray, &channelDesc, volumeSize);
CheckCUDAError();
// Copy modified volume data to the 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)d_Input, sizeof(float)*dataSize.dataWidth, dataSize.dataWidth, dataSize.dataHeight);
copyParams.dstArray = insert3DArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(©Params);
CheckCUDAError();
// Bind the array to the 3D texture
cudaBindTextureToArray(insert3DTexture, insert3DArray, channelDesc);
CheckCUDAError();
}
void Insert3D(float *d_OutputData, DataSize outputDataSize, float *d_InputData, DataSize inputDataSize, int startPosWidth, int startPosHeight, int startPosDepth) {
InitializeAndBindInsert3DTexture(d_InputData, inputDataSize);
// Set dimensions of the inserting kernel
int threadsInX = 8;
int threadsInY = 8;
int threadsInZ = 8;
int blocksInX = (inputDataSize.dataWidth+threadsInX-1)/threadsInX;
int blocksInY = (inputDataSize.dataHeight+threadsInY-1)/threadsInY;
int blocksInZ = (inputDataSize.dataDepth+threadsInZ-1)/threadsInZ;
dim3 dimBlock = dim3(threadsInX, threadsInY, threadsInZ);
dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ);
Insert3D<<<dimGrid, dimBlock>>>(d_OutputData, outputDataSize, startPosWidth, startPosHeight, startPosDepth, inputDataSize, blocksInY, 1.0f/(float)blocksInY);
CheckCUDAError();
cudaThreadSynchronize();
UnbindInsert3DTexture();
FreeInsert3DArray();
}
void CUDAInsert3D(float *h_Output, int outputWidth, int outputHeight, int outputDepth, float *h_Input, int inputWidth, int inputHeight, int inputDepth, int startPosWidth, int startPosHeight, int startPosDepth) {
// Determine size of input
DataSize dataSizeInput;
dataSizeInput.dataHeight = inputHeight;
dataSizeInput.dataWidth = inputWidth;
dataSizeInput.dataDepth = inputDepth;
dataSizeInput.dataSize = EstimateDataSize(dataSizeInput, sizeof(float));
// Determine size of output
DataSize dataSizeOutput;
dataSizeOutput.dataHeight = outputHeight;
dataSizeOutput.dataWidth = outputWidth;
dataSizeOutput.dataDepth = outputDepth;
dataSizeOutput.dataSize = EstimateDataSize(dataSizeOutput, sizeof(float));
float *d_Input, *d_Output;
// Allocate memory on the device for the input and output arguments
cudaMalloc((void**)&d_Input, dataSizeInput.dataSize);
CheckCUDAError();
cudaMalloc((void**)&d_Output, dataSizeOutput.dataSize);
CheckCUDAError();
// Copy input argument from host to device
cudaMemcpy(d_Input, h_Input, dataSizeInput.dataSize, cudaMemcpyHostToDevice);
CheckCUDAError();
cudaMemcpy(d_Output, h_Output, dataSizeOutput.dataSize, cudaMemcpyHostToDevice);
CheckCUDAError();
// Insert output from input
Insert3D(d_Output, dataSizeOutput, d_Input, dataSizeInput, startPosWidth, startPosHeight, startPosDepth);
// Copy output argument from device to host
cudaMemcpy(h_Output, d_Output, dataSizeOutput.dataSize, cudaMemcpyDeviceToHost);
CheckCUDAError();
// Free allocated memory on the device (input and output arguments)
cudaFree(d_Input);
CheckCUDAError();
cudaFree(d_Output);
CheckCUDAError();
return;
}
Does anyone have a suggestion to why I receive this error?
/D