I am trying to do some ray tracing through volume sets and I am having trouble getting my data up to CUDA without blowing up the max texture size. I am also not sure if I am doing it correctly either since I can’t find a good demo.
My largest data set is around 6 megs and I can’t seem to get it to load. I am thinking I will have to break it up into smaller pieces and cull out some of the data but I was not sure if there was an easier way to do this than I am doing. nNumVolumeData is 1669 below and I am getting an “invalid argument” error which I have found goes away when I reduce the size.
I am also not sure if I am setting up the array correctly either. I am speaking about the volume data below. It is a 2D array on the CUDA side but I am passing it a 1D array from the host side and I am not sure if that is the correct syntax for what I am trying to achieve.
I am also getting severe performance issues, but I am pretty sure it is because my data is too large.
Any ideas?
[codebox]
texture<float4, 1, cudaReadModeElementType> minMaxDTex;
texture<uint, 2, cudaReadModeElementType> volumeDataDTex
extern “C”
void cuda_contrast_raytrace(void* targetSurface, int width, int height, size_t pitch,
//CUDAVolumeBlock* pVolBlocks, int nNumBlocks,
unsigned int* pVolumeData, int nNumVolData, float* pMinMax, int nNumFloatMinMax,
CUDAMatrix4x4 invViewMatrix, CUDAMatrix4x4 projMatrix, float t)
{
cudaError_t error = cudaSuccess;
CUDAMatrix4x4* pInvViewMatrix;
cudaMalloc((void**)&pInvViewMatrix, sizeof(CUDAMatrix4x4));
cudaMemcpy(pInvViewMatrix, &invViewMatrix, sizeof(CUDAMatrix4x4), cudaMemcpyHostToDevice);
CUDAMatrix4x4* pProjMatrix;
cudaMalloc((void**)&pProjMatrix, sizeof(CUDAMatrix4x4));
cudaMemcpy(pProjMatrix, &projMatrix, sizeof(CUDAMatrix4x4), cudaMemcpyHostToDevice);
int nNumBlocks = nNumFloatMinMax/(4*2);
// Setup volumeDataDTex
cudaArray* volumeTexArray = 0;
cudaMallocArray(&volumeTexArray, &volumeDataDTex.channelDesc, INT_COUNT, nNumVolData);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("volumeDataDTex: cudaMallocArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
cudaMemcpy2DToArray(volumeTexArray, 0, 0, pVolumeData, INT_COUNT * sizeof(uint), INT_COUNT * sizeof(uint), nNumVolData, cudaMemcpyHostToDevice);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("volumeDataDTex: cudaMemcpy2DToArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
cudaBindTextureToArray(volumeDataDTex, volumeTexArray);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("volumeDataDTex: cudaBindTextureToArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
// Setup minMaxDTex
cudaArray* minMaxTexArray = 0;
cudaMallocArray(&minMaxTexArray, &minMaxDTex.channelDesc, nNumFloatMinMax/4, 1);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("minMaxDTex: cudaMallocArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
cudaMemcpyToArray(minMaxTexArray, 0,0, pMinMax, nNumFloatMinMax * sizeof(float), cudaMemcpyHostToDevice);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("minMaxDTex: cudaMemcpyToArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
cudaBindTextureToArray(minMaxDTex, minMaxTexArray);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("minMaxDTex: cudaBindTextureToArray() failed");
OutputDebugStringA(cudaGetErrorString(error));
}
dim3 dimBlock(16, 16);
dim3 dimGrid( (width + dimBlock.x - 1) / dimBlock.x,
(height + dimBlock.y - 1) / dimBlock.y);
cuda_kernel_ContrastRaytrace<<<dimGrid,dimBlock>>>( (unsigned char*)targetSurface, width, height, pitch, nNumBlocks, pInvViewMatrix, pProjMatrix, t);
error = cudaGetLastError();
if (error != cudaSuccess) {
OutputDebugString("cuda_kernel_ivus_raycast() failed to launch error");
}
cudaFree(pInvViewMatrix);
cudaFree(pProjMatrix);
cudaFreeArray(minMaxTexArray);
cudaUnbindTexture(volumeDataDTex);
cudaFreeArray(volumeTexArray);
cudaUnbindTexture(volumeDataDTex);
}[/codebox]