Hello and a happy new year,
last time i tried to write into a GL_TEXTURE_2D and finally I could manage to write into it. I tried to write the wrong data type (index out of bounds). This time I try to write into a GL_TEXTURE_3D.
This is how i generate the texture:
volumeDimension = dim3(10, 10, 10);
gl::GenTextures(1, &volumeTexture);
gl::BindTexture(gl::TEXTURE_3D, volumeTexture);
gl::TexImage3D(gl::TEXTURE_3D, 0, gl::RGBA, volumeDimension.x, volumeDimension.y, volumeDimension.z, 0, gl::RGBA, gl::UNSIGNED_BYTE, nullptr);
gl::BindTexture(gl::TEXTURE_3D, 0);
Then i initialize cuda:
gpu::error::cuda::check(cudaSetDevice(0));
gpu::error::cuda::check(cudaGLSetGLDevice(0));
gpu::error::cuda::check(cudaGraphicsGLRegisterImage(&volumeResource, volumeTexture, gl::TEXTURE_3D, cudaGraphicsRegisterFlagsSurfaceLoadStore));
Then in every frame i do a cuda pass:
gpu::error::cuda::check(cudaGraphicsMapResources(1, &volumeResource));
cudaArray_t volumeArray;
gpu::error::cuda::check(cudaGraphicsSubResourceGetMappedArray(&volumeArray, volumeResource, 0, 0));
cudaResourceDesc volumeDescription;
memset(&volumeDescription, 0, sizeof(volumeDescription));
volumeDescription.resType = cudaResourceTypeArray;
volumeDescription.res.array.array = volumeArray;
cudaSurfaceObject_t volumeSurface;
gpu::error::cuda::check(cudaCreateSurfaceObject(&volumeSurface, &volumeDescription));
gpu::functions::fillVolume(volumeSampler, volumeDimension);
gpu::error::cuda::check(cudaDestroySurfaceObject(volumeSurface));
gpu::error::cuda::check(cudaGraphicsUnmapResources(1, &volumeResource));
The Cuda Function (gpu::functions…) looks like this:
void fillVolume(cudaSurfaceObject_t volumeSurface, dim3 textureDimension)
{
dim3 threads(8, 8, 8);
dim3 blocks(static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.x) / static_cast<double>(threads.x))), static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.y) / static_cast<double>(threads.y))), static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.z) / static_cast<double>(threads.z))));
kernels::fillVolume<<< blocks, threads >>>(volumeSurface, textureDimension);
}
And the kernel looks like this:
__global__ void fillVolume(cudaSurfaceObject_t volumeSurface, dim3 textureDimension)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int z = blockIdx.z * blockDim.z + threadIdx.z;
if(x < textureDimension.x && y < textureDimension.y && z < textureDimension.z)
{
uchar4 data = make_uchar4(0xff, 0xff, 0xff, 0xff);
surf3Dwrite(data, volumeSurface, x * sizeof(uchar4), y, z);
}
}
The cuda-memcheck report is telling this:
========= Invalid __global__ read of size 4
========= at 0x000003a0 in g:\tools\nvidia cuda sdk\toolkit\include\/surface_indirect_functions.h:1635:gpu::kernels::fillVolume(__int64, dim3)
========= by thread (0,0,0) in block (1,1,1)
========= Address 0x00000009 is misaligned
I mean, it´s plausible that 0x00000009 seems to be misaligned but what can I do about it? It should be the address of my surface.
Has somebody a clue what i might missed this time?
Thank you so far,
Markus