cudaSurfaceObject_t and misaligned address

Hello and a happy new year,

last time i tried to write into a GL_TEXTURE_2D and finally I could manage to write into it. I tried to write the wrong data type (index out of bounds). This time I try to write into a GL_TEXTURE_3D.

This is how i generate the texture:

volumeDimension = dim3(10, 10, 10);
gl::GenTextures(1, &volumeTexture);
gl::BindTexture(gl::TEXTURE_3D, volumeTexture);
gl::TexImage3D(gl::TEXTURE_3D, 0, gl::RGBA, volumeDimension.x, volumeDimension.y, volumeDimension.z, 0, gl::RGBA, gl::UNSIGNED_BYTE, nullptr);
gl::BindTexture(gl::TEXTURE_3D, 0);

Then i initialize cuda:

gpu::error::cuda::check(cudaSetDevice(0));
gpu::error::cuda::check(cudaGLSetGLDevice(0));

gpu::error::cuda::check(cudaGraphicsGLRegisterImage(&volumeResource, volumeTexture, gl::TEXTURE_3D, cudaGraphicsRegisterFlagsSurfaceLoadStore));

Then in every frame i do a cuda pass:

gpu::error::cuda::check(cudaGraphicsMapResources(1, &volumeResource));

cudaArray_t volumeArray;
gpu::error::cuda::check(cudaGraphicsSubResourceGetMappedArray(&volumeArray, volumeResource, 0, 0));

cudaResourceDesc volumeDescription;
memset(&volumeDescription, 0, sizeof(volumeDescription));
volumeDescription.resType = cudaResourceTypeArray;
volumeDescription.res.array.array = volumeArray;

cudaSurfaceObject_t volumeSurface;
gpu::error::cuda::check(cudaCreateSurfaceObject(&volumeSurface, &volumeDescription));

gpu::functions::fillVolume(volumeSampler, volumeDimension);

gpu::error::cuda::check(cudaDestroySurfaceObject(volumeSurface));

gpu::error::cuda::check(cudaGraphicsUnmapResources(1, &volumeResource));

The Cuda Function (gpu::functions…) looks like this:

void fillVolume(cudaSurfaceObject_t volumeSurface, dim3 textureDimension)
{
	dim3 threads(8, 8, 8);
	dim3 blocks(static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.x) / static_cast<double>(threads.x))), static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.y) / static_cast<double>(threads.y))), static_cast<unsigned int>(std::ceil(static_cast<double>(textureDimension.z) / static_cast<double>(threads.z))));

	kernels::fillVolume<<< blocks, threads >>>(volumeSurface, textureDimension);
}

And the kernel looks like this:

__global__ void fillVolume(cudaSurfaceObject_t volumeSurface, dim3 textureDimension)
{
	unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
	unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
	unsigned int z = blockIdx.z * blockDim.z + threadIdx.z;

	if(x < textureDimension.x && y < textureDimension.y && z < textureDimension.z)
	{
		uchar4 data = make_uchar4(0xff, 0xff, 0xff, 0xff);
		surf3Dwrite(data, volumeSurface, x * sizeof(uchar4), y, z);
	}
}

The cuda-memcheck report is telling this:

========= Invalid __global__ read of size 4
=========     at 0x000003a0 in g:\tools\nvidia cuda sdk\toolkit\include\/surface_indirect_functions.h:1635:gpu::kernels::fillVolume(__int64, dim3)
=========     by thread (0,0,0) in block (1,1,1)
=========     Address 0x00000009 is misaligned

I mean, it´s plausible that 0x00000009 seems to be misaligned but what can I do about it? It should be the address of my surface.

Has somebody a clue what i might missed this time?

Thank you so far,

Markus

It turned out, that the created cudaSurfaceObject_t has the value 1074790400 (c++ side when created)

when the debugger steps into the fillVolume function a value of 1 got passed into the function… i have no idea why the value changes from 1074790400 to 1

Oh god… i found it… volumeS -> CMD + Spacebar completed the input to volumeSampler… that is actually the sampler object for the texture… when i pass the correct volumeSURFACE it works…

Thanks anyway!