Hi,
I’m trying to bind a linear memory to my cuda texture object. My kernel function will fetch from the memory and update it directly. I will launch the kernel for a couple of times. However, I noticed that in each kernel launch, the value fetched from the kernel is kept the same, meaning it is not updated after write operation in previous kernel launch. I also wrote a simple test program to show that:
kernel function:
__global__ void kernelFunction(float* output, int width, int height, int depth, cudaTextureObject_t tex)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x < width && y < height && z < depth)
{
if(x == 2 && y == 2 && z == 2){
float value = tex3D<float>(tex, x, y, z);
printf("prev: %f\n", value);
output[z * width * height + y * width + x] = value + 1;
}
}
}
texture creation:
int width = 128;
int height = 128;
int depth = 128;
size_t size = width * height * depth * sizeof(float);
float* inputData = (float*)malloc(size);
// Allocate device memory for input data
float* d_inputData;
cudaMalloc((void**)&d_inputData, size);
// Set to 0s
cudaMemset(d_inputData, 0, size);
// Create texture object
cudaExtent extent = make_cudaExtent(width, height, depth);
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
// Set texture parameters
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.addressMode[2] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModePoint;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 0;
resDesc.res.linear.devPtr = d_inputData;
resDesc.res.linear.desc = channelDesc;
resDesc.res.linear.sizeInBytes = size;
// Bind texture to linear memory
cudaTextureObject_t tex;
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
My kernel launch:
dim3 blockSize(8, 8, 8);
dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y, (depth + blockSize.z - 1) / blockSize.z);
for(int i = 0; i < 5; i++){
kernelFunction<<<gridSize, blockSize>>>(d_inputData, width, height, depth, tex);
cudaDeviceSynchronize(); // Ensure kernel execution is complete
}
Based on the above code, the print statement within the kernel function generates the following message:
$ ./test
prev: 0.000000
prev: 0.000000
prev: 0.000000
prev: 0.000000
prev: 0.000000
Value at (2,2,2): 1.000000
We can see that in each iteration it fetches 0, which should not be the case after first launch. And this problem results in my final value 1 instead of 5. Are there anything wrong? I believe there should be a valid way to bind linear array to texture and receive update in each iteration. Thank everyone in advance.