Hi,
I just try to increase a value of byte’s array(unsigned char* data) in the position “index”.
I want to use atomicCAS, which supports only 32-bit values, for it.
My project is x64.
When I write:
data = {3,3,3,3};
index = 2;
device void CallFunction(unsigned char* data, unsigned int index)
{
unsigned int* dword_ptr = (unsigned int*)&(data[(index / 4) * 4]);
unsigned char byte_pos = index % 4;
unsigned int readback, old_value, new_value;
unsigned char byte_value;
old_value = dword_ptr;
byte_value = (old_value & (0xFF << byte_pos8)) >> byte_pos8;
byte_value = byte_value < 254 ? ++byte_value : 255;
new_value = (byte_value << (byte_pos8)) | (old_value & ~(0xFF << (byte_pos*8)));
readback = atomicCAS(dword_ptr, old_value, new_value);
}
But in the result I get data = {3, 3, 255, 3}.
It should be data = {3, 3, 4, 3}
What is wrong with my function?
Thanks!
your code as posted seems to work for me. The error is likely in something you haven’t shown.
$ cat t764.cu
#include <stdio.h>
__device__ unsigned char data[4] = {3,3,3,3};
__device__ void CallFunction(unsigned char* data, unsigned int index)
{
unsigned int* dword_ptr = (unsigned int*)&(data[(index / 4) * 4]);
unsigned char byte_pos = index % 4;
unsigned int readback, old_value, new_value;
unsigned char byte_value;
old_value = *dword_ptr;
byte_value = (old_value & (0xFF << byte_pos*8)) >> byte_pos*8;
byte_value = byte_value < 254 ? ++byte_value : 255;
new_value = (byte_value << (byte_pos*8)) | (old_value & ~(0xFF << (byte_pos*8)));
readback = atomicCAS(dword_ptr, old_value, new_value);
}
__global__ void tkernel(){
int index = 2;
CallFunction(data, index);
printf("data[0] = %d\n", data[0]);
printf("data[1] = %d\n", data[1]);
printf("data[2] = %d\n", data[2]);
printf("data[3] = %d\n", data[3]);
}
int main(){
tkernel<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
$ nvcc -o t764 t764.cu
t764.cu(8): warning: variable "readback" was set but never used
t764.cu(8): warning: variable "readback" was set but never used
[bob@cluster1 misc]$ cuda-memcheck ./t764
========= CUDA-MEMCHECK
data[0] = 3
data[1] = 3
data[2] = 4
data[3] = 3
========= ERROR SUMMARY: 0 errors
$
Perhaps you have multiple threads stepping on each other. Impossible to tell without seeing a complete code.
Thanks for your response!
You are right. But I just found that the problem in my additional condition
do
{
readback = atomicCAS(dword_ptr, old_value, new_value);
} while (readback != old_value);
In my case the value is not equal until the new_value = 255.
I don’t understand the cause of it.
It still works for me, even with that modification.