I wrote some pieces of code using the modulo operators, and noticed that these, especially in comparisons, give weird results.
Example:
#include <stdio.h>
#include <assert.h>
#include <math.h>
#define BLOCKSIZE 512
__global__ void kfinddiv(int *d_a, int k)
{
long long idx = gridDim.x*blockIdx.y+blockDim.x*blockIdx.x+threadIdx.x;
if (idx+2 < k/2)
{
if (k%(idx+2) == 0)
*d_a = idx+2;
}
}
int main()
{
int input;
scanf("%d", &input);
dim3 dimGrid(int(ceil(float(input/BLOCKSIZE))), int(ceil(float(input/BLOCKSIZE))));
dim3 dimBlock(BLOCKSIZE);
printf("%d\n", int(ceil(float(input/BLOCKSIZE))));
int *d_a;
int *h_a;
h_a = (int*)malloc(sizeof(int));
cudaMalloc(&d_a, sizeof(int));
*h_a = 0;
cudaMemcpy(d_a, h_a, sizeof(int), cudaMemcpyHostToDevice);
kfinddiv<<< dimGrid, dimBlock >>>(d_a, input);
cudaMemcpy(h_a, d_a, sizeof(int), cudaMemcpyDeviceToHost);
printf("%d\n", *h_a);
return 0;
}
This works fine when compiled with -deviceemu, but as soon as it starts running on the real deal the modulo seems to be wrongly evaluated.
Does anyone know whether, or when this will be fixed. (I’m 100% sure it is the modulo operator: add an if(idx == 0 && k%(idx+2)) at the end and use an even number as imput and you’ll see what I mean. then try removing the modulo portion)