I’m attempting to implement RC4 encryption and have run into a bit of an issue. For some reason, CUDA doesn’t seem to be executing the following line:
a[idx*256+y]=a[idx*256+y] ^ S[(S[i] + S[j]) % 255];
Where a and S are both char matrices. I can access a and change all chars, to N for example (a[idx*256+y]=‘N’ ), and it works. It also works fine in emulation mode (I know, I know emulation mode isn’t worth anything).
I’ve been looking for a solution all weekend to no avail. BTW I’m running the latest dev drivers.
Any help would be greatly appreciated
Full Code
__global__ void rc4(char a[N_DATASIZE*N_OBJECTS], string b[N_OBJECTS], int N, size_t *string_size)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
int keysize = string_size[idx];
unsigned char S[256];
int i, j;
/* KSA */
for (i = 0; i < 256; i++)
S[i] = i;
unsigned char temp;
for (i = j = 0; i < 256; i++) {
j = (j + b[idx][i % keysize] + S[i]) % 255;
/*SWAP*/
temp = S[i];
S[i] = S[j];
S[j] = temp;
}
i = j = 0;
/* PRGA */
for (int y=0; y<N_DATASIZE; y++)
{
i = (i + 1) % 255;
j = (j + S[i]) % 255;
/*SWAP*/
temp = S[i];
S[i] = S[j];
S[j] = temp;
a[idx*256+y]=a[idx*256+y] ^ S[(S[i] + S[j]) % 255];
}
}
}