Hi,
i’m learning CUDA and now i’m trying to write program for parallel computing Hamming distance. When i run my kernel in more than one block, it returns incorrect solution and i can’t figure out why.
This is my kernel:
__global__ void hammingDistance(char* text, char* pattern, unsigned int* distance, unsigned int textLength, unsigned int patternLength, unsigned int maxError, unsigned int * array1, unsigned int * array2) {
unsigned int* ptr1;
unsigned int* ptr2;
unsigned int* swap;
ptr1 = array1;
ptr2 = array2;
unsigned int threadID = threadIdx.x + blockIdx.x*blockDim.x;
unsigned int offset = 0;
while (threadID+offset < textLength+1) {
*(ptr1+threadID+offset) = 0;
offset += blockDim.x*gridDim.x;
}
*(ptr2) = maxError+1;
int column = threadID+1;
for (unsigned int row = 0; row < patternLength; row++) {
offset = 0;
while (column+offset < textLength+1) {
if (pattern[row] == text[column+offset-1])
*(ptr2 + column + offset) = *(ptr1 + column + offset - 1);
else
*(ptr2 + column + offset) = *(ptr1 + column + offset - 1) + 1;
offset += blockDim.x*gridDim.x;
}
swap = ptr1;
ptr1 = ptr2;
ptr2 = swap;
if (row == 0 && threadID == 0) *ptr2 = maxError+1;
}
offset = 0;
while (threadID+offset < textLength) {
distance[threadID+offset] = *(ptr1+threadID+offset+1);
offset += blockDim.x*gridDim.x;
}
}
Will please somebody help me? What is wrong? I have Geforce G210.
Sem2.zip (44.4 KB)