For example, here’s the output of my program executed on small sequences:
0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24 -26 -28 -30 -32 -34 -36 -38 -40 -42 -44 -46 -48
-2 -1 -3 -5 -5 -7 -9 -11 -13 -15 -17 -19 -21 -23 -25 -27 -29 -31 -33 -35 -37 -39 -41 -43 -45
-4 -3 -2 -4 -4 -6 -8 -10 -12 -12 -14 -16 -18 -20 -22 -24 -26 -28 -30 -32 -34 -36 -38 -40 -42
-6 -5 -4 -3 -3 -5 -7 -9 -11 -11 -13 -15 -17 -17 -19 -21 -23 -25 -27 -29 -31 -33 -35 -37 -39
-8 -7 -6 -5 -4 -4 -6 -8 -10 -12 -12 -14 -14 -16 -18 -18 -20 -22 -24 -26 -28 -30 -32 -34 -36
-10 -9 -8 -7 -6 -5 -5 -7 -7 -9 -11 -13 -15 -15 -17 -19 -19 -19 -21 -23 -25 -27 -29 -31 -33
-12 -11 -10 -9 -8 -7 -6 -6 -8 -8 -10 -12 -12 -14 -16 -16 -18 -20 -20 -22 -24 -24 -26 -28 -30
-14 -11 -10 -9 -10 -7 -6 -5 -7 -9 -7 -9 -11 -13 -13 -15 -17 -19 -19 -21 -23 -25 -25 -27 -27
-16 -13 -12 -11 -10 -9 -8 -7 -4 -6 -8 -8 -10 -12 -14 -14 -16 -16 -18 -20 -20 -22 -24 -26 -28
-18 -15 -14 -13 -10 -11 -10 -9 -6 -3 -5 -7 -9 -9 -11 -13 -15 -17 -17 -17 -19 -21 -23 -23 -25
-20 -17 -16 -15 -12 -11 -12 -11 -8 -5 -4 -6 -6 -8 -10 -10 -12 -14 -16 -18 -18 -18 -20 -22 -24
-22 -19 -18 -17 -14 -13 -12 -13 -10 -7 -6 -5 -7 -5 -7 -9 -11 -13 -15 -15 -17 -19 -19 -19 -21
-24 -21 -20 -19 -16 -15 -14 -13 -12 -9 -8 -7 -4 -6 -6 -6 -8 -10 -12 -14 -16 -16 -18 -20 -20
-26 -23 -20 -19 -18 -15 -14 -13 -14 -11 -8 -7 -6 -5 -5 -7 -7 -9 -9 -11 -13 -15 -17 -19 -19
-28 -25 -22 -21 -20 -17 -16 -15 -12 -13 -10 -9 -8 -7 -6 -6 -8 -6 -8 -10 -10 -12 -14 -16 -18
-30 -27 -24 -23 -20 -19 -18 -17 -14 -11 -12 -11 -10 -7 -8 -7 -7 -8 -7 -7 -9 -11 -13 -13 -15
-32 -29 -26 -23 -22 -19 -18 -17 -16 -13 -10 -11 -12 -9 -6 -8 -8 -8 -7 -8 -8 -10 -12 -14 -12
-34 -31 -28 -25 -24 -21 -20 -19 -16 -15 -12 -11 -12 -11 -8 -7 -9 -7 -9 -8 -7 -9 -9 -11 -13
-36 -33 -30 -27 -26 -23 -22 -21 -18 -17 -14 -13 -10 -12 -10 -7 -6 -8 -8 -10 -9 -6 -8 -10 -12
-38 -35 -32 -29 -28 -25 -22 -21 -20 -19 -16 -13 -12 -11 -11 -9 -8 -7 -7 -9 -11 -8 -7 -9 -9
0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24 -26 -28 -30 -32 -34 -36 -38 -40 -42 -44 -46 -48
-2 -1 -3 -5 -5 -7 -9 -11 -13 -15 -17 -19 -21 -23 -25 -27 -29 -33 -35 -35 -37 -39 -41 -43 -45
-4 -3 -2 -4 -4 -6 -8 -10 -12 -12 -14 -16 -18 -20 -22 -24 -26 -5 -7 -9 -11 -13 -15 -17 -19
-6 -5 -4 -3 -3 -5 -7 -9 -11 -11 -13 -15 -17 -17 -19 -21 -23 -4 -6 -6 -8 -10 -12 -14 -16
-8 -7 -6 -5 -4 -4 -6 -8 -10 -12 -12 -14 -14 -16 -18 -18 -20 -6 -5 -7 -7 -7 -9 -11 -13
-10 -9 -8 -7 -6 -5 -5 -7 -7 -9 -11 -13 -15 -15 -17 -19 -19 -8 -7 -6 -6 -8 -6 -8 -10
-12 -11 -10 -9 -8 -7 -6 -6 -8 -8 -10 -12 -12 -14 -16 -16 -18 -10 -9 -8 -7 -5 -7 -7 -9
-14 -11 -10 -9 -10 -7 -6 -5 -7 -9 -7 -9 -11 -13 -13 -15 -17 -12 -9 -10 -9 -7 -6 -8 -6
-16 -13 -12 -11 -10 -9 -8 -7 -4 -6 -8 -8 -10 -12 -14 -14 -16 -14 -11 -10 -9 -9 -6 -7 -8
-18 -15 -14 -13 -10 -11 -10 -9 -6 -3 -5 -7 -9 -9 -11 -13 -15 -16 -13 -10 -11 -10 -8 -5 -7
-20 -17 -16 -15 -12 -11 -12 -11 -8 -5 -4 -6 -6 -8 -10 -10 -12 -14 -15 -12 -11 -10 -10 -7 -6
-22 -19 -18 -17 -14 -13 -12 -13 -10 -7 -6 -5 -7 -5 -7 -9 -11 -13 -15 -14 -13 -12 -11 -9 -8
-24 -21 -20 -19 -16 -15 -14 -13 -12 -9 -8 -7 -4 -6 -6 -6 -8 -10 -12 -14 -15 -12 -13 -11 -10
-26 -23 -20 -19 -18 -15 -14 -13 -14 -11 -8 -7 -6 -5 -5 -7 -7 -9 -9 -11 -13 -14 -13 -13 -10
-28 -25 -22 -21 -20 -17 -16 -15 -12 -13 -10 -9 -8 -7 -6 -6 -8 -6 -8 -10 -10 -12 -13 -14 -12
-30 -27 -24 -23 -20 -19 -18 -17 -14 -11 -12 -11 -10 -7 -8 -7 -7 -8 -7 -7 -9 -11 -13 -12 -14
-32 -29 -26 -23 -22 -19 -18 -17 -16 -13 -10 -11 -12 -9 -6 -8 -8 -8 -7 -8 -8 -10 -12 -14 -11
-34 -31 -28 -25 -24 -21 -20 -19 -16 -15 -12 -11 -12 -11 -8 -7 -9 -7 -9 -8 -7 -9 -9 -11 -13
-36 -33 -30 -27 -26 -23 -22 -21 -18 -17 -14 -13 -10 -12 -10 -7 -6 -8 -8 -10 -9 -6 -8 -10 -12
-38 -35 -32 -29 -28 -25 -22 -21 -20 -19 -16 -13 -12 -11 -11 -9 -8 -7 -7 -9 -11 -8 -7 -9 -9
diference between matrices : 1286
The first matrix is the good reference computed on CPU, the second is computed on GPU.
Now I run exactly the same code again :
...
2 times the same matrix
...
diference between matrices : 0
I don’t know what’s going on :(
When I execute the code on larger sequences (then the matrix is 10000x10000 sized), the difference between matrices is never zero.
I probably missed something in the code but why is this working sometimes with small sequences??
Here’s my code:
// Matrices are stored in row-major order:
// M(row, col) = *(M.elements + row * M.stride + col)
typedef struct {
int width;
int height;
int* elements;
} Matrix;
// acceder a l'element A[row][col]
__device__ int GetElement(Matrix A, int row, int col) {
return A.elements[row*A.width + col];
}
// definir l'element A[row][col] avec value
__device__ void SetElement(Matrix A, int row, int col, int value) {
A.elements[row*A.width + col] = value;
}
//penalite si gap
__device__ int p_on_device(char a, char b) {
if (a == b)
return 1;
else
return -1;
}
//calcul de similarite global et local
__device__ int sim_global_on_device(int a, int b, int c, char s, char t) {
int maximum = max(a - G, b + p_on_device(s, t));
maximum = max(maximum, c - G);
return maximum;
}
__global__ void AlignmentScoreKernel(Matrix score, charArray s, charArray t, int max_diag_size, int nbr_diag, int* data_diag) {
//IDs du thread courant
int blockID = blockIdx.x;
int threadID = threadIdx.x;
int index = blockID*BLOCK_SIZE + threadID;
//taille de la diagonale
int diag_size = 1;
//valeur de la diagonale
int value_diag = 0;
//calcul de la matrice
for(int d=0; d<nbr_diag; d++) {
//printf("hehe I am %d %d %d\n", blockID, threadID, index);
//calcul des indices de la case courante de la matrice
int di, dj;
if(d < s.length) {
di = d + 1;
dj = 1;
}
else {
di = s.length;
dj = d - s.length + 2;
}
di -= index;
dj += index;
if(index < diag_size) {
//printf("valeurs pour le calcul de value_diag %d %d %d\n", GetElement(score, di, dj-1), GetElement(score, di-1, dj-1), GetElement(score, di-1, dj));
//calcul de l'antidiagonale (un element par thread)
value_diag = sim_global_on_device(GetElement(score, di, dj-1),
GetElement(score, di-1, dj-1),
GetElement(score, di-1, dj),
s.values[di-1],
t.values[dj-1]);
SetElement(score, di, dj, value_diag);
}
//mise a jour de la taille de la diagonale
//on augmente la taille tant qu'on n'a pas atteint le maximum
if(diag_size < max_diag_size) {
if( ((d < s.length) && (s.length <= t.length)) || ((d < t.length-1) && (s.length > t.length)) ) {
diag_size++;
}
}
//on diminue la taille quand on entre dans le coin inferieur droit de la matrice
if( ((s.length >= t.length) && (d >= s.length-1)) || ((s.length < t.length) && (d >= t.length-1)) ) {
diag_size--;
}
__syncthreads();
}
}