Hello everyone,
I am a student of Granada’s University,(Spain) , we are trying to work on alignment of protein
The problem is that I have write a code, that, works well when you simulate it on CPU, but when you execute it on GPU, it returns, three times the same results, it means, it writes three times the results (correct result ) for the last protein, you introduce.
The code i have write, is (in general line, pseudo code ) these way :
hmmsearch
main_cuda(){
struct plan7 hmm;
while (leer dsq ){
nseq++
}
int *All_mx = malloc (nseq * sizeof(int));
All_mx = ..../* some data */
int *All_L = malloc (nseq * sizeof(int));
All_L = ..../* some diferent data. a Vector, that contains lengths */
P7Vitebi_cuda(All_mx, All_L,hmm,nseq );
}
//////////
viterbi_cuda
P7Viterbi cuda(int All_mx,int All_L,struct plan7 hmm,int nseq){
int*xmx_d = cudamalloc ( 5 * M sizeof (int) );
intmmx_d = cudamalloc ( L * M sizeof (int) );
intdmx_d = cudamalloc ( L * M sizeof (int) );
intimx_d = cudamalloc ( L * M *sizeof (int) );
int *All_mx_d = cudaMalloc (nseq *sizeof (int));
cudaMemCpy(All_mx_d,All_mx, HostToDevice);
int *All_L_d = cudaMalloc (nseq *sizeof (int));
cudaMemCpy(All_L_d,All_L, HostToDevice);
Viterbi_kernel<<<dimgrid, dimbloq>>> (nseq,xmx_d,mmx_d,imx_d,dmx_d,All_mx_d,All_L);
cudaMemCpy(All_mx,All_mx_d, DeviceToHost);
print ( All_mx )
}
//////////////
Viterbi_kernel(int nseq,int xmx_d,int mmx_d,int *imx_d,int *dmx_d,int All_mx_d,int All_L){
int j = threadIdx.x;
xmx = ... /* some data */
mmx = ... /* some data */
dmx = ... /* some data */
imx = ... /* some data */
int L=All_L_d[j];
int *ptr = All_mx + L;
for (t = 0 to nseq )
ptr [t] = xmx[t];
ptr [t+1] = mmx[t];
ptr [t+2] = dmx[t];
ptr [t+3] = imx[t];
}
=================================
maybe could i have forgotten something like __syncthreads() ?
For example i have tried writing __syncthreads(); bellow of these intructions, but doesn’t work.
int L=All_L_d[j];
__syncthreads();
....
for (t = 0 to nseq ){
ptr [t] = xmx[t];
ptr [t+1] = mmx[t];
ptr [t+2] = dmx[t];
ptr [t+3] = imx[t];
__syncthreads();
}
========================
Any idea? I’m writing well the data ptr? i mean, could be the problem in other calculations … ?
thank you. :ph34r:
Pedro