 a=1;e=t1;

``````	l4=P4[e]%length;

l3=P4[e]/length;

S5=S1[l4];

S6=S2[l3];
int t=S3[e];

////printf("\n\n S5[e]====%c\tS6[]=====%c",S5,S6);

////printf("\n\n e======%d\tp4[e]====%d\tS3[]=====%d",e,P4[e],S3[e]);

l=0;

//printf("\n\n e======%d\tp4[e]====%d",e,P4[e]);

if(S3[e]>=1)//checks whether sequence characters match if mismatch occurs next element is accessed

{	l4=0;

do

{

for(h=0;h<s;h++)//CHECKING P4 ARRAY for top digonal and left elment

{

if(P4[h]==(P4[e]-1))

b=h;//left element

else if(P4[h]==(P4[e]-length))

c=h;//top element

else if(P4[h]==(P4[e]-(length+1)))

d=h;//digonal element

}//4th four

//Comparing top diagonal and left element to find greatest of them
if(S3[b]==S3[d]==S3[c]||S3[d]==S3[e]-1)

g=d;

else if(S3[b]>S3[d]&&S3[b]>S3[c])

g=b;

else if(S3[c]>S3[d])

g=c;

else
g=d;

e=g;

//printf("\n\n e======%d\tS3[e]====%d\t p4[E]==%d\tl4====%d\tl===%d\tt==%d",e,S3[e],P4[e],l4,l,t);
if(S3[e]<1 || t<=1)
break;

l=P4[e]%length;

l4=P4[e]/length;
if(S1[l]==S2[l4])

{
S6[a]=S2[l4];

S5[a]=S1[l];
}
else
{
S6[a]='_';

S5[a]='_';
}
a++;

l4=0,l=0;b=0;c=0;d=0;

t--;

}while(S3[e]>=1 || t>=1);

S5[a]='\0';

S6[a]='\0';

printf("\n\nThe alignment is ===%s\nThe alignment is ===%s",S5,S6);
*S5=NULL;

*S6=NULL;

}//1st if

}//1st for
``````

The code is for Smith waterman algorithm in this alogorithm 2 character sequences are matched for possible similar regions i have scorred the matrix but now i have to get the matched string . there are two arrays first is S3 and P4. S3 stores scores in sorted order and P4 stores priginal postion of each element before sorting. the for loop is to find out top, left and digonal elemet of a given position . i want to convert this code in cuda but i can do it success fully plz someone help me. iam attaching my code with this post have a llok at it
SMWH.cu (20 KB)

You might want to read this.

Hi thanks for the help but i have read this paper before and similar ones but the problem is they all talk about initialisation and scoring phase of Smith waterman none of them have mentioned something abt trace back phase. Plus the link u have given it is tested on smaller sequence. I am testing my program on arrays with dimension larger than 8000*8000

i have made a cuda code for the code i have psoted it is working fine with 40004000 array with threads per block = 400 and number of blocks = 40004000/400 . btu as soon as i change my number of array elements this configuration dosent work. iam keeping number of threads 400 for initialisation phase and its working perfectly for any length of array. its just for traceback phase its not working iam pasting the cuda code developed by me…

this is the cuda code

global void Trace(int *P4,int length,int *hd, int e)

{

``````int idx= blockIdx.x * blockDim.x + threadIdx.x;

if(idx<(length*length))

{

if(P4[idx]==(P4[e]-1))

hd=idx;	//left element

else if(P4[idx]==(P4[e]-length))

hd=idx;//top element

else if(P4[idx]==(P4[e]-(length+1)))

hd=idx;//digonal element

}
``````

it replace the code block

``````			/*for(h=0;h<s;h++)//CHECKING P4 ARRAY for top digonal and left elment s= number of array elements

{

if(P4[h]==(P4[e]-1))

b=h;//left element

else if(P4[h]==(P4[e]-length))

c=h;//top element

else if(P4[h]==(P4[e]-(length+1)))

d=h;//digonal element

}//4th four

*/
``````

in the code

if(S3[e]>=1)//checks whether sequence characters match if mismatch occurs next element is accessed

``````	{	l4=0;

do

{

/*for(h=0;h<s;h++)//CHECKING P4 ARRAY for top digonal and left elment

{

if(P4[h]==(P4[e]-1))

b=h;//left element

else if(P4[h]==(P4[e]-length))

c=h;//top element

else if(P4[h]==(P4[e]-(length+1)))

d=h;//digonal element

}//4th four

*/

CUDA_SAFE_CALL(cudaMalloc((void**)&hd,sizeof(int)*3));

CUDA_SAFE_CALL(cudaMalloc((void**)&P4D,sizeof(int)*length*length));

CUDA_SAFE_CALL(cudaMemcpy(P4D,P4,sizeof(int)*length*length,c
``````

udaMemcpyHostToDevice));

``````			CUT_SAFE_CALL(cutCreateTimer(&timer6));

CUT_SAFE_CALL(cutStartTimer(timer6));

//for(int i=0;i<=numblocks;i++)

Trace<<<numblocks,LE>>>(P4D,length,hd,e);

cudaFree(P4D);

CUDA_SAFE_CALL(cudaMemcpy(ha,hd,sizeof(int)*3,cudaMemcpyDevi
``````

ceToHost));

``````			//for(n=0;n<3;n++)

//printf("\nh==%d",ha[n]);

cudaFree(hd);

b=ha;c=ha;d=ha;
``````

// printf("\n\nb====%d\tc=====%d\td======%d",b,c,d);

``````			//Comparing top diagonal and left element to find greatest of them

if(S3[b]==S3[d]==S3[c]||S3[d]==S3[e]-1)

g=d;

else if(S3[b]>S3[d]&&S3[b]>S3[c])

g=b;

else if(S3[c]>S3[d])

g=c;

else

g=d;

e=g;

if(S3[e]<1 || t<=1)

break;

l=P4[e]%length;

l4=P4[e]/length;

if(S1[l]==S2[l4])

{

S6[a]=S2[l4];

S5[a]=S1[l];

}

else

{

S6[a]='_';

S5[a]='_';

}

a++;

l4=0,l=0;b=0;c=0;d=0;

t--;

}while(S3[e]>1 || t>1);

S5[a]='\0';

S6[a]='\0';

printf("\n\nThe alignment is ===%s\nThe alignment is ===%s",S5,S6);

*S5=NULL;

*S6=NULL;

}//1st if

else

continue;

}//1st for
``````

also iam trying to do sorting on thsi array but it dosent seems to work cause the values get duplicated if i go over 24*24 array limitation

PLS HElp