PLease debug the code!

#include

#include<cuda.h>

#include

#include

#define SIZE 100000

using namespace std;

global void multiply(char no1,int len1,char no2,int len2,char result)

{

int carry=0;

int idx=blockIdx.x*blockDim.x+threadIdx.x;

if(idx<len2)

{

for(int i=0;i<len1;i++)

{

   int temp=(carry+result[idx+i]+no2[idx]*no1[i])%10;

   carry=(carry+result[idx+i]+no2[idx]*no1[i])/10;

   result[idx+i]=temp;

}

if(carry!=0)

   result[idx+len1]=carry;

}

}

global void convertcti(char no,int len)

{

int idx=blockIdx.x*blockDim.x+threadIdx.x;

if(idx<len)

no[idx]-='0';

}

global void convertitc(char no,int len)

{

int idx=blockIdx.x*blockDim.x+threadIdx.x;

if(idx<len)

	no[idx]+='0';

}

global void reverse(char no,int len)

{

int idx=blockIdx.x*blockDim.x+threadIdx.x;

if(idx<(len/2))

{

	int temp=no[idx];

	no[idx]=no[len-idx-1];

	no[len-idx-1]=temp;

}

}

int main()

{

char *no1,*no2,*result;

no1=new char;

no2=new char;

result=new char;

cin>>no1>>no2;

int len1=strlen(no1);

int len2=strlen(no2);

char *no1_d,*no2_d,*result_d;

for(int i=0;i<SIZE;i++)

          result[i]=0;

dim3 threadsperblockn1(8,1);

dim3 numblocksn1(len1/8+(len1%8==0?0:1),1);

cudaMalloc((void**)&no1_d,sizeof(no1));

cudaMemcpy(no1_d,no1,sizeof(no1),cudaMemcpyHostToDevice);

convertcti<<<numblocksn1,threadsperblockn1>>>(no1_d,len1);

reverse<<<numblocksn1,threadsperblockn1>>>(no1_d,len1);

cudaThreadSynchronize();

dim3 threadsperblockn2(8,1);

dim3 numblocksn2(len2/8+(len2%8==0?0:1),1);

cudaMalloc((void**)&no2_d,sizeof(no2));

cudaMemcpy(no2_d,no2,sizeof(no2),cudaMemcpyHostToDevice);

convertcti<<<numblocksn2,threadsperblockn2>>>(no2_d,len2);

reverse<<<numblocksn2,threadsperblockn2>>>(no2_d,len2);

cudaThreadSynchronize();

//Only for checking

cudaMemcpy(no1,no1_d,sizeof(no1),cudaMemcpyDeviceToHost);

cout<<“ANS”<<endl;

for(int i=0;i<len1;i++)

   cout<<(int)no1[i]<<" ";

cout<<endl;

cudaMemcpy(no2,no2_d,sizeof(no2),cudaMemcpyDeviceToHost);

 cout<<"ANS"<<endl;

for(int i=0;i<len2;i++)

   cout<<(int)no2[i]<<" ";

cout<<endl;

//Only for checking

cudaMalloc((void**)&result_d,sizeof(result));

cudaMemcpy(result_d,result,sizeof(result),cudaMemcpyHostToDevice);

multiply<<<numblocksn2,threadsperblockn2>>>(no1_d,len1,no2_d,len2,result_d);

cudaThreadSynchronize();

cudaMemcpy(result,result_d,sizeof(result),cudaMemcpyDeviceToHost);

cout<<“RESULT”<<endl;

for(int i=0;i<(len1+len2);i++)

   cout<<(int)result[i]<<" ";

cout<<endl;

getchar();

getchar();

}

Hi

the above code ,reverses the 2 given numbers taken as strings, multiplies them using oldschool math and then prints it. The code works fine as long as the ans is at max 4digits, if the answer is >=10000, all digits after the 5th is printed as 0.

I am unable to get my mistake .PLS help!

PS> I have printed the final answer in reverse.

Did you check the value returned by sizeof(no1)

Thanks a lot for pointing out the blunder!