# residual is increasing instead of decreasing

Hello ,
I am trying to solve Ax=b using conjugate gradient method.
I checked the nvidia example and because my matrix A is not a sparse matrix , I used the cublasSgemv function.

The problem is that the residual first increase , then decreases and then increases again with the kast value being of 10^6.

``````#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <cublas_v2.h>

using namespace std;

int main() {

int Rows = 80 ,Cols = 64;
int Total = Rows * Cols;

//allocate host memory
float * A,
* b,
* x;

A = (float *) malloc( Total  * Total * sizeof(float) );

b = (float *) malloc( Total * sizeof(float) );

x = (float *) malloc( Total * sizeof(float) );

//allocate device memory
float * d_A,
* d_x,
* d_r,
* d_p,
* d_Ax;

cudaMalloc( (void **)&d_A, Total * Total * sizeof(float) );
cudaMalloc( (void **)&d_Ax, Total  * sizeof(float) );
cudaMalloc( (void **)&d_x, Total  * sizeof(float) );
cudaMalloc( (void **)&d_r, Total * sizeof(float));
cudaMalloc( (void **)&d_p, Total * sizeof(float));

FILE * fileA;
fileA = fopen( "Asymmetric", "rb" );
fread( A, sizeof(float), Total * Total , fileA );
fclose( fileA );

//copy A to device
cudaMemcpy(d_A, A, Total *  Total * sizeof(float), cudaMemcpyHostToDevice);

FILE * fileB;
fileB = fopen( "B", "rb" );
fread( b, sizeof(float), Total  ,fileB );
fclose( fileB );

//copy b to device
cudaMemcpy(d_r, b, Total * sizeof(float), cudaMemcpyHostToDevice);

const float tol = 1e-5f;
const int max_iter = 10000;
float a, bb, na, r0, r1,dot;
float alpha, beta, alpham1;

for (int i = 0; i < Total; i++)
x[i] = 0.0;

//copy to host
cudaMemcpy(d_x, x, Total * sizeof(float), cudaMemcpyHostToDevice);

/* Get handle to the CUBLAS context */
cublasHandle_t cublasHandle = 0;
cublasStatus_t cublasStatus;
cublasStatus = cublasCreate(&cublasHandle);

cublasStatus;

alpha = 1.0;
alpham1 = -1.0;
beta = 0.0;
r0 = 0.;

//  Solve Ax = b
cublasSgemv(cublasHandle , CUBLAS_OP_T , Total  , Total , &alpha , d_A ,  Total  ,  d_x ,1 , &beta , d_Ax , 1 );

cublasSaxpy(cublasHandle,  Total  , &alpham1, d_Ax, 1, d_r, 1);
cublasStatus = cublasSdot(cublasHandle, Total , d_r, 1, d_r, 1, &r1);

int k = 1;

while (r1 > tol*tol && k <= max_iter)
{
if (k > 1)
{
bb = r1 / r0;
cublasStatus = cublasSscal(cublasHandle, Total, &bb, d_p, 1);
cublasStatus = cublasSaxpy(cublasHandle,  Total , &alpha, d_r, 1, d_p, 1);

}
else
{
cublasStatus = cublasScopy(cublasHandle,  Total , d_r, 1, d_p, 1);
}

cublasSgemv(cublasHandle , CUBLAS_OP_T , Total  , Total ,  &alpha , d_A ,  Total  ,  d_p , 1 , &beta , d_Ax , 1 );

cublasStatus = cublasSdot(cublasHandle,  Total , d_p, 1, d_Ax, 1, &dot);

a = r1 / dot;

cublasStatus = cublasSaxpy(cublasHandle,  Total , &a, d_p, 1, d_x, 1);

na = -a;

cublasStatus = cublasSaxpy(cublasHandle,  Total , &na, d_Ax, 1, d_r, 1);

r0 = r1;
cublasStatus = cublasSdot(cublasHandle,  Total , d_r, 1, d_r, 1, &r1);

cout << "\niteration = "<<k<<" , residual = "<<sqrt(r1)<<endl;
k++;
}

cudaMemcpy(x, d_x, Total * sizeof(float), cudaMemcpyDeviceToHost);

cublasDestroy(cublasHandle);

free( b );
free( A );
free( x );

cudaFree( d_A );
cudaFree( d_Ax );
cudaFree( d_x );
cudaFree( d_r );

return 0;

}
``````

Thanks!

PS: I can’t upload the A and B matrices , I don’t know how.

Is A a symmetric positive definite matrix?

Yes.

Do you have any ideas for that?

Thanks!

I am not sure about the alpham1 = -1 .
Why the example in nvida uses -1 and not 1?

Also, printing the “dot” values ,I can see that they have values in the order of ^34.
And the last value is nan and the x vector I am taking has nan values.

Hmm , it seems I have something wrong in the generation of the SPDM matrix.

I now used curand and cublas functions to produce it and the residual is decreasing!
Ok!