Hey
This is a code which compile and run with me. It need Cusolver obviously. Don’t pay attention to the initialisation of the matrix. It’s in the aim to give the best example of what i do.
I don’t know if the data in the matrix matter or not so i give it to.
Thanks for helping
#include "cusolverDn.h"
#include <iostream>
#include "device_launch_parameters.h"
#include "cuda_runtime.h"
int main()
{
int i_bus=16;
int j_bus=10;
int npq=14;
int work_size=0;
int info=0;
double** Temp_j= new double*[i_bus-1+npq];
double Host_M[29]={8e+006,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; //i_bus-1+npq
double temp_j0[29]={2e+008,0,0,0,0,0,0,0,-2e+008,0,0,0,0,0,0,0,0,0,0,0,0,0,-10000,0,0,0,0,0,0};
double temp_j1[29]={0,4e+008,-2e+008,-2e+008,0,0,0,0,0,0,0,0,0,0,0,20000,-10000,-10000,0,0,0,0,0,0,0,0,0,0,0};
double temp_j2[29]={0,-2e+008,4e+008,0,0,0,0,0,0,0,0,0,0,0,0,-10000,20000,0,0,0,0,0,0,0,0,0,0,0,0};
double temp_j3[29]={0,-2e+008,0,6e+008,-2e+008,0,0,-2e+008,0,0,0,0,0,0,0,-10000,0,30000,-10000,0,0,-10000,0,0,0,0,0,0,0};
double temp_j4[29]={0,0,0,-2e+008,4e+008,0,0,0,0,0,0,-2e+008,0,0,0,0,0,-10000,20000,0,0,0,0,0,0,-10000,0,0,0};
double temp_j5[29]={0,0,0,0,0,6e+008,-2e+008,-2e+008,0,0,-2e+008,0,0,0,0,0,0,0,0,30000,-10000,-10000,0,0,-10000,0,0,0,0};
double temp_j6[29]={0,0,0,0,0,-2e+008,4e+008,0,0,-2e+008,0,0,0,0,0,0,0,0,0,-10000,20000,0,0,-10000,0,0,0,0,0};
double temp_j7[29]={0,0,0,-2e+008,0,-2e+008,0,6e+008,0,0,0,0,-2e+008,0,0,0,0,-10000,0,-10000,0,30000,0,0,0,0,-10000,0,0};
double temp_j8[29]={-2e+008,0,0,0,0,0,0,0,4e+008,0,0,0,-2e+008,0,0,0,0,0,0,0,0,0,20000,0,0,0,-10000,0,0};
double temp_j9[29]={0,0,0,0,0,0,-2e+008,0,0,4e+008,0,0,0,-2e+008,0,0,0,0,0,0,-10000,0,0,20000,0,0,0,-10000,0};
double temp_j10[29]={0,0,0,0,0,-2e+008,0,0,0,0,2e+008,0,0,0,0,0,0,0,0,-10000,0,0,0,0,10000,0,0,0,0};
double temp_j11[29]={0,0,0,0,-2e+008,0,0,0,0,0,0,2e+008,0,0,0,0,0,0,-10000,0,0,0,0,0,0,10000,0,0,0};
double temp_j12[29]={0,0,0,0,0,0,0,-2e+008,-2e+008,0,0,0,4e+008,0,0,0,0,0,0,0,0,-10000,-10000,0,0,0,20000,0,0};
double temp_j13[29]={0,0,0,0,0,0,0,0,0,-2e+008,0,0,0,4e+008,-2e+008,0,0,0,0,0,0,0,0,-10000,0,0,0,20000,-10000};
double temp_j14[29]={0,0,0,0,0,0,0,0,0,0,0,0,0,-2e+008,2e+008,0,0,0,0,0,0,0,0,0,0,0,0,-10000,10000};
double temp_j15[29]={-0,-4e+008,2e+008,2e+008,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,20000,-10000,-10000,0,0,0,0,0,0,0,0,0,0,0};
double temp_j16[29]={-0,2e+008,-4e+008,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-10000,20000,0,0,0,0,0,0,0,0,0,0,0,0};
double temp_j17[29]={-0,2e+008,-0,-6e+008,2e+008,-0,-0,2e+008,-0,-0,-0,-0,-0,-0,-0,-10000,0,30000,-10000,0,0,-10000,0,0,0,0,0,0,0};
double temp_j18[29]={-0,-0,-0,2e+008,-4e+008,-0,-0,-0,-0,-0,-0,2e+008,-0,-0,-0,0,0,-10000,20000,0,0,0,0,0,0,-10000,0,0,0};
double temp_j19[29]={-0,-0,-0,-0,-0,-6e+008,2e+008,2e+008,-0,-0,2e+008,-0,-0,-0,-0,0,0,0,0,30000,-10000,-10000,0,0,-10000,0,0,0,0};
double temp_j20[29]={-0,-0,-0,-0,-0,2e+008,-4e+008,-0,-0,2e+008,-0,-0,-0,-0,-0,0,0,0,0,-10000,20000,0,0,-10000,0,0,0,0,0};
double temp_j21[29]={-0,-0,-0,2e+008,-0,2e+008,-0,-6e+008,-0,-0,-0,-0,2e+008,-0,-0,0,0,-10000,0,-10000,0,30000,0,0,0,0,-10000,0,0};
double temp_j22[29]={2e+008,-0,-0,-0,-0,-0,-0,-0,-4e+008,-0,-0,-0,2e+008,-0,-0,0,0,0,0,0,0,0,20000,0,0,0,-10000,0,0};
double temp_j23[29]={-0,-0,-0,-0,-0,-0,2e+008,-0,-0,-4e+008,-0,-0,-0,2e+008,-0,0,0,0,0,0,-10000,0,0,20000,0,0,0,-10000,0};
double temp_j24[29]={-0,-0,-0,-0,-0,2e+008,-0,-0,-0,-0,-2e+008,-0,-0,-0,-0,0,0,0,0,-10000,0,0,0,0,10000,0,0,0,0};
double temp_j25[29]={-0,-0,-0,-0,2e+008,-0,-0,-0,-0,-0,-0,-2e+008,-0,-0,-0,0,0,0,-10000,0,0,0,0,0,0,10000,0,0,0};
double temp_j26[29]={-0,-0,-0,-0,-0,-0,-0,2e+008,2e+008,-0,-0,-0,-4e+008,-0,-0,0,0,0,0,0,0,-10000,-10000,0,0,0,20000,0,0};
double temp_j27[29]={-0,-0,-0,-0,-0,-0,-0,-0,-0,2e+008,-0,-0,-0,-4e+008,2e+008,0,0,0,0,0,0,0,0,-10000,0,0,0,20000,-10000};
double temp_j28[29]={-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,2e+008,-2e+008,0,0,0,0,0,0,0,0,0,0,0,0,-10000,10000};
Temp_j[0]=temp_j0;
Temp_j[1]=temp_j1;
Temp_j[2]=temp_j2;
Temp_j[3]=temp_j3;
Temp_j[4]=temp_j4;
Temp_j[5]=temp_j5;
Temp_j[6]=temp_j6;
Temp_j[7]=temp_j7;
Temp_j[8]=temp_j8;
Temp_j[9]=temp_j9;
Temp_j[10]=temp_j10;
Temp_j[11]=temp_j11;
Temp_j[12]=temp_j12;
Temp_j[13]=temp_j13;
Temp_j[14]=temp_j14;
Temp_j[15]=temp_j15;
Temp_j[16]=temp_j16;
Temp_j[17]=temp_j17;
Temp_j[18]=temp_j18;
Temp_j[19]=temp_j19;
Temp_j[20]=temp_j20;
Temp_j[21]=temp_j21;
Temp_j[22]=temp_j22;
Temp_j[23]=temp_j23;
Temp_j[24]=temp_j24;
Temp_j[25]=temp_j25;
Temp_j[26]=temp_j26;
Temp_j[27]=temp_j27;
Temp_j[28]=temp_j28;
double* Host_J= new double [(i_bus-1+npq)*(i_bus-1+npq)];
double* device_M;
double* device_J;
int* device_info;
int* device_Ipiv;
int flat=0;
for(int i=0; i<(i_bus-1+npq);i++){
for(int j=0;j<(i_bus-1+npq);j++){
Host_J[flat]=Temp_j[i][j];
flat++;
}
}
cudaError_t cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
cudaStatus=cudaMalloc(&device_J, (i_bus-1+npq)*(i_bus-1+npq)*sizeof(double));
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMalloc failed");
goto Error;
}
cudaStatus=cudaMalloc(&device_M,(i_bus-1+npq)*sizeof(double));
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudamalloc failed");
}
cudaStatus= cudaMalloc(&device_info,sizeof(int));
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMalloc failed");
goto Error;
}
cudaStatus= cudaMalloc(&device_Ipiv,(i_bus-1+npq)*sizeof(int));
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMalloc failed");
goto Error;
}
cudaStatus= cudaMemcpy(device_J,Host_J,(i_bus-1+npq)*(i_bus-1+npq)*sizeof(double),cudaMemcpyHostToDevice);
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMemcpy failed");
}
cudaStatus= cudaMemcpy(device_M,Host_M,(i_bus-1+npq)*sizeof(double),cudaMemcpyHostToDevice);
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMemcpy failed");
}
cusolverStatus_t status;
cusolverDnHandle_t handle;
status=cusolverDnCreate(&handle);
if(status!=CUSOLVER_STATUS_SUCCESS){
fprintf(stderr,"solve fail");
goto Error;
}
double* device_work;
status= cusolverDnDgetrf_bufferSize(handle,i_bus-1+npq,i_bus-1+npq,device_J,i_bus-1+npq,&work_size);
if(status!=CUSOLVER_STATUS_SUCCESS){
fprintf(stderr,"buffersize failed");
goto Error;
}
cudaStatus= cudaMalloc(&device_work,work_size*sizeof(double));
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"cudaMalloc failed");
goto Error;
}
status=cusolverDnDgetrf(handle,i_bus-1+npq,i_bus-1+npq,device_J,(i_bus-1+npq),device_work,device_Ipiv,device_info);
if(status!=CUSOLVER_STATUS_SUCCESS){
fprintf(stderr,"Factorisation LU failed");
goto Error;
}
cudaStatus=cudaMemcpy(&info, device_info,sizeof(int),cudaMemcpyDeviceToHost);
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"memcpy fail");
goto Error;
}
if(info!=0){
fprintf(stderr,"Error:LU factorization failed\n");
}
status=cusolverDnDgetrs(handle,CUBLAS_OP_N ,i_bus-1+npq,1,device_J,(i_bus-1+npq),device_Ipiv,device_M,(i_bus-1+npq),device_info);
if(status!=CUSOLVER_STATUS_SUCCESS){
fprintf(stderr,"résolution failed");
goto Error;
}
cudaStatus=cudaMemcpy(&info, device_info,sizeof(int),cudaMemcpyDeviceToHost);
if(cudaStatus!=cudaSuccess){
fprintf(stderr,"memcpy fail");
goto Error;
}
if(info!=0){
fprintf(stderr,"Error:solved fail\n");
}
int test;
std::cin>>test;
return 0;
Error:
cudaFree(device_M);
cudaFree(device_J);
cudaFree(device_Ipiv);
cudaFree(device_info);
}