I am trying to solve the problem A*X=B,
where A =
1 2
2 1
and B =
8 7
7 8
Therefore, X should be
2 3
3 2
I am implementing the following code:
#define IDX2C(i,j,ld) ((j*ld)+i) // square matrix
void matrixPrintf(char* format, float* matrix,int dim1,int dim2)
{
for (int i=0;i<dim1;i++)
{
for (int j=0;j<dim2;j++)
printf(format,matrix[IDX2C(i,j,dim1)]);
printf("\n");
}
};
int main(int argc, char **argv)
{
float A[] = { 1, 2, 2, 1 };
float B[] = { 8, 7, 7, 8 };
float* d_A, * d_B;
cudaMalloc((void **) &d_A, 4*sizeof(float));
cudaMemcpy(d_A, A, 4*sizeof(float), cudaMemcpyHostToDevice);
cudaMalloc((void **) &d_B, 4*sizeof(float));
cudaMemcpy(d_B, B, 4*sizeof(float), cudaMemcpyHostToDevice);
printf("A = \n");
matrixPrintf("%8.2g ",A,2,2);
printf("B = \n");
matrixPrintf("%8.2g ",B,2,2);
// initialize cublas
cublasHandle_t handle;
cublasCreate(&handle);
const float alpha = 1;
cublasStatus_t status =
cublasStrsm(handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER,
CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, 2, 2, &alpha,
d_A, 2, d_B, 3);
if (status != CUBLAS_STATUS_SUCCESS)
{
printf("cublasTgemm returned error code %d, line(%d)\n", status, __LINE__);
exit(EXIT_FAILURE);
}
std::fill(B,B+4,float(0));
cudaDeviceSynchronize();
cudaMemcpy(B, d_B, 4*sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(d_A);
cudaFree(d_B);
printf("B(final) = \n");
matrixPrintf("%8.2g ",B,2,2);
return 0;
}
My results are:
A =
1 2
2 1
B =
8 7
7 8
B(final) =
-6 7
7 8
Can any one see what I am doing wrong?
Thanks -