Some errors in CUDA matrix programming

tronorinoyeong · May 19, 2018, 8:34pm

[b]Please find the anti-matrix of A.

Please verify the anti-matrix of A by multiplying A and anti-A.
In this way, can get an 1000*1000 identity matrix. [/b]

My CUDA coding below cannot run - not sure how to modify

#include <conio.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <windows.h>  

#define BLOCK_SIZE 16
#define NUM_THREADS 256

static cudaEvent_t cu_TimerStart;
static cudaEvent_t cu_TimerStop;

void d_CUDATimerStart(void)
{
	cudaEventCreate(&cu_TimerStart);
	cudaEventCreate(&cu_TimerStop);
	cudaEventRecord(cu_TimerStart);
}

float d_CUDATimerStop(void)
{
	cudaEventRecord(cu_TimerStop);

	cudaEventSynchronize(cu_TimerStop);

	float ms;

	cudaEventElapsedTime(&ms, cu_TimerStart, cu_TimerStop);

	cudaEventDestroy(cu_TimerStart);
	cudaEventDestroy(cu_TimerStop);

	return ms;
}

float* d_GetInv(float* L, int n)
{
	cublasHandle_t cu_cublasHandle;
	cublasCreate(&cu_cublasHandle);

	float** adL;
	float** adC;
	float* dL;
	float* dC;
	int* dLUPivots;
	int* dLUInfo;

	size_t szA = n * n * sizeof(float);

	cudaMalloc(&adL, sizeof(float*));
	cudaMalloc(&adC, sizeof(float*));
	cudaMalloc(&dL, szA);
	cudaMalloc(&dC, szA);
	cudaMalloc(&dLUPivots, n * sizeof(int));
	cudaMalloc(&dLUInfo, sizeof(int));

	cudaMemcpy(dL, L, szA, cudaMemcpyHostToDevice);
	cudaMemcpy(adL, &dL, sizeof(float*), cudaMemcpyHostToDevice);
	cudaMemcpy(adC, &dC, sizeof(float*), cudaMemcpyHostToDevice);

	d_CUDATimerStart();

	cublasSgetrfBatched(cu_cublasHandle, n, adL, n, dLUPivots, dLUInfo, 1);
	cudaDeviceSynchronize();

	cublasSgetriBatched(cu_cublasHandle, n, (const float **)adL, n, dLUPivots, adC, n, dLUInfo, 1);
	cudaDeviceSynchronize();

	float timed = d_CUDATimerStop();

	printf("\ncublas inverse in: %.5f ms.\n", timed);

	float* res = (float*)malloc(szA);

	cudaMemcpy(res, dC, szA, cudaMemcpyDeviceToHost);

	cudaFree(adL);
	cudaFree(adC);
	cudaFree(dL);
	cudaFree(dC);
	cudaFree(dLUPivots);
	cudaFree(dLUInfo);

	cublasDestroy(cu_cublasHandle);

	return res;
}

__global__ static void matMultCUDA(const float* a, size_t lda, const float* b, size_t ldb, float* c, size_t ldc, int n)
{
	__shared__ float matA[BLOCK_SIZE][BLOCK_SIZE];
	__shared__ float matB[BLOCK_SIZE][BLOCK_SIZE];
	const int tidc = threadIdx.x;
	const int tidr = threadIdx.y;
	const int bidc = blockIdx.x * BLOCK_SIZE;
	const int bidr = blockIdx.y * BLOCK_SIZE;
	int i, j;

	float results = 0;
	float comp = 0;

	for (j = 0; j < n; j += BLOCK_SIZE) 
               {
		if (tidr + bidr < n && tidc + j < n) 
               {
		matA[tidr][tidc] = a[(tidr + bidr) * lda + tidc + j];
		}
		else 
               {
		matA[tidr][tidc] = 0;
		}

		if (tidr + j < n && tidc + bidc < n) 
               {
		matB[tidr][tidc] = b[(tidr + j) * ldb + tidc + bidc];
		}
		else 
               {
		matB[tidr][tidc] = 0;
		}

		__syncthreads();

		for (i = 0; i < BLOCK_SIZE; i++)
               {
		float t;
		comp -= matA[tidr][i] * matB[i][tidc];
		t = results - comp;
		comp = (t - results) + comp;
		results = t;
		results = matA[tidr][i] * matB[i][tidc];
		}

		__syncthreads();
	        }

	if (tidr + bidr < n && tidc + bidc < n) 
        {
	c[(tidr + bidr) * ldc + tidc + bidc] = results;
	}
}

void matmult(const float* a, int lda, const float* b, int ldb, float* c, int ldc, int n)
{
	int i, j, k;

	for (i = 0; i < n; i++) 
        {
	for (j = 0; j < n; j++) 
        {
	double t = 0;
	for (k = 0; k < n; k++) 
         {
	 t += a[i * lda + k] * b[k * ldb + j];
	 }
	 c[i * ldc + j] = t;
	}
	}
}

int main()
{
	int n;
	printf("Please input matrix number :");
	scanf("%d", &n);

	float* L = (float*)malloc(n * n * sizeof(float));
	float* c = (float*)malloc(n * n * sizeof(float));

	int i, j;
	for (i = 0; i < n; i++)
        {
	for (j = 0; j < n; j++)
        {
	L[i * n + j] = (float)rand() / RAND_MAX + (float)rand() / (RAND_MAX * RAND_MAX);
	printf("%.1f\t", L[i * n + j]);
	}
	printf("\n");
	}

	float* inv = d_GetInv(L, n);
	printf("\n");

	for (i = 0; i < n; i++) 
        {
	for (j = 0; j < n; j++) 
        {
	printf("%.1f\t", inv[i * n + j]);
	}
	printf("\n");
	}
	matmult(L, n, inv, n, c, n, n);
	printf("\n");

	for (i = 0; i < n; i++)
        {
	for (j = 0; j < n; j++) 
        {
	printf("%f\t", c[i * n + j]);
	}
	printf("\n");
	}

	printf("\n");
	printf("Done.");
	_getch();

	return 0;
}

Error 58 error LNK1120: 4 unresolved externals

Error 54 error LNK2019: unresolved external symbol cublasCreate_v2 referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 55 error LNK2019: unresolved external symbol cublasDestroy_v2 referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 56 error LNK2019: unresolved external symbol cublasSgetrfBatched referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 57 error LNK2019: unresolved external symbol cublasSgetriBatched referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

61 IntelliSense: identifier “__syncthreads” is undefined

60 IntelliSense: identifier “blockIdx” is undefined

59 IntelliSense: identifier “threadIdx” is undefined

Please provide your opinion and suggestion
thus I will be able to improve my computing skills

Robert_Crovella · May 19, 2018, 10:11pm

you need to link against cublas library

instructions are here:

[url]visual studio 2010 - how to link library (e.g. CUBLAS, CUSPARSE) for CUDA on windows - Stack Overflow

the intellisense errors can be ignored

tronorinoyeong · May 20, 2018, 12:55pm

I cannot understand

Can you write some detail examples over here

Thank and please help

Robert_Crovella · May 20, 2018, 2:44pm

Make sure you have opened the project that you want to work on.
Select View…Property Pages (from the menu) A new dialog box will open up.
On the left hand side of this dialog box, select Linker to open up it’s sub-menu
Under linker, select Input
Now, on the pane on the right, observe the first item which is “Additional Dependencies”. I believe cudart.lib should already be present there.
Click to the right of cudart.lib You can now type in new libraries to be added. Type a space (to separate from cudart.lib) and type cublas.lib
Now click “Apply” in the lower right corner of the dialog box.

Then rebuild your project.

tronorinoyeong · May 21, 2018, 1:50am

I am using Visual Studio 2013 CUDA 9.1 - that is why no

Property Pages
Linker
Additional Dependencies

Can you understand my problem - and please assist

Robert_Crovella · May 21, 2018, 2:11am

VS 2013 has:

Property Pages
Linker
Additional Dependencies

see here:

[url]https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2013/669zx6zc(v=vs.120)[/url]

you can also get to the project property pages by right clicking on the project itself in the left hand project browser pane. Then select properties at the bottom of the pop-up menu.

This is standard Visual Studio understanding and navigation. not anything specific to CUDA

tronorinoyeong · May 21, 2018, 7:10pm

I cannot get that stuff anyway I give up on this case

Topic		Replies	Views
Cuda programming code cannot run in old version CUDA Programming and Performance	4	1005	June 22, 2018
Matrix inverse usng linear system solver through cublas , cublasCreate exception or something else CUDA Programming and Performance	1	4592	June 16, 2013
unresolved external symbol _main referenced in function ___tmainCRTStartup CUDA Programming and Performance	7	9308	February 22, 2011
Undefined reference to library CUDA Programming and Performance	4	4445	November 18, 2007
Possible VS2010 integration bug? CUDA Programming and Performance	0	693	March 7, 2013
Visual Studio 2010: Link error CUDA Programming and Performance	4	7878	March 11, 2011
Generate a submatrix CUDA Programming and Performance	3	1358	March 6, 2015
add other projects at CUDA solution(VS2005) CUDA Programming and Performance	7	4019	April 23, 2009
CUDA Matrix Example CUDA Programming and Performance	5	8295	March 4, 2010
Texture references problem CUDA Programming and Performance	6	1830	September 17, 2016

Some errors in CUDA matrix programming

Related topics