Some errors in CUDA matrix programming

[b]Please find the anti-matrix of A.

Please verify the anti-matrix of A by multiplying A and anti-A.
In this way, can get an 1000*1000 identity matrix. [/b]

My CUDA coding below cannot run - not sure how to modify

#include <conio.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <windows.h>  

#define BLOCK_SIZE 16
#define NUM_THREADS 256

static cudaEvent_t cu_TimerStart;
static cudaEvent_t cu_TimerStop;

void d_CUDATimerStart(void)
{
	cudaEventCreate(&cu_TimerStart);
	cudaEventCreate(&cu_TimerStop);
	cudaEventRecord(cu_TimerStart);
}

float d_CUDATimerStop(void)
{
	cudaEventRecord(cu_TimerStop);

	cudaEventSynchronize(cu_TimerStop);

	float ms;

	cudaEventElapsedTime(&ms, cu_TimerStart, cu_TimerStop);

	cudaEventDestroy(cu_TimerStart);
	cudaEventDestroy(cu_TimerStop);

	return ms;
}

float* d_GetInv(float* L, int n)
{
	cublasHandle_t cu_cublasHandle;
	cublasCreate(&cu_cublasHandle);

	float** adL;
	float** adC;
	float* dL;
	float* dC;
	int* dLUPivots;
	int* dLUInfo;

	size_t szA = n * n * sizeof(float);

	cudaMalloc(&adL, sizeof(float*));
	cudaMalloc(&adC, sizeof(float*));
	cudaMalloc(&dL, szA);
	cudaMalloc(&dC, szA);
	cudaMalloc(&dLUPivots, n * sizeof(int));
	cudaMalloc(&dLUInfo, sizeof(int));

	cudaMemcpy(dL, L, szA, cudaMemcpyHostToDevice);
	cudaMemcpy(adL, &dL, sizeof(float*), cudaMemcpyHostToDevice);
	cudaMemcpy(adC, &dC, sizeof(float*), cudaMemcpyHostToDevice);

	d_CUDATimerStart();

	cublasSgetrfBatched(cu_cublasHandle, n, adL, n, dLUPivots, dLUInfo, 1);
	cudaDeviceSynchronize();

	cublasSgetriBatched(cu_cublasHandle, n, (const float **)adL, n, dLUPivots, adC, n, dLUInfo, 1);
	cudaDeviceSynchronize();

	float timed = d_CUDATimerStop();

	printf("\ncublas inverse in: %.5f ms.\n", timed);

	float* res = (float*)malloc(szA);

	cudaMemcpy(res, dC, szA, cudaMemcpyDeviceToHost);

	cudaFree(adL);
	cudaFree(adC);
	cudaFree(dL);
	cudaFree(dC);
	cudaFree(dLUPivots);
	cudaFree(dLUInfo);

	cublasDestroy(cu_cublasHandle);

	return res;
}

__global__ static void matMultCUDA(const float* a, size_t lda, const float* b, size_t ldb, float* c, size_t ldc, int n)
{
	__shared__ float matA[BLOCK_SIZE][BLOCK_SIZE];
	__shared__ float matB[BLOCK_SIZE][BLOCK_SIZE];
	const int tidc = threadIdx.x;
	const int tidr = threadIdx.y;
	const int bidc = blockIdx.x * BLOCK_SIZE;
	const int bidr = blockIdx.y * BLOCK_SIZE;
	int i, j;

	float results = 0;
	float comp = 0;

	for (j = 0; j < n; j += BLOCK_SIZE) 
               {
		if (tidr + bidr < n && tidc + j < n) 
               {
		matA[tidr][tidc] = a[(tidr + bidr) * lda + tidc + j];
		}
		else 
               {
		matA[tidr][tidc] = 0;
		}

		if (tidr + j < n && tidc + bidc < n) 
               {
		matB[tidr][tidc] = b[(tidr + j) * ldb + tidc + bidc];
		}
		else 
               {
		matB[tidr][tidc] = 0;
		}

		__syncthreads();

		for (i = 0; i < BLOCK_SIZE; i++)
               {
		float t;
		comp -= matA[tidr][i] * matB[i][tidc];
		t = results - comp;
		comp = (t - results) + comp;
		results = t;
		results = matA[tidr][i] * matB[i][tidc];
		}

		__syncthreads();
	        }

	if (tidr + bidr < n && tidc + bidc < n) 
        {
	c[(tidr + bidr) * ldc + tidc + bidc] = results;
	}
}

void matmult(const float* a, int lda, const float* b, int ldb, float* c, int ldc, int n)
{
	int i, j, k;

	for (i = 0; i < n; i++) 
        {
	for (j = 0; j < n; j++) 
        {
	double t = 0;
	for (k = 0; k < n; k++) 
         {
	 t += a[i * lda + k] * b[k * ldb + j];
	 }
	 c[i * ldc + j] = t;
	}
	}
}

int main()
{
	int n;
	printf("Please input matrix number :");
	scanf("%d", &n);

	float* L = (float*)malloc(n * n * sizeof(float));
	float* c = (float*)malloc(n * n * sizeof(float));

	int i, j;
	for (i = 0; i < n; i++)
        {
	for (j = 0; j < n; j++)
        {
	L[i * n + j] = (float)rand() / RAND_MAX + (float)rand() / (RAND_MAX * RAND_MAX);
	printf("%.1f\t", L[i * n + j]);
	}
	printf("\n");
	}

	float* inv = d_GetInv(L, n);
	printf("\n");

	for (i = 0; i < n; i++) 
        {
	for (j = 0; j < n; j++) 
        {
	printf("%.1f\t", inv[i * n + j]);
	}
	printf("\n");
	}
	matmult(L, n, inv, n, c, n, n);
	printf("\n");

	for (i = 0; i < n; i++)
        {
	for (j = 0; j < n; j++) 
        {
	printf("%f\t", c[i * n + j]);
	}
	printf("\n");
	}

	printf("\n");
	printf("Done.");
	_getch();

	return 0;
}

Error 58 error LNK1120: 4 unresolved externals

Error 54 error LNK2019: unresolved external symbol cublasCreate_v2 referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 55 error LNK2019: unresolved external symbol cublasDestroy_v2 referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 56 error LNK2019: unresolved external symbol cublasSgetrfBatched referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

Error 57 error LNK2019: unresolved external symbol cublasSgetriBatched referenced in function “float * __cdecl d_GetInv(float *,int)” (?d_GetInv@@YAPEAMPEAMH@Z)

61 IntelliSense: identifier “__syncthreads” is undefined

60 IntelliSense: identifier “blockIdx” is undefined

59 IntelliSense: identifier “threadIdx” is undefined

Please provide your opinion and suggestion
thus I will be able to improve my computing skills

you need to link against cublas library

instructions are here:

https://stackoverflow.com/questions/13570285/how-to-link-library-e-g-cublas-cusparse-for-cuda-on-windows/13588857#13588857

the intellisense errors can be ignored

I cannot understand

Can you write some detail examples over here

Thank and please help

Make sure you have opened the project that you want to work on.
Select View…Property Pages (from the menu) A new dialog box will open up.
On the left hand side of this dialog box, select Linker to open up it’s sub-menu
Under linker, select Input
Now, on the pane on the right, observe the first item which is “Additional Dependencies”. I believe cudart.lib should already be present there.
Click to the right of cudart.lib You can now type in new libraries to be added. Type a space (to separate from cudart.lib) and type cublas.lib
Now click “Apply” in the lower right corner of the dialog box.

Then rebuild your project.

I am using Visual Studio 2013 CUDA 9.1 - that is why no

Property Pages
Linker
Additional Dependencies

Can you understand my problem - and please assist

VS 2013 has:

Property Pages
Linker
Additional Dependencies

see here:

https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2013/669zx6zc(v=vs.120)

you can also get to the project property pages by right clicking on the project itself in the left hand project browser pane. Then select properties at the bottom of the pop-up menu.

This is standard Visual Studio understanding and navigation. not anything specific to CUDA

I cannot get that stuff anyway I give up on this case