Hi, I am tring to use perfkit in TX1 to monitor hardware performance counter, I put the following file in a library.
main.cu
libNvPmApi.Core.so
NvPmApi.h
NvPmApi.interfaceManager.Linux.h
NvPmApi.Manager.h
the following is the code,which in mian.cu
#include <stdio.h>
#define NVPM_INITGUID
#include "NvPmApi.Manager.h"
#define S_OK 0x00000000
static NvPmApiManager S_NVPMManager;
extern NvPmApiManager *GetNvPmApiManager(){return &S_NVPMManager;}
const NvPmApi *GetNvPmApi(){return S_NVPMManager.Api();}
__global__
void saxpy(int n, float a, float *x, float *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) y[i] = a*x[i] + y[i];
}
int main(void)
{
if(GetNvPmApiManager()->Construct(/home/ubuntu/Desktop/cuda_example/libNvPmApi.Core.so) != S_OK)
{
return false; // This is an error condition
}
NVPMRESULT nvResult;
if((nvResult = GetNvPmApi()->Init()) != NVPM_OK)
{
return false; // This is an error condition
}
int N = 1<<20;
float *x, *y, *d_x, *d_y;
x = (float*)malloc(N*sizeof(float));
y = (float*)malloc(N*sizeof(float));
cudaMalloc(&d_x, N*sizeof(float));
cudaMalloc(&d_y, N*sizeof(float));
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
// Perform SAXPY on 1M elements
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(y[i]-4.0f));
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
}
when I excude the code in terminal with the following command line
$ nvcc -o main main.cu
but the terminal said that
main.cu(20):error: expected and expression
I think it is something wrong with introduce the dynamic link library in this file
I am just a freshman using perfkit,could you tell me how to solve this problem?
Many thanks for helping!