Hello,
I’m trying to compile a library using openCL with cmake and FindOpenCL.
I have these files:
oclVectorMul.cpp
oclVectorMul.h
test.cpp
So I use FindOpenCL and I make my own CMakeLists.txt:
[codebox]cmake_minimum_required(VERSION 2.6 FATAL_ERROR)
project(oclVectorMul)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
set(LIBRARY_OUTPUT_PATH lib/${CMAKE_BUILD_TYPE})
include(FindOpenCL)
Recherche d’OpenCL
find_package(OpenCL REQUIRED)
include_directories(${OPENCL_INCLUDE_DIR})
Recherche des includes du sdk
include_directories(~/NVIDIA_GPU_Computing_SDK/shared/inc)
link_directories(~/NVIDIA_GPU_Computing_SDK/shared/lib)
include_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/inc)
link_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/lib)
Include de la lib
include_directories(include)
file(
GLOB_RECURSE
source_files
src/*
include/*
)
add_library(
oclVectorMul
STATIC
${source_files}
)
target_link_libraries(
oclVectorMul
${OPENCL_LIBRARIES}
libshrutil.a
liboclUtil.a )
[/codebox]
It works fine. The library oclVectorMul.a is create.
But after that, I try to compile a test.cpp file using oclVectorMul.a. So I include the header and I create a new folder with the test.cpp file and a CMakeLists.txt to compile this file.
[codebox]cmake_minimum_required(VERSION 2.6)
#Configuration du projet
project(test)
set(EXECUTABLE_OUTPUT_PATH bin/${CMAKE_BUILD_TYPE})
#Inclusion de la bibliothèque oclVectorMul
include_directories(…/include)
link_directories(…/lib)
#Configuration de l’exécutable
file(
GLOB_RECURSE
source_files
src/*
)
add_executable(
test
${source_files}
)
#Configuration de l’édition de liens
target_link_libraries(
test
oclVectorMul
)
[/codebox]
But here, it doesn’t work. I have these errors when I type “make”:
[codebox]Scanning dependencies of target test
[100%] Building CXX object CMakeFiles/test.dir/src/test.cpp.o
Linking CXX executable bin/test
…/lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::Cleanup(int)':
oclVectorMul.cpp:(.text+0x20): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x80): undefined reference to `clReleaseKernel’
oclVectorMul.cpp:(.text+0x98): undefined reference to `clReleaseProgram’
oclVectorMul.cpp:(.text+0xb0): undefined reference to `clReleaseCommandQueue’
oclVectorMul.cpp:(.text+0xc8): undefined reference to `clReleaseContext’
oclVectorMul.cpp:(.text+0xe0): undefined reference to `clReleaseMemObject’
oclVectorMul.cpp:(.text+0xf8): undefined reference to `clReleaseMemObject’
oclVectorMul.cpp:(.text+0x133): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x14e): undefined reference to `shrLog’
…/lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::multiplie(int, char**)':
oclVectorMul.cpp:(.text+0x17c): undefined reference to `shrCheckCmdLineFlag’
oclVectorMul.cpp:(.text+0x18c): undefined reference to `shrSetLogFileName’
oclVectorMul.cpp:(.text+0x1ab): undefined reference to `shrRoundUp’
oclVectorMul.cpp:(.text+0x1fd): undefined reference to `shrFillArray’
oclVectorMul.cpp:(.text+0x21c): undefined reference to `clCreateContextFromType’
oclVectorMul.cpp:(.text+0x251): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x27c): undefined reference to `clGetContextInfo’
oclVectorMul.cpp:(.text+0x2ba): undefined reference to `clGetContextInfo’
oclVectorMul.cpp:(.text+0x2f8): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x322): undefined reference to `clCreateCommandQueue’
oclVectorMul.cpp:(.text+0x357): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x36b): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x398): undefined reference to `clCreateBuffer’
oclVectorMul.cpp:(.text+0x3a9): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x3e1): undefined reference to `clCreateBuffer’
oclVectorMul.cpp:(.text+0x42a): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x447): undefined reference to `shrFindFilePath’
oclVectorMul.cpp:(.text+0x464): undefined reference to `oclLoadProgSource’
oclVectorMul.cpp:(.text+0x48c): undefined reference to `clCreateProgramWithSource’
oclVectorMul.cpp:(.text+0x4c1): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x4f2): undefined reference to `clBuildProgram’
oclVectorMul.cpp:(.text+0x526): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x546): undefined reference to `clCreateKernel’
oclVectorMul.cpp:(.text+0x57b): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x5a0): undefined reference to `clSetKernelArg’
oclVectorMul.cpp:(.text+0x5c1): undefined reference to `clSetKernelArg’
oclVectorMul.cpp:(.text+0x5ec): undefined reference to `clSetKernelArg’
oclVectorMul.cpp:(.text+0x617): undefined reference to `clSetKernelArg’
oclVectorMul.cpp:(.text+0x655): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x669): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x6ab): undefined reference to `clEnqueueNDRangeKernel’
oclVectorMul.cpp:(.text+0x6bb): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x6f6): undefined reference to `shrLog’
oclVectorMul.cpp:(.text+0x70a): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x758): undefined reference to `clEnqueueReadBuffer’
oclVectorMul.cpp:(.text+0x768): undefined reference to `shrDeltaT’
oclVectorMul.cpp:(.text+0x7a3): undefined reference to `shrLog’
collect2: ld a retourné 1 code d’état d’exécution
make[2]: *** [bin/test] Erreur 1
make[1]: *** [CMakeFiles/test.dir/all] Erreur 2
make: *** [all] Erreur 2
[/codebox]
As I don’t make the link beetwen oclVectorMul.a and the sdk. But when I compile my openCL code as an application, with the main function in the oclVectorMul.cpp, it works.
Someone could help me?
Thanks a lot.
Here is the content of oclVectorMul.cpp
[codebox]//************************************************************
//
// Multiply a vector by a float data
//
//************************************************************
// common SDK header for standard utilities and system libs
#include <oclUtils.h>
#include “oclVectorMul.h”
namespace liboclvecmul {
// Name of the file with the source code for the computation kernel
// ************************************************************
const char* cSourceFile = “VectorMul.cl”;
// Host buffers for demo
// ************************************************************
void *src, *dst; // Host buffers for OpenCL test
// OpenCL Vars
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQue; // OpenCL command que
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
cl_kernel ckKernel; // OpenCL kernel
cl_mem cmDevSrc; // OpenCL device source buffer
cl_mem cmDevDst; // OpenCL device destination buffer
size_t szGlobalWorkSize; // 1D var for Total # of work items
size_t szLocalWorkSize; // 1D var for # of work items in the work group
size_t szParmDataBytes; // Byte size of context information
size_t szKernelLength; // Byte size of kernel code
cl_int ciErr1, ciErr2; // Error code var
char* cPathAndName = NULL; // var for full paths to data, src, etc.
char* cSourceCL = NULL; // Buffer to hold source for compilation
// demo config vars
int iNumElements = 8388608;//16777216;//11444777; // Length of float arrays to process (odd # for illustration)
float value = 10.0f;
shrBOOL bQuickTest = shrFALSE;
// Forward Declarations
// ************************************************************
void Cleanup (int iExitCode);
// Main function
// ************************************************************
int multiplie(int argc, char **argv)
{
// get command line arg for quick test, if provided
bQuickTest = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");
// start logs
shrSetLogFileName ("oclVectorMul.txt");
// shrLog(LOGBOTH, 0.0, “%s Starting…\n\n# of float elements per Array \t= %u\n”, argv[0], iNumElements);
// set and log Global and Local work size dimensions
szLocalWorkSize = 256;
szGlobalWorkSize = shrRoundUp((int)szLocalWorkSize, iNumElements); // rounded up to the nearest multiple of the LocalWorkSize
// shrLog(LOGBOTH, 0.0, “Global Work Size \t\t= %u\nLocal Work Size \t\t= %u\n# of Work Groups \t\t= %u\n\n”,
// szGlobalWorkSize, szLocalWorkSize, (szGlobalWorkSize % szLocalWorkSize + szGlobalWorkSize/szLocalWorkSize));
// Allocate and initialize host arrays
src = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);
dst = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);
shrFillArray((float*)src, iNumElements);
// shrLog(LOGBOTH, 0.0, “Allocate and Init Host Mem…\n”);
// Create the OpenCL context on a GPU device
cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
// shrLog(LOGBOTH, 0.0, “clCreateContextFromType…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Get the list of GPU devices associated with context
ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
cdDevices = (cl_device_id*)malloc(szParmDataBytes);
ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
// shrLog(LOGBOTH, 0.0, “clGetContextInfo…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Create a command-queue
cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
// shrLog(LOGBOTH, 0.0, “clCreateCommandQueue…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Allocate the OpenCL source and result buffer memory objects on the device GMEM, and copy the data to the device
shrDeltaT(0);
cmDevSrc = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * szGlobalWorkSize, src, &ciErr1);
double memCpyHDTime = shrDeltaT(0);
cmDevDst = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float) * szGlobalWorkSize, NULL, &ciErr2);
ciErr1 |= ciErr2;
// shrLog(LOGBOTH, 0.0, “clCreateBuffer…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Read the OpenCL kernel in from source file
cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);
// shrLog(LOGBOTH, 0.0, “oclLoadProgSource (%s)…\n”, cSourceFile);
// Create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
// shrLog(LOGBOTH, 0.0, “clCreateProgramWithSource…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Build the program
ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
// shrLog(LOGBOTH, 0.0, “clBuildProgram…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Create the kernel
ckKernel = clCreateKernel(cpProgram, "VectorMul", &ciErr1);
// shrLog(LOGBOTH, 0.0, “clCreateKernel…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Set the Argument values
ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmDevSrc);
ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_float), (void*)&value);
ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmDevDst);
ciErr1 |= clSetKernelArg(ckKernel, 3, sizeof(cl_int), (void*)&iNumElements);
// shrLog(LOGBOTH, 0.0, “clSetKernelArg…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Launch kernel
shrDeltaT(0);
ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
double computeTime = shrDeltaT(0);
// shrLog(LOGBOTH, 0.0, “clEnqueueNDRangeKernel…\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clEnqueueNDRangeKernel, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
// Synchronous/blocking read of results, and check accumulated errors
shrDeltaT(0);
ciErr1 = clEnqueueReadBuffer(cqCommandQue, cmDevDst, CL_TRUE, 0, sizeof(cl_float) * szGlobalWorkSize, dst, 0, NULL, NULL);
double memCpyDHTime = shrDeltaT(0);
// shrLog(LOGBOTH, 0.0, “clEnqueueReadBuffer…\n\n”);
if (ciErr1 != CL_SUCCESS)
{
shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadBuffer, near Line %u in file %u", __LINE__, __FILE__);
Cleanup(EXIT_FAILURE);
}
printf("\nMultiplication\n\n\t*%d datas\n\t*%d Global Work Size\n\t*%d Local Work Size\n", iNumElements, (int)szGlobalWorkSize, (int)szLocalWorkSize);
printf("\n=============================================\n");
printf("Time to copy datas HOST -> DEVICE : %.3f ms\n", (memCpyHDTime*1000));
printf("Time to compute : %.3f ms\n", (computeTime*1000));
printf("Time to copy datas DEVICE -> HOST : %.3f ms\n", (memCpyDHTime*1000));
printf("--------------------------------------------\n");
printf("Total time : %.3f ms\n", ((memCpyHDTime+computeTime+memCpyDHTime)*1000));
printf("=============================================\n\n");
float *srcf, *dstf;
srcf = (float*)src;
dstf = (float*)dst;
for (int i=0; i<iNumElements; i++)
{
if (srcf[i]*10 != dstf[i])
printf("Error at indice %d\n", i);
}
// Cleanup and leave
Cleanup (EXIT_SUCCESS);
}
void Cleanup (int iExitCode)
{
// Cleanup allocated objects
shrLog(LOGBOTH, 0.0, "\nStarting Cleanup...\n\n");
if(cdDevices)free(cdDevices);
if(cPathAndName)free(cPathAndName);
if(cSourceCL)free(cSourceCL);
if(ckKernel)clReleaseKernel(ckKernel);
if(cpProgram)clReleaseProgram(cpProgram);
if(cqCommandQue)clReleaseCommandQueue(cqCommandQue);
if(cxGPUContext)clReleaseContext(cxGPUContext);
if(cmDevSrc)clReleaseMemObject(cmDevSrc);
if(cmDevDst)clReleaseMemObject(cmDevDst);
// Free host memory
free(src);
free (dst);
// finalize logs and leave
if (bQuickTest)
{
shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\n");
}
else
{
shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\nPress Enter to Exit\n");
getchar();
}
exit (iExitCode);
}
} // namespace
[/codebox]
oclVectorMul.h:
[codebox]#ifndef OCLVECTORMUL
#define OCLVECTORMUL
namespace liboclvecmul {
int multiplie(int argc, char **argv);
}
[/codebox]
test.cpp
[codebox]#include “oclVectorMul.h”
int main(int argc, char **argv)
{
return liboclvecmul::multiplie(argc,argv);;
}
[/codebox]