Cmake & openCL How to make a library

Hello,

I’m trying to compile a library using openCL with cmake and FindOpenCL.

I have these files:

oclVectorMul.cpp

oclVectorMul.h

test.cpp

So I use FindOpenCL and I make my own CMakeLists.txt:

[codebox]cmake_minimum_required(VERSION 2.6 FATAL_ERROR)

project(oclVectorMul)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})

set(LIBRARY_OUTPUT_PATH lib/${CMAKE_BUILD_TYPE})

include(FindOpenCL)

Recherche d’OpenCL

find_package(OpenCL REQUIRED)

include_directories(${OPENCL_INCLUDE_DIR})

Recherche des includes du sdk

include_directories(~/NVIDIA_GPU_Computing_SDK/shared/inc)

link_directories(~/NVIDIA_GPU_Computing_SDK/shared/lib)

include_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/inc)

link_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/lib)

Include de la lib

include_directories(include)

file(

GLOB_RECURSE

source_files

src/*

include/*

)

add_library(

oclVectorMul

STATIC

${source_files}	

)

target_link_libraries(

oclVectorMul

${OPENCL_LIBRARIES}

libshrutil.a

liboclUtil.a  )

[/codebox]

It works fine. The library oclVectorMul.a is create.

But after that, I try to compile a test.cpp file using oclVectorMul.a. So I include the header and I create a new folder with the test.cpp file and a CMakeLists.txt to compile this file.

[codebox]cmake_minimum_required(VERSION 2.6)

#Configuration du projet

project(test)

set(EXECUTABLE_OUTPUT_PATH bin/${CMAKE_BUILD_TYPE})

#Inclusion de la bibliothèque oclVectorMul

include_directories(…/include)

link_directories(…/lib)

#Configuration de l’exécutable

file(

GLOB_RECURSE

source_files

src/*

)

add_executable(

test

${source_files}

)

#Configuration de l’édition de liens

target_link_libraries(

test

oclVectorMul

)

[/codebox]

But here, it doesn’t work. I have these errors when I type “make”:

[codebox]Scanning dependencies of target test

[100%] Building CXX object CMakeFiles/test.dir/src/test.cpp.o

Linking CXX executable bin/test

…/lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::Cleanup(int)’:

oclVectorMul.cpp:(.text+0x20): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x80): undefined reference to `clReleaseKernel’

oclVectorMul.cpp:(.text+0x98): undefined reference to `clReleaseProgram’

oclVectorMul.cpp:(.text+0xb0): undefined reference to `clReleaseCommandQueue’

oclVectorMul.cpp:(.text+0xc8): undefined reference to `clReleaseContext’

oclVectorMul.cpp:(.text+0xe0): undefined reference to `clReleaseMemObject’

oclVectorMul.cpp:(.text+0xf8): undefined reference to `clReleaseMemObject’

oclVectorMul.cpp:(.text+0x133): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x14e): undefined reference to `shrLog’

…/lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::multiplie(int, char**)’:

oclVectorMul.cpp:(.text+0x17c): undefined reference to `shrCheckCmdLineFlag’

oclVectorMul.cpp:(.text+0x18c): undefined reference to `shrSetLogFileName’

oclVectorMul.cpp:(.text+0x1ab): undefined reference to `shrRoundUp’

oclVectorMul.cpp:(.text+0x1fd): undefined reference to `shrFillArray’

oclVectorMul.cpp:(.text+0x21c): undefined reference to `clCreateContextFromType’

oclVectorMul.cpp:(.text+0x251): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x27c): undefined reference to `clGetContextInfo’

oclVectorMul.cpp:(.text+0x2ba): undefined reference to `clGetContextInfo’

oclVectorMul.cpp:(.text+0x2f8): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x322): undefined reference to `clCreateCommandQueue’

oclVectorMul.cpp:(.text+0x357): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x36b): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x398): undefined reference to `clCreateBuffer’

oclVectorMul.cpp:(.text+0x3a9): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x3e1): undefined reference to `clCreateBuffer’

oclVectorMul.cpp:(.text+0x42a): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x447): undefined reference to `shrFindFilePath’

oclVectorMul.cpp:(.text+0x464): undefined reference to `oclLoadProgSource’

oclVectorMul.cpp:(.text+0x48c): undefined reference to `clCreateProgramWithSource’

oclVectorMul.cpp:(.text+0x4c1): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x4f2): undefined reference to `clBuildProgram’

oclVectorMul.cpp:(.text+0x526): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x546): undefined reference to `clCreateKernel’

oclVectorMul.cpp:(.text+0x57b): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x5a0): undefined reference to `clSetKernelArg’

oclVectorMul.cpp:(.text+0x5c1): undefined reference to `clSetKernelArg’

oclVectorMul.cpp:(.text+0x5ec): undefined reference to `clSetKernelArg’

oclVectorMul.cpp:(.text+0x617): undefined reference to `clSetKernelArg’

oclVectorMul.cpp:(.text+0x655): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x669): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x6ab): undefined reference to `clEnqueueNDRangeKernel’

oclVectorMul.cpp:(.text+0x6bb): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x6f6): undefined reference to `shrLog’

oclVectorMul.cpp:(.text+0x70a): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x758): undefined reference to `clEnqueueReadBuffer’

oclVectorMul.cpp:(.text+0x768): undefined reference to `shrDeltaT’

oclVectorMul.cpp:(.text+0x7a3): undefined reference to `shrLog’

collect2: ld a retourné 1 code d’état d’exécution

make[2]: *** [bin/test] Erreur 1

make[1]: *** [CMakeFiles/test.dir/all] Erreur 2

make: *** [all] Erreur 2

[/codebox]

As I don’t make the link beetwen oclVectorMul.a and the sdk. But when I compile my openCL code as an application, with the main function in the oclVectorMul.cpp, it works.

Someone could help me?

Thanks a lot.

Here is the content of oclVectorMul.cpp

[codebox]//************************************************************


//

// Multiply a vector by a float data

//

//************************************************************


// common SDK header for standard utilities and system libs

#include <oclUtils.h>

#include “oclVectorMul.h”

namespace liboclvecmul {

// Name of the file with the source code for the computation kernel

// ************************************************************


const char* cSourceFile = “VectorMul.cl”;

// Host buffers for demo

// ************************************************************


void *src, *dst; // Host buffers for OpenCL test

// OpenCL Vars

cl_context cxGPUContext; // OpenCL context

cl_command_queue cqCommandQue; // OpenCL command que

cl_device_id* cdDevices; // OpenCL device list

cl_program cpProgram; // OpenCL program

cl_kernel ckKernel; // OpenCL kernel

cl_mem cmDevSrc; // OpenCL device source buffer

cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize; // 1D var for Total # of work items

size_t szLocalWorkSize; // 1D var for # of work items in the work group

size_t szParmDataBytes; // Byte size of context information

size_t szKernelLength; // Byte size of kernel code

cl_int ciErr1, ciErr2; // Error code var

char* cPathAndName = NULL; // var for full paths to data, src, etc.

char* cSourceCL = NULL; // Buffer to hold source for compilation

// demo config vars

int iNumElements = 8388608;//16777216;//11444777; // Length of float arrays to process (odd # for illustration)

float value = 10.0f;

shrBOOL bQuickTest = shrFALSE;

// Forward Declarations

// ************************************************************


void Cleanup (int iExitCode);

// Main function

// ************************************************************


int multiplie(int argc, char **argv)

{

// get command line arg for quick test, if provided

bQuickTest = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");

// start logs

shrSetLogFileName ("oclVectorMul.txt");

// shrLog(LOGBOTH, 0.0, “%s Starting…\n\n# of float elements per Array \t= %u\n”, argv[0], iNumElements);

// set and log Global and Local work size dimensions

szLocalWorkSize = 256;

szGlobalWorkSize = shrRoundUp((int)szLocalWorkSize, iNumElements);  // rounded up to the nearest multiple of the LocalWorkSize

// shrLog(LOGBOTH, 0.0, “Global Work Size \t\t= %u\nLocal Work Size \t\t= %u\n# of Work Groups \t\t= %u\n\n”,

// szGlobalWorkSize, szLocalWorkSize, (szGlobalWorkSize % szLocalWorkSize + szGlobalWorkSize/szLocalWorkSize));

// Allocate and initialize host arrays

src = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);

dst = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);

shrFillArray((float*)src, iNumElements);

// shrLog(LOGBOTH, 0.0, “Allocate and Init Host Mem…\n”);

// Create the OpenCL context on a GPU device

cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);

// shrLog(LOGBOTH, 0.0, “clCreateContextFromType…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Get the list of GPU devices associated with context

ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);

cdDevices = (cl_device_id*)malloc(szParmDataBytes);

ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

// shrLog(LOGBOTH, 0.0, “clGetContextInfo…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Create a command-queue

cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);

// shrLog(LOGBOTH, 0.0, “clCreateCommandQueue…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Allocate the OpenCL source and result buffer memory objects on the device GMEM, and copy the data to the device

shrDeltaT(0);

cmDevSrc = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * szGlobalWorkSize, src, &ciErr1);

double memCpyHDTime = shrDeltaT(0);

cmDevDst = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float) * szGlobalWorkSize, NULL, &ciErr2);

ciErr1 |= ciErr2;

// shrLog(LOGBOTH, 0.0, “clCreateBuffer…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Read the OpenCL kernel in from source file

cPathAndName = shrFindFilePath(cSourceFile, argv[0]);

cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// shrLog(LOGBOTH, 0.0, “oclLoadProgSource (%s)…\n”, cSourceFile);

// Create the program

cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);

// shrLog(LOGBOTH, 0.0, “clCreateProgramWithSource…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Build the program

ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);

// shrLog(LOGBOTH, 0.0, “clBuildProgram…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Create the kernel

ckKernel = clCreateKernel(cpProgram, "VectorMul", &ciErr1);

// shrLog(LOGBOTH, 0.0, “clCreateKernel…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmDevSrc);

ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_float), (void*)&value);

ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmDevDst);

ciErr1 |= clSetKernelArg(ckKernel, 3, sizeof(cl_int), (void*)&iNumElements);

// shrLog(LOGBOTH, 0.0, “clSetKernelArg…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Launch kernel

shrDeltaT(0);

ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);

double computeTime = shrDeltaT(0);

// shrLog(LOGBOTH, 0.0, “clEnqueueNDRangeKernel…\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clEnqueueNDRangeKernel, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

// Synchronous/blocking read of results, and check accumulated errors

shrDeltaT(0);

ciErr1 = clEnqueueReadBuffer(cqCommandQue, cmDevDst, CL_TRUE, 0, sizeof(cl_float) * szGlobalWorkSize, dst, 0, NULL, NULL);

double memCpyDHTime = shrDeltaT(0);

// shrLog(LOGBOTH, 0.0, “clEnqueueReadBuffer…\n\n”);

if (ciErr1 != CL_SUCCESS)

{

    shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadBuffer, near Line %u in file %u", __LINE__, __FILE__);

    Cleanup(EXIT_FAILURE);

}

printf("\nMultiplication\n\n\t*%d datas\n\t*%d Global Work Size\n\t*%d Local Work Size\n", iNumElements, (int)szGlobalWorkSize, (int)szLocalWorkSize);

printf("\n=============================================\n");

printf("Time to copy datas HOST -> DEVICE : %.3f ms\n", (memCpyHDTime*1000));

printf("Time to compute                   : %.3f ms\n", (computeTime*1000));

printf("Time to copy datas DEVICE -> HOST : %.3f ms\n", (memCpyDHTime*1000));

printf("--------------------------------------------\n");

printf("Total time                        : %.3f ms\n", ((memCpyHDTime+computeTime+memCpyDHTime)*1000));

printf("=============================================\n\n");

float *srcf, *dstf;

srcf = (float*)src;

dstf = (float*)dst;

for (int i=0; i<iNumElements; i++)

{

	if (srcf[i]*10 != dstf[i])

		printf("Error at indice %d\n", i);

}

// Cleanup and leave

Cleanup (EXIT_SUCCESS);

}

void Cleanup (int iExitCode)

{

// Cleanup allocated objects

shrLog(LOGBOTH, 0.0, "\nStarting Cleanup...\n\n");

if(cdDevices)free(cdDevices);

if(cPathAndName)free(cPathAndName);

if(cSourceCL)free(cSourceCL);

if(ckKernel)clReleaseKernel(ckKernel);  

if(cpProgram)clReleaseProgram(cpProgram);

if(cqCommandQue)clReleaseCommandQueue(cqCommandQue);

if(cxGPUContext)clReleaseContext(cxGPUContext);

if(cmDevSrc)clReleaseMemObject(cmDevSrc);

if(cmDevDst)clReleaseMemObject(cmDevDst);

// Free host memory

free(src); 

free (dst);

// finalize logs and leave

if (bQuickTest)

{

    shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\n");

}

else 

{

    shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\nPress Enter to Exit\n");

    getchar();

}

exit (iExitCode);

}

} // namespace

[/codebox]

oclVectorMul.h:

[codebox]#ifndef OCLVECTORMUL

#define OCLVECTORMUL

namespace liboclvecmul {

int multiplie(int argc, char **argv);

}

#endif

[/codebox]

test.cpp

[codebox]#include “oclVectorMul.h”

int main(int argc, char **argv)

{

return liboclvecmul::multiplie(argc,argv);;

}

[/codebox]

I forgot to add sdk in the CMakeLists.txt for the test.cpp file.

So it must have this form:

[codebox]make_minimum_required(VERSION 2.6)

#Configuration du projet

project(test)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})

set(EXECUTABLE_OUTPUT_PATH bin/${CMAKE_BUILD_TYPE})

Inclusion d’OpenCL

include(FindOpenCL)

Recherche d’OpenCL

find_package(OpenCL REQUIRED)

include_directories(${OPENCL_INCLUDE_DIR})

Recherche des includes du sdk

include_directories(~/NVIDIA_GPU_Computing_SDK/shared/inc)

link_directories(~/NVIDIA_GPU_Computing_SDK/shared/lib)

include_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/inc)

link_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/lib)

#Inclusion de la bibliothèque oclVectorMul

include_directories(…/include)

link_directories(…/lib)

#Configuration de l’exécutable

file(

GLOB_RECURSE

source_files

src/*

)

add_executable(

test

${source_files}

)

#Configuration de l’édition de liens

target_link_libraries(

test

oclVectorMul

${OPENCL_LIBRARIES}

libshrutil.a

liboclUtil.a  

)[/codebox]

I hope it could help someone!

(Please in the future just append to the original thread.)

Here is CMakeLists.txt that should work (I switched to using SDK 3.0-beta, and your code actually doesn’t compile in that case, thus I’m not able to fully check it) in the case you put all files in the same directory - it should not be hard to split the library and executable in the separate directories from there:

cmake_minimum_required(VERSION 2.6 FATAL_ERROR)

project(test)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})

find_package(OpenCL REQUIRED)

include_directories(${OPENCL_INCLUDE_DIR})

# Change line below to point to your GPU SDK installation path!!!

set(GPU_SDK_DIRECTORY "/opt/gpu-sdk")

include_directories(${GPU_SDK_DIRECTORY}/shared/inc)

include_directories(${GPU_SDK_DIRECTORY}/OpenCL/common/inc)

add_library(

  oclVectorMul

  STATIC

  oclVectorMul.cpp

  )

add_executable(

  test

  test.cpp

  )

target_link_libraries(

  test

  ${OPENCL_LIBRARIES}

  ${GPU_SDK_DIRECTORY}/shared/lib/libshrutil.a

  ${GPU_SDK_DIRECTORY}/OpenCL/common/lib/liboclUtil.a

  oclVectorMul

  )