Not yet a forum for OpenCL (so I am putting here this thread, even so related with CUDA)

luisgo · January 8, 2018, 3:16pm

Dear All

I am porting a OpenCL program to CUDA but I have to put working first in OpenCL

The examples I found in OpenCL in the site of NVIDIA gave all the errors (two errors): (for Visual 2015)

do not found exception.h (Nvidia or Microsoft include???)
identifier “RUNTIME_EXCEPTION” is undefined

The original projects are for Visual 2008

Thanks

Luis Gonçalves

luisgo · January 8, 2018, 5:09pm

Another question. For running OpenCL in Visual Studio 2015, it is needed that the project be a CUDA project?

Thanks

Luis Gonçalves

Robert_Crovella · January 8, 2018, 5:25pm

It should not be necessary to make a CUDA project.

You should be able to drop an OpenCL code into a new project in VS, then provide the include path to the cl.h header file, and provide the linker path to the OpenCL library. That’s all that should be needed.

Robert_Crovella · January 8, 2018, 6:46pm

On VS 2015, on a system with CUDA 8 and a proper GPU install, I did the following:

start a new empty “general” project - console application
set the build configuration to x64 Release
in the project explorer window, select Source Files (folder). Right-click, select Add…C++ file (.cpp)
double-click on the new source file (Source.cpp)
paste in an OpenCL program, such as:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include <CL/cl.h>

#define STRINGIFY(s) #s

#define CL_SUCCEED(e) (assert(e == CL_SUCCESS))

const char *kernel_cl = STRINGIFY(
__kernel void vector_add(__global const int *A, __global const int *B, __global int *C) {

    // Get the index of the current element to be processed
    int i = get_global_id(0);

    // Do the operation
    C[i] = A[i] + B[i];
});

int main(void) {
    printf("started running\n");

    // Create the two input vectors
    int i;
    const int LIST_SIZE = 1024;
    int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
    int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
    for(i = 0; i < LIST_SIZE; i++) {
        A[i] = i;
        B[i] = LIST_SIZE - i;
    }

    // Load the kernel source code into the array source_str
    const char *source_str = kernel_cl;
    size_t source_size;
    source_size = strlen(source_str);
    printf("kernel loading done\n");
    // Get platform and device information
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;

cl_int ret = clGetPlatformIDs(0, NULL, &ret_num_platforms);
    cl_platform_id *platforms = NULL;
    platforms = (cl_platform_id*)malloc(ret_num_platforms*sizeof(cl_platform_id));

    ret = clGetPlatformIDs(ret_num_platforms, platforms, NULL);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    ret = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_ALL, 1,
            &device_id, &ret_num_devices);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}
    // Create an OpenCL context
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    // Create memory buffers on the device for each vector
    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);

    // Copy the lists A and B to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    printf("before building\n");
    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
            (const char **)&source_str, (const size_t *)&source_size, &ret);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    printf("after building\n");
    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
    if (ret != CL_SUCCESS) {printf("ret at %d is %d\n", __LINE__, ret); CL_SUCCEED(ret);}

    printf("before execution\n");
    // Execute the OpenCL kernel on the list
    size_t global_item_size = LIST_SIZE; // Process the entire lists
    size_t local_item_size = 64; // Divide work items into groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
            &global_item_size, &local_item_size, 0, NULL, NULL);
 printf("after execution\n");
    // Read the memory buffer C on the device to the local variable C
    int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
    ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
 printf("after copying\n");
    // Display the result to the screen
    for(i = 0; i < 10; i++)
        printf("%d + %d = %d\n", A[i], B[i], C[i]);

    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(a_mem_obj);
    ret = clReleaseMemObject(b_mem_obj);
    ret = clReleaseMemObject(c_mem_obj);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
    free(A);
    free(B);
    free(C);
    getchar();
    return 0;
}

In project properties…C/C++…General…Additional Include Directories, add the path to your CL/cl.h file, for me I added C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include
In project properties…Linker…Input…Additional Dependences, add your opencl lib, for me I added OpenCL.lib
In project properties…Linker…General…Additional Library Directories, add the path to your opencl lib, for me I added C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64

Then build the code and run it.

Topic		Replies	Views
OpenCL and Ubuntu 10.10 CUDA Programming and Performance	7	80075	January 25, 2011
Simple OpenCL Example from OpenCL_GettingStartedLinux.pdf wrong output - only zeros CUDA Programming and Performance	3	22283	July 11, 2010
CL_BUILD_PROGRAM_FAILURE CUDA Programming and Performance	6	21900	February 5, 2012
openCL Newbie CUDA Programming and Performance	17	5424	February 8, 2012
OpenCL & VC++ from ATI to nVidia CUDA Programming and Performance	8	5845	April 6, 2010
Cuda OpenCL comparison cuda, openCL, nvidia CUDA Programming and Performance	19	42614	November 1, 2012
OpenCL example code doesn't compile (CUDA 6.0 + Ubuntu 12.04.5) CUDA Setup and Installation	9	7260	August 16, 2017
How to make OpenCL works on cluster with Nvidia Tesla? CUDA Setup and Installation	3	2424	March 23, 2014
How to add a cuda-file to VS 2015 c++ solution CUDA Programming and Performance	28	23908	August 20, 2017
Problem with get_global_id(1); CUDA Programming and Performance	5	4016	May 19, 2014

Not yet a forum for OpenCL (so I am putting here this thread, even so related with CUDA)

Related topics