I installed ubuntu 22.04 desktop from the official site. I selected minimal install. I disabled the downloading of updates. I noted that the kernel version is 5.19.0-40-generic
. I set runlevel to 3 (sudo systemctl set-default multi-user.target
). I enabled the SSH server. I installed build-essential (sudo apt install build-essential
). gcc --version
showed 11.3
I followed the instructions in the linux install guide to prepare the machine for runfile install. I then rebooted the machine. I happen to be using a GTX 970 GPU.
I downloaded the latest nvidia cuda toolkit runfile installer and installed it (CUDA 12.1, driver 530.30.02).
I noted that the install was successful as reported by the installer, and ran nvidia-smi
, which showed expected output.
After that I successfully compiled and ran an OpenCL program that I happened to have laying around. Here is the full console session at that point:
$ cat t1.cpp
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
#include <stddef.h>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
using namespace std;
#define testpp(...)#__VA_ARGS__
const char* pp = testpp(
kernel void innerproduct
(
global double* inp1,
global double* inp2,
global double* out
)
{
int id = get_global_id(0);
out[id] = inp1[id] * inp2[id];
}
__kernel void ppp
(
__global double* inp,
__global double* ppnum,
__global double* out
)
{
int id = get_global_id(0);
ndrange_t ndrange = ndrange_1D(5, 1);
enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,^ { innerproduct(inp, ppnum, out); });
}
);
double targetinp[5] = { 5,7,0,4,6 };
double tarpp[5] = { 1,5,0,0,3 };
double oup[5];
int main()
{
cl_int err = 0;
cl_uint numPlatforms;
cl_platform_id platform = NULL;
int ans;
err = clGetPlatformIDs(0, NULL, &numPlatforms);
if (err != CL_SUCCESS)
{
printf("Error: Getting Platforms\n");
return EXIT_FAILURE;
}
if (numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
err = clGetPlatformIDs(numPlatforms, platforms, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Getting Platform Ids.(clGetPlatformIDs)\n");
return -1;
}
for (unsigned int i = 0; i < numPlatforms; ++i)
{
char pbuff[100];
err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
//platform = platforms[i];
cout << i << ":" << pbuff << "\n";
}
cout << "select platform: ";
cin >> ans;
platform = platforms[ans];
free(platforms);
}
else
{
cout << "invalid platform";
return EXIT_FAILURE;
}
cl_uint num_devices = 0;
cl_device_id* devices = NULL;
cl_device_id device = NULL;
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, devices, &num_devices);
if (num_devices == 0) //no GPU available.
{
cout << "No GPU device available." << endl;
return EXIT_FAILURE;
}
else
{
devices = (cl_device_id*)malloc(num_devices * sizeof(cl_device_id));
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
for (unsigned int i = 0; i < num_devices; ++i)
{
char pbuff[100];
err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
cout << i << ":" << pbuff << "\n";
}
cout << "select device: ";
cin >> ans;
device = devices[ans];
free(devices);
}
cl_context context = nullptr;
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
cl_command_queue commandQueue = nullptr;
commandQueue = clCreateCommandQueue(context, device, 0, &err);
size_t ppsize[] = { strlen(pp) };
//cout << context;
cl_program pprog = clCreateProgramWithSource(context, 1, &pp, ppsize, &err);
if (err != CL_SUCCESS)
{
printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
return EXIT_FAILURE;
}
//cout << pprog;
err = clBuildProgram(pprog, 1, &device, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
if (err == CL_BUILD_PROGRAM_FAILURE) {
// Determine the size of the log
size_t log_size;
clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char* log = (char*)malloc(log_size);
// Get the log
clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s\n", log);
}
cout << err;
printf("Error: Building Program (clBuildingProgram)\n");
return EXIT_FAILURE;
}
cl_kernel testkernel = clCreateKernel(pprog, "ppp", &err);
if (err != CL_SUCCESS)
{
size_t log_size;
clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
// Allocate memory for the log
char* log = (char*)malloc(log_size);
// Get the log
clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
// Print the log
printf("%s\n", log);
printf("Error: Creating Kernel from program.(clCreateKernel)\n");
return EXIT_FAILURE;
}
cl_mem inply = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &targetinp, &err);
cl_mem preg = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &tarpp, &err);
cl_mem ouply = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &oup, &err);
err = clSetKernelArg(testkernel, 0, sizeof(cl_mem), (void*)&inply);
err = clSetKernelArg(testkernel, 1, sizeof(cl_mem), (void*)&preg);
err = clSetKernelArg(testkernel, 2, sizeof(cl_mem), (void*)&ouply);
size_t globalThreads = 1;
size_t localThreads = 1;
err = clEnqueueNDRangeKernel(commandQueue, testkernel, 1, NULL, &globalThreads, &localThreads, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
err = clFinish(commandQueue);
if (err != CL_SUCCESS)
{
printf("Error: Finish command queue\n");
return EXIT_FAILURE;
}
err = clEnqueueReadBuffer(commandQueue, ouply, CL_TRUE, 0, sizeof(double) * 5, &oup, 0, NULL, NULL);
for (int i = 0; i < 5; i++)
{
cout << oup[i] << "\n";
}
}
$ g++ t1.cpp -o t1 -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lOpenCL
$ ./t1
0:NVIDIA Corporation
select platform: 0
0:NVIDIA GeForce GTX 970
select device: 0
5
35
0
0
18
$
So I can imagine some possibilities. I’m not saying categorically that it is one of these problems, I am just guessing, here:
- You are using a kernel version other than the one I reported for default Ubuntu 22.04, and there is a problem with the NVIDIA driver in that kernel. I don’t know for sure of any specific problems, but it is not unheard of for a particular NVIDIA driver to not work correctly on the newest linux kernel version.
- The driver install is broken, perhaps because you allowed the kernel to be updated at some point after the original driver install, and did not provide a proper method (e.g. DKMS) to maintain an updated GPU driver install. I acknowledge that if you ran
nvidia-smi
and got sensible output, that it would seem that the driver install is OK, but that is not a blanket guarantee.
- You installed the necessary NVIDIA bits using some method/installer not provided directly by NVIDIA. For example you installed canonical/ubuntu packages to create your NVIDIA GPU/OpenCL environment, instead of using the CUDA runfile installer the way I did. I don’t know for sure that this is a problem or that there is any problem with canonical/ubuntu packages, but it may be a possibility.
And of course there may be other reasons. These are just guesses. But my test case shows that with a careful install of Ubuntu 22.04, OpenCL on NVIDIA platform is usable.