Regression? NVIDIA OpenCL ICD stops working in Ubuntu 22.04

I have two Ubuntu boxes running 22.04 and neither of the systems can list the nvidia GPU in clinfo. These boxes have

Box 1:
RTX 2060
CUDA 11.3
driver 515.86.01

Box 2:
RTX 3090
CUDA 11.3
driver 525.105.17

both boxes can list the GPU via nvidia-smi, both have /etc/OpenCL/vendors/nvidia.icd and libnvidia-opencl.so.1 are installed on the system as part of libnvidia-compute-???

my lab also has over a dozen other Linux boxes running Ubuntu 18.04 and 20.04 with various generations of NVIDIA cards, some even have the same driver version (515/525), but none of them have this issue.

is this a known regression? please let me know if there is a fix for this.

thanks

I have similiar problems. On 22.04, all OpenCL kernels failed to compile.

I installed ubuntu 22.04 desktop from the official site. I selected minimal install. I disabled the downloading of updates. I noted that the kernel version is 5.19.0-40-generic. I set runlevel to 3 (sudo systemctl set-default multi-user.target). I enabled the SSH server. I installed build-essential (sudo apt install build-essential). gcc --version showed 11.3

I followed the instructions in the linux install guide to prepare the machine for runfile install. I then rebooted the machine. I happen to be using a GTX 970 GPU.

I downloaded the latest nvidia cuda toolkit runfile installer and installed it (CUDA 12.1, driver 530.30.02).

I noted that the install was successful as reported by the installer, and ran nvidia-smi, which showed expected output.

After that I successfully compiled and ran an OpenCL program that I happened to have laying around. Here is the full console session at that point:

$ cat t1.cpp
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <vector>
#include <string>
#include <cstring>
#include <stddef.h>

#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
using namespace std;


#define testpp(...)#__VA_ARGS__
const char* pp = testpp(
   kernel void innerproduct
   (
    global double* inp1,
    global double* inp2,
    global double* out
   )
   {
    int id = get_global_id(0);
    out[id] = inp1[id] * inp2[id];
   }

   __kernel void ppp
   (
    __global double* inp,
    __global double* ppnum,
    __global double* out
   )
   {
    int id = get_global_id(0);
    ndrange_t ndrange = ndrange_1D(5, 1);
    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,^ { innerproduct(inp, ppnum, out); });

   }

);
double targetinp[5] = { 5,7,0,4,6 };
double tarpp[5] = { 1,5,0,0,3 };
double oup[5];

int main()
{
   cl_int err = 0;
   cl_uint numPlatforms;
   cl_platform_id platform = NULL;
   int ans;
   err = clGetPlatformIDs(0, NULL, &numPlatforms);
   if (err != CL_SUCCESS)
   {
    printf("Error: Getting Platforms\n");
    return EXIT_FAILURE;
   }
   if (numPlatforms > 0)
   {
    cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
    err = clGetPlatformIDs(numPlatforms, platforms, NULL);
    if (err != CL_SUCCESS)
    {
     printf("Error: Getting Platform Ids.(clGetPlatformIDs)\n");
     return -1;
    }
    for (unsigned int i = 0; i < numPlatforms; ++i)
    {
     char pbuff[100];
     err = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
     //platform = platforms[i];
     cout << i << ":" << pbuff << "\n";
    }
    cout << "select platform: ";
    cin >> ans;
    platform = platforms[ans];
    free(platforms);
   }
   else
   {
    cout << "invalid platform";
    return EXIT_FAILURE;
   }
   cl_uint num_devices = 0;
   cl_device_id* devices = NULL;
   cl_device_id device = NULL;

   err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, devices, &num_devices);
   if (num_devices == 0) //no GPU available.
   {
    cout << "No GPU device available." << endl;
    return EXIT_FAILURE;
   }
   else
   {
    devices = (cl_device_id*)malloc(num_devices * sizeof(cl_device_id));
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
    for (unsigned int i = 0; i < num_devices; ++i)
    {
     char pbuff[100];
     err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
     cout << i << ":" << pbuff << "\n";
    }
    cout << "select device: ";
    cin >> ans;
    device = devices[ans];
    free(devices);

   }

   cl_context context = nullptr;
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);

   cl_command_queue commandQueue = nullptr;
   commandQueue = clCreateCommandQueue(context, device, 0, &err);

   size_t ppsize[] = { strlen(pp) };
   //cout << context;
   cl_program pprog = clCreateProgramWithSource(context, 1, &pp, ppsize, &err);
   if (err != CL_SUCCESS)
   {
    printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
    return EXIT_FAILURE;
   }
   //cout << pprog;

   err = clBuildProgram(pprog, 1, &device, NULL, NULL, NULL);
   if (err != CL_SUCCESS)
   {
    if (err == CL_BUILD_PROGRAM_FAILURE) {
     // Determine the size of the log
     size_t log_size;
     clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

     // Allocate memory for the log
     char* log = (char*)malloc(log_size);

     // Get the log
     clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
     // Print the log
     printf("%s\n", log);
    }
    cout << err;
    printf("Error: Building Program (clBuildingProgram)\n");

    return EXIT_FAILURE;
   }

   cl_kernel testkernel = clCreateKernel(pprog, "ppp", &err);
   if (err != CL_SUCCESS)
   {
    size_t log_size;
    clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

    // Allocate memory for the log
    char* log = (char*)malloc(log_size);

    // Get the log
    clGetProgramBuildInfo(pprog, device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
    // Print the log
    printf("%s\n", log);
    printf("Error: Creating Kernel from program.(clCreateKernel)\n");
    return EXIT_FAILURE;
   }

   cl_mem inply = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &targetinp, &err);
   cl_mem preg = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &tarpp, &err);
   cl_mem ouply = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(double) * 5, &oup, &err);

   err = clSetKernelArg(testkernel, 0, sizeof(cl_mem), (void*)&inply);
   err = clSetKernelArg(testkernel, 1, sizeof(cl_mem), (void*)&preg);
   err = clSetKernelArg(testkernel, 2, sizeof(cl_mem), (void*)&ouply);
   size_t globalThreads = 1;
   size_t localThreads = 1;
   err = clEnqueueNDRangeKernel(commandQueue, testkernel, 1, NULL, &globalThreads, &localThreads, 0, NULL, NULL);
   if (err != CL_SUCCESS)
   {
    printf("Error: Enqueueing kernel\n");
    return EXIT_FAILURE;
   }

   err = clFinish(commandQueue);
   if (err != CL_SUCCESS)
   {
    printf("Error: Finish command queue\n");
    return EXIT_FAILURE;
   }

   err = clEnqueueReadBuffer(commandQueue, ouply, CL_TRUE, 0, sizeof(double) * 5, &oup, 0, NULL, NULL);
   for (int i = 0; i < 5; i++)
   {
    cout << oup[i] << "\n";
   }
}
$ g++ t1.cpp -o t1 -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lOpenCL
$ ./t1
0:NVIDIA Corporation
select platform: 0
0:NVIDIA GeForce GTX 970
select device: 0
5
35
0
0
18
$

So I can imagine some possibilities. I’m not saying categorically that it is one of these problems, I am just guessing, here:

  • You are using a kernel version other than the one I reported for default Ubuntu 22.04, and there is a problem with the NVIDIA driver in that kernel. I don’t know for sure of any specific problems, but it is not unheard of for a particular NVIDIA driver to not work correctly on the newest linux kernel version.
  • The driver install is broken, perhaps because you allowed the kernel to be updated at some point after the original driver install, and did not provide a proper method (e.g. DKMS) to maintain an updated GPU driver install. I acknowledge that if you ran nvidia-smi and got sensible output, that it would seem that the driver install is OK, but that is not a blanket guarantee.
  • You installed the necessary NVIDIA bits using some method/installer not provided directly by NVIDIA. For example you installed canonical/ubuntu packages to create your NVIDIA GPU/OpenCL environment, instead of using the CUDA runfile installer the way I did. I don’t know for sure that this is a problem or that there is any problem with canonical/ubuntu packages, but it may be a possibility.

And of course there may be other reasons. These are just guesses. But my test case shows that with a careful install of Ubuntu 22.04, OpenCL on NVIDIA platform is usable.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.