CL / GL interop. clCreateFromGLBuffer return CL_OUT_OF_RESOURCES on GeForce GTX 1070 (Linux with Nvi...

Guillaume75 · July 15, 2017, 1:28pm

Hello

I’ve got a weird issue trying to run the following code.
It works well when I disable CL GL sharing or running on CPU.

Nvidia oclSimpleGL works well with CL GL interop.

Any ideas?

Guillaume

This is part of gnuradio, gr-fosphor.

/*

cl.c
OpenCL base routines
Copyright (C) 2013-2014 Sylvain Munaut
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/.
*/

/*! \addtogroup cl

@{
*/

/*! \file cl.c

\brief OpenCL base routines
*/

#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include “cl_platform.h”
#include “cl_compat.h”

#if defined(APPLE) || defined(MACOSX)

include <OpenGL/OpenGL.h>

include <OpenGL/gl.h>

#elif defined(_WIN32)

include <windows.h>

include <wingdi.h>

#else

include <GL/glx.h>

#endif

#include “cl.h”
#include “gl.h”
#include “private.h”
#include “resource.h”

struct fosphor_cl_features
{
#define FLG_CL_GL_SHARING (1<<0)
#define FLG_CL_NVIDIA_SM11 (1<<1)
#define FLG_CL_OPENCL_11 (1<<2)
#define FLG_CL_LOCAL_ATOMIC_EXT (1<<3)

cl_device_type type;
char vendor[128];
unsigned long local_mem;
int flags;
int wg_size;
int wg_size_dim[2];

};

struct fosphor_cl_state
{
cl_platform_id pl_id;
cl_device_id dev_id;
cl_context ctx;
cl_command_queue cq;

/* Features */
struct fosphor_cl_features feat;

/* FFT */
cl_mem		mem_fft_in;
cl_mem		mem_fft_out;
cl_mem		mem_fft_win;

cl_program	prog_fft;
cl_kernel	kern_fft;

float		*fft_win;
int		fft_win_updated;

/* Display */
cl_mem		mem_waterfall;
cl_mem		mem_histogram;
cl_mem		mem_spectrum;

cl_program	prog_display;
cl_kernel	kern_display;

/* Histogram range */
float		histo_scale;
float		histo_offset;

/* State */
int		waterfall_pos;
enum {
	CL_BOOTING = 0,
	CL_PENDING,
	CL_READY,
} state;

};

/* -------------------------------------------------------------------------- /
/ Helpers / Internal API /
/ -------------------------------------------------------------------------- */

#define MAX_PLATFORMS 16
#define MAX_DEVICES 16

#define CL_ERR_CHECK(v, msg)
if ((v) != CL_SUCCESS) {
fprintf(stderr, “[!] CL Error (%d, %s:%d): %s\n”,
(v), FILE, LINE, msg);
goto error;
}

static int
cl_device_query(cl_device_id dev_id, struct fosphor_cl_features *feat)
{
char txt[2048];
cl_int err;
int has_nv_attr;

memset(feat, 0x00, sizeof(struct fosphor_cl_features));

/* Device type */
err = clGetDeviceInfo(dev_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &feat->type, NULL);
if (err != CL_SUCCESS)
	return -1;

/* Vendor */
err = clGetDeviceInfo(dev_id, CL_DEVICE_VENDOR, sizeof(feat->vendor)-1, &feat->vendor, NULL);
if (err != CL_SUCCESS)
	return -1;

/* Local memory size */
err = clGetDeviceInfo(dev_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &feat->local_mem, NULL);
if (err != CL_SUCCESS)
	return -1;

/* CL/GL extension */
err = clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, sizeof(txt)-1, txt, NULL);
if (err != CL_SUCCESS)
	return -1;

txt = 0;

/*  Check for CL/GL sharing */
if (strstr(txt, "cl_khr_gl_sharing") || strstr(txt, "cl_APPLE_gl_sharing"))
	feat->flags |= FLG_CL_GL_SHARING;

/* Check for NV attributes */
has_nv_attr = !!strstr(txt, "cl_nv_device_attribute_query");

/* Check for cl_khr_local_int32_base_atomics extension */
if (strstr(txt, "cl_khr_local_int32_base_atomics"))
	feat->flags |= FLG_CL_LOCAL_ATOMIC_EXT;

/* Check OpenCL 1.1 compat */
err = clGetDeviceInfo(dev_id, CL_DEVICE_VERSION, sizeof(txt)-1, txt, NULL);
if (err != CL_SUCCESS)
	return -1;

txt = 0;

if (!memcmp(txt, "OpenCL 1.", 9) && txt[9] >= '1')
	feat->flags |= FLG_CL_OPENCL_11;

/* Check if a NVidia SM11 architecture */
if (has_nv_attr) {
	cl_uint nv_maj, nv_min;

	err = clGetDeviceInfo(dev_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
	                      sizeof(cl_uint), &nv_maj, NULL);
	if (err != CL_SUCCESS)
		return -1;

	err = clGetDeviceInfo(dev_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
	                      sizeof(cl_uint), &nv_min, NULL);
	if (err != CL_SUCCESS)
		return -1;

	if ((nv_maj == 1) && (nv_min == 1))
		feat->flags |= FLG_CL_NVIDIA_SM11;
}

#ifdef APPLE
else if (!(feat->flags & (FLG_CL_OPENCL_11 | FLG_CL_LOCAL_ATOMIC_EXT)))
{
/*
* OSX doesn’t allow query of NV attributes even on NVidia
* cards so we just assume any non-opencl 1.1 nvidia card
* without cl_khr_local_int32_base_atomics extension
* that does OpenCL is a SM1.1 one
*/
err = clGetDeviceInfo(dev_id, CL_DEVICE_VENDOR, sizeof(txt)-1, txt, NULL);
if (err != CL_SUCCESS)
return -1;

	txt = 0;

	if (!!strstr(txt, "NVIDIA"))
		feat->flags |= FLG_CL_NVIDIA_SM11;
}

#endif

return 0;

}

static int
cl_device_score(cl_device_id dev_id, struct fosphor_cl_features *feat)
{
int rv, score = 0;

/* Query device */
rv = cl_device_query(dev_id, feat);
if (rv)
	return rv;

/* Check compatibility */
if (!(feat->flags & (FLG_CL_NVIDIA_SM11 | FLG_CL_OPENCL_11 | FLG_CL_LOCAL_ATOMIC_EXT)))
	return -1;

/* Prefer device with CL/GL sharing */
if (feat->flags & FLG_CL_GL_SHARING)
	score += 500;

/* Prefer GPU (preferrably NVidia / AMD) */
if (feat->type == CL_DEVICE_TYPE_GPU)
{
	char vendor;
	int i;

	score += 1000;

	for (i=0; i<sizeof(feat->vendor); i++)
		vendor[i] = tolower(feat->vendor[i]);

	if (strstr(vendor, "nvidia") ||
	    strstr(vendor, "advanced micro devices") ||
	    strstr(vendor, "amd"))
		score += 500;
}

/* Bigger local mem */
score += (feat->local_mem < (1<<20)) ? (feat->local_mem >> 11) : (1 << 9);

return score;

}

static int
cl_find_device(cl_platform_id *pl_id_p, cl_device_id *dev_id_p,
struct fosphor_cl_features *feat)
{
cl_platform_id pl_list[MAX_PLATFORMS], pl_id;
cl_device_id dev_list[MAX_DEVICES], dev_id;
cl_uint pl_count, dev_count, i, j;
cl_int err;
int score = -1;

/* Scan each platforms */
err = clGetPlatformIDs(MAX_PLATFORMS, pl_list, &pl_count);
CL_ERR_CHECK(err, "Unable to fetch platform IDs");

for (i=0; i<pl_count; i++)
{
	/* Scan all devices */
	err = clGetDeviceIDs(pl_list[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, dev_list, &dev_count);
	if (err != CL_SUCCESS)
	{
		fprintf(stderr, "[w] CL Error (%d, %s:%d): "
			"Unable to fetch device IDs for platform %d. Skipping.\n",
			err, __FILE__, __LINE__, i);
		continue;
	}

	for (j=0; j<dev_count; j++)
	{
		struct fosphor_cl_features feat_cur;
		int s = cl_device_score(dev_list[j], &feat_cur);
		if (s > score) {
			pl_id  = pl_list[i];
			dev_id = dev_list[j];
			memcpy(feat, &feat_cur, sizeof(struct fosphor_cl_features));
			score = s;
		}
	}
}

/* Did we get a good fit ? */
if (score >= 0) {
	*pl_id_p  = pl_id;
	*dev_id_p = dev_id;
	err = 0;
} else {
	err = -ENODEV;
}

error:
return err;
}

static cl_program
cl_load_program(cl_device_id dev_id, cl_context ctx,
const char *resource_name, const char *opts,
cl_int *err_ptr)
{
cl_program prog = NULL;
const char *src;
cl_int err;

/* Grab resource */
src = resource_get(resource_name, NULL);
if (!src) {
	fprintf(stderr, "[!] Unable to load non-existent resource '%s'\n", resource_name);
	err = CL_INVALID_VALUE;
	goto error;
}

/* Create the program from sources */
prog = clCreateProgramWithSource(ctx, 1, (const char **)&src, NULL, &err);
CL_ERR_CHECK(err, "Failed to create program");

    /* Build it */
    err = clBuildProgram(prog, 0, NULL, opts, NULL, NULL);

#ifndef DEBUG_CL
if (err != CL_SUCCESS)
#endif
{
size_t len;
const int txt_buf_len = 1024 * 1024;
char *txt_buf;

	txt_buf = malloc(txt_buf_len);
            clGetProgramBuildInfo(prog, dev_id, CL_PROGRAM_BUILD_LOG, txt_buf_len, txt_buf, &len);
            fprintf(stderr, "Build log for '%s':\n%s\n\n---\n", resource_name, txt_buf);
	free(txt_buf);
    }

CL_ERR_CHECK(err, "Failed to build program");

#ifdef DEBUG_CL
{
size_t bin_len;
char name_buf[256];
char *bin_buf;

	snprintf(name_buf, 256, "prog_%s.bin", resource_name);

	clGetProgramInfo(prog, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bin_len, NULL);
	fprintf(stderr, "Binary length for '%s': %d\n\n", resource_name, (int)bin_len);

	bin_buf = malloc(bin_len);

	clGetProgramInfo(prog, CL_PROGRAM_BINARIES, sizeof(char *), &bin_buf, NULL );

	FILE *fh = fopen(name_buf, "wb");
	if (fwrite(bin_buf, bin_len, 1, fh) != 1)
		fprintf(stderr, "[w] Binary write failed\n");
	fclose(fh);

	free(bin_buf);
}

#endif

/* All good */
return prog;

/* Error path */

error:
if (prog)
clReleaseProgram(prog);

if (err_ptr)
	*err_ptr = err;

return NULL;

}

static int
cl_queue_clear_buffers(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;
float noise_floor, color[4] = {0.0f, 0.0f, 0.0f, 0.0f};
size_t img_origin[3] = {0.0f, 0.0f, 0.0f}, img_region[3];
cl_int err;

/* Configure noise floor to the bottom of the scale */
noise_floor = - self->power.offset;

/* Init spectrum to noise floor */
err = clEnqueueFillBuffer(cl->cq,
	cl->mem_spectrum,
	&noise_floor, sizeof(float),
	0,
	2 * 2 * sizeof(cl_float) * FOSPHOR_FFT_LEN,
	0, NULL, NULL
);
CL_ERR_CHECK(err, "Unable to queue clear of spectrum buffer");

/* Init the waterfall image to noise floor */
color[0] = noise_floor;

img_region[0] = FOSPHOR_FFT_LEN;
img_region[1] = 1024;
img_region[2] = 1;

err = clEnqueueFillImage(cl->cq,
	cl->mem_waterfall,
	color,
	img_origin, img_region,
	0, NULL, NULL
);
CL_ERR_CHECK(err, "Unable to queue clear of waterfall image");

/* Init the histogram image to all 0.0f values */
color[0] = 0.0f;

img_region[0] = FOSPHOR_FFT_LEN;
img_region[1] = 128;
img_region[2] = 1;

err = clEnqueueFillImage(cl->cq,
	cl->mem_histogram,
	color,
	img_origin, img_region,
	0, NULL, NULL
);
CL_ERR_CHECK(err, "Unable to queue clear of histogram image");

/* Need to finish because our patterns are on the stack */
clFinish(cl->cq);

return 0;

/* Error path */

error:
return err;
}

static int
cl_init_buffers_gl(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;
cl_int err;

/* GL shared objects */
	/* Waterfall texture */
cl->mem_waterfall = clCreateFromGLTexture(cl->ctx,
	CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0,
	fosphor_gl_get_shared_id(self, GL_ID_TEX_WATERFALL),
	&err
);
CL_ERR_CHECK(err, "Unable to share waterfall texture into OpenCL context");

	/* Histogram texture */
cl->mem_histogram = clCreateFromGLTexture(cl->ctx,
	CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0,
	fosphor_gl_get_shared_id(self, GL_ID_TEX_HISTOGRAM),
	&err
);
CL_ERR_CHECK(err, "Unable to share histogram texture into OpenCL context");

	/* Spectrum VBO */
cl->mem_spectrum = clCreateFromGLBuffer(cl->ctx,
	CL_MEM_WRITE_ONLY,
	fosphor_gl_get_shared_id(self, GL_ID_VBO_SPECTRUM),
	&err
);
CL_ERR_CHECK(err, "Unable to share spectrum VBO into OpenCL context");

/* All done */
err = 0;

error:
return err;
}

static int
cl_init_buffers_nogl(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;
cl_image_format img_fmt;
cl_image_desc img_desc;
cl_int err;

/* Common settings */
img_fmt.image_channel_order = CL_R;
img_fmt.image_channel_data_type = CL_FLOAT;

img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
img_desc.image_width = FOSPHOR_FFT_LEN;
img_desc.image_depth = 0;
img_desc.image_array_size = 0;
img_desc.image_row_pitch = 0;
img_desc.image_slice_pitch = 0;
img_desc.num_mip_levels = 0;
img_desc.num_samples = 0;
img_desc.buffer = NULL;

/* Waterfall texture */
img_desc.image_height = 1024;

cl->mem_waterfall = clCreateImage(
	cl->ctx,
	CL_MEM_WRITE_ONLY,
	&img_fmt,
	&img_desc,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to create waterfall image");

/* Histogram texture */
img_desc.image_height = 128;

cl->mem_histogram = clCreateImage(
	cl->ctx,
	CL_MEM_READ_WRITE,
	&img_fmt,
	&img_desc,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to create histogram image");

/* Spectrum VBO */
cl->mem_spectrum = clCreateBuffer(
	cl->ctx,
	CL_MEM_READ_WRITE,
	2 * 2 * sizeof(cl_float) * FOSPHOR_FFT_LEN,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to create spectrum VBO buffer");

/* All done */
err = 0;

error:
return err;
}

static int
cl_do_init(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;
cl_context_properties ctx_props[7];
const char *disp_opts;
cl_int err;

/* Setup some options */
if ((cl->feat.type == CL_DEVICE_TYPE_GPU) &&
    (cl->feat.flags & FLG_CL_GL_SHARING))
{
	/* Only use CLGL sharing with GPU. Most CPU impl of it will
	 * just fail with float textures */
	self->flags |= FLG_FOSPHOR_USE_CLGL_SHARING;
}

/* Context */
ctx_props[0] = 0;

if (self->flags & FLG_FOSPHOR_USE_CLGL_SHARING)
{
	/* Setup context properties */

#if defined(APPLE) || defined(MACOSX)

		/* OSX variant */
	ctx_props[0] = CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE;
	ctx_props[1] = (cl_context_properties) CGLGetShareGroup(CGLGetCurrentContext());
	ctx_props[2] = 0;

#elif defined(_WIN32)

		/* Win 32 variant */
	ctx_props[0] = CL_GL_CONTEXT_KHR;
	ctx_props[1] = (cl_context_properties) wglGetCurrentContext();
	ctx_props[2] = CL_WGL_HDC_KHR;
	ctx_props[3] = (cl_context_properties) wglGetCurrentDC();
	ctx_props[4] = CL_CONTEXT_PLATFORM;
	ctx_props[5] = (cl_context_properties) cl->pl_id;
	ctx_props[6] = 0;

#else

		/* Linux variant */
	ctx_props[0] = CL_GL_CONTEXT_KHR;
	ctx_props[1] = (cl_context_properties) glXGetCurrentContext();
	ctx_props[2] = CL_GLX_DISPLAY_KHR;
	ctx_props[3] = (cl_context_properties) glXGetCurrentDisplay();
	ctx_props[4] = CL_CONTEXT_PLATFORM;
	ctx_props[5] = (cl_context_properties) cl->pl_id;
	ctx_props[6] = 0;

#endif

	/* Attempt to create context */
	cl->ctx = clCreateContext(ctx_props, 1, &cl->dev_id, NULL, NULL, &err);
	if (err != CL_SUCCESS) {
		/* Failed, we'll retry again without CL/GL sharing */
		fprintf(stderr, "[w] CL Error (%d, %s:%d): "
			"Unable to create context with CL/GL sharing, retrying without\n",
			err, __FILE__, __LINE__);

		self->flags &= ~FLG_FOSPHOR_USE_CLGL_SHARING;
		ctx_props[0] = 0;
	}
}

if (!(self->flags & FLG_FOSPHOR_USE_CLGL_SHARING))
{
	cl->ctx = clCreateContext(ctx_props, 1, &cl->dev_id, NULL, NULL, &err);
	CL_ERR_CHECK(err, "Unable to create context");
}

/* Command Queue */
cl->cq = clCreateCommandQueue(cl->ctx, cl->dev_id, 0, &err);
CL_ERR_CHECK(err, "Unable to create command queue");

/* FFT buffers */
cl->mem_fft_in = clCreateBuffer(cl->ctx,
	CL_MEM_READ_ONLY,
	2 * sizeof(cl_float) * FOSPHOR_FFT_LEN * FOSPHOR_FFT_MAX_BATCH,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to allocate FFT input buffer");

cl->mem_fft_out = clCreateBuffer(cl->ctx,
	CL_MEM_READ_WRITE,
	2 * sizeof(cl_float) * FOSPHOR_FFT_LEN * FOSPHOR_FFT_MAX_BATCH,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to allocate FFT output buffer");

cl->mem_fft_win = clCreateBuffer(cl->ctx,
	CL_MEM_READ_ONLY,
	2 * sizeof(cl_float) * FOSPHOR_FFT_LEN,
	NULL,
	&err
);
CL_ERR_CHECK(err, "Unable to allocate FFT window buffer");

/* FFT program/kernels */
cl->prog_fft = cl_load_program(cl->dev_id, cl->ctx, "fft.cl", NULL, &err);
if (!cl->prog_fft)
	goto error;

cl->kern_fft = clCreateKernel(cl->prog_fft, "fft1D_1024", &err);
CL_ERR_CHECK(err, "Unable to create FFT kernel");

/* Configure static FFT kernel args */
err  = clSetKernelArg(cl->kern_fft, 0, sizeof(cl_mem), &cl->mem_fft_in);
err |= clSetKernelArg(cl->kern_fft, 1, sizeof(cl_mem), &cl->mem_fft_out);
err |= clSetKernelArg(cl->kern_fft, 2, sizeof(cl_mem), &cl->mem_fft_win);

CL_ERR_CHECK(err, "Unable to configure FFT kernel");

/* Display kernel result memory objects */
if (self->flags & FLG_FOSPHOR_USE_CLGL_SHARING)
	err = cl_init_buffers_gl(self);
else
	err = cl_init_buffers_nogl(self);

if (err != CL_SUCCESS)
	goto error;

/* Display program/kernel */
if (cl->feat.flags & FLG_CL_NVIDIA_SM11)
	disp_opts = "-DUSE_NV_SM11_ATOMICS";
else if (!(cl->feat.flags & FLG_CL_OPENCL_11))
	disp_opts = "-DUSE_EXT_ATOMICS";
else
	disp_opts = NULL;

cl->prog_display = cl_load_program(cl->dev_id, cl->ctx, "display.cl", disp_opts, &err);
if (!cl->prog_display)
	goto error;

cl->kern_display = clCreateKernel(cl->prog_display, "display", &err);
CL_ERR_CHECK(err, "Unable to create display kernel");

/* Configure static display kernel args */
cl_uint fft_log2_len = FOSPHOR_FFT_LEN_LOG;
cl_float histo_t0r   = 16.0f;
cl_float histo_t0d   = 1024.0f;
cl_float live_alpha  = 0.002f;

err  = clSetKernelArg(cl->kern_display,  0, sizeof(cl_mem),   &cl->mem_fft_out);
err |= clSetKernelArg(cl->kern_display,  1, sizeof(cl_int),   &fft_log2_len);

err |= clSetKernelArg(cl->kern_display,  3, sizeof(cl_mem),   &cl->mem_waterfall);

err |= clSetKernelArg(cl->kern_display,  5, sizeof(cl_mem),   &cl->mem_histogram);
err |= clSetKernelArg(cl->kern_display,  6, sizeof(cl_mem),   &cl->mem_histogram);
err |= clSetKernelArg(cl->kern_display,  7, sizeof(cl_float), &histo_t0r);
err |= clSetKernelArg(cl->kern_display,  8, sizeof(cl_float), &histo_t0d);

err |= clSetKernelArg(cl->kern_display, 11, sizeof(cl_mem),   &cl->mem_spectrum);
err |= clSetKernelArg(cl->kern_display, 12, sizeof(cl_float), &live_alpha);

CL_ERR_CHECK(err, "Unable to configure display kernel");

/* All done */
err = 0;

error:
return err;
}

static void
cl_do_release(struct fosphor_cl_state *cl)
{
if (cl->kern_display)
clReleaseKernel(cl->kern_display);

if (cl->prog_display)
	clReleaseProgram(cl->prog_display);

if (cl->mem_spectrum)
	clReleaseMemObject(cl->mem_spectrum);

if (cl->mem_histogram)
	clReleaseMemObject(cl->mem_histogram);

if (cl->mem_waterfall)
	clReleaseMemObject(cl->mem_waterfall);

if (cl->kern_fft)
	clReleaseKernel(cl->kern_fft);

if (cl->prog_fft)
	clReleaseProgram(cl->prog_fft);

if (cl->mem_fft_win)
	clReleaseMemObject(cl->mem_fft_win);

if (cl->mem_fft_out)
	clReleaseMemObject(cl->mem_fft_out);

if (cl->mem_fft_in)
	clReleaseMemObject(cl->mem_fft_in);

if (cl->cq)
	clReleaseCommandQueue(cl->cq);

if (cl->ctx)
	clReleaseContext(cl->ctx);

}

/* -------------------------------------------------------------------------- /
/ Exposed API /
/ -------------------------------------------------------------------------- */

int
fosphor_cl_init(struct fosphor *self)
{
struct fosphor_cl_state *cl;
char dev_name[128];
cl_int err;

/* Allocate structure */
cl = malloc(sizeof(struct fosphor_cl_state));
if (!cl)
	return -ENOMEM;

self->cl = cl;

memset(cl, 0, sizeof(struct fosphor_cl_state));

cl->state = CL_BOOTING;

/* Find suitable device */
if (cl_find_device(&cl->pl_id, &cl->dev_id, &cl->feat)) {
	fprintf(stderr, "[!] No suitable OpenCL device found\n");
	goto error;
}

/* Report selected device */
err = clGetDeviceInfo(cl->dev_id, CL_DEVICE_NAME, sizeof(dev_name)-1, dev_name, NULL);
CL_ERR_CHECK(err, "Unable to fetch device name");

fprintf(stderr, "[+] Selected device: %s\n", dev_name);

/* Setup compatibility layer for this platform */
cl_compat_init();
cl_compat_check_platform(cl->pl_id);

/* Initialize selected platform / device */
err = cl_do_init(self);
if (err)
	goto error;

/* Done */
return 0;

/* Error path */

error:
fosphor_cl_release(self);

return -EIO;

}

void
fosphor_cl_release(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;

/* Safety */
if (!cl)
	return;

/* Release all allocated OpenCL resources */
cl_do_release(cl);

/* Release structure */
free(cl);

/* Nothing left */
self->cl = NULL;

}

int
fosphor_cl_process(struct fosphor *self,
void *samples, int len)
{
struct fosphor_cl_state *cl = self->cl;

cl_int err;
size_t local[2], global[2];
int n_spectra = len / FOSPHOR_FFT_LEN;

/* Validate batch size */
if (len & ((FOSPHOR_FFT_MULT_BATCH*FOSPHOR_FFT_LEN)-1))
	return -EINVAL;

if (len > (FOSPHOR_FFT_LEN * FOSPHOR_FFT_MAX_BATCH))
	return -EINVAL;

/* Copy new window if needed */
if (cl->fft_win_updated) {
	err = clEnqueueWriteBuffer(
		cl->cq,
		cl->mem_fft_win,
		CL_FALSE,
		0, sizeof(cl_float) * FOSPHOR_FFT_LEN, cl->fft_win,
		0, NULL, NULL
	);
	CL_ERR_CHECK(err, "Unable to copy data to FFT window buffer");

	cl->fft_win_updated = 0;
}

/* Copy samples data */
err = clEnqueueWriteBuffer(
	cl->cq,
	cl->mem_fft_in,
	CL_FALSE,
	0, 2 * sizeof(cl_float) * len, samples,
	0, NULL, NULL
);
CL_ERR_CHECK(err, "Unable to copy data to FFT input buffer");

/* Execute FFT kernel */
global[0] = FOSPHOR_FFT_LEN / 8;
global[1] = n_spectra;

local[0] = global[0];
local[1] = 1;

err = clEnqueueNDRangeKernel(cl->cq, cl->kern_fft, 2, NULL, global, local, 0, NULL, NULL);
CL_ERR_CHECK(err, "Unable to queue FFT kernel execution");

/* Capture all GL objects */
if ((cl->state != CL_PENDING) && (self->flags & FLG_FOSPHOR_USE_CLGL_SHARING)) {
	cl_mem objs[3];

	objs[0] = cl->mem_waterfall;
	objs[1] = cl->mem_histogram;
	objs[2] = cl->mem_spectrum;

	err = clEnqueueAcquireGLObjects(cl->cq, 3, objs, 0, NULL, NULL);
	CL_ERR_CHECK(err, "Unable to acquire GL objects");
}

/* If this is the first run, make sure to pre-clear the buffers */
if (cl->state == CL_BOOTING) {
	err = cl_queue_clear_buffers(self);
	if (err != CL_SUCCESS)
		goto error;
}

/* Configure display kernel */
err  = 0;
err |= clSetKernelArg(cl->kern_display,  2, sizeof(cl_int),   &n_spectra);
err |= clSetKernelArg(cl->kern_display,  4, sizeof(cl_int),   &cl->waterfall_pos);
err |= clSetKernelArg(cl->kern_display,  9, sizeof(cl_float), &cl->histo_scale);
err |= clSetKernelArg(cl->kern_display, 10, sizeof(cl_float), &cl->histo_offset);
CL_ERR_CHECK(err, "Unable to configure display kernel");

/* Execute display kernel */
global[0] = FOSPHOR_FFT_LEN;
global[1] = 16;
local[0] = 16;
local[1] = 16;

err = clEnqueueNDRangeKernel(cl->cq, cl->kern_display, 2, NULL, global, local, 0, NULL, NULL);
CL_ERR_CHECK(err, "Unable to queue display kernel execution");

/* Advance waterfall */
cl->waterfall_pos = (cl->waterfall_pos + n_spectra) & 1023;

/* New state */
cl->state = CL_PENDING;

return 0;

error:
return -EIO;
}

int
fosphor_cl_finish(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;

cl_int err;

/* Check if we really need to do anything */
if (cl->state == CL_READY)
	return 0;

/* If no data was processed, we may need to finish the boot */
if (cl->state == CL_BOOTING) {
	/* Acquire GL objects if needed */
	if (self->flags & FLG_FOSPHOR_USE_CLGL_SHARING)
	{
		cl_mem objs[3];

		objs[0] = cl->mem_waterfall;
		objs[1] = cl->mem_histogram;
		objs[2] = cl->mem_spectrum;

		err = clEnqueueAcquireGLObjects(cl->cq, 3, objs, 0, NULL, NULL);
		CL_ERR_CHECK(err, "Unable to acquire GL objects");
	}

	/* Clear the buffers */
	err = cl_queue_clear_buffers(self);
	if (err != CL_SUCCESS)
		goto error;
}

/* Act depending on current mode */
if (self->flags & FLG_FOSPHOR_USE_CLGL_SHARING)
{
	/* If we use CL/GL sharing, we need to release the objects */
	cl_mem objs[3];

	objs[0] = cl->mem_waterfall;
	objs[1] = cl->mem_histogram;
	objs[2] = cl->mem_spectrum;

	err = clEnqueueReleaseGLObjects(cl->cq, 3, objs, 0, NULL, NULL);
	CL_ERR_CHECK(err, "Unable to release GL objects");
}
else
{
	/* If we don't use CL/GL sharing, we need to fetch the results */
	size_t img_origin[3] = { 0, 0, 0 };
	size_t img_region[3] = { 1024, 0, 1 };

		/* Waterfall */
	img_region[1] = 1024;

	err = clEnqueueReadImage(cl->cq,
		cl->mem_waterfall,
		CL_FALSE,
		img_origin,
		img_region,
		0,
		0,
		self->img_waterfall,
		0, NULL, NULL
	);
	CL_ERR_CHECK(err, "Unable to queue readback of waterfall image");

		/* Histogram */
	img_region[1] = 128;

	err = clEnqueueReadImage(cl->cq,
		cl->mem_histogram,
		CL_FALSE,
		img_origin,
		img_region,
		0,
		0,
		self->img_histogram,
		0, NULL, NULL
	);
	CL_ERR_CHECK(err, "Unable to queue readback of histogram image");

		/* Live spectrum */
	err = clEnqueueReadBuffer(cl->cq,
		cl->mem_spectrum,
		CL_FALSE,
		0,
		2 * 2 * sizeof(cl_float) * FOSPHOR_FFT_LEN,
		self->buf_spectrum,
		0, NULL, NULL
	);
	CL_ERR_CHECK(err, "Unable to queue readback of spectrum buffer");
}

/* Ensure CL is done */
clFinish(cl->cq);

/* New state */
cl->state = CL_READY;

return 1;

error:
return -EIO;
}

void
fosphor_cl_load_fft_window(struct fosphor *self, float *win)
{
struct fosphor_cl_state *cl = self->cl;

cl->fft_win = win;
cl->fft_win_updated = 1;

}

int
fosphor_cl_get_waterfall_position(struct fosphor *self)
{
struct fosphor_cl_state *cl = self->cl;

return cl->waterfall_pos;

}

void
fosphor_cl_set_histogram_range(struct fosphor *self,
float scale, float offset)
{
struct fosphor_cl_state *cl = self->cl;

cl->histo_scale  = scale * 128.0f;
cl->histo_offset = offset;

}

/*! @} */

adanowitz · July 20, 2017, 1:20pm

I’m running into the same issue with a GTX1050. Have you made any headway? Do you know if it worked with older versions of the driver?

adanowitz · July 20, 2017, 1:34pm

In the meantime, I’ve found a workaround here: gr-fosphor CL_INVALID_VALUE in clEnqueueFillBuffer call

Guillaume75 · July 20, 2017, 3:56pm

Hello adanowitz,

No infortunatly I keep stuck on it!
I workarounded the same way for make it work.

I didn’t test it with driver older than 375.
Newer are not working as well.

Guillaume

Topic		Replies	Views
troubles with CL/GL texture interoperability clCreateFromGLTextureXD fail but clCreateFromGLBuffer w CUDA Programming and Performance	3	1723	November 10, 2010
OpenCL Sample Code \| DD STOSIC. Simplified, straightforward, OpenCL source code. CUDA Programming and Performance	2	18665	February 17, 2011
EGL and OpenCL interop fails Linux	5	1342	April 6, 2020
OpenCL Driver BUG? clEnqueueReadImage fails to copy data from Image Object CUDA Programming and Performance	1	4288	April 6, 2011
OpenCL CL_OUT_OF_RESOURCES on event.wait() CUDA Programming and Performance	63	25084	November 19, 2010
Intel Opencl sample astonishing godrays implementation CUDA Programming and Performance	8	19344	November 29, 2010
OpenCL-GL interop context crashes on Linux with segmentation fault (driver 390.25). Linux	3	1488	October 14, 2021
performance question CUDA Programming and Performance	9	9930	August 4, 2010
Cannot create context for CPU CUDA Programming and Performance	16	31580	December 8, 2009
clGetEventProfilingInfo() zero end time cannot get kernel's end time while start is correct CUDA Programming and Performance	23	6084	November 14, 2010

CL / GL interop. clCreateFromGLBuffer return CL_OUT_OF_RESOURCES on GeForce GTX 1070 (Linux with Nvi...

include <OpenGL/OpenGL.h>

include <OpenGL/gl.h>

include <windows.h>

include <wingdi.h>

include <GL/glx.h>

Related topics