The build of the kernel now works with ATI SDK for a CPU device, I do not know why… really, I did not change anything… (just tried another kernel that worked, and came back to the first kernel). Don’t ask me…
Yes, I already saw the Khronos c++ bindings, but at fist glance i do not like it much either.
If your are interested and would like to give me feed back, you’ll find bellow my wrappers (It is just a first draft for my test purpose…) and a small test.
To switch betwwen CPU and GPU device, you have for now to uncomment the corresponding line
#include "my_opencl.h"
#include <iostream>
using namespace std;
int main()
{
typedef float value_type;
try
{
string s("__kernel void vec_add(int n, __global const float *X0, __global const float *X1, __global float *Y) { int i=get_global_id(0); Y[i]=X0[i]+X1[i]; }");
ocl_kernel kern = gpu_kernel(s,"vec_add");
int n=10;
value_type *X=new float[n], *Y=new float[n], *Z=new float[n];
for (int i=0; i<n; ++i) { X[i]=i; Y[i]=i; Z[i]=0; }
ocl_rvector<value_type> bX=gpu_rvector<value_type>(n);
ocl_rvector<value_type> bY=gpu_rvector<value_type>(n);
ocl_wvector<value_type> bZ=gpu_wvector<value_type>(n);
gpu_write(&X[0],bX);
gpu_write(&Y[0],bY);
gpu_exec(n,bX,bY,kern,bZ);
gpu_read(bZ,&Z[0]);
for (int i=0; i<n; ++i) cout << Z[i] << " ";
} catch (ocl_error &e) { cout << e.value() << " " << e.what() << endl; }
}
#ifndef GENIAL_OPENCL_H
#define GENIAL_OPENCL_H
#include <CL/cl.h>
#include <assert.h>
#include <string>
#include <vector>
#include <fstream>
using namespace std;
class ocl_error : public std::exception
{
private:
int err;
string msg;
public:
inline ocl_error() : err(CL_SUCCESS) {}
inline ocl_error(int e) : err(e) {}
inline ocl_error( const char *s) : err(CL_SUCCESS), msg(s) {}
inline ocl_error( const string &s) : err(CL_SUCCESS), msg(s) {}
inline ocl_error(int e, const char *s) : err(e) , msg(s) {}
inline ocl_error(int e, const string &s) : err(e) , msg(s) {}
inline ~ocl_error() throw() {}
inline int value() const { return err; }
inline const string &message() const { return msg; }
virtual const char *what() const throw() { return message().c_str(); }
};
inline void ocl_assert(cl_int err) { assert(err==CL_SUCCESS); }
inline void ocl_check(cl_int err) { if (err==CL_SUCCESS) return; throw ocl_error(err); }
inline void ocl_check(cl_int err,const char *msg) { if (err==CL_SUCCESS) return; throw ocl_error(err,msg); }
inline void ocl_check(bool b ) { if (b) return; throw ocl_error(); }
inline void ocl_check(bool b,const char *msg) { if (b) return; throw ocl_error(msg); }
template<class V>
class ocl_buffer
{
public:
typedef ocl_buffer self;
typedef V value_type;
typedef cl_mem id_type;
typedef cl_int int_type;
private:
int_type sz;
id_type mem;
public:
ocl_buffer(cl_context context, int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : sz(n) { cl_int err; mem=clCreateBuffer(context,flags,nelms()*sizeof(value_type),NULL,&err); ocl_check(err); }
~ocl_buffer() { clReleaseMemObject(mem); }
id_type &id() { return mem; }
const id_type &id() const { return mem; }
int_type nelms() const { return sz; }
cl_uint dim() const { return 1; }
};
template<class V>
class ocl_vector : public ocl_buffer<V>
{
public:
typedef ocl_vector self;
typedef ocl_buffer<V> base;
typedef typename base::value_type value_type;
typedef typename base::int_type int_type;
public:
ocl_vector(cl_context context, int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : base(context,n,flags) { }
};
template<class V> struct ocl_rvector : public ocl_vector<V> { typedef ocl_rvector self; typedef ocl_vector<V> base; ocl_rvector (cl_context context, int n) : base(context,n,CL_MEM_READ_ONLY ) {} };
template<class V> struct ocl_wvector : public ocl_vector<V> { typedef ocl_wvector self; typedef ocl_vector<V> base; ocl_wvector (cl_context context, int n) : base(context,n,CL_MEM_WRITE_ONLY) {} };
template<class V> struct ocl_rwvector : public ocl_vector<V> { typedef ocl_rwvector self; typedef ocl_vector<V> base; ocl_rwvector(cl_context context, int n) : base(context,n,CL_MEM_READ_WRITE) {} };
template<class V>
class ocl_matrix : public ocl_buffer<V>
{
public:
typedef ocl_matrix self;
typedef ocl_buffer<V> base;
typedef typename base::value_type value_type;
typedef typename base::int_type int_type;
private:
int_type s0,s1;
public:
ocl_matrix(cl_context context, int m,int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : base(context,m*n,flags), s0(m), s1(n) { }
int_type nrows() const { return s0; }
int_type ncols() const { return s1; }
int_type dim() const { return 2; }
};
template<class V> struct ocl_rmatrix : public ocl_matrix<V> { typedef ocl_rmatrix self; typedef ocl_matrix<V> base; ocl_rmatrix (cl_context context, int m,int n) : base(context,m,n,CL_MEM_READ_ONLY ) {} };
template<class V> struct ocl_wmatrix : public ocl_matrix<V> { typedef ocl_wmatrix self; typedef ocl_matrix<V> base; ocl_wmatrix (cl_context context, int m,int n) : base(context,m,n,CL_MEM_WRITE_ONLY) {} };
template<class V> struct ocl_rwmatrix : public ocl_matrix<V> { typedef ocl_rwmatrix self; typedef ocl_matrix<V> base; ocl_rwmatrix(cl_context context, int m,int n) : base(context,m,n,CL_MEM_READ_WRITE) {} };
class ocl_kernel
{
public:
typedef ocl_kernel self;
typedef cl_uint dim_type;
typedef size_t size_type;
typedef size_type gws_type[3];
private:
cl_kernel kern;
dim_type d;
gws_type gws;
public:
ocl_kernel(cl_program prog, const char *name) : d(0) { init(prog,name); }
ocl_kernel(cl_program prog, const string &name) : d(0) { init(prog,name.c_str()); }
~ocl_kernel() { clReleaseKernel(id()); }
cl_kernel id() const { return kern; }
dim_type &dim() { return d; }
const dim_type &dim() const { return d; }
gws_type &global_work_size() { return gws; }
const gws_type &global_work_size() const { return gws; }
self &set_arg(int i, int x) { ocl_check(clSetKernelArg(id(),i,sizeof(int),(void*)&x)); return *this; }
template<class V> self &set_arg(int i, const ocl_vector<V> &x) { return set_buffer(i,x); }
template<class V> self &set_arg(int i, const ocl_matrix<V> &x) { dim()=__max(dim(),x.dim()); gws[0]=__max(gws[0],x.nrows());gws[1]=__max(gws[1],x.ncols()); ocl_check(clSetKernelArg(id(),i,sizeof(typename ocl_matrix<V>::id_type),(void*)&(x.id()))); return *this; }
private:
void init(cl_program prog, const char *name) { gws[0]=0; gws[1]=0; gws[2]=0; cl_int err; kern = clCreateKernel(prog, name, &err); ocl_check(err); }
template<class Buf> self &set_buffer(int i, const Buf &x) { dim()=__max(dim(),x.dim()); gws[0]=__max(gws[0],x.nelms()); ocl_check(clSetKernelArg(id(),i,sizeof(typename Buf::id_type),(void*)&(x.id()))); return *this; }
};
class ocl_devices : public vector<cl_device_id>
{
public:
typedef ocl_devices self;
public:
ocl_devices(cl_context context)
{
size_t sz;
ocl_check(clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &sz));
resize(sz/sizeof(cl_device_id));
ocl_check(clGetContextInfo(context, CL_CONTEXT_DEVICES, sz, &(*this)[0], NULL));
}
};
class ocl_program
{
public:
typedef ocl_program self;
private:
cl_context cont;
cl_program prog;
public:
ocl_program(cl_context context) : cont(context), prog(0) { }
ocl_program(cl_context context, const char *s) : cont(context) { init(s); }
ocl_program(cl_context context, const string &s) : cont(context) { init(s.c_str()); }
ocl_program(const char *fname, cl_context context) : cont(context) { load(fname); }
ocl_program(const string &fname, cl_context context) : cont(context) { load(fname); }
~ocl_program() { clReleaseProgram(id()); }
cl_context context_id() const { return cont; }
cl_program id () const { return prog; }
self &load(const string &fname) { return load(fname.c_str()); }
self &load(const char *fname)
{
ifstream is(fname);
ocl_check(is.is_open(),"File not found");
string src=string(istreambuf_iterator<char>(is),istreambuf_iterator<char>());
return init(&src[0]);
}
ocl_kernel kernel(const char *name) { return ocl_kernel(id(),name); }
ocl_kernel kernel(const string &name) { return ocl_kernel(id(),name); }
private:
self &init(const char *s) { cl_int err; prog = clCreateProgramWithSource(context_id(), 1, &s, NULL, &err); ocl_check(err); build(); return *this; }
self &build()
{
cl_int err=clBuildProgram(id(), 0, NULL, NULL, NULL, NULL);
//ocl_check(err);
if (err != CL_SUCCESS)
{
size_t n;
cl_device_id dev= ocl_devices(context_id())[0];
ocl_check(clGetProgramBuildInfo(id(), dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &n));
string s(n,'
#ifndef GENIAL_OPENCL_H
#define GENIAL_OPENCL_H
#include <CL/cl.h>
#include <assert.h>
#include
#include
#include
using namespace std;
class ocl_error : public std::exception
{
private:
int err;
string msg;
public:
inline ocl_error() : err(CL_SUCCESS) {}
inline ocl_error(int e) : err(e) {}
inline ocl_error( const char *s) : err(CL_SUCCESS), msg(s) {}
inline ocl_error( const string &s) : err(CL_SUCCESS), msg(s) {}
inline ocl_error(int e, const char *s) : err(e) , msg(s) {}
inline ocl_error(int e, const string &s) : err(e) , msg(s) {}
inline ~ocl_error() throw() {}
inline int value() const { return err; }
inline const string &message() const { return msg; }
virtual const char *what() const throw() { return message().c_str(); }
};
inline void ocl_assert(cl_int err) { assert(err==CL_SUCCESS); }
inline void ocl_check(cl_int err) { if (err==CL_SUCCESS) return; throw ocl_error(err); }
inline void ocl_check(cl_int err,const char *msg) { if (err==CL_SUCCESS) return; throw ocl_error(err,msg); }
inline void ocl_check(bool b ) { if (b) return; throw ocl_error(); }
inline void ocl_check(bool b,const char *msg) { if (b) return; throw ocl_error(msg); }
template
class ocl_buffer
{
public:
typedef ocl_buffer self;
typedef V value_type;
typedef cl_mem id_type;
typedef cl_int int_type;
private:
int_type sz;
id_type mem;
public:
ocl_buffer(cl_context context, int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : sz(n) { cl_int err; mem=clCreateBuffer(context,flags,nelms()*sizeof(value_type),NULL,&err); ocl_check(err); }
~ocl_buffer() { clReleaseMemObject(mem); }
id_type &id() { return mem; }
const id_type &id() const { return mem; }
int_type nelms() const { return sz; }
cl_uint dim() const { return 1; }
};
template
class ocl_vector : public ocl_buffer
{
public:
typedef ocl_vector self;
typedef ocl_buffer<V> base;
typedef typename base::value_type value_type;
typedef typename base::int_type int_type;
public:
ocl_vector(cl_context context, int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : base(context,n,flags) { }
};
template struct ocl_rvector : public ocl_vector { typedef ocl_rvector self; typedef ocl_vector base; ocl_rvector (cl_context context, int n) : base(context,n,CL_MEM_READ_ONLY ) {} };
template struct ocl_wvector : public ocl_vector { typedef ocl_wvector self; typedef ocl_vector base; ocl_wvector (cl_context context, int n) : base(context,n,CL_MEM_WRITE_ONLY) {} };
template struct ocl_rwvector : public ocl_vector { typedef ocl_rwvector self; typedef ocl_vector base; ocl_rwvector(cl_context context, int n) : base(context,n,CL_MEM_READ_WRITE) {} };
template
class ocl_matrix : public ocl_buffer
{
public:
typedef ocl_matrix self;
typedef ocl_buffer<V> base;
typedef typename base::value_type value_type;
typedef typename base::int_type int_type;
private:
int_type s0,s1;
public:
ocl_matrix(cl_context context, int m,int n, cl_mem_flags flags=CL_MEM_READ_WRITE) : base(context,m*n,flags), s0(m), s1(n) { }
int_type nrows() const { return s0; }
int_type ncols() const { return s1; }
int_type dim() const { return 2; }
};
template struct ocl_rmatrix : public ocl_matrix { typedef ocl_rmatrix self; typedef ocl_matrix base; ocl_rmatrix (cl_context context, int m,int n) : base(context,m,n,CL_MEM_READ_ONLY ) {} };
template struct ocl_wmatrix : public ocl_matrix { typedef ocl_wmatrix self; typedef ocl_matrix base; ocl_wmatrix (cl_context context, int m,int n) : base(context,m,n,CL_MEM_WRITE_ONLY) {} };
template struct ocl_rwmatrix : public ocl_matrix { typedef ocl_rwmatrix self; typedef ocl_matrix base; ocl_rwmatrix(cl_context context, int m,int n) : base(context,m,n,CL_MEM_READ_WRITE) {} };
class ocl_kernel
{
public:
typedef ocl_kernel self;
typedef cl_uint dim_type;
typedef size_t size_type;
typedef size_type gws_type[3];
private:
cl_kernel kern;
dim_type d;
gws_type gws;
public:
ocl_kernel(cl_program prog, const char *name) : d(0) { init(prog,name); }
ocl_kernel(cl_program prog, const string &name) : d(0) { init(prog,name.c_str()); }
~ocl_kernel() { clReleaseKernel(id()); }
cl_kernel id() const { return kern; }
dim_type &dim() { return d; }
const dim_type &dim() const { return d; }
gws_type &global_work_size() { return gws; }
const gws_type &global_work_size() const { return gws; }
self &set_arg(int i, int x) { ocl_check(clSetKernelArg(id(),i,sizeof(int),(void*)&x)); return *this; }
template<class V> self &set_arg(int i, const ocl_vector<V> &x) { return set_buffer(i,x); }
template<class V> self &set_arg(int i, const ocl_matrix<V> &x) { dim()=__max(dim(),x.dim()); gws[0]=__max(gws[0],x.nrows());gws[1]=__max(gws[1],x.ncols()); ocl_check(clSetKernelArg(id(),i,sizeof(typename ocl_matrix<V>::id_type),(void*)&(x.id()))); return *this; }
private:
void init(cl_program prog, const char *name) { gws[0]=0; gws[1]=0; gws[2]=0; cl_int err; kern = clCreateKernel(prog, name, &err); ocl_check(err); }
template<class Buf> self &set_buffer(int i, const Buf &x) { dim()=__max(dim(),x.dim()); gws[0]=__max(gws[0],x.nelms()); ocl_check(clSetKernelArg(id(),i,sizeof(typename Buf::id_type),(void*)&(x.id()))); return *this; }
};
class ocl_devices : public vector<cl_device_id>
{
public:
typedef ocl_devices self;
public:
ocl_devices(cl_context context)
{
size_t sz;
ocl_check(clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &sz));
resize(sz/sizeof(cl_device_id));
ocl_check(clGetContextInfo(context, CL_CONTEXT_DEVICES, sz, &(*this)[0], NULL));
}
};
class ocl_program
{
public:
typedef ocl_program self;
private:
cl_context cont;
cl_program prog;
public:
ocl_program(cl_context context) : cont(context), prog(0) { }
ocl_program(cl_context context, const char *s) : cont(context) { init(s); }
ocl_program(cl_context context, const string &s) : cont(context) { init(s.c_str()); }
ocl_program(const char *fname, cl_context context) : cont(context) { load(fname); }
ocl_program(const string &fname, cl_context context) : cont(context) { load(fname); }
~ocl_program() { clReleaseProgram(id()); }
cl_context context_id() const { return cont; }
cl_program id () const { return prog; }
self &load(const string &fname) { return load(fname.c_str()); }
self &load(const char *fname)
{
ifstream is(fname);
ocl_check(is.is_open(),"File not found");
string src=string(istreambuf_iterator<char>(is),istreambuf_iterator<char>());
return init(&src[0]);
}
ocl_kernel kernel(const char *name) { return ocl_kernel(id(),name); }
ocl_kernel kernel(const string &name) { return ocl_kernel(id(),name); }
private:
self &init(const char *s) { cl_int err; prog = clCreateProgramWithSource(context_id(), 1, &s, NULL, &err); ocl_check(err); build(); return *this; }
self &build()
{
cl_int err=clBuildProgram(id(), 0, NULL, NULL, NULL, NULL);
//ocl_check(err);
if (err != CL_SUCCESS)
{
size_t n;
cl_device_id dev= ocl_devices(context_id())[0];
ocl_check(clGetProgramBuildInfo(id(), dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &n));
string s(n,'\0');
ocl_check(clGetProgramBuildInfo(id(), dev, CL_PROGRAM_BUILD_LOG, n, &s[0], NULL));
throw ocl_error(err,s);
}
return *this;
}
};
class ocl_file
{
private:
string fname;
public:
ocl_file() : fname() {}
ocl_file(const char *s) : fname(s) {}
ocl_file(const string &s) : fname(s) {}
string &name() { return fname; }
const string &name() const { return fname; }
};
void operator>>(const ocl_file &file, ocl_program &prog) { prog.load(file.name()); }
class ocl_command_queue
{
public:
typedef ocl_command_queue self;
private:
cl_command_queue cmd_queue;
public:
ocl_command_queue(cl_context context, cl_device_id device) { cl_int err; cmd_queue = clCreateCommandQueue(context, device, 0, &err); ocl_check(err); }
~ocl_command_queue() { clReleaseCommandQueue(cmd_queue); }
cl_command_queue id() const { return cmd_queue; }
template<class V> void write(const V *p, ocl_buffer<V> &X) { ocl_check(clEnqueueWriteBuffer(id(),X.id(),CL_FALSE,0,sizeof(V)*X.nelms(), p, 0, NULL, NULL)); }
template<class V> void read (const ocl_buffer<V> &X, V *p) { ocl_check(clEnqueueReadBuffer (id(),X.id(),CL_TRUE ,0,sizeof(V)*X.nelms(), p, 0, NULL, NULL)); }
void operator()(const ocl_kernel &kern) { ocl_check(clEnqueueNDRangeKernel(id(),kern.id(), kern.dim(), NULL, &kern.global_work_size()[0], NULL, 0, NULL, NULL)); }
template< class Y0> void operator()( ocl_kernel &kern, Y0 &y0) { eval(0,kern,y0); }
template<class X0 ,class Y0> void operator()(const X0 &x0 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0 ); eval(1,kern,y0); }
template<class X0,class X1 ,class Y0> void operator()(const X0 &x0,const X1 &x1 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1 ); eval(2,kern,y0); }
template<class X0,class X1,class X2 ,class Y0> void operator()(const X0 &x0,const X1 &x1,const X2 &x2 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1,x2 ); eval(3,kern,y0); }
template<class X0,class X1,class X2,class X3,class Y0> void operator()(const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3, ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1,x2,x3); eval(4,kern,y0); }
private:
template<class V> void set_input_arg (int i,ocl_kernel &kern, const V &x) { kern.set_arg(i,x); }
template<class V> void set_output_arg(int i,ocl_kernel &kern, const V &y) { kern.set_arg(i,y); }
template<class V> void get_output_arg(int i,ocl_kernel &kern, const V &y) { }
template<class X0 > void set_input (int i,ocl_kernel &kern, const X0 &x0 ) { set_input_arg (i,kern,x0); }
template<class X0,class X1 > void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1 ) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); }
template<class X0,class X1,class X2 > void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1,const X2 &x2 ) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); set_input_arg(i+2,kern,x2); }
template<class X0,class X1,class X2,class X3> void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); set_input_arg(i+2,kern,x2); set_input_arg(i+3,kern,x3); }
template<class Y0 > void set_output(int i,ocl_kernel &kern, const Y0 &y0 ) { set_output_arg(i,kern,y0); }
template<class V0> void get_output(int i, ocl_kernel &kern, V0 &y0) { }
template<class Y0> void eval(int i, ocl_kernel &kern, Y0 &y0) { set_output(i,kern,y0); (*this)(kern); get_output(i,kern,y0); }
};
class ocl_context
{
public:
typedef ocl_context self;
private:
cl_context context;
public:
ocl_context() : context(0) {}
ocl_context(cl_device_type dev)
{
cl_int err;
context = clCreateContextFromType(0, dev, NULL, NULL, &err);
ocl_check(err);
}
ocl_context(self &r) : context(r.context) { r.context=0; }
~ocl_context() { destroy(); }
self &operator=(self &r) { destroy(); context=r.id(); r.context=0; return *this; }
cl_context id () const { return context; }
ocl_devices devices () const { return ocl_devices(id()); }
ocl_command_queue command_queue() const { return ocl_command_queue(id(),devices()[0]); }
template<class V> ocl_rvector <V> rvector (int n) const { return ocl_rvector <V>(id(),n); }
template<class V> ocl_wvector <V> wvector (int n) const { return ocl_wvector <V>(id(),n); }
template<class V> ocl_rwvector<V> rwvector(int n) const { return ocl_rwvector<V>(id(),n); }
template<class V> ocl_rmatrix <V> rmatrix (int m,int n) const { return ocl_rmatrix <V>(id(),m,n); }
template<class V> ocl_wmatrix <V> wmatrix (int m,int n) const { return ocl_wmatrix <V>(id(),m,n); }
template<class V> ocl_rwmatrix<V> rwmatrix(int m,int n) const { return ocl_rwmatrix<V>(id(),m,n); }
ocl_program program(const char *s) const { return ocl_program(id(),s); }
ocl_program program(const string &s) const { return ocl_program(id(),s); }
ocl_program load_program(const char *fname) const { ocl_file is(fname); ocl_program prog(id()); is>>prog; return prog; }
ocl_program load_program(const string &fname) const { ocl_file is(fname); ocl_program prog(id()); is>>prog; return prog; }
ocl_kernel kernel(const char *src, const char *kname) const { return program(src).kernel(kname); }
ocl_kernel kernel(const string &src, const string &kname) const { return program(src).kernel(kname); }
ocl_kernel load_kernel(const char *fname, const char *kname) const { return load_program(fname).kernel(kname); }
ocl_kernel load_kernel(const string &fname, const string &kname) const { return load_program(fname).kernel(kname); }
private:
void destroy() { if (id()!=0) clReleaseContext(id()); }
};
struct ocl_gpu : public ocl_context { typedef ocl_gpu self; typedef ocl_context base; ocl_gpu() : base(CL_DEVICE_TYPE_GPU) {} };
struct ocl_cpu : public ocl_context { typedef ocl_cpu self; typedef ocl_context base; ocl_cpu() : base(CL_DEVICE_TYPE_CPU) {} };
static ocl_context ocl_current_context = ocl_cpu();
//static ocl_context ocl_current_context = ocl_gpu();
static ocl_command_queue ocl_current_command_queue = ocl_current_context.command_queue();
inline void gpu_set(ocl_context &context) { ocl_current_context=context; ocl_current_command_queue=ocl_current_context.command_queue(); }
template ocl_rvector gpu_rvector (int n) { return ocl_current_context.rvector (n); }
template ocl_wvector gpu_wvector (int n) { return ocl_current_context.wvector (n); }
template ocl_rwvector gpu_rwvector(int n) { return ocl_current_context.rwvector(n); }
template ocl_rmatrix gpu_rmatrix (int m,int n) { return ocl_current_context.rmatrix (m,n); }
template ocl_wmatrix gpu_wmatrix (int m,int n) { return ocl_current_context.wmatrix (m,n); }
template ocl_rwmatrix gpu_rwmatrix(int m,int n) { return ocl_current_context.rwmatrix(m,n); }
inline ocl_program gpu_program(const char *src) { return ocl_current_context.program(src); }
inline ocl_program gpu_program(const string &src) { return ocl_current_context.program(src); }
inline ocl_program gpu_load_program(const char *fname) { return ocl_current_context.load_program(fname); }
inline ocl_program gpu_load_program(const string &fname) { return ocl_current_context.load_program(fname); }
inline ocl_kernel gpu_kernel(const char *src, const char *kname) { return ocl_current_context.kernel(src,kname); }
inline ocl_kernel gpu_kernel(const string &src, const string &kname) { return ocl_current_context.kernel(src,kname); }
inline ocl_kernel gpu_load_kernel(const char *fname, const char *kname) { return ocl_current_context.load_kernel(fname,kname); }
inline ocl_kernel gpu_load_kernel(const string &fname, const string &kname) { return ocl_current_context.load_kernel(fname,kname); }
template void gpu_write(const V *p, ocl_buffer &X) { ocl_current_command_queue.write(p,X); }
template void gpu_read (const ocl_buffer &X, V *p) { ocl_current_command_queue.read (X,p); }
template< class Y0> void gpu_exec( ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue( kern,y0); }
template<class X0 ,class Y0> void gpu_exec(const X0 &x0 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0 ,kern,y0); }
template<class X0,class X1 ,class Y0> void gpu_exec(const X0 &x0,const X1 &x1 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1 ,kern,y0); }
template<class X0,class X1,class X2 ,class Y0> void gpu_exec(const X0 &x0,const X1 &x1,const X2 &x2 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1,x2 ,kern,y0); }
template<class X0,class X1,class X2,class X3,class Y0> void gpu_exec(const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3, ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1,x2,x3,kern,y0); }
#endif
');
ocl_check(clGetProgramBuildInfo(id(), dev, CL_PROGRAM_BUILD_LOG, n, &s[0], NULL));
throw ocl_error(err,s);
}
return *this;
}
};
class ocl_file
{
private:
string fname;
public:
ocl_file() : fname() {}
ocl_file(const char *s) : fname(s) {}
ocl_file(const string &s) : fname(s) {}
string &name() { return fname; }
const string &name() const { return fname; }
};
void operator>>(const ocl_file &file, ocl_program &prog) { prog.load(file.name()); }
class ocl_command_queue
{
public:
typedef ocl_command_queue self;
private:
cl_command_queue cmd_queue;
public:
ocl_command_queue(cl_context context, cl_device_id device) { cl_int err; cmd_queue = clCreateCommandQueue(context, device, 0, &err); ocl_check(err); }
~ocl_command_queue() { clReleaseCommandQueue(cmd_queue); }
cl_command_queue id() const { return cmd_queue; }
template<class V> void write(const V *p, ocl_buffer<V> &X) { ocl_check(clEnqueueWriteBuffer(id(),X.id(),CL_FALSE,0,sizeof(V)*X.nelms(), p, 0, NULL, NULL)); }
template<class V> void read (const ocl_buffer<V> &X, V *p) { ocl_check(clEnqueueReadBuffer (id(),X.id(),CL_TRUE ,0,sizeof(V)*X.nelms(), p, 0, NULL, NULL)); }
void operator()(const ocl_kernel &kern) { ocl_check(clEnqueueNDRangeKernel(id(),kern.id(), kern.dim(), NULL, &kern.global_work_size()[0], NULL, 0, NULL, NULL)); }
template< class Y0> void operator()( ocl_kernel &kern, Y0 &y0) { eval(0,kern,y0); }
template<class X0 ,class Y0> void operator()(const X0 &x0 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0 ); eval(1,kern,y0); }
template<class X0,class X1 ,class Y0> void operator()(const X0 &x0,const X1 &x1 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1 ); eval(2,kern,y0); }
template<class X0,class X1,class X2 ,class Y0> void operator()(const X0 &x0,const X1 &x1,const X2 &x2 , ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1,x2 ); eval(3,kern,y0); }
template<class X0,class X1,class X2,class X3,class Y0> void operator()(const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3, ocl_kernel &kern, Y0 &y0) { set_input(0,kern,x0,x1,x2,x3); eval(4,kern,y0); }
private:
template<class V> void set_input_arg (int i,ocl_kernel &kern, const V &x) { kern.set_arg(i,x); }
template<class V> void set_output_arg(int i,ocl_kernel &kern, const V &y) { kern.set_arg(i,y); }
template<class V> void get_output_arg(int i,ocl_kernel &kern, const V &y) { }
template<class X0 > void set_input (int i,ocl_kernel &kern, const X0 &x0 ) { set_input_arg (i,kern,x0); }
template<class X0,class X1 > void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1 ) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); }
template<class X0,class X1,class X2 > void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1,const X2 &x2 ) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); set_input_arg(i+2,kern,x2); }
template<class X0,class X1,class X2,class X3> void set_input (int i,ocl_kernel &kern, const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3) { set_input_arg (i,kern,x0); set_input_arg(i+1,kern,x1); set_input_arg(i+2,kern,x2); set_input_arg(i+3,kern,x3); }
template<class Y0 > void set_output(int i,ocl_kernel &kern, const Y0 &y0 ) { set_output_arg(i,kern,y0); }
template<class V0> void get_output(int i, ocl_kernel &kern, V0 &y0) { }
template<class Y0> void eval(int i, ocl_kernel &kern, Y0 &y0) { set_output(i,kern,y0); (*this)(kern); get_output(i,kern,y0); }
};
class ocl_context
{
public:
typedef ocl_context self;
private:
cl_context context;
public:
ocl_context() : context(0) {}
ocl_context(cl_device_type dev)
{
cl_int err;
context = clCreateContextFromType(0, dev, NULL, NULL, &err);
ocl_check(err);
}
ocl_context(self &r) : context(r.context) { r.context=0; }
~ocl_context() { destroy(); }
self &operator=(self &r) { destroy(); context=r.id(); r.context=0; return *this; }
cl_context id () const { return context; }
ocl_devices devices () const { return ocl_devices(id()); }
ocl_command_queue command_queue() const { return ocl_command_queue(id(),devices()[0]); }
template<class V> ocl_rvector <V> rvector (int n) const { return ocl_rvector <V>(id(),n); }
template<class V> ocl_wvector <V> wvector (int n) const { return ocl_wvector <V>(id(),n); }
template<class V> ocl_rwvector<V> rwvector(int n) const { return ocl_rwvector<V>(id(),n); }
template<class V> ocl_rmatrix <V> rmatrix (int m,int n) const { return ocl_rmatrix <V>(id(),m,n); }
template<class V> ocl_wmatrix <V> wmatrix (int m,int n) const { return ocl_wmatrix <V>(id(),m,n); }
template<class V> ocl_rwmatrix<V> rwmatrix(int m,int n) const { return ocl_rwmatrix<V>(id(),m,n); }
ocl_program program(const char *s) const { return ocl_program(id(),s); }
ocl_program program(const string &s) const { return ocl_program(id(),s); }
ocl_program load_program(const char *fname) const { ocl_file is(fname); ocl_program prog(id()); is>>prog; return prog; }
ocl_program load_program(const string &fname) const { ocl_file is(fname); ocl_program prog(id()); is>>prog; return prog; }
ocl_kernel kernel(const char *src, const char *kname) const { return program(src).kernel(kname); }
ocl_kernel kernel(const string &src, const string &kname) const { return program(src).kernel(kname); }
ocl_kernel load_kernel(const char *fname, const char *kname) const { return load_program(fname).kernel(kname); }
ocl_kernel load_kernel(const string &fname, const string &kname) const { return load_program(fname).kernel(kname); }
private:
void destroy() { if (id()!=0) clReleaseContext(id()); }
};
struct ocl_gpu : public ocl_context { typedef ocl_gpu self; typedef ocl_context base; ocl_gpu() : base(CL_DEVICE_TYPE_GPU) {} };
struct ocl_cpu : public ocl_context { typedef ocl_cpu self; typedef ocl_context base; ocl_cpu() : base(CL_DEVICE_TYPE_CPU) {} };
static ocl_context ocl_current_context = ocl_cpu();
//static ocl_context ocl_current_context = ocl_gpu();
static ocl_command_queue ocl_current_command_queue = ocl_current_context.command_queue();
inline void gpu_set(ocl_context &context) { ocl_current_context=context; ocl_current_command_queue=ocl_current_context.command_queue(); }
template<class V> ocl_rvector <V> gpu_rvector (int n) { return ocl_current_context.rvector <V>(n); }
template<class V> ocl_wvector <V> gpu_wvector (int n) { return ocl_current_context.wvector <V>(n); }
template<class V> ocl_rwvector<V> gpu_rwvector(int n) { return ocl_current_context.rwvector<V>(n); }
template<class V> ocl_rmatrix <V> gpu_rmatrix (int m,int n) { return ocl_current_context.rmatrix <V>(m,n); }
template<class V> ocl_wmatrix <V> gpu_wmatrix (int m,int n) { return ocl_current_context.wmatrix <V>(m,n); }
template<class V> ocl_rwmatrix<V> gpu_rwmatrix(int m,int n) { return ocl_current_context.rwmatrix<V>(m,n); }
inline ocl_program gpu_program(const char *src) { return ocl_current_context.program(src); }
inline ocl_program gpu_program(const string &src) { return ocl_current_context.program(src); }
inline ocl_program gpu_load_program(const char *fname) { return ocl_current_context.load_program(fname); }
inline ocl_program gpu_load_program(const string &fname) { return ocl_current_context.load_program(fname); }
inline ocl_kernel gpu_kernel(const char *src, const char *kname) { return ocl_current_context.kernel(src,kname); }
inline ocl_kernel gpu_kernel(const string &src, const string &kname) { return ocl_current_context.kernel(src,kname); }
inline ocl_kernel gpu_load_kernel(const char *fname, const char *kname) { return ocl_current_context.load_kernel(fname,kname); }
inline ocl_kernel gpu_load_kernel(const string &fname, const string &kname) { return ocl_current_context.load_kernel(fname,kname); }
template<class V> void gpu_write(const V *p, ocl_buffer<V> &X) { ocl_current_command_queue.write(p,X); }
template<class V> void gpu_read (const ocl_buffer<V> &X, V *p) { ocl_current_command_queue.read (X,p); }
template< class Y0> void gpu_exec( ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue( kern,y0); }
template<class X0 ,class Y0> void gpu_exec(const X0 &x0 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0 ,kern,y0); }
template<class X0,class X1 ,class Y0> void gpu_exec(const X0 &x0,const X1 &x1 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1 ,kern,y0); }
template<class X0,class X1,class X2 ,class Y0> void gpu_exec(const X0 &x0,const X1 &x1,const X2 &x2 , ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1,x2 ,kern,y0); }
template<class X0,class X1,class X2,class X3,class Y0> void gpu_exec(const X0 &x0,const X1 &x1,const X2 &x2,const X3 &x3, ocl_kernel &kern, Y0 &y0) { return ocl_current_command_queue(x0,x1,x2,x3,kern,y0); }
#endif