I’m trying to do some work with image memory, except everything I do returns the image texture as zeros. I’ve been working on this for far too many hours. Help! Why can’t I get the read_imagef() functions to return anything useful?
Here is my complete code worked into a simple example:
#include <stdio.h>
#include <assert.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <OpenCL/OpenCL.h>
#pragma mark -
#pragma mark Utilities
char * load_program_source(const char *filename)
{
struct stat statbuf;
FILE *fh;
char *source;
fh = fopen(filename, "r");
if (fh == 0)
return 0;
stat(filename, &statbuf);
source = (char *) malloc(statbuf.st_size + 1);
fread(source, statbuf.st_size, 1, fh);
source[statbuf.st_size] = '
#include <stdio.h>
#include <assert.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <OpenCL/OpenCL.h>
#pragma mark -
#pragma mark Utilities
char * load_program_source(const char *filename)
{
struct stat statbuf;
FILE *fh;
char *source;
fh = fopen(filename, "r");
if (fh == 0)
return 0;
stat(filename, &statbuf);
source = (char *) malloc(statbuf.st_size + 1);
fread(source, statbuf.st_size, 1, fh);
source[statbuf.st_size] = '\0';
return source;
}
#pragma mark -
#pragma mark Main OpenCL Routine
int runCL(char * a, char * b, float * results, int n)
{
cl_program program[1];
cl_kernel kernel[2];
cl_command_queue cmd_queue;
cl_context context;
cl_device_id cpu = NULL, device = NULL;
cl_int err = 0;
size_t returned_size = 0;
cl_mem a_mem, b_mem, ans_mem;
#pragma mark Device Information
{
// List all devices
cl_uint num_dev;
cl_device_id dev_list[5];
// Find the CPU CL device
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
assert(err == CL_SUCCESS);
assert(device);
// Get some information about the returned device
cl_char vendor_name[1024] = {0};
cl_char device_name[1024] = {0};
err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),
vendor_name, &returned_size);
err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
device_name, &returned_size);
assert(err == CL_SUCCESS);
printf("Connecting to %s %s...\n", vendor_name, device_name);
}
#pragma mark Context and Command Queue
{
// Now create a context to perform our calculation with the
// specified device
context = clCreateContext(0, 1, &device, NULL, NULL, &err);
assert(err == CL_SUCCESS);
// And also a command queue for the context
cmd_queue = clCreateCommandQueue(context, device, 0, NULL);
}
#pragma mark Program and Kernel Creation
{
// Load the program source from disk
// The kernel/program is the project directory and in Xcode the executable
// is set to launch from that directory hence we use a relative path
const char * filename = "example.cl";
char *program_source = load_program_source(filename);
program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
NULL, &err);
assert(err == CL_SUCCESS);
err = clBuildProgram(program[0], 1, &device, "-cl-fast-relaxed-math", NULL, NULL);
assert(err == CL_SUCCESS);
// Now create the kernel "objects" that we want to use in the example file
kernel[0] = clCreateKernel(program[0], "add", &err);
}
#pragma mark Memory Allocation
{
cl_image_format imgFmt;
imgFmt.image_channel_order = CL_A;
imgFmt.image_channel_data_type = CL_UNORM_INT8;
// Allocate memory on the device to hold our data and store the results into
a_mem = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imgFmt,
10, 10, sizeof(char) * 10, a, &err);
assert(err == CL_SUCCESS);
b_mem = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imgFmt,
10, 10, sizeof(char) * 10, b, &err);
assert(err == CL_SUCCESS);
// Results array
ans_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*100, NULL, NULL);
// Get all of the stuff written and allocated
clFinish(cmd_queue);
}
#pragma mark Kernel Arguments
{
// Now setup the arguments to our kernel
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &a_mem);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &b_mem);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &ans_mem);
assert(err == CL_SUCCESS);
}
#pragma mark Execution and Read
{
// Run the calculation by enqueuing it and forcing the
// command queue to complete the task
size_t global_work_size = n;
err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,
&global_work_size, NULL, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
// Once finished read back the results from the answer
// array into the results array
err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, sizeof(float)*100,
results, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
}
#pragma mark Teardown
{
clReleaseMemObject(a_mem);
clReleaseMemObject(b_mem);
clReleaseMemObject(ans_mem);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
}
return CL_SUCCESS;
}
int main (int argc, const char * argv) {
// Problem size
int n = 100;
int i;
// Allocate some memory and a place for the results
char * a = (char *)malloc(n*sizeof(char));
char * b = (char *)malloc(n*sizeof(char));
float * results = (float *)malloc(n*sizeof(float));
// Fill in the values
for(i=0;i<n;i++){
a[i] = (char)i;
b[i] = (char)n-i;
results[i] = 0.f;
}
// Do the OpenCL calculation
runCL(a, b, results, n);
// Print out some results. For this example the values of all elements
// should be the same as the value of n
for(i=0;i<n && i<32;i++) printf("%f\n",results[i]);
// Free up memory
free(a);
free(b);
free(results);
return 0;
}
';
return source;
}
#pragma mark -
#pragma mark Main OpenCL Routine
int runCL(char * a, char * b, float * results, int n)
{
cl_program program[1];
cl_kernel kernel[2];
cl_command_queue cmd_queue;
cl_context context;
cl_device_id cpu = NULL, device = NULL;
cl_int err = 0;
size_t returned_size = 0;
cl_mem a_mem, b_mem, ans_mem;
#pragma mark Device Information
{
// List all devices
cl_uint num_dev;
cl_device_id dev_list[5];
// Find the CPU CL device
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
assert(err == CL_SUCCESS);
assert(device);
// Get some information about the returned device
cl_char vendor_name[1024] = {0};
cl_char device_name[1024] = {0};
err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name),
vendor_name, &returned_size);
err |= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
device_name, &returned_size);
assert(err == CL_SUCCESS);
printf("Connecting to %s %s...\n", vendor_name, device_name);
}
#pragma mark Context and Command Queue
{
// Now create a context to perform our calculation with the
// specified device
context = clCreateContext(0, 1, &device, NULL, NULL, &err);
assert(err == CL_SUCCESS);
// And also a command queue for the context
cmd_queue = clCreateCommandQueue(context, device, 0, NULL);
}
#pragma mark Program and Kernel Creation
{
// Load the program source from disk
// The kernel/program is the project directory and in Xcode the executable
// is set to launch from that directory hence we use a relative path
const char * filename = "example.cl";
char *program_source = load_program_source(filename);
program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
NULL, &err);
assert(err == CL_SUCCESS);
err = clBuildProgram(program[0], 1, &device, "-cl-fast-relaxed-math", NULL, NULL);
assert(err == CL_SUCCESS);
// Now create the kernel "objects" that we want to use in the example file
kernel[0] = clCreateKernel(program[0], "add", &err);
}
#pragma mark Memory Allocation
{
cl_image_format imgFmt;
imgFmt.image_channel_order = CL_A;
imgFmt.image_channel_data_type = CL_UNORM_INT8;
// Allocate memory on the device to hold our data and store the results into
a_mem = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imgFmt,
10, 10, sizeof(char) * 10, a, &err);
assert(err == CL_SUCCESS);
b_mem = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &imgFmt,
10, 10, sizeof(char) * 10, b, &err);
assert(err == CL_SUCCESS);
// Results array
ans_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*100, NULL, NULL);
// Get all of the stuff written and allocated
clFinish(cmd_queue);
}
#pragma mark Kernel Arguments
{
// Now setup the arguments to our kernel
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &a_mem);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &b_mem);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &ans_mem);
assert(err == CL_SUCCESS);
}
#pragma mark Execution and Read
{
// Run the calculation by enqueuing it and forcing the
// command queue to complete the task
size_t global_work_size = n;
err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,
&global_work_size, NULL, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
// Once finished read back the results from the answer
// array into the results array
err = clEnqueueReadBuffer(cmd_queue, ans_mem, CL_TRUE, 0, sizeof(float)*100,
results, 0, NULL, NULL);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);
}
#pragma mark Teardown
{
clReleaseMemObject(a_mem);
clReleaseMemObject(b_mem);
clReleaseMemObject(ans_mem);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
}
return CL_SUCCESS;
}
int main (int argc, const char * argv[]) {
// Problem size
int n = 100;
int i;
// Allocate some memory and a place for the results
char * a = (char *)malloc(n*sizeof(char));
char * b = (char *)malloc(n*sizeof(char));
float * results = (float *)malloc(n*sizeof(float));
// Fill in the values
for(i=0;i<n;i++){
a[i] = (char)i;
b[i] = (char)n-i;
results[i] = 0.f;
}
// Do the OpenCL calculation
runCL(a, b, results, n);
// Print out some results. For this example the values of all elements
// should be the same as the value of n
for(i=0;i<n && i<32;i++) printf("%f\n",results[i]);
// Free up memory
free(a);
free(b);
free(results);
return 0;
}
__kernel void
add(read_only image2d_t a,
read_only image2d_t b,
__global float *answer)
{
int gid = get_global_id(0);
const sampler_t samp = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST;
float4 img = read_imagef(a, samp, (float2)(gid%10, gid/10));
answer[gid] = (read_imagef(a, samp, (float2)(gid%10, gid/10))).x +
(read_imagef(b, samp, (float2)(gid%10, gid/10))).x;
printf("%10f\n", img.x);
}