I’m trying to run my program, and it worked at first with these args. If I change the size of unifiedSrcValid, it may (or may not) return the CL_OUT_OF_RESOURCES error. I can’t find a reason why it wouldn’t work. I’m running on a GTX 285, so I should have plenty of GPU memory. My machine is a Mac Pro with 16 GB of RAM running 10.6.3. It gives the same error if I run it on the CPU vs the GPU.
Does anyone else get this error (or another error) with this code? Am I doing anything wrong?
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <OpenCL/opencl.h>
#define TRUE 1
#define FALSE 0
////////////////////////////////////////////////////////////////////////////////
struct oclWarper {
cl_command_queue queue;
cl_context context;
cl_kernel kern;
cl_device_id dev;
int srcWidth;
int srcHeight;
int dstWidth;
int dstHeight;
unsigned int xyChSize;
cl_channel_order xyChOrder;
unsigned int imgChSize;
cl_channel_order imgChOrder;
cl_channel_type imageFormat;
};
// Simple compute kernel which computes the square of an input array
//
const char *KernelSource = "\n"
"__kernel void resamp(read_only image2d_t srcCoords,\n"
"read_only image2d_t srcReal,\n"
"read_only image2d_t srcImag,\n"
"__constant float *fUnifiedSrcDensity,\n"
"__constant int *nUnifiedSrcValid,\n"
"__constant char *useBandSrcValid,\n"
"__constant int *nBandSrcValid,\n"
"__global char *dstReal,\n"
"__global char *dstImag,\n"
"__constant float *fDstNoDataReal,\n"
"__constant float *dstDensity,\n"
"__constant int *nDstValid,\n"
"const int bandNum)\n"
"{}\n";
#define handleErr(err) if((err) != CL_SUCCESS) { \
printf("Error at file %s line %d; Err val: %d\n", __FILE__, __LINE__, err); \
printCLErr(err); \
while(1){}\
return err; \
}
void printCLErr(cl_int err)
{
switch (err)
{
case CL_SUCCESS:
printf("CL_SUCCESS\n");
break;
case CL_DEVICE_NOT_FOUND:
printf("CL_DEVICE_NOT_FOUND\n");
break;
case CL_DEVICE_NOT_AVAILABLE:
printf("CL_DEVICE_NOT_AVAILABLE\n");
break;
case CL_COMPILER_NOT_AVAILABLE:
printf("CL_COMPILER_NOT_AVAILABLE\n");
break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
printf("CL_MEM_OBJECT_ALLOCATION_FAILURE\n");
break;
case CL_OUT_OF_RESOURCES:
printf("CL_OUT_OF_RESOURCES\n");
break;
case CL_OUT_OF_HOST_MEMORY:
printf("CL_OUT_OF_HOST_MEMORY\n");
break;
case CL_PROFILING_INFO_NOT_AVAILABLE:
printf("CL_PROFILING_INFO_NOT_AVAILABLE\n");
break;
case CL_MEM_COPY_OVERLAP:
printf("CL_MEM_COPY_OVERLAP\n");
break;
case CL_IMAGE_FORMAT_MISMATCH:
printf("CL_IMAGE_FORMAT_MISMATCH\n");
break;
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
printf("CL_IMAGE_FORMAT_NOT_SUPPORTED\n");
break;
case CL_BUILD_PROGRAM_FAILURE:
printf("CL_BUILD_PROGRAM_FAILURE\n");
break;
case CL_MAP_FAILURE:
printf("CL_MAP_FAILURE\n");
break;
case CL_INVALID_VALUE:
printf("CL_INVALID_VALUE\n");
break;
case CL_INVALID_DEVICE_TYPE:
printf("CL_INVALID_DEVICE_TYPE\n");
break;
case CL_INVALID_PLATFORM:
printf("CL_INVALID_PLATFORM\n");
break;
case CL_INVALID_DEVICE:
printf("CL_INVALID_DEVICE\n");
break;
case CL_INVALID_CONTEXT:
printf("CL_INVALID_CONTEXT\n");
break;
case CL_INVALID_QUEUE_PROPERTIES:
printf("CL_INVALID_QUEUE_PROPERTIES\n");
break;
case CL_INVALID_COMMAND_QUEUE:
printf("CL_INVALID_COMMAND_QUEUE\n");
break;
case CL_INVALID_HOST_PTR:
printf("CL_INVALID_HOST_PTR\n");
break;
case CL_INVALID_MEM_OBJECT:
printf("CL_INVALID_MEM_OBJECT\n");
break;
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
printf("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR\n");
break;
case CL_INVALID_IMAGE_SIZE:
printf("CL_INVALID_IMAGE_SIZE\n");
break;
case CL_INVALID_SAMPLER:
printf("CL_INVALID_SAMPLER\n");
break;
case CL_INVALID_BINARY:
printf("CL_INVALID_BINARY\n");
break;
case CL_INVALID_BUILD_OPTIONS:
printf("CL_INVALID_BUILD_OPTIONS\n");
break;
case CL_INVALID_PROGRAM:
printf("CL_INVALID_PROGRAM\n");
break;
case CL_INVALID_PROGRAM_EXECUTABLE:
printf("CL_INVALID_PROGRAM_EXECUTABLE\n");
break;
case CL_INVALID_KERNEL_NAME:
printf("CL_INVALID_KERNEL_NAME\n");
break;
case CL_INVALID_KERNEL_DEFINITION:
printf("CL_INVALID_KERNEL_DEFINITION\n");
break;
case CL_INVALID_KERNEL:
printf("CL_INVALID_KERNEL\n");
break;
case CL_INVALID_ARG_INDEX:
printf("CL_INVALID_ARG_INDEX\n");
break;
case CL_INVALID_ARG_VALUE:
printf("CL_INVALID_ARG_VALUE\n");
break;
case CL_INVALID_ARG_SIZE:
printf("CL_INVALID_ARG_SIZE\n");
break;
case CL_INVALID_KERNEL_ARGS:
printf("CL_INVALID_KERNEL_ARGS\n");
break;
case CL_INVALID_WORK_DIMENSION:
printf("CL_INVALID_WORK_DIMENSION\n");
break;
case CL_INVALID_WORK_GROUP_SIZE:
printf("CL_INVALID_WORK_GROUP_SIZE\n");
break;
case CL_INVALID_WORK_ITEM_SIZE:
printf("CL_INVALID_WORK_ITEM_SIZE\n");
break;
case CL_INVALID_GLOBAL_OFFSET:
printf("CL_INVALID_GLOBAL_OFFSET\n");
break;
case CL_INVALID_EVENT_WAIT_LIST:
printf("CL_INVALID_EVENT_WAIT_LIST\n");
break;
case CL_INVALID_EVENT:
printf("CL_INVALID_EVENT\n");
break;
case CL_INVALID_OPERATION:
printf("CL_INVALID_OPERATION\n");
break;
case CL_INVALID_GL_OBJECT:
printf("CL_INVALID_GL_OBJECT\n");
break;
case CL_INVALID_BUFFER_SIZE:
printf("CL_INVALID_BUFFER_SIZE\n");
break;
case CL_INVALID_MIP_LEVEL:
printf("CL_INVALID_MIP_LEVEL\n");
break;
case CL_INVALID_GLOBAL_WORK_SIZE:
printf("CL_INVALID_GLOBAL_WORK_SIZE\n");
break;
}
}
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
struct oclWarper warperStack;
struct oclWarper *warper = &warperStack;
cl_image_format imgFmt;
warper->srcWidth = 2703;
warper->srcHeight = 2685;
warper->dstWidth = 2248;
warper->dstHeight = 3086;
warper->xyChSize = 4;
warper->xyChOrder = 4277;
warper->imgChOrder = 4280;
warper->imageFormat = 4306;
warper->imgChSize = 1;
cl_int err = CL_SUCCESS;
size_t numSrcPx = warper->srcWidth * warper->srcHeight;
int validSrcSz = sizeof(int) * (1 + (numSrcPx >> 5));
size_t numDstPx = warper->dstWidth * warper->dstHeight;
cl_mem xy, unifiedSrcDensityCL, unifiedSrcValidCL;
cl_mem dstDensityCL, dstValidCL, dstNoDataRealCL;
cl_mem useBandSrcValidCL, nBandSrcValidCL;
cl_mem srcImag, srcReal;
cl_mem dstReal, dstImag;
size_t ceil_runs[2];
size_t group_size[2];
int bandNum = 0;
// Connect to a compute device
//
int gpu = 0;
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
// Create a compute context
//
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
// Create a command commands
//
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
// Create the compute program from the source buffer
//
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
// Build the program executable
//
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
// Create the compute kernel in the program we wish to run
//
kernel = clCreateKernel(program, "resamp", &err);
if (!kernel || err != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n");
printCLErr(err);
exit(1);
}
//************************************************************
***
warper->context = context;
warper->queue = commands;
warper->dev = device_id;
warper->kern = kernel;
//Copy coord data to the device
imgFmt.image_channel_order = warper->xyChOrder;
imgFmt.image_channel_data_type = CL_FLOAT;
xy = clCreateImage2D(warper->context, CL_MEM_READ_ONLY, &imgFmt,
(size_t) warper->dstWidth, (size_t) warper->dstHeight,
(size_t) sizeof(float) * warper->xyChSize * warper->dstWidth,
NULL, &err);
handleErr(err);
//Set up argument
handleErr(err = clSetKernelArg(warper->kern, 0, sizeof(cl_mem), &xy));
//Set up image vars
unifiedSrcDensityCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY,
sizeof(float) * numSrcPx, NULL, &err);
handleErr(err);
//Copy unifiedSrcValid if it exists
if (FALSE) {
//Alloc dummy device RAM
unifiedSrcValidCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
} else {
//Alloc & copy all validity data
unifiedSrcValidCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY,
validSrcSz, NULL, &err);
handleErr(err);
}
//Make a fake image so we don't have a NULL pointer
useBandSrcValidCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
nBandSrcValidCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
//Set up arguments
handleErr(err = clSetKernelArg(warper->kern, 3, sizeof(cl_mem), &unifiedSrcDensityCL));
handleErr(err = clSetKernelArg(warper->kern, 4, sizeof(cl_mem), &unifiedSrcValidCL));
handleErr(err = clSetKernelArg(warper->kern, 5, sizeof(cl_mem), &useBandSrcValidCL));
handleErr(err = clSetKernelArg(warper->kern, 6, sizeof(cl_mem), &nBandSrcValidCL));
//Set up image vars
imgFmt.image_channel_order = warper->imgChOrder;
imgFmt.image_channel_data_type = warper->imageFormat;
srcReal = clCreateImage2D(warper->context, CL_MEM_READ_ONLY, &imgFmt,
(size_t) warper->srcWidth, (size_t) warper->srcHeight,
warper->srcWidth * warper->imgChSize * sizeof(char),
NULL, &err);
handleErr(err);
srcImag = clCreateImage2D(warper->context, CL_MEM_READ_ONLY, &imgFmt,
1, 1, warper->imgChSize * sizeof(char), NULL, &err);
handleErr(err);
//Set up per-band arguments
handleErr(err = clSetKernelArg(warper->kern, 1, sizeof(cl_mem), &srcReal));
handleErr(err = clSetKernelArg(warper->kern, 2, sizeof(cl_mem), &srcImag));
//Make dummy memory
dstReal = clCreateBuffer(warper->context, CL_MEM_READ_WRITE,
numDstPx * warper->imgChSize * sizeof(char), NULL, &err);
handleErr(err);
dstImag = clCreateBuffer(warper->context, CL_MEM_READ_WRITE, 1, NULL, &err);
handleErr(err);
//Set up per-band arguments
handleErr(err = clSetKernelArg(warper->kern, 7, sizeof(cl_mem), &dstReal));
handleErr(err = clSetKernelArg(warper->kern, 8, sizeof(cl_mem), &dstImag));
//Make dummy memory
dstNoDataRealCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
dstDensityCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
dstValidCL = clCreateBuffer(warper->context, CL_MEM_READ_ONLY, 1, NULL, &err);
handleErr(err);
//Set up arguments
handleErr(err = clSetKernelArg(warper->kern, 9, sizeof(cl_mem), &dstNoDataRealCL));
handleErr(err = clSetKernelArg(warper->kern, 10, sizeof(cl_mem), &dstDensityCL));
handleErr(err = clSetKernelArg(warper->kern, 11, sizeof(cl_mem), &dstValidCL));
handleErr(err = clSetKernelArg(warper->kern, 12, sizeof(int), &bandNum));
ceil_runs[0] = 1;
ceil_runs[1] = 1;
group_size[0] = 1;
group_size[1] = 1;
handleErr(err = clEnqueueNDRangeKernel(warper->queue, warper->kern, 2, NULL,
ceil_runs, group_size, 0, NULL, NULL));
return 0;
}