Incorrect write_imagef() output in OpenCL kernel

I ran into a problem using write_imagef() in an OpenCL kernel; no matter what values I pass as the output color, the value that gets written has 1.0 in every color channel.

I reduced the problem down to the following self-contained example. It creates a 512x512 image (CL_RGBA, CL_UNORM_INT8), runs a kernel that writes float4(1,0,1,1) to pixel (0,0) of the image, and reads the image back to host memory.

Some things I’ve tried:

  • Checking for runtime errors – every function returns CL_SUCCESS.
  • Running on an Intel GPU – works correctly.
  • Running on an Intel CPU – works correctly.
  • Changing the image format to CL_UNSIGNED_INT8 and using write_imageui() instead of write_imagef() – works correctly.

So, is the bug mine or the driver’s? Any help either way would be greatly appreciated. Thanks!

Here’s the output when I run on my NVIDIA GPU:

CL_PLATFORM_NAME    = NVIDIA CUDA
CL_PLATFORM_VERSION = OpenCL 1.1 CUDA 4.2.1
CL_PLATFORM_VENDOR  = NVIDIA Corporation
CL_DEVICE_NAME      = GeForce GT 650M
CL_DRIVER_VERSION   = 310.70
pixels[0]: expected=0xFFFF00FF, actual=0xFFFFFFFF

And here’s the source code (also posted at https://gist.github.com/4378846):

#include <CL/cl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char *writeImagefTestSrc = "__kernel void writeImagefTest(__write_only image2d_t outImg)

{

	write_imagef(outImg, (int2)(0,0), (float4)(1,0,1,1));

}";

int main(int , char **)
{
	cl_int clError = CL_SUCCESS;

	cl_uint numPlatforms = 0;
	cl_platform_id platforms[32];
	clGetPlatformIDs(32,platforms,&numPlatforms);
	cl_platform_id oclPlatform = platforms[0];
	cl_context_properties contextProperties[] = {
		CL_CONTEXT_PLATFORM, (cl_context_properties)oclPlatform,
		0
	};
	cl_context oclContext = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &clError);
	cl_uint numDevices = 0;
	cl_device_id devices[32];
	clError = clGetContextInfo(oclContext, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
	clError = clGetContextInfo(oclContext, CL_CONTEXT_DEVICES, sizeof(devices), devices, NULL);
	cl_device_id oclDevice = devices[0];

	char strBuf[256];
	strBuf[255] = 0;
	clGetPlatformInfo(oclPlatform, CL_PLATFORM_NAME,    255, strBuf, NULL); printf("CL_PLATFORM_NAME    = %s
", strBuf);
	clGetPlatformInfo(oclPlatform, CL_PLATFORM_VERSION, 255, strBuf, NULL); printf("CL_PLATFORM_VERSION = %s
", strBuf);
	clGetPlatformInfo(oclPlatform, CL_PLATFORM_VENDOR,  255, strBuf, NULL); printf("CL_PLATFORM_VENDOR  = %s
", strBuf);
	clGetDeviceInfo(oclDevice,     CL_DEVICE_NAME,      255, strBuf, NULL); printf("CL_DEVICE_NAME      = %s
", strBuf);
	clGetDeviceInfo(oclDevice,     CL_DRIVER_VERSION,   255, strBuf, NULL); printf("CL_DRIVER_VERSION   = %s
", strBuf);

	cl_command_queue oclQueue = clCreateCommandQueue(oclContext, oclDevice, CL_QUEUE_PROFILING_ENABLE, &clError);
	cl_program writeImagefTestPgm = clCreateProgramWithSource(oclContext, 1, &writeImagefTestSrc, NULL, &clError);
	clError = clBuildProgram(writeImagefTestPgm, 1, &oclDevice, "-Werror -cl-std=CL1.1", NULL, NULL);
	cl_kernel writeImagefTestKnl = clCreateKernel(writeImagefTestPgm, "writeImagefTest", &clError);
	cl_image_format imageFormat;
	imageFormat.image_channel_data_type = CL_UNORM_INT8;
	imageFormat.image_channel_order = CL_RGBA;
	const size_t imageWidth = 512, imageHeight = 512;
	cl_mem image = clCreateImage2D(oclContext, CL_MEM_WRITE_ONLY, &imageFormat, imageWidth, imageHeight, 0, NULL, &clError);
	size_t imageRowPitch = 0, imageSlicePitch = 0;
	clError = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(size_t), &imageRowPitch, NULL);
	clError = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &imageSlicePitch, NULL);

	clError = clSetKernelArg(writeImagefTestKnl, 0, sizeof(cl_mem), &image);
	clError = clEnqueueTask(oclQueue, writeImagefTestKnl, 0,NULL, NULL);
	size_t origin[3] = {0,0,0};
	size_t region[3] = {imageWidth,imageHeight,1};
	uint32_t *pixels = (uint32_t*)malloc(imageRowPitch*imageHeight);
	memset(pixels,0xCD,imageRowPitch*imageHeight);
	clError = clEnqueueReadImage(oclQueue, image, CL_TRUE, origin, region, imageRowPitch, imageSlicePitch, pixels, 0,NULL, NULL);
	clError = clFinish(oclQueue);
	printf("pixels[0]: expected=0xFFFF00FF, actual=0x%08X
", pixels[0]);
	return 0;
}

The problem still occurs with the new 310.90 drivers.

This is still a problem with 314.22 Drivers

I realized that when I read from a cl_unorm_int8 image and write this value to another cl_unorm_int8 image I get correct results.

write_imagef(outImg, (int2)(0,0), read_imagef(input, sampler, coord));

This a bit awkward and should be addressed by the development team…