read and write Bmp file using image memory objects

hello everyone,

im trying to develop simple program (using clCreateImage2D and read/write_image) - read bmp from file, compute it in kernel and then save to bmp file. unfortunatelly when i run program, output file is empty, no errors are given by program. i have no idea what is causing the problem.

what is wrong with my code ?

// sorry for my poor english ;).

vec.cpp

[codebox]#include <oclUtils.h>

const char* cSourceFile = “kernel.cl”;

// OpenCL Vars

cl_context cxGPUContext; // OpenCL context

cl_command_queue cqCommandQue; // OpenCL command que

cl_device_id* cdDevices; // OpenCL device list

cl_program cpProgram; // OpenCL program

cl_kernel ckKernel; // OpenCL kernel

cl_mem cmDevSrcA; // OpenCL device source buffer A

cl_mem cmDevSrcB; // OpenCL device source buffer B

cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize = {512, 512};

size_t szLocalWorkSize = {16, 16};

size_t szParmDataBytes; // Byte size of context information

size_t szKernelLength; // Byte size of kernel code

cl_int ciErr1, ciErr2; // Error code var

char* cPathAndName = NULL; // var for full paths to data, src, etc.

char* cSourceCL = NULL; // Buffer to hold source for compilation

shrBOOL bNoPrompt = shrFALSE;

// Main function

// ************************************************************


int main(int argc, char **argv)

{

// get command line arg for quick test, if provided

bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");

// start logs

shrSetLogFileName ("vec.txt");

void *image = fopen("in.bmp", "rb");

void *image2 = fopen("wynik.bmp", "wb");

image = (void )malloc(8 * (512512*3+54));

image2 = (void *)malloc(8 * (512*512*3+54));

// Create the OpenCL context on a GPU device

cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateContextFromType...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Get the list of GPU devices associated with context

ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);

cdDevices = (cl_device_id*)malloc(szParmDataBytes);

ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

shrLog(LOGBOTH, 0.0, "clGetContextInfo...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create a command-queue

cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateCommandQueue...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;

size_t height = 512;

size_t rowpitch = 0;

cl_image_format format;

format.image_channel_order = CL_RGBA;

format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;

flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage = clCreateImage2D(

	       cxGPUContext,            

	       flags,     

	       &format,       

	       width,       

	       height,        

	       rowpitch,      

	       image,         

	       &ciErr1     

   ); 

cl_mem_flags flags2;

flags2 = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR;

cl_mem myClImage2 = clCreateImage2D(

	       cxGPUContext,            // a valid OpenCL context

	       flags2,                // option flags                  [1]

	       &format,              // image format properties       [2]

	       width,               // width of the image in pixels

	       height,              // height of the image in pixels

	       rowpitch,            // scan-line pitch in bytes      [3]

	       image2,                // pointer to the image data

	       &ciErr2                // on return, the result code

   ); 

ciErr1 |= ciErr2;

shrLog(LOGBOTH, 0.0, "clCreateBuffer...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Read the OpenCL kernel in from source file

shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)...\n", cSourceFile); 

cPathAndName = shrFindFilePath(cSourceFile, argv[0]);

cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program

cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Build the program

ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);

shrLog(LOGBOTH, 0.0, "clBuildProgram...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create the kernel

ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);

ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);

shrLog(LOGBOTH, 0.0, "clSetKernelArg 0 - 3...\n\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// --------------------------------------------------------

// Start Core sequence... copy input data to GPU, compute, copy results back

const size_t origin[] = {0, 0, 0};

const size_t region[] = {1, 1, 1};

ciErr1 = clEnqueueWriteImage (

						cqCommandQue,

						myClImage,	//	cl_mem image,

						CL_TRUE, //	  	cl_bool blocking_read,

						origin,//	  	const size_t origin[3],

						region,	//	  	const size_t region[3],

						0,	//	  	size_t row_pitch,

						0,	//	  	size_t slice_pitch,

						image,	//	  	void *ptr,

						0,	//	  	cl_uint num_events_in_wait_list,

						NULL, 	//	  	const cl_event *event_wait_list,

						NULL 	//	  	cl_event *event)

	);

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Launch kernel

ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);

shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (copy)...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

//collect results

ciErr1 = clEnqueueReadImage (

						cqCommandQue,

						myClImage2,	//		cl_mem image,

						CL_TRUE, 	//	  	cl_bool blocking_read,

						origin,		//	  	const size_t origin[3],

						region,		//	  	const size_t region[3],

						0,			//	  	size_t row_pitch,

						0,			//	  	size_t slice_pitch,

						image2,		//	  	void *ptr,

						0,			//	  	cl_uint num_events_in_wait_list,

						NULL, 		//	  	const cl_event *event_wait_list,

						NULL 		//	  	cl_event *event)

	);

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

FILE *nk = fopen("wynik.bmp", "wb");

fwrite(image2, 1, sizeof(8*(512*512*3+54)), nk);

shrLog(LOGBOTH, 0.0, "END \n\n");

}[/codebox]

and kernel.cl file

[codebox]__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)

{

const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FI

LTER_NEAREST;

int gid0 = get_global_id(0);

int gid1 = get_global_id(1);

uint4 pixel;

pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid1));

write_imageui (imageOut,(int2)(gid0,gid1),pixel);

}[/codebox]
in.bmp (768 KB)

I just have a brief look at your code, so i’m not sure complety sure of what you wanted to do. But you set the region to {1,1,1} for the copying and reading of the picture. So it means that you are going to read 1x1x1 pixel…
You should set the region to {image_width,image_height,image_depth} where depth is 1 for a 2D image.

thank you for your response. i changed region values to {512,512,1} and its not working.

i just want to read image from computer’s disk, compute it using opencl, and then write result to computer but it appears that i have serious problems with that.

any help and examples will be greatly appreciated ;).

my actual code:

[codebox]#include <oclUtils.h>

const char* cSourceFile = “kernel.cl”;

// OpenCL Vars

cl_context cxGPUContext; // OpenCL context

cl_command_queue cqCommandQue; // OpenCL command que

cl_device_id* cdDevices; // OpenCL device list

cl_program cpProgram; // OpenCL program

cl_kernel ckKernel; // OpenCL kernel

cl_mem cmDevSrcA; // OpenCL device source buffer A

cl_mem cmDevSrcB; // OpenCL device source buffer B

cl_mem cmDevDst; // OpenCL device destination buffer

size_t szParmDataBytes; // Byte size of context information

size_t szKernelLength; // Byte size of kernel code

cl_int ciErr1, ciErr2; // Error code var

char* cPathAndName = NULL; // var for full paths to data, src, etc.

char* cSourceCL = NULL; // Buffer to hold source for compilation

// Main function

int main(int argc, char **argv)

{

// FILE *input = fopen(“in.bmp”, “rb”);

// unsigned char tab [512*512+54];

// //fseek (input, 54, SEEK_SET);

//

// int i, j;

// for (i=0; i < (512*512+54); i++)

// {

//

// fread (&tab[i],1,3,input);

//

// }

// fclose(input);

//

// void * image;

// image = (void *)tab;

//

// unsigned char tab2 [512*512+54];

// void * image2;

// image2 = (void *)tab2;

FILE * in = fopen("in.bmp", "rb");

void *image;

image = (void*)in;

void *image2;

image2 = (void *)malloc(512*512+54);	

// Create the OpenCL context on a GPU device

cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);

// Get the list of GPU devices associated with context

ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);

cdDevices = (cl_device_id*)malloc(szParmDataBytes);

ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

// Create a command-queue

cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;

size_t height = 512;

size_t rowpitch = 0;

cl_image_format format;

format.image_channel_order = CL_RGBA;

format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;

flags = CL_MEM_READ_ONLY;// | CL_MEM_USE_HOST_PTR;

cl_mem myClImage = clCreateImage2D(

	cxGPUContext,

	flags,

	&format,

	width,

	height,

	rowpitch,

	0,//image,

	&ciErr1

);

cl_mem_flags flags2;

flags2 = CL_MEM_WRITE_ONLY ;//| CL_MEM_USE_HOST_PTR;

cl_mem myClImage2 = clCreateImage2D(

	cxGPUContext, // a valid OpenCL context

	flags2, // option flags [1]

	&format, // image format properties [2]

	width, // width of the image in pixels

	height, // height of the image in pixels

	rowpitch, // scan-line pitch in bytes [3]

	0,//image2, // pointer to the image data

	&ciErr2 // on return, the result code

);

// Read the OpenCL kernel in from source file

cPathAndName = shrFindFilePath(cSourceFile, argv[0]);

cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program

cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);

// Build the program

ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);

// Create the kernel

ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);

ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);

// --------------------------------------------------------

// Start Core sequence… copy input data to GPU, compute, copy results back

size_t origin[3];

origin[0] = 0;

origin[1] = 0;

origin[2] = 0;

size_t region[3];

region[0] = height;

region[1] = width; 

region[2] = 1;

ciErr1 = clEnqueueWriteImage (

	cqCommandQue,

	myClImage, // cl_mem image,

	CL_TRUE, // cl_bool blocking_read,

	origin,// const size_t origin[3],

	region, // const size_t region[3],

	0,// width * sizeof(3), ??// size_t row_pitch,

	0, // size_t slice_pitch,

	image, // void *ptr,

	0, // cl_uint num_events_in_wait_list,

	NULL, // const cl_event *event_wait_list,

	NULL // cl_event *event)

);

// Launch kernel

size_t szGlobalWorkSize[2];

size_t szLocalWorkSize[2];

szGlobalWorkSize[0] = 512;

szGlobalWorkSize[1] = 512;

szLocalWorkSize[0] = 16;

szLocalWorkSize[1] = 16;

ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);

//collect results

ciErr1 = clEnqueueReadImage (

	cqCommandQue,

	myClImage2, // cl_mem image,

	CL_TRUE, // cl_bool blocking_read,

	origin, // const size_t origin[3],

	region, // const size_t region[3],

	0, // width * sizeof(3) ??,//, // size_t row_pitch,

	0, // size_t slice_pitch,

	image2, // void *ptr,

	0, // cl_uint num_events_in_wait_list,

	NULL, // const cl_event *event_wait_list,

	NULL // cl_event *event)

);

FILE *nk = fopen("wynik.bmp", "wb");

fwrite(image2, 1, sizeof(512*512+54), nk);

}[/codebox]

kernel code remains the same.

I never used fwrite but I think you use it a bad way.
size_t fwrite ( const void * ptr, size_t size, size_t count, FILE * stream ); where size is the size of an element and count the number of element.

So in your case size should be sizeof(char) because your image format is CL_UNSIGNED_INT8, i.e. an int coded on 8bits and your count should be the number of element : 512512+54.
Until now you are telling fwrite that your elements are 1 byte sized and to copy sizeof(512
512+54)=sizeof(int)=4 elements…
Just try : fwrite(image2,sizeof(char),512512+54),nk)
By the way why are you copying 512
512[b]+54[b]? Your image is only of size 512*512.

my bad, sizeof() was unnecessary.

so i’m reading bmp header to file (54 bytes) , then to another file i read bmp data ( 5125123 bytes - because of 3 color channels).

kernels perform operation of changing to zero values of R and G channels. unhappily my output looks like crt screen - rgb vertical stripes overlayed with output image (outputt.bmp added in attachment, zoom of image in thumbnail).

cl_image_format seems to be ok, theres no errors either and i’m out out ideas how to solve it.

kernel.cl

[codebox]

__kernel void copy(__read_only image2d_t imageIn,__write_only image2d_t imageOut)

{

const sampler_t sampler=CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FI

LTER_NEAREST;

int gid0 = get_global_id(0);

int gid1 = get_global_id(1);

uint4 pixel;

pixel=read_imageui(imageIn,sampler,(int2)(gid0,gid1));

pixel.x = 0;

pixel.y = 0;

write_imageui (imageOut,(int2)(gid0,gid1),pixel);

}[/codebox]

vec.cpp

[codebox]

#include <oclUtils.h>

const char* cSourceFile = “kernel.cl”;

// OpenCL Vars

cl_context cxGPUContext; // OpenCL context

cl_command_queue cqCommandQue; // OpenCL command que

cl_device_id* cdDevices; // OpenCL device list

cl_program cpProgram; // OpenCL program

cl_kernel ckKernel; // OpenCL kernel

cl_mem cmDevSrcA; // OpenCL device source buffer A

cl_mem cmDevSrcB; // OpenCL device source buffer B

cl_mem cmDevDst; // OpenCL device destination buffer

size_t szGlobalWorkSize[2];

size_t szLocalWorkSize[2];

size_t szParmDataBytes; // Byte size of context information

size_t szKernelLength; // Byte size of kernel code

cl_int ciErr1, ciErr2; // Error code var

char* cPathAndName = NULL; // var for full paths to data, src, etc.

char* cSourceCL = NULL; // Buffer to hold source for compilation

shrBOOL bNoPrompt = shrFALSE;

// Main function

// ************************************************************


int main(int argc, char **argv)

{

// get command line arg for quick test, if provided

bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");

// start logs

shrSetLogFileName ("vec.txt");

size_t result=0;

unsigned char header [54];

FILE *input = fopen("in.bmp", "rb");

result = fread(header,1,54,input); 

fseek (input, 54, SEEK_SET);

unsigned char *tab;

tab = (unsigned char*)malloc(512*512*4);

result = fread(tab, 1, 3*512*512, input);

fclose(input);

void * image= (unsigned char *)tab;

unsigned char *tab2;

tab2 = (unsigned char*)malloc(512*512*4);

void * image2= (unsigned char *)tab2;

// Create the OpenCL context on a GPU device

cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateContextFromType...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Get the list of GPU devices associated with context

ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);

cdDevices = (cl_device_id*)malloc(szParmDataBytes);

ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

shrLog(LOGBOTH, 0.0, "clGetContextInfo...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create a command-queue

cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateCommandQueue...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Allocate the OpenCL buffer memory objects for source and result on the device GMEM

size_t width = 512;

size_t height = 512;

size_t rowpitch = 0;

cl_image_format format;

format.image_channel_order = CL_RGBA;

format.image_channel_data_type = CL_UNSIGNED_INT8;

cl_mem_flags flags;

flags = CL_MEM_READ_ONLY; 

shrLog(LOGBOTH, 0.0, "clCreateImage 1...\n"); 

cl_mem myClImage = clCreateImage2D(

			cxGPUContext,            

			flags,     

			&format,       

			width,       

			height,        

			rowpitch,      

	 		0,          

			&ciErr1     

   ); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateImage2d 1, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

shrLog(LOGBOTH, 0.0, "clCreateImage 2...\n"); 

cl_mem_flags flags2;

flags2 = CL_MEM_WRITE_ONLY; 

cl_mem myClImage2 = clCreateImage2D(

	       cxGPUContext,          

	       flags2,               

	       &format,            

	       width,             

	       height,            

	       rowpitch,      

	       0,//image2,              

	       &ciErr1                 

   ); 

//ciErr1 |= ciErr2;

	if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateImage2D 2, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Read the OpenCL kernel in from source file

shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)...\n", cSourceFile); 

cPathAndName = shrFindFilePath(cSourceFile, argv[0]);

cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);

// Create the program

cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Build the program

ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);

shrLog(LOGBOTH, 0.0, "clBuildProgram...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Create the kernel

ckKernel = clCreateKernel(cpProgram, "copy", &ciErr1);

shrLog(LOGBOTH, 0.0, "clCreateKernel (copy)...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// Set the Argument values

ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&myClImage);

shrLog(LOGBOTH, 0.0, "clSetKernelArg 0...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

ciErr1 = clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&myClImage2);

shrLog(LOGBOTH, 0.0, "clSetKernelArg 1...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// --------------------------------------------------------

// Start Core sequence... copy input data to GPU, compute, copy results back

size_t origin[3];

origin[0] = 0;

origin[1] = 0;

origin[2] = 0;

size_t region[3];

region[0] = width;

region[1] = height; 

region[2] = 1;

shrLog(LOGBOTH, 0.0, "clEnqueueWriteImage...\n"); 

ciErr1 =  clEnqueueWriteImage  (

	cqCommandQue,	 

	myClImage,	 

	CL_TRUE,	 

	origin, 	 

	region,		 

	width*sizeof(char)*4,		//size_t input_row_pitch,	

	0,		//width*sizeof(char)*height,//size_t input_slice_pitch,

	image,		//const void * ptr,

	0,		 

	NULL,		 

	NULL		 

);

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clEnqueueWriteImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

// write of data to GPU device

szGlobalWorkSize[0] = 512;

szGlobalWorkSize[1] = 512;

szLocalWorkSize[0] = 16;

szLocalWorkSize[1] = 16;

// Launch kernel

ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);

clFinish(cqCommandQue);

shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel (VectorAdd)...\n"); 

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "leRROR Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

shrLog(LOGBOTH, 0.0, "clEnqueueReadImage ...\n"); 

ciErr1 = clEnqueueReadImage  (

	cqCommandQue,		 

	myClImage2,		 

	CL_TRUE,		 

	origin,			 

	region,			 

	0,		//width*sizeof(char), 	//size_t row_pitch,

	0, 			//width*sizeof(char)*height,

	image2,			//void *ptr,

	0,			 

	NULL,		 

	NULL			 

);

if (ciErr1 != CL_SUCCESS)

	{shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadImage, Line %u in file %s !!!\n\n", __LINE__, __FILE__);}

FILE *nk = fopen("outputt.bmp", "wb");

fwrite(header,1,54,nk);

fwrite(image2, 1, (3*512*512), nk);

fclose(nk);

shrLog(LOGBOTH, 0.0, "Finish success\n\n");

}

[/codebox]
zoom.jpg
outputt.bmp (768 KB)

zoom.jpg

It looks to me that your image in the BMP file is 24 bits while your image object is 32 bits. You need to transform the 24 bits bitmap to 32 bits before copying it into the image object.

that’s right :). thank you for solving my problem :thumbsup: .

any clue how to cast this 24b to 32b? :confused:

–> So with Gimp and add alpha (rgba bmp 32bit)

–> pay attention with the char != int TAB, and the buffer order ;)