get_global_id(1) returning incorrect values

Hello everyone,

I was working on an OpenCL application using the Apple platform and I wanted to try to test it on the Windows OS. I booted up the Windows partition(Not VM) on my Macbook Pro and after porting the code over, I noticed that some of the kernels were not working correctly. It seems that the get_global_id function does not return the correct value when I passed “1” as the argument. I made a small sample program and recreated the error.

#include <iostream>

#include <..\cl.hpp>

#include <map>

#include <vector>

#include <iterator>

std::string source(

"__kernel void test(    __global float* outVector,\

                        int size)\

{\

outVector[get_local_id(1)*10+get_local_id(0)] = get_global_id(1);\

}");

int main (int argc, const char * argv[])

{

        std::vector<cl::Platform> platforms;

	cl::Platform::get(&platforms);

	std::vector<cl::Device> devices;

	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);

	cl::Context context(devices);

	cl::CommandQueue queue(context, devices[0]);

	cl::Program::Sources sources(1, std::make_pair(source.c_str(), source.size()));

	cl::Program program(context, sources);

	std::string build_options("");

	program.build(devices,build_options.c_str());

	std::vector<cl::Kernel> program_kernels;

	program.createKernels(&program_kernels);

	std::map<std::string, cl::Kernel> kernels_found;

	std::string kernel_name;

	for ( int i = 0; i < program_kernels.size(); ++i )

	{

	  kernel_name = program_kernels[i].getInfo<CL_KERNEL_FUNCTION_NAME>();

	  std::cout << "Kernel found: " << kernel_name << std::endl;

	  kernels_found[kernel_name] = program_kernels[i];

	}

	std::vector<float> values(100);

	cl::Buffer out_vector(context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, sizeof(float) * 100, &values[0]);

	cl::Kernel test= kernels_found["test"];

	test.setArg(0, out_vector);

	test.setArg(1, 100);	

	queue.enqueueNDRangeKernel(test,0,cl::NDRange(10,10), cl::NDRange(10,10));

	queue.enqueueReadBuffer(out_vector,CL_TRUE, 0,sizeof(float)*100, &values[0]);

	queue.finish();

	std::vector<float>::iterator it;

	std::ostream_iterator<float> out_it(std::cout, "\t");

	for(it = values.begin(); it != values.end(); it += 10)

	{

	  std::copy(it, it+10, out_it);

	  std::cout << std::endl;

	}

	return 0;

}

Output:

Kernel found: test

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.43597e+009    3.4359

7e+009

Press any key to continue . . .

Extra Info:

Machine: Apple Macbook Pro

Operating System: Windows 7 Professional (BOOTCAMP Partition)

IDE: Visual Studio 2010 Professional

GPU: GeForce 9400M

CUDA Toolkit: 4.0 RC2 (April 2011)

Driver: Notebook Developer Drivers for WinVista and Win7 (270.28)

Platform Version: OpenCL 1.0 CUDA 4.0.1

The get_global_id(0) and get_local_id functions work as expected. It seems like a driver issue. I haven’t found anything on the internet about this problem. Does anyone know of a workaround which would not require me to manually calculating the global id from the local and group ids.

I also noticed that the build fails if the __constant qualifier is used in the parameters of a kernel declaration. This seems to go against the OpenCL specifications.

Thanks,

Umar Arshad

You are not testing return values of the enqueue* and finish() methods. Please, try to verify these are really returning CL_SUCCESS.

Hey FlaviusV,

All functions are returning CL_SUCCESS. I also defined __CL_ENABLE_EXCEPTIONS and no functions are throwing any exceptions. Here is the updated code. It is printing the same output.

#define __CL_ENABLE_EXCEPTIONS

#include <iostream>

#include "cl.hpp"

#include <map>

#include <vector>

#include <iterator>

std::string source(

"__kernel void test(    __global float* outVector,\

                        int size)\

{\

outVector[get_local_id(1)*10+get_local_id(0)] = get_global_id(1);\

}");

int main (int argc, const char * argv[])

{

        std::vector<cl::Platform> platforms;

        cl::Platform::get(&platforms);

std::vector<cl::Device> devices;

        platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);

        cl::Context context(devices);

        cl::CommandQueue queue(context, devices[0]);

cl::Program::Sources sources(1, std::make_pair(source.c_str(), source.size()));

        cl::Program program(context, sources);

        std::string build_options("");

        program.build(devices,build_options.c_str());

        std::vector<cl::Kernel> program_kernels;

        program.createKernels(&program_kernels);

        std::map<std::string, cl::Kernel> kernels_found;

std::string kernel_name;

        for ( int i = 0; i < program_kernels.size(); ++i )

        {

          kernel_name = program_kernels[i].getInfo<CL_KERNEL_FUNCTION_NAME>();

          std::cout << "Kernel found: " << kernel_name << std::endl;

          kernels_found[kernel_name] = program_kernels[i];

        }

std::vector<float> values(100);

        cl::Buffer out_vector(context,CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, sizeof(float) * 100, &values[0]);

cl::Kernel test= kernels_found["test"];

        test.setArg(0, out_vector);

        test.setArg(1, 100);    

	if(queue.enqueueNDRangeKernel(test,0,cl::NDRange(10,10), cl::NDRange(10,10)) != CL_SUCCESS)

		std::cerr << "Problem Enqueueing Kernel" << std::endl;

        if(queue.enqueueReadBuffer(out_vector,CL_TRUE, 0,sizeof(float)*100, &values[0]) != CL_SUCCESS)

		std::cerr << "Problem Enqueueing Read Buffer" << std::endl;

	if(queue.finish() != CL_SUCCESS)

		std::cerr << "Problem with the finish command" << std::endl;

std::vector<float>::iterator it;

        std::ostream_iterator<float> out_it(std::cout, "\t");

        for(it = values.begin(); it != values.end(); it += 10)

        {

          std::copy(it, it+10, out_it);

          std::cout << std::endl;

        }

return 0;

}

Thanks,

Umar Arshad