Hey folks!
I tried to run a small “Hello World” program, which is showing “1.234” as output if it is successful.
It is only writing this number within a helloWorld kernel to global memory and should make a copy into a host variable.
Unfortunately I get 0.0000 as output, and I don’t know, what’s wrong with this code snippet.
Do you see the what is wrong?
helloWorld.cl
__kernel void helloWorld( __global float *x )
{
(*x) = 1.234;
return;
};
helloWorld.cpp
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <string>
#include <cstdlib>
#include <cstdio>
char* sourceToString( const char* file, size_t* kernelLength )
{
FILE* fileHandle = fopen( file, "rb" );
size_t sourceLength;
if( fileHandle == 0 )
{
return NULL;
}
fseek( fileHandle, 0, SEEK_END );
sourceLength = ftell( fileHandle );
fseek( fileHandle, 0, SEEK_SET );
char* string = (char *)malloc( sourceLength + 1 );
if( fread( ( string ), sourceLength, 1, fileHandle ) != 1)
{
fclose( fileHandle );
free( string );
return 0;
}
fclose( fileHandle );
string[sourceLength] = '
#include <CL/cl.h>
#include <CL/cl_platform.h>
char* sourceToString( const char* file, size_t* kernelLength )
{
FILE* fileHandle = fopen( file, "rb" );
size_t sourceLength;
if( fileHandle == 0 )
{
return NULL;
}
fseek( fileHandle, 0, SEEK_END );
sourceLength = ftell( fileHandle );
fseek( fileHandle, 0, SEEK_SET );
char* string = (char *)malloc( sourceLength + 1 );
if( fread( ( string ), sourceLength, 1, fileHandle ) != 1)
{
fclose( fileHandle );
free( string );
return 0;
}
fclose( fileHandle );
string[sourceLength] = '\0';
return string;
}
int main()
{
const unsigned int BlockSize = 1;
const unsigned int Blocks = 1;
const unsigned int Dimension = Blocks * BlockSize;
float* srcA = (float*)malloc(sizeof(cl_float) * Dimension );
srcA[0] = 1.0;
cl_context hContext = clCreateContextFromType( 0, CL_DEVICE_TYPE_GPU, 0, 0, 0 );
size_t nContextDescriptorSize;
clGetContextInfo( hContext, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize );
cl_device_id aDevices = (cl_device_id)malloc( nContextDescriptorSize );
clGetContextInfo( hContext, CL_CONTEXT_DEVICES, nContextDescriptorSize, aDevices, 0 );
cl_command_queue hCmdQueue;
hCmdQueue = clCreateCommandQueue( hContext, aDevices[ 0 ], 0, 0 );
cl_mem hDeviceMemX = clCreateBuffer( hContext, CL_MEM_WRITE_ONLY , Dimension * sizeof( cl_float ), 0, 0 );
size_t kernelLength;
char *cSourceCL = sourceToString( “./helloWorld.cl”, &kernelLength);
cl_program hProgram = clCreateProgramWithSource( hContext, 1, (const char **)&cSourceCL, &kernelLength, 0 );
clBuildProgram( hProgram, 0, 0, 0, 0, 0 );
cl_kernel hKernel = clCreateKernel( hProgram, “helloWorld”, 0 );
clSetKernelArg( hKernel, 0, sizeof( cl_mem ), (void*)&hDeviceMemX );
clEnqueueNDRangeKernel( hCmdQueue, hKernel, 1, 0, &Dimension, &BlockSize, 0, 0, 0 );
clEnqueueReadBuffer( hCmdQueue, hDeviceMemX, CL_TRUE, 0, Dimension * sizeof( cl_float ), srcA, 0, 0, 0 );
printf( “\n\n%f\n\n”, (*srcA) );
clReleaseMemObject( hDeviceMemX );
delete srcA;
return 0;
}
';
return string;
}
int main()
{
const unsigned int BlockSize = 1;
const unsigned int Blocks = 1;
const unsigned int Dimension = Blocks * BlockSize;
float* srcA = (float*)malloc(sizeof(cl_float) * Dimension );
srcA[0] = 1.0;
cl_context hContext = clCreateContextFromType( 0, CL_DEVICE_TYPE_GPU, 0, 0, 0 );
size_t nContextDescriptorSize;
clGetContextInfo( hContext, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize );
cl_device_id *aDevices = (cl_device_id*)malloc( nContextDescriptorSize );
clGetContextInfo( hContext, CL_CONTEXT_DEVICES, nContextDescriptorSize, aDevices, 0 );
cl_command_queue hCmdQueue;
hCmdQueue = clCreateCommandQueue( hContext, aDevices[ 0 ], 0, 0 );
cl_mem hDeviceMemX = clCreateBuffer( hContext, CL_MEM_WRITE_ONLY , Dimension * sizeof( cl_float ), 0, 0 );
size_t kernelLength;
char *cSourceCL = sourceToString( "./helloWorld.cl", &kernelLength);
cl_program hProgram = clCreateProgramWithSource( hContext, 1, (const char **)&cSourceCL, &kernelLength, 0 );
clBuildProgram( hProgram, 0, 0, 0, 0, 0 );
cl_kernel hKernel = clCreateKernel( hProgram, "helloWorld", 0 );
clSetKernelArg( hKernel, 0, sizeof( cl_mem ), (void*)&hDeviceMemX );
clEnqueueNDRangeKernel( hCmdQueue, hKernel, 1, 0, &Dimension, &BlockSize, 0, 0, 0 );
clEnqueueReadBuffer( hCmdQueue, hDeviceMemX, CL_TRUE, 0, Dimension * sizeof( cl_float ), srcA, 0, 0, 0 );
printf( "\n\n%f\n\n", (*srcA) );
clReleaseMemObject( hDeviceMemX );
delete[] srcA;
return 0;
}