Hi!
I’m just trying to run a few of the OpenCL samples on my PPC machine. Unfortunately, the MersenneTwister segfaults. The reason is that it reads a memory dump generated for a litte-endian machine, which is interpreted incorrectly on my machine. Some of these values are used as bound in a for loop writing into an array, and evenutally into not allocated memory… As I don’t understand the code well enough, I can’t provide a patch to fix the problem. However, the following patch makes the sample run without segfaulting on my machine, but it doens’t pass the test yet. Can anyone help me out?
Cheers,
Ingo
diff -pur NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister.cpp IBM_patched-NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister.cpp
--- NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister.cpp 2010-03-31 16:20:20.000000000 +0200
+++ IBM_patched-NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister.cpp 2010-03-31 19:14:12.000000000 +0200
@@ -33,6 +33,14 @@ extern "C" void RandomRef(float *h_Rand,
extern "C" void BoxMullerRef(float *h_Rand, int nPerRng);
#endif
+inline void endian_swap(unsigned int& x)
+{
+ x = (x>>24) |
+ ((x<<8) & 0x00FF0000) |
+ ((x>>8) & 0x0000FF00) |
+ (x<<24);
+}
+
///////////////////////////////////////////////////////////////////////////////
//Load twister configurations
///////////////////////////////////////////////////////////////////////////////
@@ -58,8 +66,15 @@ void loadMTGPU(const char *fname,
oclCheckError(0, 1);
}
- for (unsigned int i = 0; i < size; i++)
+ for (unsigned int i = 0; i < size; i++) {
fread(&h_MT[i], sizeof(mt_struct_stripped), 1, fd);
+
+ for( int j = 0; j < 4; j++) {
+ unsigned int x = *((int*)(h_MT+i) + j);
+ endian_swap(x);
+ *((int*)(h_MT+i) + j) = x;
+ }
+ }
fclose(fd);
for(unsigned int i = 0; i < size; i++)
@@ -73,7 +88,7 @@ int main(int argc, const char **argv)
{
cl_context cxGPUContext; // OpenCL context
cl_command_queue cqCommandQueue[MAX_GPU_COUNT]; // OpenCL command que
- cl_platform_id cpPlatform; // OpenCL platform
+ cl_platform_id cpPlatform = NULL; // OpenCL platform
cl_uint nDevice; // OpenCL device count
cl_device_id* cdDevices; // OpenCL device list
cl_program cpProgram; // OpenCL program
@@ -94,12 +109,12 @@ int main(int argc, const char **argv)
shrLog("Get platforms...\n");
ciErr1 = oclGetPlatformID(&cpPlatform);
oclCheckError(ciErr1, CL_SUCCESS);
-
+
shrLog("Get devices...\n");
- ciErr1 = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &nDevice);
+ ciErr1 = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevice);
oclCheckError(ciErr1, CL_SUCCESS);
cdDevices = (cl_device_id *)malloc(nDevice * sizeof(cl_device_id) );
- ciErr1 = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, nDevice, cdDevices, NULL);
+ ciErr1 = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, nDevice, cdDevices, NULL);
oclCheckError(ciErr1, CL_SUCCESS);
shrLog("Create context...\n");
diff -pur NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister_gold.cpp IBM_patched-NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister_gold.cpp
--- NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister_gold.cpp 2010-03-31 16:20:20.000000000 +0200
+++ IBM_patched-NVIDIA_GPU_Computing_SDK/OpenCL/src/oclMersenneTwister/src/oclMersenneTwister_gold.cpp 2010-03-31 19:12:32.000000000 +0200
@@ -20,6 +20,14 @@
static mt_struct MT[MT_RNG_COUNT];
static uint32_t state[MT_NN];
+inline void endian_swap(unsigned int& x)
+{
+ x = (x>>24) |
+ ((x<<8) & 0x00FF0000) |
+ ((x>>8) & 0x0000FF00) |
+ (x<<24);
+}
+
extern "C" void initMTRef(const char *fname){
FILE* fd = 0;
@@ -47,6 +55,11 @@ extern "C" void initMTRef(const char *fn
{
oclCheckError(0, 1);
}
+ for( int j = 0; j < 16; j++) {
+ unsigned int x = *((int*)(MT+i) + j);
+ endian_swap(x);
+ *((int*)(MT+i) + j) = x;
+ }
}
fclose(fd);