Can someone test my code, while I have CC 1.2?

erkan.tairi · April 28, 2013, 12:05pm

I have the following code for Mandelbrot set calculation. After some tweaking and stuff, I managed to remove some of the errors regarding the function call. Now I have few errors about double precision operations. Because my GPU has compute capability 1.2, I cannot use double precision, cause as long as I know support for it was added in version 1.3. Therefore, if someone can compile my code in his computer, who has GPU with CC 1.3 or greater, and tell me the result I would be glad. Below you can find the error message that I get while running my program, along with the message from -Minfo, how I try to compile, and my code.

How I compile:

pgcc -acc -Minfo=accel -ta=nvidia,cc12 -lm -o mandopenacc mandopenacc.c

Messages:

mandelbrot:
34, Generating present_or_copyout(m[0:400][0:600])
Generating NVIDIA code
Double precision operations disable compute capability 1.0 kernel
35, Loop is parallelizable
36, Loop is parallelizable
Accelerator kernel generated
35, #pragma acc loop gang /* blockIdx.y /
36, #pragma acc loop gang, vector(128) / blockIdx.x threadIdx.x */
39, Double precision operations disable compute capability 1.2 kernel
40, Double precision operations disable compute capability 1.2 kernel
41, Double precision operations disable compute capability 1.2 kernel
45, Loop carried scalar dependence for ‘newRe’ at line 47
Scalar last value needed after loop for ‘newRe’ at line 66
Loop carried scalar dependence for ‘newIm’ at line 48
Scalar last value needed after loop for ‘newIm’ at line 66
Inner sequential loop scheduled on accelerator
47, Double precision operations disable compute capability 1.2 kernel
48, Double precision operations disable compute capability 1.2 kernel
50, Double precision operations disable compute capability 1.2 kernel
51, Double precision operations disable compute capability 1.2 kernel
53, Double precision operations disable compute capability 1.2 kernel
66, Double precision operations disable compute capability 1.2 kernel
67, Double precision operations disable compute capability 1.2 kernel

Error that I get after running the program:

etairi@Progex:~/Projects/mandelbrot$ ./mandopenacc 20000 > out1.ppm
call to cuModuleLoadData returned error 200: Invalid image

Code:

// mandopenacc.c
// to compile: pgcc -acc -Minfo=accel -ta=nvidia,cc12 -o mandopenacc mandopenacc.c
// usage: ./mandopenacc <no_of_iterations> > output.ppm

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <openacc.h>

typedef struct {
	int r, g, b;
} rgb;


void mandelbrot(int niterations, rgb **m)
{
	int w = 600, h = 400, x, y, i; 
	// each iteration, it calculates: newz = oldz*oldz + p, 
	// where p is the current pixel, and oldz stars at the origin
	double pr, pi;                   // real and imaginary part of the pixel p
	double newRe, newIm, oldRe, oldIm;   // real and imaginary parts of new and old z
	double zoom = 1, moveX = -0.5, moveY = 0; // you can change these to zoom and change position
	
	//loop through every pixel
	#pragma acc kernels loop private(y,x,i,newRe,newIm,oldRe,oldIm)
	for(y = 0; y < h; y++) {
		for(x = 0; x < w; x++) {
			// calculate the initial real and imaginary part of z, 
			// based on the pixel location and zoom and position values
			pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
	        	pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
	        	newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
	        	// i will represent the number of iterations
	        	// start the iteration process
	        	for(i = 0; i < niterations; i++) {
            			// remember value of previous iteration
            			oldRe = newRe;
            			oldIm = newIm;
            			// the actual iteration, the real and imaginary part are calculated
            			newRe = oldRe * oldRe - oldIm * oldIm + pr;
            			newIm = 2 * oldRe * oldIm + pi;
            			// if the point is outside the circle with radius 2: stop
            			if((newRe * newRe + newIm * newIm) > 4) break;
        		}
        
       			if(i == niterations)
			{
				m[y][x].r = 0;
				m[y][x].g = 0;
				m[y][x].b = 0;
			}
			else
			{
				// normalized iteration count method for proper coloring
				double z = sqrt(newRe * newRe + newIm * newIm);
				int brightness = 256. * log(1.75 + i - log(log(z))) / log((double)niterations);
				m[y][x].r = brightness;
				m[y][x].g = brightness;
				m[y][x].b = 255;
			}

    		}
	}

}

int main(int argc, char *argv[])
{
	int niterations, i, j;

	if(argc != 2)
	{
		printf("Usage: %s <no_of_iterations> > output.ppm\n", argv[0]);
		exit(1);
	}

	niterations = atoi(argv[1]);

	rgb **m;
	m = malloc(400 * sizeof(rgb *));
	for(i = 0; i < 400; i++)
		m[i] = malloc(600 * sizeof(rgb));

	clock_t begin, end;
	double time_spent;
    
	begin = clock();
	mandelbrot(niterations, m);

	printf("P6\n# AUTHOR: Erkan Tairi\n");
	printf("%d %d\n255\n",600,400);

	for(i = 0; i < 400; i++) {
		for(j = 0; j < 600; j++) {
			fputc((char)m[i][j].r, stdout);
			fputc((char)m[i][j].g, stdout);
			fputc((char)m[i][j].b, stdout);
		}
	}

	end = clock();
    
	time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
	fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);

	return 0;
}

MatColgrove · April 29, 2013, 10:13pm

Hi erkant,

Alternately, you can compile to single precision.

% cat mandlebrot.c 
// mandopenacc.c
// to compile: pgcc -acc -Minfo=accel -ta=nvidia,cc12 -o mandopenacc mandopenacc.c
// usage: ./mandopenacc <no_of_iterations> > output.ppm

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#ifdef _OPENACC
#include <openacc.h>
#endif

#ifdef SINGLE
#define REAL float
#define LOG logf
#define SQRT sqrtf
#else
#define REAL double
#define LOG log
#define SQRT sqrt
#endif

typedef struct {
  int r, g, b;
} rgb;


void mandelbrot(int niterations, rgb **m)
{
  int w = 600, h = 400, x, y, i;
  // each iteration, it calculates: newz = oldz*oldz + p,
  // where p is the current pixel, and oldz stars at the origin
  REAL pr, pi;                   // real and imaginary part of the pixel p
  REAL newRe, newIm, oldRe, oldIm;   // real and imaginary parts of new and old z
  REAL zoom = 1, moveX = -0.5, moveY = 0; // you can change these to zoom and change position
   
  //loop through every pixel
#pragma acc kernels loop private(newRe,newIm,oldRe,oldIm)
  for(y = 0; y < h; y++) {
    for(x = 0; x < w; x++) {
      // calculate the initial real and imaginary part of z,
      // based on the pixel location and zoom and position values
      pr = 1.5 * (x - w / 2) / (0.5 * zoom * w) + moveX;
      pi = (y - h / 2) / (0.5 * zoom * h) + moveY;
      newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
      // i will represent the number of iterations
      // start the iteration process
      for(i = 0; i < niterations; i++) {
	// remember value of previous iteration
	oldRe = newRe;
	oldIm = newIm;
	// the actual iteration, the real and imaginary part are calculated
	newRe = oldRe * oldRe - oldIm * oldIm + pr;
	newIm = 2 * oldRe * oldIm + pi;
	// if the point is outside the circle with radius 2: stop
	if((newRe * newRe + newIm * newIm) > 4) break;
      }
       
      if(i == niterations)
	{
	  m[y][x].r = 0;
	  m[y][x].g = 0;
	  m[y][x].b = 0;
	}
      else
	{
	  // normalized iteration count method for proper coloring
	  REAL z = SQRT(newRe * newRe + newIm * newIm);
	  int brightness = 256. * LOG(1.75 + i - LOG(LOG(z))) / LOG((REAL)niterations);
	  m[y][x].r = brightness;
	  m[y][x].g = brightness;
	  m[y][x].b = 255;
	}

    }
  }

}

int main(int argc, char *argv[])
{
  int niterations, i, j;

  if(argc != 2)
    {
      printf("Usage: %s <no_of_iterations> > output.ppm\n", argv[0]);
      exit(1);
    }

  niterations = atoi(argv[1]);

  rgb **m;
  m = malloc(400 * sizeof(rgb *));
  for(i = 0; i < 400; i++)
    m[i] = malloc(600 * sizeof(rgb));

  clock_t begin, end;
  REAL time_spent;
   
  begin = clock();
  mandelbrot(niterations, m);

  printf("P6\n# AUTHOR: Erkan Tairi\n");
  printf("%d %d\n255\n",600,400);

  for(i = 0; i < 400; i++) {
    for(j = 0; j < 600; j++) {
      fputc((char)m[i][j].r, stdout);
      fputc((char)m[i][j].g, stdout);
      fputc((char)m[i][j].b, stdout);
    }
  }

  end = clock();
   
  time_spent = (REAL)(end - begin) / CLOCKS_PER_SEC;
  fprintf(stderr, "Elapsed time: %.2lf seconds.\n", time_spent);

  return 0;
}
% pgcc mandlebrot.c -acc -Minfo=accel -V13.5 -ta=nvidia,4.2,cc12 -DSINGLE -Mfcon
mandelbrot:
     38, Generating present_or_copyout(m[0:400][0:600])
         Generating NVIDIA code
         Generating compute capability 1.2 binary
     39, Loop is parallelizable
     40, Loop is parallelizable
         Accelerator kernel generated
         39, #pragma acc loop gang /* blockIdx.y */
         40, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
     48, Loop carried scalar dependence for 'newRe' at line 50
         Scalar last value needed after loop for 'newRe' at line 68
         Loop carried scalar dependence for 'newIm' at line 51
         Scalar last value needed after loop for 'newIm' at line 68
         Inner sequential loop scheduled on accelerator
     57, Accelerator restriction: induction variable live-out from loop: i
% a.out 1024 > output.ppm
Elapsed time: 0.43 seconds.

Mat

Topic		Replies	Views
Compiler failed to translate accelerator region Legacy PGI Compilers	9	6799	June 26, 2013
Could not find GPU binary file Legacy PGI Compilers	2	3348	April 21, 2012
Help with 'double precision' CUDA Programming and Performance	8	2975	July 29, 2008
How to compile existing C/C++ project w/ NVIDIA GPU? Legacy PGI Compilers	11	9531	August 3, 2012
double precision Legacy PGI Compilers	6	18689	July 15, 2009
error for a simple OPENACC program Legacy PGI Compilers	23	11960	May 16, 2013
Not enough compute capability? Legacy PGI Compilers	2	2306	October 13, 2010
Differences between Host and Accel Code Legacy PGI Compilers	1	2419	September 6, 2011
cuModuleLoadData error 209 Legacy PGI Compilers	7	16343	February 10, 2015
using cuda libraries with OpenACC Legacy PGI Compilers	1	6064	July 13, 2012

Can someone test my code, while I have CC 1.2?

Related topics