Hi again :-)
Thank you very much for answering to my previous questions. At this time, I have a problem with compiling error.
pgc++ -acc -w -ta=nvidia:cuda6.0 -fast -Minfo=accel -time -O2 `pkg-config --cflags opencv` -c -o main2.o main2.cpp
Image::Image(cv::Mat):
31, Generating enter data copyin(this[:1])
Generating enter data create(img_data[:length])
41, Generating update device(this[:1])
Generating update device(img_data[:length])
Image::~Image():
48, Generating exit data delete(img_data[:length])
Generating exit data delete(this[:1])
PGCC-S-0155-Accelerator region ignored; see -Minfo messages (main2.cpp: 65)
Image::integralImage():
65, Generating copyout(ii[:ii_length])
Accelerator region ignored
70, Accelerator restriction: function/procedure calls are not supported
80, Accelerator restriction: unsupported call to '_ZNSolsEi'
Timing stats:
init 16 millisecs 8%
parser 167 millisecs 83%
schedule 17 millisecs 8%
Total time 200 millisecs
PGCC/x86 Linux 14.7-0: compilation completed with severe errors
make: *** [main2.o] Error 2
I am trying to implement integral image function within my Image class. What if I remove the pragmas in the integral image function. It compiles with no problem.
#include <stdlib.h>
#include <iostream>
#include <opencv2/opencv.hpp>
#include "/opt/pgi/linux86-64/2014/include/openacc.h"
using namespace std;
using namespace cv;
class Image
{
private:
int row;
int col;
int length;
int *img_data;
public:
Image()
{
}
;
Image(Mat img)
{
row = img.rows;
col = img.cols;
length = row * col;
img_data = (int*) malloc(sizeof(int) * length);
#pragma acc enter data copyin(this)
#pragma acc enter data create(img_data[0:length])
uchar* tmp_data = img.data;
for (int i = 0; i < length; i++)
{
img_data[i] = (int) tmp_data[i];
}
#pragma acc update device(this)
#pragma acc update device(img_data[0:length])
}
~Image()
{
delete[] img_data;
#pragma acc exit data delete(img_data[0:length])
#pragma acc exit data delete(this)
}
int* integralImage()
{
int ii_width = this->col+1;
int ii_height =this->row +1;
int ii_length = ii_width*ii_height;
int* ii = (int*) malloc(sizeof(int)*ii_length);
#pragma acc data copyout(ii[0:ii_length])
#pragma acc kernels loop
for(int i = 0; i< ii_height;i++)
{
for(int j = 0; j<ii_width; j++)
{
if(i==0 || j==0)
{
ii[ii_width*i+j] = 0;
}else
{
int val = img_data[(ii_width-1)*(i-1)+(j-1)];
ii[ii_width*i+j] =val + ii[ii_width*(i-1)+j]+ii[ii_width*i+(j-1)]-ii[ii_width*(i-1)+(j-1)];
}
cout<<ii[ii_width*i+j]<<" ";
}
cout<<endl;
}
return ii;
}
int * get_img()
{
return img_data;
}
};
int main(int argc, char *argv[])
{
// Verify that the class Image will work with OpenACC.
// for more discussion about why the C++ classes MUST be trivially
// copyable, see
// http://www.drdobbs.com/parallel/cuda-unifying-hostdevice-interactions-wi/240161436
if (__has_trivial_copy(Image) == true)
{
cout << "Congrat! Image will work with OpenACC copy operations "
<< endl;
}
else
{
cout << "OOPS! Image will NOT work with OpenACC copy operations "
<< endl;
return 1;
}
Mat img = imread("/home/dongyoung/Downloads/test.jpg",
CV_LOAD_IMAGE_GRAYSCALE);
Image foo(img);
foo.integralImage();
cout << "Nicely done! :D" << endl;
return 0;
}
Please review my code to find any problems.
Many Thanks,
DK