Hi :-)
I saw some examples on the web and modified a little of it. Then it give errors like “FATAL ERROR: variable in data clause is partially present on the device: name=(unknown)”
followings are current my code:
#include <stdlib.h>
#include
#include <opencv2/opencv.hpp>
#include “/opt/pgi/linux86-64/2014/include/openacc.h”
using namespace std;
using namespace cv;
class Image
{
private:
int row;
int col;
int length;
int **img_data;
Image()
{
}
;
public:
Image(Mat img)
{
row = img.rows;
col = img.cols;
length = row * col;
img_data = (int**) malloc(sizeof(int*) * row);
for (int i = 0; i < row; i++)
{
img_data _= (int*) malloc(sizeof(int) * col);
}
uchar* tmp_data = img.data;
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
img_data[j] = (int) tmp_data[col * i + j];
}
}
#pragma acc enter data pcreate(this)
#pragma acc update device(this)
#pragma acc enter data pcreate(img_data[0:length][0:length])
}
~Image()
{
for (int i = 0; i < row; i++)
{
delete img_data;
}
delete img_data;
#pragma acc exit data delete(img_data[0:length][0:length])
#pragma acc exit data delete(this)
}
int ** get_img()
{
return img_data;
}
};
int main(int argc, char argv[])
{
Mat img = imread(“/home/dongyoung/Downloads/test.jpg”,
CV_LOAD_IMAGE_GRAYSCALE);
Image foo(img);
int* img_data = (int**) malloc(sizeof(int*) * img.rows);
for (int i = 0; i < img.rows; i++)
{
img_data = (int*) malloc(sizeof(int) * img.cols);
}
img_data = foo.get_img();
for (int i = 0; i < img.rows; i++)
{
for (int j = 0; j < img.cols; j++)
{
cout << img_data[j] << " ";
}
cout << endl;
}
cout << “Nicely done! :D” << endl;
return 0;
}
The outputs are:
246 244 243 230 208
243 241 239 224 199
242 239 235 217 188
242 240 235 215 184
244 242 236 215 183
Nicely done! :D
FATAL ERROR: variable in data clause is partially present on the device: name=(unknown)
file:/home/dongyoung/workspace_parallel/test2/main2.cpp _ZN5ImageD1Ev line:67
(null) lives at 0x1a4bb80 size 200 not present
Present table dump for device[1]: NVIDIA Tesla GPU 1, compute capability 3.5
host:0x1a4bb80 device:0x700300200 size:200 presentcount:1 line:55 name:(null)
host:0x7fff242f0d10 device:0x700300000 size:24 presentcount:1 line:55 name:T24595880
Thanks in advance,
DK
Hi DK,
When you see a NULL pointer not present error, it typically means that you haven’t attached (associated) some dynamically allocated variable with it’s parent. In this case, most likely the elements of the “img_data” pointer array aren’t getting updated to the new “col” array. Note that you’re using “length” instead of “rol” and “col” so the size of “img_data” is not the same as the host copy. I think this is the core problem.
I don’t have CV installed so I removed the CV stuff and wrote up the following example. You can add CV back in.
I like to match the “enter data” pragma to where the mallocs occur. It’s not necessary, but I prefer it as a matter of style.
Note, using 2-D arrays in C/C++ is technically not allowed in OpenACC since data must be contiguous. Though, the OpenACC committee is looking at extending the standard. PGI currently allows it as an extension. However, if you are planning on passing the image data to a CUDA C routine, you may need to linearize the data into a contiguous 1-D array.
Let me know if you have questions,
Mat
% cat test.cpp
#include <stdlib.h>
#include <iostream>
#include "openacc.h"
using namespace std;
class Image
{
private:
int row;
int col;
int length;
int **img_data;
Image()
{
}
;
public:
Image(int _rows, int _cols)
{
row = _rows;
col = _cols;
length = row * col;
img_data = (int**) malloc(sizeof(int*) * row);
#pragma acc enter data copyin(this)
for (int i = 0; i < row; i++)
{
img_data[i] = (int*) malloc(sizeof(int) * col);
}
#pragma acc enter data create(img_data[0:row][0:col])
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
img_data[i][j] = (i*col)+j;
}
}
#pragma acc update device (img_data[0:row][0:col])
}
~Image()
{
for (int i = 0; i < row; i++)
{
delete[] img_data[i];
}
delete[] img_data;
#pragma acc exit data delete(img_data[0:row][0:col])
#pragma acc exit data delete(this)
}
int ** get_img()
{
return img_data;
}
int get_row() {
return row;
}
int get_col() {
return col;
}
};
int main(int argc, char *argv[])
{
Image foo(10,12);
int rows;
int cols;
cols = foo.get_col();
rows = foo.get_row();
int** img_d;
int** img_h;
img_d = foo.get_img();
img_h = (int**) malloc(sizeof(int*) * rows);
for (int i = 0; i < rows; i++)
{
img_h[i] = (int*) malloc(sizeof(int) * cols);
}
#pragma acc parallel loop copyout(img_h[0:rows][0:cols]) present(img_d)
for (int i = 0; i < rows; i++)
{
#pragma acc loop
for (int j = 0; j < cols; j++)
{
img_h[i][j] = img_d[i][j];
}
}
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
cout << img_h[i][j] << " ";
}
cout << endl;
}
cout << "Nicely done! :D" << endl;
return 0;
}
% pgcpp -acc -Minfo=accel test.cpp ; a.out
main:
86, Generating copyout(img_h[:rows][:cols])
Generating present(img_d[:][:])
Accelerator kernel generated
89, #pragma acc loop gang /* blockIdx.x */
92, #pragma acc loop vector(256) /* threadIdx.x */
Interchanging generated strip mine loop outwards
Interchanging generated vector loop outwards
86, Generating Tesla code
92, Loop is parallelizable
Image::Image(int, int):
25, Generating enter data copyin(this[:1])
31, Generating enter data create(img_data[:row][:col])
43, Generating update device(img_data[:row][:col])
Image::~Image():
54, Generating exit data delete(img_data[:row][:col])
Generating exit data delete(this[:1])
0 1 2 3 4 5 6 7 8 9 10 11
12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31 32 33 34 35
36 37 38 39 40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55 56 57 58 59
60 61 62 63 64 65 66 67 68 69 70 71
72 73 74 75 76 77 78 79 80 81 82 83
84 85 86 87 88 89 90 91 92 93 94 95
96 97 98 99 100 101 102 103 104 105 106 107
108 109 110 111 112 113 114 115 116 117 118 119
Nicely done! :D
Many thanks to you Mat!
I didn’t notice that I made a huge mistake on data copyin stuff. I was testing the code for both 1D arry and 2D array for an image.
Thanks again :)
DK