Hi LS,
The easiest thing to do is use CUDA Unified Memory by adding the flag “-ta=tesla:managed”. This will have the CUDA driver manage the memory for you so you don’t need to manage it yourself. The caveats being the Unified Memory is still considered experimental, only allows you to use as much memory as available on your device, may be slow (but not always), and is only available for use with dynamic data so you’d need to modify your code to dynamically allocate “materials”.
Hopefully the OpenACC 3.0 standard will add better support for aggregates with dynamic data members, but for now if you manage the data yourself, you’ll need to add a bit more code.
When the class or structure contains fixed size data (i.e. the size is known at compile time), the compiler can add some special case code to handle multi-dimensional arrays in OpenACC data clauses. However when the class contains dynamic data members, it doesn’t have enough information so the user needs to manually add code which populates the device array with the device pointers to the classes.
First, I’d recommend moving the unstructured data region which creates the “this” pointer into the class constructor. For dynamic data members, have their “enter data” in the same method where they are allocated. (For “b” this would be in the constructor as well).
In the main code when you allocate each element of “materials”, you need to capture the device “this” pointer and then assign it to the device copy of “materials”. I prefer to capture them into an array and then call “acc_memcpy_device” to update the device copy of “materials”, but you could do it one at a time as well (albeit slower).
You’ll also want to add update routines to the mymaterial class. The compiler won’t know how to perform a deep update of the dynamic data members so you’ll need to add this code yourself.
Hope this helps,
Mat
Note that I modified your code so that the size of each “b” is variable.
% cat test2.cpp
#include <iostream>
#ifdef _OPENACC
#include <openacc.h>
#endif
#ifndef N
#define N 20
#endif
using namespace std;
class mymaterial {
public:
int n;
double* b;
mymaterial(int n_){
n = n_;
b = new double[n];
for(int i=0;i<n;++i){
b[i] = i;
}
#pragma acc enter data pcopyin(this, b[0:n])
}
~mymaterial() {
#pragma acc exit data delete(b[0:n], this)
delete [] b;
}
#ifdef _OPENACC
void update_self() {
#pragma acc update self(b[0:n])
}
void update_device() {
#pragma acc update device(b[0:n])
}
#endif
};
int main () {
mymaterial** materials = new mymaterial*[N];
#ifdef _OPENACC
size_t devptr[N];
#endif
for(int i=0;i<N;++i){
materials[i] = new mymaterial(i+1);
#ifdef _OPENACC
devptr[i] = (size_t) acc_deviceptr(materials[i]);
#endif
}
#ifdef _OPENACC
#pragma acc enter data create(materials[0:N][0:1]) copyin(devptr[0:N])
acc_memcpy_device(acc_deviceptr(materials),acc_deviceptr(devptr),sizeof(size_t)*N);
#endif
#pragma acc kernels loop independent present(materials)
for(int i=0;i<N;++i){
mymaterial* mat = materials[i];
for(int j=0;j<mat->n;++j){
mat->b[j] *= 2;
}
}
#ifdef _OPENACC
for(int i=0;i<N;++i){
materials[i]->update_self();
}
#endif
for(int i=0;i<N;++i){
for(int j=0;j<materials[i]->n;++j){
cout << materials[i]->b[j] << ",";
}
cout << endl;
delete materials[i];
}
#pragma acc exit data delete(materials[0:N][0:1],devptr)
}
% pgc++ test2.cpp -acc -Minfo=accel; a.out
main:
57, Generating enter data copyin(devptr[:])
Generating enter data create(materials[:20][:1])
Generating present(materials[:][:])
61, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
61, #pragma acc loop gang, vector(32) /* blockIdx.x threadIdx.x */
63, Loop is parallelizable
83, Generating exit data delete(devptr[:],materials[:20][:1])
mymaterial::mymaterial(int):
21, Generating enter data copyin(b[:n],this[:1])
mymaterial::~mymaterial():
25, Generating exit data delete(this[:1],b[:n])
mymaterial::update_self():
32, Generating update self(b[:n])
0,
0,2,
0,2,4,
0,2,4,6,
0,2,4,6,8,
0,2,4,6,8,10,
0,2,4,6,8,10,12,
0,2,4,6,8,10,12,14,
0,2,4,6,8,10,12,14,16,
0,2,4,6,8,10,12,14,16,18,
0,2,4,6,8,10,12,14,16,18,20,
0,2,4,6,8,10,12,14,16,18,20,22,
0,2,4,6,8,10,12,14,16,18,20,22,24,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,
0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,