Hello to everyone.
I have some troubles with this function (#pragma acc routine):
#pragma acc routine
template<typename Node1, typename Node2, typename Edge1, typename Edge2,
typename NodeComparisonFunctor, typename EdgeComparisonFunctor>
int VF3ParallelSubState<Node1, Node2, Edge1, Edge2, NodeComparisonFunctor,
EdgeComparisonFunctor>::ParallelIsFeasiblePair(node_id node1,
node_id node2) {
int isFeasible = 0;
if ((node1 < n1) && (node2 < n2) && (core_1[node1] == NULL_NODE)
&& (core_2[node2] == NULL_NODE)) {
if (!nf(g1->GetNodeAttr(node1), g2->GetNodeAttr(node2)))
isFeasible++;
if (g1->InEdgeCount(node1) > g2->InEdgeCount(node2)
|| g1->OutEdgeCount(node1) > g2->OutEdgeCount(node2))
isFeasible++;
int oec1 = g1->OutEdgeCount(node1);
int iec1 = g1->InEdgeCount(node1);
int oec2 = g2->OutEdgeCount(node2);
int iec2 = g2->InEdgeCount(node2);
const Edge1 *restrict vecEdgesOut1 = g1->GetOutEdgeAttrVec(node1);
const Edge1 *restrict vecEdgesIn1 = g1->GetInEdgeAttrVec(node1);
const node_id *restrict vecNodesOut1 = g1->GetOutEdgeNodeVec(node1);
const node_id *restrict vecNodesIn1 = g1->GetInEdgeNodeVec(node1);
const node_id *restrict vecNodesOut2 = g2->GetOutEdgeNodeVec(node2);
const node_id *restrict vecNodesIn2 = g2->GetInEdgeNodeVec(node2);
// Check the 'out' edges of node1
#pragma acc loop independent reduction(+:isFeasible)
for (int i = 0; i < oec1; i++) {
Edge1 eattr1 = vecEdgesOut1[i];
node_id other1 = vecNodesOut1[i];
int c_other = class_1[other1];
if (core_1[other1] != NULL_NODE) {
Edge2 eattr2;
node_id other2 = core_1[other1];
if (!g2->HasEdge(node2, other2, eattr2) || !ef(eattr1, eattr2))
isFeasible++;
}
}
// Check the 'in' edges of node1
#pragma acc loop independent reduction(+:isFeasible)
for (int i = 0; i < iec1; i++) {
Edge1 eattr1 = vecEdgesIn1[i];
node_id other1 = vecNodesIn1[i];
int c_other = class_1[other1];
if (core_1[other1] != NULL_NODE) {
Edge2 eattr2;
node_id other2 = core_1[other1];
if (!g2->HasEdge(other2, node2, eattr2) || !ef(eattr1, eattr2))
isFeasible++;
}
}
// Check the 'out' edges of node2
#pragma acc loop independent reduction(+:isFeasible)
for (int i = 0; i < oec2; i++) {
node_id other2 = vecNodesOut2[i];
int c_other = class_2[other2];
if (core_2[other2] != NULL_NODE) {
node_id other1 = core_2[other2];
if (!g1->HasEdge(node1, other1))
isFeasible++;
}
}
// Check the 'in' edges of node2
#pragma acc loop independent reduction(+:isFeasible)
for (int i = 0; i < iec2; i++) {
node_id other2 = vecNodesIn2[i];
int c_other = class_2[other2];
if (core_2[other2] != NULL_NODE) {
node_id other1 = core_2[other2];
if (!g1->HasEdge(other1, node1))
isFeasible++;
}
}
} else
isFeasible++;
return isFeasible;
}
This acc routine is called from this another piece of code shown below:
template<typename Node1, typename Node2, typename Edge1, typename Edge2,
typename NodeComparisonFunctor, typename EdgeComparisonFunctor>
void VF3ParallelSubState<Node1, Node2, Edge1, Edge2, NodeComparisonFunctor,
EdgeComparisonFunctor>::ParallelNextPairFromS0(node_id* restrict &prova) {
node_id curr_n1 = order[core_len];
int c = class_1[curr_n1];
prova = (node_id*) malloc(sizeof(node_id) * n2);
#pragma acc parallel loop copy(prova[:n2])
for (int i = 0; i < n2; i++) {
if (class_2[i] == c) {
if (ParallelIsFeasiblePair(curr_n1, (node_id) i)==0) {
prova[i] = (node_id) i + 1;
} else {
prova[i] = (node_id) -1;
}
} else
prova[i] = (node_id) 0;
}
}
When I run it, I get the following error:
Accelerator Kernel Timing data
/home/christian/eclipse-workspace/ParallelVF3/src/../include/vf3_parallel_sub_state.hpp
_ZN19VF3ParallelSubStateIii5EmptyS0_18EqualityComparatorIiiES1_IS0_S0_EE22ParallelNextPairFromS0ERrPt NVIDIA devicenum=0
time(us): 14
518: compute region reached 1 time
518: kernel launched 1 time
grid: [1] block: [128]
device time(us): total=0 max=0 min=0 avg=0
518: data region reached 2 times
34: kernel launched 1 time
grid: [1] block: [128]
elapsed time(us): total=820 max=820 min=820 avg=820
518: data copyin transfers: 2
device time(us): total=14 max=11 min=3 avg=7
call to cuMemFreeHost returned error 700: Illegal address during kernel execution
Makefile:38: set di istruzioni per l'obiettivo "run_cuda" non riuscito
make: *** [run_cuda] Errore 1
but I don’t know why. Can you help me please?