Hi everybody,

I’m beginner in CUDA programming. i want to run my code (A* search algorithm) in parallel mode on GPU in order to speed up Vs. Cpu mode. this is my code:

=====================================================================================================

float Navigation::findPath(dtPolyRef startRef, dtPolyRef endRef,const float* startPos, const float* endPos, dtPolyRef* path, int &pathCount, const int maxPath)

{

float cost = 0.0f;

dtAssert(m_nodePool);

dtAssert(m_openList);

```
pathCount = 0;
m_nodePool->clear();
m_openList->clear();
dtNode* startNode = m_nodePool->getNode(startRef);
dtVcopy(startNode->pos, startPos);
startNode->pidx = 0;
startNode->cost = 0;
startNode->total = dtVdist(startPos, endPos) * H_SCALE;
startNode->id = startRef;
startNode->flags = DT_NODE_OPEN;
m_openList->push(startNode);
dtNode* lastBestNode = startNode;
float lastBestNodeCost = startNode->total;
while (!m_openList->empty())
{
// Remove node from open list and put it in closed list.
dtNode* bestNode = m_openList->pop();
bestNode->flags &= ~DT_NODE_OPEN;
bestNode->flags |= DT_NODE_CLOSED;
// Reached the goal, stop searching.
if (bestNode->id == endRef)
{
lastBestNode = bestNode;
break;
}
Graph::Node* node = &mainGraph.nodes[bestNode->id];
// Get parent poly and tile.
dtPolyRef parentRef = 0;
if (bestNode->pidx)
parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
```

//==================================================================================

```
for (unsigned int i=0; i < node->numEdges; i++)
{
dtPolyRef neighbourRef = node->edges[i].targetNodeId;
// Skip invalid ids and do not expand back to where we came from.
if (!neighbourRef || neighbourRef == parentRef)
continue;
dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
if (!neighbourNode)
{
continue;
}
// If the node is visited the first time, calculate node position.
if (neighbourNode->flags == 0)
{
dtVcopy(neighbourNode->pos, node->edges[i].pos);
}
// Calculate cost and heuristic.
float cost = 0.0f;
float heuristic = 0.0f;
if (neighbourRef == endRef)
{
const float curCost = dtVdist(bestNode->pos, neighbourNode->pos);
const float endCost = dtVdist(bestNode->pos, endPos);
cost = bestNode->cost + curCost + endCost;
heuristic = 0.0f;
}
else
{
const float curCost = dtVdist(bestNode->pos, neighbourNode->pos);
cost = bestNode->cost + curCost;
heuristic = dtVdist(neighbourNode->pos, endPos)*H_SCALE;
}
const float total = cost + heuristic;
// The node is already in open list and the new result is worse, skip.
if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
continue;
// The node is already visited and process, and the new result is worse, skip.
if ((neighbourNode->flags & DT_NODE_CLOSED) && total >= neighbourNode->total)
continue;
// Add or update the node.
neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
neighbourNode->id = neighbourRef;
neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
neighbourNode->cost = cost;
neighbourNode->total = total;
if (neighbourNode->flags & DT_NODE_OPEN)
{
// Already in open, update node location.
m_openList->modify(neighbourNode);
}
else
{
// Put the node in open list.
neighbourNode->flags |= DT_NODE_OPEN;
m_openList->push(neighbourNode);
}
// Update nearest node to target so far.
if (heuristic < lastBestNodeCost)
{
lastBestNodeCost = heuristic;
lastBestNode = neighbourNode;
}
}
```

//==================================================================================

```
}
// Reverse the path.
dtNode* prev = 0;
dtNode* node = lastBestNode;
cost = lastBestNode->cost;
do
{
dtNode* next = m_nodePool->getNodeAtIdx(node->pidx);
node->pidx = m_nodePool->getNodeIdx(prev);
prev = node;
node = next;
}
while (node);
// Store path
node = prev;
int n = 0;
do
{
path[n++] = node->id + refBase;
if (n >= maxPath)
{
break;
}
node = m_nodePool->getNodeAtIdx(node->pidx);
}
while (node);
pathCount = n;
return cost;
```

}

=====================================================================================================

can anybody help me ?

thanks a lot.