Hello everyone!
I’m stuck with a problem.
I am very new to CUDA, however I’m going to try implement the Aho-Corasick algorithm on the GPU. This algorithm work such in a way such that it take a dictionary of words, and make a tree structure of these words with one character in each node, springing child nodes for every new character that does not already exist in the tree. See image for example.
I have attempted to combine a C++ Tutorial for Trie C++ Trie Impementation
with the SDK example: new_delete, whith the container.hpp code.
After rewriting the turorial to a template form, and writing it similarly to the Container.hpp i am stuck with an odd problem!
1> C:\projects\PatternRecognition\PatternRecognition>“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include" -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler "/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”
1> nvcc fatal : Don’t know what to do with ‘C:/projects/PatternRecognition/PatternRecognition/trie.hpp’
1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 4.1.targets(361,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include” -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler “/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”” exited with code -1.
1>
1>Build FAILED.
[b]Can anyone please attempt to see WHY it does not understand what to do with my file?
To me it looks exactly like the container.hpp file.[/b]
[i] Im running Windows 7 64bit
visual studio 2010 ultimate
CUDA 4.1, SDK 4.1 Nsight 2.1
Intel i7 920
Nvidia GPU #1 = GeForce 580 GTX
Nvidia GPU #2 = GeForce 460 GTX[/i]
The Rewritten tutorial code:
#ifndef _TRIE_H_
#define _TRIE_H_
#include "container.hpp"
template<class T>
class Node{
private:
T content;
bool marker;
Vector<Node*> children;
public:
__device__
Node<T>() {
content = ' ';
marker = false;
children = new Vector<Node*>(30); //test with 30 children
}
__device__
~Node<T>() {
//delete children;
}
__device__
virtual T getContent() {return content;}
__device__
virtual void setContent(char c) {content =c;}
__device__
virtual bool wordMarker(){return marker;}
__device__
virtual void setWordMarker(){ marker = true;}
__device__
virtual Node* findChild(T c){
for(int i=0; i< children.getSize(); i++){
if(children[i].content() == c){
return &children[i];
}
return;
}
}
__device__
virtual void appendChild(Node* child) {children.push(child);}
__device__
virtual Vector<Node*> getChildren() {return children;}
};
template<class T>
class Trie{
private:
Node* root;
public:
__device__
Trie(){ root = new Node<T>();}
__device__
~Trie(){ delete [] root;}
__device__
virtual void addWord(char* s){
Node<T>* current = root;
if(strlen(s) == 0){
current->setWordMarker();
return;
}
for(int i=0; i< strlen(s); i++){
Node<T>*child = current->findChild(s[i]);
if(child != NULL){
current = child;
}
else{
Node<T>* tmp = new Node<T>();
tmp->setContent(s[i]);
current->appendChild(tmp);
current = tmp;
}
if(i == strlen(s) -1)
current->setWordMarker();
}
}
__device__
virtual bool searchWord(char* s){
Node<T>* current = root;
while ( current != NULL )
{
for ( int i = 0; i < strlen(s); i++ )
{
Node<T>* tmp = current->findChild(s[i]);
if ( tmp == NULL )
return false;
current = tmp;
}
if ( current->wordMarker() )
return true;
else
return false;
}
return false;
}
__device__
virtual void deleteWord(char* s){
//Do something
}
};
#endif
The new_delete example from CUDA SDK 4.1: (container.hpp)
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef _mContainer_H_
#define _mContainer_H_
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
typedef unsigned long long int pointer_int_t;
#else
typedef unsigned int pointer_int_t;
#endif
/////////////////////////////////////////////////////////////////////////////
//
// mContainer parent class.
//
////////////////////////////////////////////////////////////////////////////
template<class T>
class mContainer {
public:
__device__
mContainer() {;}
__device__
virtual void push(T e) = 0;
__device__
virtual bool pop(T &e) = 0;
};
/////////////////////////////////////////////////////////////////////////////
//
// Vector class derived from mContainer class using linear memory as data storage
//
////////////////////////////////////////////////////////////////////////////
template<class T>
class Vector : public mContainer<T> {
public:
// Constructor, data is allocated on the heap
__device__
Vector(int max_size) : m_top(-1) {
m_data = new T[max_size];
}
// Constructor, data uses preallocated buffer via placement new
__device__
Vector(int max_size, T* preallocated_buffer) : m_top(-1) {
m_data = new (preallocated_buffer) T[max_size];
}
__device__
~Vector() {
delete [] m_data;
}
__device__
virtual
void push(T e) {
// Atomically increment the top idx
int idx = atomicAdd(&(this->m_top), 1);
m_data[idx+1] = e;
atomicAdd(&(this->m_size), 1);
}
__device__
virtual
bool pop(T &e) {
if( m_top >= 0 ) {
// Atomically decrement the top idx
int idx = atomicAdd( &(this->m_top), -1 );
if( idx >= 0 ) {
e = m_data[idx];
atomicAdd( &(this->m_size), -1 ); //update num_elements
return true;
}
}
return false;
}
__device__
virtual
int getSize(){
return this->m_size;
}
private:
int m_size;
T* m_data;
int m_top;
};
/////////////////////////////////////////////////////////////////////////////
//
// Stack classes derived from mContainer class using singly linked lists as data storage.
//
////////////////////////////////////////////////////////////////////////////
template<class T>
class SingleLinkElement {
public:
__device__
SingleLinkElement(T e) : m_data(e), m_next(0) {
}
__device__
void setNext(SingleLinkElement<T>* next) {
m_next = next;
}
__device__
SingleLinkElement<T>* getNext() {
return m_next;
}
__device__
T operator() (){
return m_data;
}
private:
T m_data;
SingleLinkElement<T>* m_next;
};
template<class T>
class Stack : public mContainer<T> {
public:
__device__
Stack() {
m_top = 0;
}
__device__
virtual
void push(T e) {
// Create new List element on the device heap
SingleLinkElement<T>* newElement = new SingleLinkElement<T>(e);
// Atomic exchange to safely update the top of stack pointer and get the old top pointer
SingleLinkElement<T>* old_top = (SingleLinkElement<T>*) atomicExch( (pointer_int_t*)&(this->m_top),
(pointer_int_t)newElement );
newElement->setNext(old_top);
}
__device__
virtual
bool pop(T &e) {
// Get pointer to top element
SingleLinkElement<T>* curr_top;
SingleLinkElement<T>* next;
SingleLinkElement<T>* old_top;
curr_top = 0;
old_top = this->m_top;
while( old_top != 0 && old_top != curr_top ) {
curr_top = old_top;
// Otherwise get pointer to the next element in the list
next = curr_top->getNext();
// Update the top pointer to this one but ONLY if no other thread concurrently changed top in the meantime.
// This is implemented with atomic compare and swap, comparing top with the value we read first.
// If there was a change they are not equal and we try again.
old_top = (SingleLinkElement<T>*) atomicCAS( (pointer_int_t*)&(this->m_top),
(pointer_int_t)curr_top,
(pointer_int_t)next );
}
if( old_top != 0 ) {
// Return the top element
e = (*old_top)();
delete old_top;
return true;
} else {
// Stack is empty
return false;
}
}
protected:
// Pointer to the top of the stack
SingleLinkElement<T>* m_top;
};
#endif