Trie implementation for GPU Implementing a Trie structure for GPU

Hello everyone!

I’m stuck with a problem.

I am very new to CUDA, however I’m going to try implement the Aho-Corasick algorithm on the GPU. This algorithm work such in a way such that it take a dictionary of words, and make a tree structure of these words with one character in each node, springing child nodes for every new character that does not already exist in the tree. See image for example.

I have attempted to combine a C++ Tutorial for Trie C++ Trie Impementation

with the SDK example: new_delete, whith the container.hpp code.

After rewriting the turorial to a template form, and writing it similarly to the Container.hpp i am stuck with an odd problem!

1> C:\projects\PatternRecognition\PatternRecognition>“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include" -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler "/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”

1> nvcc fatal : Don’t know what to do with ‘C:/projects/PatternRecognition/PatternRecognition/trie.hpp’

1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 4.1.targets(361,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include” -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler “/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”” exited with code -1.

1>

1>Build FAILED.

[b]Can anyone please attempt to see WHY it does not understand what to do with my file?

To me it looks exactly like the container.hpp file.[/b]

[i] Im running Windows 7 64bit

visual studio 2010 ultimate

CUDA 4.1, SDK 4.1 Nsight 2.1

Intel i7 920

Nvidia GPU #1 = GeForce 580 GTX

Nvidia GPU #2 = GeForce 460 GTX[/i]

The Rewritten tutorial code:

#ifndef _TRIE_H_

#define _TRIE_H_

#include "container.hpp"

template<class T>

class Node{

private:

	T content;

	bool marker;

	Vector<Node*> children;

public:

	__device__

		Node<T>() {

			content = ' ';

			marker = false;

			children = new Vector<Node*>(30); //test with 30 children

	}

	__device__

		~Node<T>() {

			//delete children;	

	}

	__device__

		virtual T getContent() {return content;}

	__device__

		virtual void setContent(char c) {content =c;}

	__device__

		virtual bool wordMarker(){return marker;}

	__device__

		virtual void setWordMarker(){ marker = true;}

	__device__

		virtual Node* findChild(T c){

			for(int i=0; i< children.getSize(); i++){

				if(children[i].content() == c){

					return &children[i];

				}

				return;

		}

	}

	__device__

		virtual void appendChild(Node* child) {children.push(child);}

	__device__

		virtual Vector<Node*> getChildren() {return children;}

};

template<class T>

class Trie{

private:

	Node* root;

public:

	__device__

		Trie(){	root = new Node<T>();}

	__device__

		~Trie(){ delete [] root;}

	__device__

		virtual void addWord(char* s){

			Node<T>* current = root;

			if(strlen(s) == 0){

				current->setWordMarker();

				return;

			}

			for(int i=0; i< strlen(s); i++){

				Node<T>*child = current->findChild(s[i]);

				if(child != NULL){

					current = child;

				}

				else{

					Node<T>* tmp = new Node<T>();

					tmp->setContent(s[i]);

					current->appendChild(tmp);

					current = tmp;

				}

				if(i == strlen(s) -1)

					current->setWordMarker();

			}

	}

	__device__

	virtual bool searchWord(char* s){

			Node<T>* current = root;

			while ( current != NULL )

			{

				for ( int i = 0; i < strlen(s); i++ )

				{

					Node<T>* tmp = current->findChild(s[i]);

					if ( tmp == NULL )

						return false;

					current = tmp;

				}

				if ( current->wordMarker() )

					return true;

				else

					return false;

			}

			return false;

	}

	__device__

		virtual void deleteWord(char* s){

			//Do something

	}

};

#endif

The new_delete example from CUDA SDK 4.1: (container.hpp)

/*

* Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.

*

* Please refer to the NVIDIA end user license agreement (EULA) associated

* with this source code for terms and conditions that govern your use of

* this software. Any use, reproduction, disclosure, or distribution of

* this software and related documentation outside the terms of the EULA

* is strictly prohibited.

*

*/

#ifndef _mContainer_H_

#define _mContainer_H_

#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)

typedef unsigned long long int pointer_int_t;

#else

typedef unsigned int pointer_int_t;

#endif

/////////////////////////////////////////////////////////////////////////////

//

// mContainer parent class.

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class mContainer {

public:

	__device__

	mContainer() {;}

	__device__

	virtual void push(T e) = 0;

	__device__

	virtual bool pop(T &e) = 0;

};

/////////////////////////////////////////////////////////////////////////////

//

// Vector class derived from mContainer class using linear memory as data storage

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class Vector : public mContainer<T> {

public:

	// Constructor, data is allocated on the heap

	__device__

	Vector(int max_size) :  m_top(-1) {

		m_data = new T[max_size];

	}

	// Constructor, data uses preallocated buffer via placement new

	__device__

	Vector(int max_size, T* preallocated_buffer) :  m_top(-1) {

		m_data = new (preallocated_buffer) T[max_size];

	}

	__device__

	~Vector() {

		delete [] m_data;

	}

	__device__

	virtual

	void push(T e) {

		// Atomically increment the top idx

		int idx = atomicAdd(&(this->m_top), 1);

		m_data[idx+1] = e;

		atomicAdd(&(this->m_size), 1);

	}

	__device__

	virtual

	bool pop(T &e) {

		if( m_top >= 0 ) {

			// Atomically decrement the top idx

			int idx = atomicAdd( &(this->m_top), -1 );

			if( idx >= 0 ) {

				e = m_data[idx];

				atomicAdd( &(this->m_size), -1 ); //update num_elements

				return true;

			}

		}

		return false;

		

	}

	__device__

		virtual

		int getSize(){

			return this->m_size;

	}

private:

	int m_size;

	T* m_data;

	int m_top;

};

/////////////////////////////////////////////////////////////////////////////

//

// Stack classes derived from mContainer class using singly linked lists as data storage.

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class SingleLinkElement {

public:

	__device__

	SingleLinkElement(T e) : m_data(e), m_next(0) {

	}

	__device__

	void setNext(SingleLinkElement<T>* next) {

		m_next = next;

	}

	__device__

	SingleLinkElement<T>* getNext() {

		return m_next;

	}

	__device__

	T operator() (){

		return m_data;

	}

private:

	T m_data;

	SingleLinkElement<T>* m_next;

};

template<class T>

class Stack : public mContainer<T> {

public:

	__device__

	Stack() {

		m_top = 0;

	}

	__device__

	virtual 	

	void push(T e) {

		// Create new List element on the device heap

		SingleLinkElement<T>* newElement = new SingleLinkElement<T>(e);

		// Atomic exchange to safely update the top of stack pointer and get the old top pointer 

		SingleLinkElement<T>* old_top = (SingleLinkElement<T>*) atomicExch( (pointer_int_t*)&(this->m_top),

                                                                                    (pointer_int_t)newElement );

		newElement->setNext(old_top);

	}

	__device__

	virtual

	bool pop(T &e) {

		// Get pointer to top element

		SingleLinkElement<T>* curr_top;

		SingleLinkElement<T>* next;

		SingleLinkElement<T>* old_top;

		curr_top = 0;

		old_top = this->m_top;

		while( old_top != 0 && old_top != curr_top ) {

			curr_top = old_top;

			// Otherwise get pointer to the next element in the list

			next = curr_top->getNext();

		

			// Update the top pointer to this one but ONLY if no other thread concurrently changed top in the meantime.

			// This is implemented with atomic compare and swap, comparing top with the value we read first. 

			// If there was a change they are not equal and we try again.

			old_top = (SingleLinkElement<T>*) atomicCAS( (pointer_int_t*)&(this->m_top), 

                                                                     (pointer_int_t)curr_top, 

                                                                     (pointer_int_t)next );

		}

		

		if( old_top != 0 ) {

			// Return the top element 

			e = (*old_top)();

			delete old_top;

			return true;

		} else {

			// Stack is empty

			return false;

		}

	}

protected:

	// Pointer to the top of the stack

	SingleLinkElement<T>* m_top;

};

#endif

I created a new project, with new files(only copied over the content) and the problem has disappeared…

Really weird.

Now i got a million other syntax errors instead to be solved External Image

Any help is still appreciated, if anyone see problems in the code, or has experience with this sort of programming, guides etc.