Trie implementation for GPU Implementing a Trie structure for GPU

Hello everyone!

I’m stuck with a problem.

I am very new to CUDA, however I’m going to try implement the Aho-Corasick algorithm on the GPU. This algorithm work such in a way such that it take a dictionary of words, and make a tree structure of these words with one character in each node, springing child nodes for every new character that does not already exist in the tree. See image for example.

I have attempted to combine a C++ Tutorial for Trie C++ Trie Impementation

with the SDK example: new_delete, whith the container.hpp code.

After rewriting the turorial to a template form, and writing it similarly to the Container.hpp i am stuck with an odd problem!

1> C:\projects\PatternRecognition\PatternRecognition>“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code=“sm_20,compute_20” --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include" -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler "/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”

1> nvcc fatal : Don’t know what to do with ‘C:/projects/PatternRecognition/PatternRecognition/trie.hpp’

1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 4.1.targets(361,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\bin\nvcc.exe” -gencode=arch=compute_20,code=“sm_20,compute_20” --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.1\include” -G0 --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -I. -g -Xcompiler “/EHsc /nologo /Od /Zi /MDd " -o “Debug\trie.hpp.obj” “C:\projects\PatternRecognition\PatternRecognition\trie.hpp”” exited with code -1.

1>

1>Build FAILED.

[b]Can anyone please attempt to see WHY it does not understand what to do with my file?

To me it looks exactly like the container.hpp file.[/b]

[i] Im running Windows 7 64bit

visual studio 2010 ultimate

CUDA 4.1, SDK 4.1 Nsight 2.1

Intel i7 920

Nvidia GPU #1 = GeForce 580 GTX

Nvidia GPU #2 = GeForce 460 GTX[/i]

The Rewritten tutorial code:

#ifndef _TRIE_H_

#define _TRIE_H_

#include "container.hpp"

template<class T>

class Node{

private:

	T content;

	bool marker;

	Vector<Node*> children;

public:

	__device__

		Node<T>() {

			content = ' ';

			marker = false;

			children = new Vector<Node*>(30); //test with 30 children

	}

	__device__

		~Node<T>() {

			//delete children;	

	}

	__device__

		virtual T getContent() {return content;}

	__device__

		virtual void setContent(char c) {content =c;}

	__device__

		virtual bool wordMarker(){return marker;}

	__device__

		virtual void setWordMarker(){ marker = true;}

	__device__

		virtual Node* findChild(T c){

			for(int i=0; i< children.getSize(); i++){

				if(children[i].content() == c){

					return &children[i];

				}

				return;

		}

	}

	__device__

		virtual void appendChild(Node* child) {children.push(child);}

	__device__

		virtual Vector<Node*> getChildren() {return children;}

};

template<class T>

class Trie{

private:

	Node* root;

public:

	__device__

		Trie(){	root = new Node<T>();}

	__device__

		~Trie(){ delete [] root;}

	__device__

		virtual void addWord(char* s){

			Node<T>* current = root;

			if(strlen(s) == 0){

				current->setWordMarker();

				return;

			}

			for(int i=0; i< strlen(s); i++){

				Node<T>*child = current->findChild(s[i]);

				if(child != NULL){

					current = child;

				}

				else{

					Node<T>* tmp = new Node<T>();

					tmp->setContent(s[i]);

					current->appendChild(tmp);

					current = tmp;

				}

				if(i == strlen(s) -1)

					current->setWordMarker();

			}

	}

	__device__

	virtual bool searchWord(char* s){

			Node<T>* current = root;

			while ( current != NULL )

			{

				for ( int i = 0; i < strlen(s); i++ )

				{

					Node<T>* tmp = current->findChild(s[i]);

					if ( tmp == NULL )

						return false;

					current = tmp;

				}

				if ( current->wordMarker() )

					return true;

				else

					return false;

			}

			return false;

	}

	__device__

		virtual void deleteWord(char* s){

			//Do something

	}

};

#endif

The new_delete example from CUDA SDK 4.1: (container.hpp)

/*

* Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.

*

* Please refer to the NVIDIA end user license agreement (EULA) associated

* with this source code for terms and conditions that govern your use of

* this software. Any use, reproduction, disclosure, or distribution of

* this software and related documentation outside the terms of the EULA

* is strictly prohibited.

*

*/

#ifndef _mContainer_H_

#define _mContainer_H_

#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)

typedef unsigned long long int pointer_int_t;

#else

typedef unsigned int pointer_int_t;

#endif

/////////////////////////////////////////////////////////////////////////////

//

// mContainer parent class.

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class mContainer {

public:

	__device__

	mContainer() {;}

	__device__

	virtual void push(T e) = 0;

	__device__

	virtual bool pop(T &e) = 0;

};

/////////////////////////////////////////////////////////////////////////////

//

// Vector class derived from mContainer class using linear memory as data storage

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class Vector : public mContainer<T> {

public:

	// Constructor, data is allocated on the heap

	__device__

	Vector(int max_size) :  m_top(-1) {

		m_data = new T[max_size];

	}

	// Constructor, data uses preallocated buffer via placement new

	__device__

	Vector(int max_size, T* preallocated_buffer) :  m_top(-1) {

		m_data = new (preallocated_buffer) T[max_size];

	}

	__device__

	~Vector() {

		delete [] m_data;

	}

	__device__

	virtual

	void push(T e) {

		// Atomically increment the top idx

		int idx = atomicAdd(&(this->m_top), 1);

		m_data[idx+1] = e;

		atomicAdd(&(this->m_size), 1);

	}

	__device__

	virtual

	bool pop(T &e) {

		if( m_top >= 0 ) {

			// Atomically decrement the top idx

			int idx = atomicAdd( &(this->m_top), -1 );

			if( idx >= 0 ) {

				e = m_data[idx];

				atomicAdd( &(this->m_size), -1 ); //update num_elements

				return true;

			}

		}

		return false;

		

	}

	__device__

		virtual

		int getSize(){

			return this->m_size;

	}

private:

	int m_size;

	T* m_data;

	int m_top;

};

/////////////////////////////////////////////////////////////////////////////

//

// Stack classes derived from mContainer class using singly linked lists as data storage.

//

////////////////////////////////////////////////////////////////////////////

template<class T>

class SingleLinkElement {

public:

	__device__

	SingleLinkElement(T e) : m_data(e), m_next(0) {

	}

	__device__

	void setNext(SingleLinkElement<T>* next) {

		m_next = next;

	}

	__device__

	SingleLinkElement<T>* getNext() {

		return m_next;

	}

	__device__

	T operator() (){

		return m_data;

	}

private:

	T m_data;

	SingleLinkElement<T>* m_next;

};

template<class T>

class Stack : public mContainer<T> {

public:

	__device__

	Stack() {

		m_top = 0;

	}

	__device__

	virtual 	

	void push(T e) {

		// Create new List element on the device heap

		SingleLinkElement<T>* newElement = new SingleLinkElement<T>(e);

		// Atomic exchange to safely update the top of stack pointer and get the old top pointer 

		SingleLinkElement<T>* old_top = (SingleLinkElement<T>*) atomicExch( (pointer_int_t*)&(this->m_top),

                                                                                    (pointer_int_t)newElement );

		newElement->setNext(old_top);

	}

	__device__

	virtual

	bool pop(T &e) {

		// Get pointer to top element

		SingleLinkElement<T>* curr_top;

		SingleLinkElement<T>* next;

		SingleLinkElement<T>* old_top;

		curr_top = 0;

		old_top = this->m_top;

		while( old_top != 0 && old_top != curr_top ) {

			curr_top = old_top;

			// Otherwise get pointer to the next element in the list

			next = curr_top->getNext();

		

			// Update the top pointer to this one but ONLY if no other thread concurrently changed top in the meantime.

			// This is implemented with atomic compare and swap, comparing top with the value we read first. 

			// If there was a change they are not equal and we try again.

			old_top = (SingleLinkElement<T>*) atomicCAS( (pointer_int_t*)&(this->m_top), 

                                                                     (pointer_int_t)curr_top, 

                                                                     (pointer_int_t)next );

		}

		

		if( old_top != 0 ) {

			// Return the top element 

			e = (*old_top)();

			delete old_top;

			return true;

		} else {

			// Stack is empty

			return false;

		}

	}

protected:

	// Pointer to the top of the stack

	SingleLinkElement<T>* m_top;

};

#endif

I created a new project, with new files(only copied over the content) and the problem has disappeared…

Really weird.

Now i got a million other syntax errors instead to be solved ;)

Any help is still appreciated, if anyone see problems in the code, or has experience with this sort of programming, guides etc.