problem if if() condition in kernel function.

hi

In the below code (marked with a line of ****'s to make spotting easy) when i compile with the if() condition commented out, everything works perfectly…But when i uncomment it…i get a compile time error which looks like–Error: External calls are not supported (found non-inlined call to ZSteqIwSt11char_traitsIwESaIwEEbRKSbIT_T0_T1_ES8)

Is it just because of the if() condition or am i missing something else?

P.S- FYI, i have declared the function prototype in ScoreHolder.h. The name of this file is ScoreHolder.cu

[codebox]#include “ScoreHolder.h”

#include “ScoringFunctions.h”

#include

#include

#include

#include “Timer.h”

#include

#include “thrust/sort.h”

#include “Node.h”

#include

#include <stdlib.h>

//#include “Typedefs.h”

//Is this form of method declaration needed here???

global void node2nodeScorer(std::wstring* dev_tnode_atts,std::wstring* dev_tnode_vals,std::wstring* dev_dnode_atts,std::wstring* dev_dnode_vals,int *count);

namespace

{

// A simple function to test if a file exists.

bool doesFileExist (const std::wstring& path)

{

	bool result (false);

	/*std::wstring ws = L"Hello";

		std::string s(ws.begin(), ws.end());

		s.assign(ws.begin(), ws.end());

		cout<<s;*/

	std::ifstream file;

	std::string path_string(path.begin(),path.end());

	path_string.assign(path.begin(),path.end());

	//char *s=path_string.c_str();

	file.open (path_string.c_str());

	if (!file.fail())

	{

		result = true;

		file.close();

	}

	return result;

}

const std::wstring semanticNodeScorerPath (L"SemanticNodeScorer.jar");

}

unsigned long* ScoreHolder::map2vec(Graph_ptr& graph)

	{

		unsigned long* temp=(unsigned long*)malloc(graph->countNodes()*sizeof(unsigned long*));

		int i=0;

		for (Graph::NodeIterator templateNode = graph->getFirstNode();templateNode !=graph->getLastNode(); ++templateNode)

		{

			temp[i]=(*templateNode).second->getUniqueId();

			i++;

		}

		return temp;

	}

std::wstring* ScoreHolder::map2string_array(const CUBRC::AttributeValueMap& attributes,int flag)

{

std::wstring* temp;

temp=new std::wstring[attributes.size()];

int i=0;

if(flag==1)//Specifies that the attributes have to be vectorised

{

	for(CUBRC::AttributeValueMap::const_iterator pos1 =attributes.begin();pos1!=attributes.end(); ++pos1)

	{

		//std::wcout<<"Map Value "<<(*pos1).first;

		temp[i]=(*pos1).first;

		i++;

	}

}

else if(flag==2)//Specifies that the values have to vectorised

{

	for(CUBRC::AttributeValueMap::const_iterator pos2 =attributes.begin();pos2!=attributes.end(); ++pos2)

	{

		temp[i]=(*pos2).second;

	//	std::wcout<<(*pos2).second;

		i++;

	}

}

return temp;

}

std::wstring* ScoreHolder::getMapAttributes (Graph_ptr& graph,int flag)

{

int temp_del=0;

std::wstring* temp;

int loop_itr=0;

Graph::NodeIterator templateNode_temp = graph->getFirstNode();

const Node_ptr n=(*templateNode_temp).second;

const CUBRC::AttributeValueMap& attributes_temp (n->getAttributes());

int no_atts=attributes_temp.size();

std::wstring* final_atts=new std::wstring[graph->countNodes()*no_atts];

int count=0;

 for (Graph::NodeIterator templateNode = graph->getFirstNode();templateNode !=graph->getLastNode(); ++templateNode)

{

	int i=0;

	CUBRC::AttributeValueMap& attributes ((*templateNode).second->getAttributes());

	temp=new std::wstring[attributes.size()];

	temp=map2string_array(attributes,flag);

	count++;

	//std::cout<<std::endl<<"Count--"<<count;

	if (final_atts==NULL)

	{

		std::cout<<"Error while making a string out of all attribute maps";

		exit(1);

	}

	for(loop_itr=(count-1)*attributes.size();loop_itr<(count)*attributes.size();loop_itr++)

	{

		//std::wcout<<std::endl<<temp[i];

		final_atts[loop_itr]=temp[i];

		i++;

		//std::cin>>temp_del;

	}

}

 return final_atts;

}

//************************************************************


global void node2nodeScorer(std::wstring* dev_tnode_atts,std::wstring* dev_tnode_vals,std::wstring* dev_dnode_atts,std::wstring* dev_dnode_vals,int *count)

{

int idx=blockIdx.x*blockDim.x+threadIdx.x;

for(int i=0;i<5;i++)

{

	if(dev_dnode_atts[idx]==dev_tnode_atts[blockIdx.x*blockDim.x

+i])

	{

		//Below is the if condition under consideration!!

                    //if(dev_dnode_vals[idx]==dev_tnode_vals[blockIdx.x*blockDim.x

+i])

		{

			count[blockIdx.x*blockDim.x]++;

		}

	}

}

count[0]=0;

count[1]=1;

count[2]=2;

}

ScoreHolder::ScoreHolder (Graph_ptr& templateGraph, Graph_ptr& dataGraph)

{

int* count=(int*)malloc(3*sizeof(int));

int* count_new;

//std::cout<<"hope she clls";

//count_new=(int*)malloc(3*sizeof(int));

for (int i=0;i<3;i++)

{

	count[i]=0;

}

std::wstring* tnode_atts=new std::wstring[50];

std::wstring* tnode_vals=new std::wstring[50];

std::wstring* dnode_atts=new std::wstring[2500];

std::wstring* dnode_vals=new std::wstring[2500];

std::wstring* dev_tnode_atts;

std::wstring* dev_tnode_vals;

std::wstring* dev_dnode_atts;

std::wstring* dev_dnode_vals;

Timer t (L"Computation of the node to node scores", true);

initializeScores (templateGraph, dataGraph);

unsigned long* tnodeids=map2vec(templateGraph);

unsigned long* dnodeids=map2vec(dataGraph);

//thrust::sort(dnodeids,dnodeids+500);

tnode_atts=getMapAttributes(templateGraph,1);

tnode_vals=getMapAttributes(templateGraph,2);

dnode_atts=getMapAttributes(dataGraph,1);

dnode_vals=getMapAttributes(dataGraph,2);

cudaMalloc((void**)&dev_tnode_atts,50*(sizeof(std::wstring)));

cudaMalloc((void**)&dev_tnode_vals,50*(sizeof(std::wstring)));

cudaMalloc((void**)&dev_dnode_atts,2500*(sizeof(std::wstring)));

cudaMalloc((void**)&dev_dnode_vals,2500*(sizeof(std::wstring)));

cudaMalloc((void**)&count_new,3*(sizeof(int)));

cudaMemcpy(dev_tnode_atts,tnode_atts,50*(sizeof(std::wstring

)),cudaMemcpyHostToDevice);

cudaMemcpy(dev_tnode_vals,tnode_vals,50*(sizeof(std::wstring

)),cudaMemcpyHostToDevice);

cudaMemcpy(dev_dnode_atts,dnode_atts,2500*(sizeof(std::wstri

ng)),cudaMemcpyHostToDevice);

cudaMemcpy(dev_dnode_vals,dnode_vals,2500*(sizeof(std::wstri

ng)),cudaMemcpyHostToDevice);

cudaMemcpy(count_new,count,3*(sizeof(int)),cudaMemcpyHostToD

evice);

int numBlocks=10;

int numThreadsPerBlock=500;

dim3 dimGrid(numBlocks);

dim3 dimBlock(numThreadsPerBlock);

node2nodeScorer<<< dimGrid,dimBlock >>>(dev_tnode_atts,dev_tnode_vals,dev_dnode_atts,dev_dnode_v

als,count_new);

cudaMemcpy(count,count_new,3*sizeof(int),cudaMemcpyDeviceToH

ost);

for (int i=0;i<3;i++)

{

	std::wcout<<std::endl<<count[i];

}

exit(1);

for(int i=0;i<2500;i++)

{

	//f++;

	//std::cout<<"Count is "<<f<<" ";

	//std::wcout<<dnode_vals[i]<<std::endl;

	if((i+1)%5==0)

		std::wcout<<std::endl;

}

std::cout<<"Back home";

exit(1);

}

//Calculate the node to node scores.

/*ScoreHolder::ScoreHolder (Graph_ptr& templateGraph, Graph_ptr& dataGraph)

{

Timer t (L"Computation of the node to node scores", true);

initializeScores (templateGraph, dataGraph);

for (Graph::NodeIterator templateNode = templateGraph->getFirstNode();

	  templateNode != templateGraph->getLastNode(); ++templateNode)

{

	for (Graph::NodeIterator dataGraphNode = dataGraph->getFirstNode();

		dataGraphNode != dataGraph->getLastNode(); ++dataGraphNode)

	{

		const unsigned long templateNodeIndex ((*templateNode).second->getUniqueId());

		const unsigned long dataGraphNodeIndex ((*dataGraphNode).second->getUniqueId());

		//std::cout<<std::endl<<"Below is the main 2 b paralellized function";

		//std::cout<<"\nHere";

		float score (ScoringFunctions::getScore((*templateNode).second, (*dataGraphNode).second));

		m_scores[templateNodeIndex][dataGraphNodeIndex].nodeToNodeSc

ore = score;

		//std::cout<<"\n"<<score;

		//std::cout<<"\nScore for template node "<<templateNodeIndex<<" and data node "<<dataGraphNodeIndex<<" is "<<score;

	}

}

t.printTimeInfo(true);

}*/

// Get the score for a node.

Structures::ScorePair&

ScoreHolder::getScore (const Node_ptr& templateNode, const Node_ptr& dataGraphNode)

{

return m_scores[templateNode->getUniqueId()][dataGraphNode->getUniqueId()];

}

//Initialize the scores from the graphs.

void ScoreHolder::initializeScores (Graph_ptr& templateGraph, Graph_ptr& dataGraph)

{

const unsigned long templateNodes (templateGraph->countNodes());

const unsigned long dataGraphNodes (dataGraph->countNodes());

// The old "trim excess capacity with a swap trick"

std::vector<std::vector<Structures::ScorePair> > tg (templateNodes);

tg.resize(templateNodes);

m_scores.swap(tg);

for (unsigned long i = 0; i < templateNodes; ++i)

{

	// The old "trim excess capacity with a swap trick"

	std::vector<Structures::ScorePair> dg (dataGraphNodes);

	dg.resize(dataGraphNodes); 

	m_scores[i].swap(dg);

}

}

// Calculate the node to node scores from the java application.

ScoreHolder::ScoreHolder (Graph_ptr& templateGraph,

					  Graph_ptr& dataGraph, 

					  const std::wstring& templateGraphPath, 

					  const std::wstring& dataGraphFilePath,

					  const std::wstring& owlFileDirectory, 

					  bool getUriInfo)

{

initializeScores (templateGraph, dataGraph); 

Timer t (L"Semantic computation of the node to node scores", true);

// This is where we call the java code to get the scores.

if (doesFileExist (templateGraphPath) && 

	doesFileExist (dataGraphFilePath))

{

	// Create a bat file to execute the scores file. It sucks that I have to create a file

	// to do this, but the system call only accepts char* args and not wchar_t. Oh well.

	std::wofstream cmd;

	cmd.open("executeScores.bat");//Gotta check this up later...why the hell is a simple string written as a wide string...creates major issues on a unix platform

	cmd << L"java -Xms10m -Xmx1024m -jar " << semanticNodeScorerPath 

		<< L" -t " << L"\"" << templateGraphPath << L"\""

		<< L" -d " << L"\"" << dataGraphFilePath << L"\""

		<< L" -o " << L"\"" << owlFileDirectory << L"\""

		<< L" -f " << L"\"" << L"scores.txt" << L"\"";

	if (!getUriInfo)

	{

		cmd << L" -dontGetUriInfo ";

	}

	cmd.close();

    // Execute the bat file to run our java stuff

	system("executeScores.bat");

	// Ensure that the score file exists.

	if (doesFileExist (std::wstring(L"scores.txt")))

	{

		std::wifstream scoreFile ("scores.txt");

		if (scoreFile.is_open())

		{

			while (!scoreFile.eof())

			{

				std::wstring templateId, dataId, scoreAsString;

				std::getline(scoreFile, templateId);

				if (!scoreFile.eof())

				{

					std::getline(scoreFile, dataId);

				}

				if (!scoreFile.eof())

				{

					std::getline(scoreFile, scoreAsString);

				}

				// Ensure that the parameters have non zero length.

				if (templateId.length() > 0 && dataId.length() > 0 && scoreAsString.length() > 0)

				{

					float score (0);

					std::wistringstream iss (scoreAsString);

					// If this doesn't work, then the string was not a floating point value.

					if (iss >> score)

					{

						Node_ptr templateNode = templateGraph->getNodeById(templateId);

						Node_ptr dataGraphNode = dataGraph->getNodeById(dataId);

						if (templateNode.get() != NULL && dataGraphNode.get() != NULL)

						{

							m_scores[templateNode->getUniqueId()][dataGraphNode->getUniqueId()].nodeToNodeScore = score;

						}

					}

				}

			}

			scoreFile.close();

		}

		else

		{

			std::wcout << std::endl << L"\nThe java scores file does not exist, quitting.";

		}

	}

}

t.printTimeInfo(true);

}

[/codebox]

ppl…i just realized that if() conditions are not the issue in kernel functions… im guessing the comparison between wstrings that im doing is causing the problem. Does anybody have an idea bout how i can compare wstring’s in the kernel function without using strcmp() and other sorts of string functions which can be used only on the host???

A good old-fashioned for loop? :)

for loop in what sense??? compare each character in the string???

Yeah, isn’t that how one usually implements strcmp()?

I’m ignoring the issue of whether this will run efficiently on CUDA for the moment. I don’t understand your problem well enough to comment there. But fundamentally, strcmp is a loop. Depending on your problem, you might turn that loop into a parallel one where each thread in a block reads a different character, or whatever.

But there is no string comparison function in CUDA provided for you, so you are going to have to write something simple yourself.