Hello,
I am currently working on a project that involves CUDA. I have very little CUDA experience, so this problem is confusing me a bit.
The problem is that I am using the GPU for a hash table (currently I am just using 1 block and 1 thread…want to get this to work first). Anyway, I can get the data to copy to the GPU to initialize everything. However, when I read in a file and try to insert the values into the hash table, the table I copy back from the GPU to the CPU isn’t being updated. In fact, it looks like unintialized data. Any help would be appreciated. Thanks.
various:
const int TABLESIZE = 50; // ( 1024 * 1024 * 1 );
const int BLOCKS = 512;
struct HashEntry
{
char key[51];
int keysize;
int values[512];
int count;
};
main function:
int main( void )
{
HashEntry HashTable[TABLESIZE]; //= new HashEntry[TABLESIZE];
HashEntry *GPUtable;
long unsigned int tablesize = TABLESIZE * sizeof( HashEntry );
ifstream inFile( "input.txt" );
if( inFile.fail() )
{
std::cout << "ERROR: cannot open if.txt!" << std::endl;
exit(1);
}
cudaMalloc( (void**) &GPUtable, tablesize );
cudaMemcpy( GPUtable, HashTable, tablesize, cudaMemcpyHostToDevice );
initializeTable2<<<1, 1>>>( GPUtable );
cudaMemcpy( HashTable, GPUtable, tablesize, cudaMemcpyDeviceToHost );
cudaFree( GPUtable );
buildHashTable( HashTable, tablesize, inFile );
cudaFree( GPUtable );
return 0;
}
buildHashTable function:
void buildHashTable( HashEntry Table[], int tablesize, ifstream& inFile )
{
double duration;
int aKeySize;
int val;
std::string key;
char* aKey;
HashEntry *GPUtable;
HashEntry Table2[TABLESIZE];
cudaMalloc( (void**) &GPUtable, tablesize );
cudaMemcpy( GPUtable, Table, tablesize, cudaMemcpyHostToDevice );
Timer timer;
timer.start();
while( inFile >> key >> val )
{
aKey = (char *)key.c_str();
aKeySize = key.length();
// Only have one block and one thread do the insert to avoid
// race conditions
insert<<<1, 1>>>( GPUtable, aKey, aKeySize, &val );
}
duration = timer.stop();
timer.printTime( duration );
int value;
cudaMemcpy( &Table2, GPUtable, tablesize, cudaMemcpyDeviceToHost );
for( int i = 0; i < 50; i++ )
{
key = Table2[i].key;
value = Table2[i].values[0];
std::cout << i << ": key = " << key << ", value = " << value
<< ", count = " << Table2[i].count << std::endl;
}
cudaFree( GPUtable );
}
initializeTable2 function:
__global__ void initializeTable2( HashEntry *Table )
{
int lowerBound = blockIdx.x * BLOCKS;
int upperBound = ( blockIdx.x + 1 ) * BLOCKS;
char *empty = "empty1";
for( int i = 0; i < 50 && i < TABLESIZE; i++ )
{
Table[i].count = 5;
stringCopy( Table[i].key, empty, 5 );
Table[i].keysize = 0;
for( int j = 0; j < 512; j++ )
{
Table[i].values[j] = -1;
}
}
//__syncthreads();
}
insert function:
__global__ void insert( HashEntry *Table, const char *key, int keysize, int *value )
{
int index;
int *res;
int i = 0;
Table[1].count++;
for( i = 0; i < 25 ; i++ )
{
//Table[1].count++;
hashFunction( key, keysize, i, &index );
if( Table[0].count == 0 )
{
stringCopy( Table[0].key, (char *) key, keysize );
Table[0].values[ Table[0].count ] = *value;
Table[0].count++;
break;
}
else if( Table[index].count < 512 )
{
stringCompare( Table[index].key, Table[index].keysize, (char *) key, keysize, res );
if( *res == 1 )
{
Table[index].values[ Table[index].count ] = *value;
Table[index].count++;
break;
}
}
}
}
stringCopy function:
__device__ void stringCopy( char *dest, char *src, int srclength )
{
for( int i = 0; i < srclength && i < 50; i++ )
{
*dest = *src;
dest++;
src++;
}
*dest = '\0'; //Null terminator
}
I did not include all functions because right now I am not actually using my hash function, I am just trying to hard code values into the hash table to return to the CPU. Thanks again!