Hello people,
I think I’ve found a bug in the toolkit/driver. I suppose it’s a bug since it’s a very strange behavior, that I can’t explain otherwise. I’ve tried to isolate it the smallest possible code, and here I’m making a complete “bug report” (as suggested by the guidelines of this forum) to help solving it.
OPERATING SYSTEM
Ubuntu 7.10 Desktop i386; kernel version: 2.6.22-14-generic
SYNOPSIS
Two subsequent memory fetches (for linear memory), from the same texture and with the same coordinate, return different results.
The incriminated lines are:
uint2 wip0 = tex1Dfetch( table6_B, 15829 );
uint2 wip1 = tex1Dfetch( table6_B, h1 );
wip0 and wip1 are different in cases when h1 = 15829.
Note: when compiled in device emulation, the results are correct.
DETAILED DESCRIPTION
Full source code is attached at the end of this message. I’ve tries to provide the smallest possible code that reproduces the bug.
I compile and launch the program with the commands (the source code is ‘main.cu’):
nvcc main.cu --run
and
nvcc main.cu --run --device-emulation
CUDA TOOLKIT VERSION
Version 1.1 for Ubuntu 7.04. I’ve tried installing version 2.0beta for RH5, but on my distribution it is plagued by even worse bugs (completely senseless texture fetches).
CUDA SDK VERSION
None. I’ve not installed the SDK.
COMPILER
gcc/g++ 4.1.3
SYSTEM
Sony Vario AR51E: Intel Centrino Core 2 Duo @ 2Ghz, 2GB of RAM, NVidia GeForce 8400M GT with 128 MB of integrated memory.
SOURCE CODE
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#define FOR(i,n) for (int i=0; i<(n); i++)
typedef unsigned long long ullong;
typedef unsigned int uint;
__constant__ uint hashSize[17];
__constant__ uint factors[17][2][3];
void* tableData[17][2];
texture< uint2, 1 > table0_A, table0_B;
texture< uint2, 1 > table1_A, table1_B;
texture< uint2, 1 > table2_A, table2_B;
texture< uint2, 1 > table3_A, table3_B;
texture< uint2, 1 > table4_A, table4_B;
texture< uint2, 1 > table5_A, table5_B;
texture< uint2, 1 > table6_A, table6_B;
texture< uint2, 1 > table7_A, table7_B;
texture< uint2, 1 > table8_A, table8_B;
texture< uint2, 1 > table9_A, table9_B;
texture< uint2, 1 > table10_A, table10_B;
texture< uint2, 1 > table11_A, table11_B;
texture< uint2, 1 > table12_A, table12_B;
texture< uint2, 1 > table13_A, table13_B;
texture< uint2, 1 > table14_A, table14_B;
texture< uint2, 1 > table15_A, table15_B;
texture< uint2, 1 > table16_A, table16_B;
__device__ uint debugInfo[8];
__device__ uint hash(uint level, uint table, uint x, uint y, uint z)
{
ullong i = (ullong) x;
ullong j = (ullong) y;
ullong k = (ullong) z;
i = i * (ullong) factors[level][table][0];
j = j * (ullong) factors[level][table][1];
k = k * (ullong) factors[level][table][2];
return (uint)(i ^ j ^ k) % hashSize[level];
}
__global__ void testIt()
{
uint h0 = hash( 6, 0, 0, 0, 33 );
uint h1 = hash( 6, 1, 0, 0, 33 );
uint2 wip0 = tex1Dfetch( table6_B, 15829 );
uint2 wip1 = tex1Dfetch( table6_B, h1 );
debugInfo[0] = h1;
debugInfo[1] = wip0.x;
debugInfo[2] = wip0.y;
debugInfo[3] = wip1.x;
debugInfo[4] = wip1.y;
}
void myInit()
{
uint hostHashSize[17];
uint hostFactors[17][2][3];
FOR(level,17)
{
char filename[64];
FILE* file;
uint tableDataSize;
void* hostTableData;
sprintf( filename, "../data/level%d.cuckoo", level );
file = fopen( filename, "rb" );
fread( &(hostHashSize[level]), sizeof(uint), 1, file );
fread( hostFactors[level][0], sizeof(uint), 3, file );
fread( hostFactors[level][1], sizeof(uint), 3, file );
tableDataSize = sizeof(uint2) * hostHashSize[level];
for (int table=0; table<2; table++)
{
hostTableData = malloc( tableDataSize );
fread( hostTableData, 1, tableDataSize, file );
cudaMalloc( &(tableData[level][table]), tableDataSize != 0 ? tableDataSize : 1 );
cudaMemcpy( tableData[level][table], hostTableData, tableDataSize, cudaMemcpyHostToDevice );
free( hostTableData );
}
switch (level)
{
case 0: cudaBindTexture( NULL, table0_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table0_B, tableData[level][1], tableDataSize );break;
case 1: cudaBindTexture( NULL, table1_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table1_B, tableData[level][1], tableDataSize );break;
case 2: cudaBindTexture( NULL, table2_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table2_B, tableData[level][1], tableDataSize );break;
case 3: cudaBindTexture( NULL, table3_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table3_B, tableData[level][1], tableDataSize );break;
case 4: cudaBindTexture( NULL, table4_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table4_B, tableData[level][1], tableDataSize );break;
case 5: cudaBindTexture( NULL, table5_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table5_B, tableData[level][1], tableDataSize );break;
case 6: cudaBindTexture( NULL, table6_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table6_B, tableData[level][1], tableDataSize );break;
case 7: cudaBindTexture( NULL, table7_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table7_B, tableData[level][1], tableDataSize );break;
case 8: cudaBindTexture( NULL, table8_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table8_B, tableData[level][1], tableDataSize );break;
case 9: cudaBindTexture( NULL, table9_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table9_B, tableData[level][1], tableDataSize );break;
case 10: cudaBindTexture( NULL, table10_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table10_B, tableData[level][1], tableDataSize );break;
case 11: cudaBindTexture( NULL, table11_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table11_B, tableData[level][1], tableDataSize );break;
case 12: cudaBindTexture( NULL, table12_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table12_B, tableData[level][1], tableDataSize );break;
case 13: cudaBindTexture( NULL, table13_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table13_B, tableData[level][1], tableDataSize );break;
case 14: cudaBindTexture( NULL, table14_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table14_B, tableData[level][1], tableDataSize );break;
case 15: cudaBindTexture( NULL, table15_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table15_B, tableData[level][1], tableDataSize );break;
case 16: cudaBindTexture( NULL, table16_A, tableData[level][0], tableDataSize ); cudaBindTexture( NULL, table16_B, tableData[level][1], tableDataSize );break;
}
fclose( file );
}
cudaMemcpyToSymbol( hashSize, hostHashSize, sizeof(uint) * 17 );
cudaMemcpyToSymbol( factors, hostFactors, sizeof(uint) * 17 * 2 * 3 );
}
void myRender()
{
uint hostDebugInfo[8];
testIt<<< 1, 1 >>>();
cudaThreadSynchronize();
cudaMemcpyFromSymbol( hostDebugInfo, debugInfo, sizeof(uint) * 8 );
printf( "%u %u %u %u %u\t\n", hostDebugInfo[0], hostDebugInfo[1], hostDebugInfo[2], hostDebugInfo[3], hostDebugInfo[4] );
}
void myShutdown()
{
FOR(level,17)
{
cudaFree( tableData[level][0] );
cudaFree( tableData[level][1] );
}
}
int main()
{
myInit();
FOR(i,10)
myRender();
myShutdown();
return 0;
}