I’m trying to pass two textures to one of my global functions, but doing so gets a compiler error that I cannot trace down.
An abstract of the code would be:
__global__ void ApplyEfrcSplines(texture<int2, 1, cudaReadModeElementType> ErfcIndexTable,
texture<float4, 1, cudaReadModeElementType> ErfcCoeffsTable,
float* r2, float* results)
{
int i, cidx;
int2 eidx;
float tr2;
float4 ecoef;
unsigned int i16r2, iSr2;
i = threadIdx.x + blockIdx.x*gridDim.x;
while (i < 256*37) {
tr2 = r2[i];
i16r2 = trunc((float)16.0*tr2);
iSr2 = i16r2 >> 5;
eidx = tex1Dfetch(ErfcIndexTable, iSr2);
cidx = eidx.x + (i16r2 >> eidx.y);
ecoef = tex1Dfetch(ErfcCoeffsTable, cidx);
results[i] = (ecoef.x * tr2) + ecoef.y + (ecoef.z / tr2) + (ecoef.w / (tr2*tr2));
i += blockDim.x*gridDim.x;
}
}
int main()
{
float* testpts;
float* results;
float* gpu_testpts;
float* gpu_results;
int2* ErfcIndexTable;
int2* gpu_ErfcIndexTable;
float4* ErfcCoeffsTable;
float4* gpu_ErfcCoeffsTable;
texture<int2, 1, cudaReadModeElementType> texErfcIndexTable;
texture<float4, 1, cudaReadModeElementType> texErfcCoeffsTable;
// Compute the indexing and coefficients tables
ErfcIndexTable = MakeIndexTable();
ErfcCoeffsTable = MakeCoeffsTable();
// Make a list of r2 test points
testpts = MakeTestPoints();
results = (float*)malloc(256*37*sizeof(float));
// Allocate memory on the GPU and upload the computed textures
cudaMallocAdvise((void**)&gpu_ErfcIndexTable, 256*sizeof(int2), "main", "ErfcIndexTable");
cudaMallocAdvise((void**)&gpu_ErfcCoeffsTable, 256*sizeof(float4), "main",
"ErfcCoeffsTable");
cudaMallocAdvise((void**)&gpu_testpts, 256*37*sizeof(float), "main", "ErfcIndexTable");
cudaMallocAdvise((void**)&gpu_results, 256*37*sizeof(float), "main", "ErfcIndexTable");
cudaMemUpload(gpu_ErfcIndexTable, ErfcIndexTable, 256*sizeof(int2), "main",
"ErfcIndexTable");
cudaMemUpload(gpu_ErfcCoeffsTable, ErfcCoeffsTable, 256*sizeof(float4), "main",
"ErfcCoeffsTable");
cudaMemUpload(gpu_testpts, testpts, 256*37*sizeof(float), "main", "testpts");
// CHECK
int i;
for (i = 0; i < 256*37; i++) {
printf("%12.6lf\n", testpts[i]);
}
// END CHECK
// Bind textures
texErfcIndexTable.normalized = 0;
texErfcIndexTable.filterMode = cudaFilterModePoint;
texErfcIndexTable.addressMode[0] = cudaAddressModeClamp;
texErfcIndexTable.channelDesc.x = 32;
texErfcIndexTable.channelDesc.y = 32;
texErfcIndexTable.channelDesc.z = 0;
texErfcIndexTable.channelDesc.w = 0;
cudaBindTexture(NULL, texErfcIndexTable, gpu_ErfcIndexTable, 256*sizeof(int2));
texErfcCoeffsTable.normalized = 0;
texErfcCoeffsTable.filterMode = cudaFilterModePoint;
texErfcCoeffsTable.addressMode[0] = cudaAddressModeClamp;
texErfcCoeffsTable.channelDesc.x = 32;
texErfcCoeffsTable.channelDesc.y = 32;
texErfcCoeffsTable.channelDesc.z = 32;
texErfcCoeffsTable.channelDesc.w = 32;
cudaBindTexture(NULL, texErfcCoeffsTable, gpu_ErfcCoeffsTable, 256*sizeof(float4));
// CHECK
ApplyEfrcSplines<<<128, 128>>>(texErfcIndexTable, texErfcCoeffsTable, gpu_testpts,
gpu_results);
cudaMemDownload(results, gpu_results, 256*37*sizeof(float), "main", "results");
// END CHECK
// Evaluate the results
EvaluateErfcSplines(ErfcIndexTable, ErfcCoeffsTable, 0.4, 20.0);
return 0;
}
Ignoring for the moment what MakeIndexTable, MakeCoeffsTable, and some of the (OK, OK, lame) functions I have for encapsulating CUDA memory allocation and error checking are, I think that the relevant code is in there.
And, the error I’m getting is:
In file included from tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:2:
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: expected ‘,’ or ‘…’ before ‘::’ token
In file included from tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:2:
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c: In function ‘void _device_stub__Z16ApplyEfrcSplines7textureI4int2Li1EL19cudaTextureReadMode0EES_I6float4Li1ELS1_0EEPfS5(_Z7textureI4int2Li1EL19cudaTextureReadMode0EE&)’:
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘::__par0’ has not been declared
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘::__par0’ has not been declared
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘::__par1’ has not been declared
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘::__par1’ has not been declared
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘__par2’ was not declared in this scope
/tmp/tmpxft_00002f86_00000000-4_Driver.cudafe1.stub.c:11: error: ‘__par3’ was not declared in this scope
Any help on this would be great. I’m working on one of the early scientific computing successes of NVIDIA’s CUDA (AMBER pmemd), and if I can get these textures in order my estimates (from simulating the same work with garbage numbers in the existing code) are that it will be a new leap forward in speed.