I’m trying to compile a CUDA program that throws “error: call can not be configured”. I’m new to CUDA programming and am not able to figure out what the problem could be. I searched the forums for similar problems, but had no luck.
Following is the CUDA program in brief:
[codebox]#define BLOCK_SIZE 16
int getline(FILE *fp, char line, int max);
void compareSeq(char *fragseqlist_d, int pitch, int *matches_d, int fragcount);
int main(int argc, char *argv)
{
char **fragseqlist;
int *matches;
char *fragseqlist_d;
int *matches_d;
long fragcount = -1;
int i, width, pitch;
dim3 dimGrid, dimBlock;
/* Code to assign string array to fragseqlist */
/* Allocate host memory */
matches = (int *) malloc((fragcount+1) * sizeof(int));
/* Allocate device memory */
cudaMalloc((void **) &matches_d, (fragcount+1) * sizeof(int));
cudaMemset(matches_d, 0, (fragcount+1) * sizeof(int));
width = 37;
cudaMallocPitch((void **) &fragseqlist_d, (size_t *) &pitch, width * sizeof(char), (fragcount+1));
/* Copy memory from host to device */
cudaMemcpy2D(fragseqlist_d, pitch, fragseqlist, pitch, width * sizeof(char), fragcount+1, cudaMemcpyHostToDevice);
/* Call device function */
dimBlock.x = BLOCK_SIZE;
dimBlock.y = BLOCK_SIZE;
dimGrid.x = (fragcount+1) / BLOCK_SIZE;
dimGrid.y = (fragcount+1) / BLOCK_SIZE;
compareSeq<<<dimGrid, dimBlock>>>(fragseqlist_d, pitch, matches_d, fragcount);
/* Copy device memory back to host memory */
cudaMemcpy(matches, matches_d, (fragcount+1) * sizeof(int), cudaMemcpyDeviceToHost);
/* Free resources */
for (i = 0; i <= fragcount; ++i)
free(fragseqlist[i]);
free(fragseqlist);
free(matches);
cudaFree(fragseqlist_d);
cudaFree(matches_d);
return 0;
}
int getline(FILE *fp, char s, int lim)
{
// some code
}
global void compareSeq(char *fragseqlist_d, int pitch, int *matches_d, int fragcount)
{
// code for parallel execution
}[/codebox]
I’m running CUDA 2.1 on Ubuntu 8.04.
I’ll really appreciate any help you could offer. Thanks in advance!