I’m trying to write my very first CUDA application in C, i’ve been building parts slowly to avoid multitudes of mistakes and compiling after each change to ensure there is working code.
The issue i’m having is that once i try to invoke a Kernel or even just define a kernel with a threadIdx.x call, the nvcc compiler complains that threadIdx is undeclared (first use in this function).
I’m importing <cuda_runtime_api.h> and i’ve tried importing <cuda.h>… here is my complete source code:
[codebox]
/**
-
I am a simple program designed to run
-
on the GPU
-
@author Martin Dale Lyness martin.lyness@gmail.com
*/
#include<cuda.h>
#include<cuda_runtime_api.h>
#include<stdlib.h>
#include<stdio.h>
#include<time.h>
#include “fp_shared.h”
int main(int argc, void ** argv);
/**
-
A GPU Kernel that computes the average of a set of
-
exactly 9 points effectively downsampling a data set.
*/
global void ComputeAverage(int resolution, float * points, float * pointsNew) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
}
int main(int argc, void ** argv) {
int i;
FILE *inputFp, *outputFp;
float *points, *pointsNew;
cudaError_t memCopyToError, memCopyFromError;
printf(“Hello, I am going to downsample data file ‘%s’\n”, INPUT_FILE_NAME);
time_t secondsStart, secondsRead, secondsInit, secondsGPU, secondsWrite;
time(&secondsStart);
points = (float*) malloc(sizeof(float) * DIMENSIONS);
pointsNew = (float*) malloc(sizeof(float) * (DIMENSIONS/9));
inputFp = fopen(INPUT_FILE_NAME, "r+");
if(inputFp==NULL) perror("Input file doesn't exist yet, run input generator first!");
else {
i = 0;
while(feof(inputFp)==0) {
fscanf(inputFp, "%f", points + i++);
}
fclose(inputFp);
}
time(&secondsRead);
float *d_points, *d_pointsNew;
memCopyToError = cudaMalloc((void**) &d_points, sizeof(float) * DIMENSIONS);
if(memCopyToError != cudaSuccess) printf("CUDA Error: %s\n", cudaGetErrorString(memCopyToError));
memCopyToError = cudaMalloc((void**) &d_pointsNew, sizeof(float) * (DIMENSIONS/9));
if(memCopyToError != cudaSuccess) printf("CUDA Error: %s\n", cudaGetErrorString(memCopyToError));
memCopyToError = cudaMemcpy(d_points, points, sizeof(float) * DIMENSIONS, cudaMemcpyHostToDevice);
if(memCopyToError != cudaSuccess) printf("CUDA Error: %s\n", cudaGetErrorString(memCopyToError));
time(&secondsInit);
int block_size = DIMENSIONS / 2;
int n_blocks = DIMENSIONS / 2;
// ComputeAverage<<< block_size, n_blocks >>> (DIMENSIONS, d_points, d_pointsNew);
time(&secondsGPU);
printf(“All done, check out file ‘%s’\n”, OUTPUT_FILE_NAME);
printf("\tStats\n");
printf("Took %d seconds to read file to memory...\n", secondsRead - secondsStart);
printf("Took %d seconds to initialize GPU memory...\n", secondsInit - secondsStart);
printf("Took %d seconds to execute GPU calculations...\n", secondsGPU - secondsStart);
free(points);
free(pointsNew);
cudaFree(d_points);
cudaFree(d_pointsNew);
return 0;
}
[/codebox]
And on compile this outputs: nvcc main.c
main.c: In function âComputeAverageâ:
main.c:19: error: âblockIdxâ undeclared (first use in this function)
main.c:19: error: (Each undeclared identifier is reported only once
main.c:19: error: for each function it appears in.)
main.c:19: error: âblockDimâ undeclared (first use in this function)
main.c:19: error: âthreadIdxâ undeclared (first use in this function)
I am most certainly missing something simple… I found the threadIdx to be defined in one of the header files includeind in the cuda distribution, i think it was device_types.h or something but including that didn’t seem to fix the issue either, and nowhere have i ever seen any other headers included in a source file for a cuda application.