[Solved ]Error whileSpecify macro definitions for use during preprocessing or compilation.

iamkaka · May 22, 2016, 2:22am

Anyone know how to figure out this problems?
If i remove “-DN=1024” , it compile correctly.
Compiling error:

usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6121): error: expected a ")"

/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6155): error: expected a ")"

/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6189): error: expected a ")"

/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6344): error: expected a ")"

/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6380): error: expected a ")"

/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda.h(6415): error: expected a ")"

6 errors detected in the compilation of "/tmp/tmpxft_00001279_00000000-9_vd_tex.cpp1.ii".

Makefile:

vd_tex:vd_tex.cu
        nvcc -DN=1024 -o vd_tex vd_tex.cu

vd_tex.cu

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#ifndef N
    #define N (2048)
#endif

texture<float> tex_a;
texture<float> tex_b;
texture<float> tex_c;

// CUDA kernel. Each thread takes care of one element of c
__global__ void vecAdd(float *c)
{
    // Get our global thread ID
    int id = blockIdx.x*blockDim.x+threadIdx.x;
    // Make sure we do not go out of bounds
   // if (id < N) {
		
		c[id] = tex1Dfetch(tex_a,id) + tex1Dfetch(tex_b,id);
//	}
        
}

int main( int argc, char* argv[] )
{
    // Size of vectors
    //int n = 10000;
	
    // Host input vectors
    float *h_a;
    float *h_b;
    //Host output vector
    float *h_c;
	
    // Device input vectors
    float *d_a;
    float *d_b;
    //Device output vector
    float *d_c;
	
    // Size, in bytes, of each vector
    size_t bytes = N*sizeof(float);
	
    // Allocate memory for each vector on host
    h_a = (float*)malloc(bytes);
    h_b = (float*)malloc(bytes);
    h_c = (float*)malloc(bytes);
	// Allocate memory for each vector on GPU
    cudaMalloc(&d_a, bytes);
    cudaMalloc(&d_b, bytes);
    cudaMalloc(&d_c, bytes);
	
    int i;
    // Initialize vectors on host
    for( i = 0; i < N; i++ ) {
        h_a[i] = sin(i)*sin(i);
        h_b[i] = cos(i)*cos(i);
		//h_c[i] = 0.0f;
    }
	// bind to texture memory
	cudaBindTexture( NULL, tex_a,
					 d_a,
					 bytes );
	cudaBindTexture( NULL, tex_b,
					 d_b,
					 bytes );
	cudaBindTexture( NULL, tex_c,
					 d_c,
					 bytes );
    // Copy host vectors to device
    cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);
    cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);
	
	
    int blockSize, gridSize;
	
    // Number of threads in each thread block
    blockSize = 1024;
	
    // Number of thread blocks in grid
    gridSize = (int)ceil((float)N/blockSize);
	
    // Execute the kernel
    vecAdd<<<gridSize, blockSize>>>(d_c);
	
    // Copy array back to host
    cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );
	
    // Sum up vector c and print result divided by n, this should equal 1 within error
    float sum = 0;
    for(i=0; i<N; i++)
        sum += h_c[i];
    printf("final result: %f\n", sum/N);
	
    // Release device memory
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);
	
    // Release host memory
    free(h_a);
    free(h_b);
    free(h_c);
	return 0;
}

Robert_Crovella · May 22, 2016, 3:19am

For codes that use the runtime API, it’s not necessary to include cuda.h like this:

#include <cuda.h>

simply delete that line and the problem should go away. If you don’t wish to make that change:

The -DN=1024 switch is clashing with various lines in cuda.h which have a function prototype that uses a variable named N, such as this one:

CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
                                                                            ^

Either don’t use this particular compile switch, or change your N variable to something else, like NN, which doesn’t appear in any CUDA headers, and use -DNN=1024 instead.

$ cat t691.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#ifndef NN
    #define NN (2048)
#endif

texture<float> tex_a;
texture<float> tex_b;
texture<float> tex_c;

// CUDA kernel. Each thread takes care of one element of c
__global__ void vecAdd(float *c)
{
    // Get our global thread ID
    int id = blockIdx.x*blockDim.x+threadIdx.x;
    // Make sure we do not go out of bounds
   // if (id < NN) {

                c[id] = tex1Dfetch(tex_a,id) + tex1Dfetch(tex_b,id);
//      }

}

int main( int argc, char* argv[] )
{
    // Size of vectors
    //int n = 10000;

    // Host input vectors
    float *h_a;
    float *h_b;
    //Host output vector
    float *h_c;

    // Device input vectors
    float *d_a;
    float *d_b;
    //Device output vector
    float *d_c;

    // Size, in bytes, of each vector
    size_t bytes = NN*sizeof(float);

    // Allocate memory for each vector on host
    h_a = (float*)malloc(bytes);
    h_b = (float*)malloc(bytes);
    h_c = (float*)malloc(bytes);
        // Allocate memory for each vector on GPU
    cudaMalloc(&d_a, bytes);
    cudaMalloc(&d_b, bytes);
    cudaMalloc(&d_c, bytes);

    int i;
    // Initialize vectors on host
    for( i = 0; i < NN; i++ ) {
        h_a[i] = sin(i)*sin(i);
        h_b[i] = cos(i)*cos(i);
                //h_c[i] = 0.0f;
    }
        // bind to texture memory
        cudaBindTexture( NULL, tex_a,
                                         d_a,
                                         bytes );
        cudaBindTexture( NULL, tex_b,
                                         d_b,
                                         bytes );
        cudaBindTexture( NULL, tex_c,
                                         d_c,
                                         bytes );
    // Copy host vectors to device
    cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);
    cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);

int blockSize, gridSize;

    // Number of threads in each thread block
    blockSize = 1024;

    // Number of thread blocks in grid
    gridSize = (int)ceil((float)NN/blockSize);

    // Execute the kernel
    vecAdd<<<gridSize, blockSize>>>(d_c);

    // Copy array back to host
    cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );

    // Sum up vector c and print result divided by n, this should equal 1 within error
    float sum = 0;
    for(i=0; i<NN; i++)
        sum += h_c[i];
    printf("final result: %f\n", sum/NN);

    // Release device memory
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    // Release host memory
    free(h_a);
    free(h_b);
    free(h_c);
        return 0;
}
$ nvcc -DNN=1024 -o t691 t691.cu
$

iamkaka · May 22, 2016, 3:32am

Thank you.
Your answer solved my problem.

For codes that use the runtime API, it’s not necessary to include cuda.h like this:

include <cuda.h>

simply delete that line and the problem should go away. If you don’t wish to make that change:

The -DN=1024 switch is clashing with various lines in cuda.h which have a function prototype that uses a variable named N, such as this one:

CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N);
                                                                            ^

Either don’t use this particular compile switch, or change your N variable to something else, like NN, which doesn’t appear in any CUDA headers, and use -DNN=1024 instead.

$ cat t691.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#ifndef NN
    #define NN (2048)
#endif

texture<float> tex_a;
texture<float> tex_b;
texture<float> tex_c;

// CUDA kernel. Each thread takes care of one element of c
__global__ void vecAdd(float *c)
{
    // Get our global thread ID
    int id = blockIdx.x*blockDim.x+threadIdx.x;
    // Make sure we do not go out of bounds
   // if (id < NN) {

                c[id] = tex1Dfetch(tex_a,id) + tex1Dfetch(tex_b,id);
//      }

}

int main( int argc, char* argv[] )
{
    // Size of vectors
    //int n = 10000;

    // Host input vectors
    float *h_a;
    float *h_b;
    //Host output vector
    float *h_c;

    // Device input vectors
    float *d_a;
    float *d_b;
    //Device output vector
    float *d_c;

    // Size, in bytes, of each vector
    size_t bytes = NN*sizeof(float);

    // Allocate memory for each vector on host
    h_a = (float*)malloc(bytes);
    h_b = (float*)malloc(bytes);
    h_c = (float*)malloc(bytes);
        // Allocate memory for each vector on GPU
    cudaMalloc(&d_a, bytes);
    cudaMalloc(&d_b, bytes);
    cudaMalloc(&d_c, bytes);

    int i;
    // Initialize vectors on host
    for( i = 0; i < NN; i++ ) {
        h_a[i] = sin(i)*sin(i);
        h_b[i] = cos(i)*cos(i);
                //h_c[i] = 0.0f;
    }
        // bind to texture memory
        cudaBindTexture( NULL, tex_a,
                                         d_a,
                                         bytes );
        cudaBindTexture( NULL, tex_b,
                                         d_b,
                                         bytes );
        cudaBindTexture( NULL, tex_c,
                                         d_c,
                                         bytes );
    // Copy host vectors to device
    cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);
    cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);


    int blockSize, gridSize;

    // Number of threads in each thread block
    blockSize = 1024;

    // Number of thread blocks in grid
    gridSize = (int)ceil((float)NN/blockSize);

    // Execute the kernel
    vecAdd<<<gridSize, blockSize>>>(d_c);

    // Copy array back to host
    cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );

    // Sum up vector c and print result divided by n, this should equal 1 within error
    float sum = 0;
    for(i=0; i<NN; i++)
        sum += h_c[i];
    printf("final result: %f\n", sum/NN);

    // Release device memory
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    // Release host memory
    free(h_a);
    free(h_b);
    free(h_c);
        return 0;
}
$ nvcc -DNN=1024 -o t691 t691.cu
$

Topic		Replies	Views
CUDA compile trouble CUDA Programming and Performance	47	5511	November 8, 2010
compilation CUDA Programming and Performance	3	7925	March 25, 2010
Compilation problem: CUDA is broken? CUDA Programming and Performance	11	13843	September 14, 2011
Problem compiling simple example CUDA Programming and Performance	11	18054	November 12, 2010
Can you tell me where is wrong? CUDA Programming and Performance	2	1864	June 3, 2009
NVCC forces c++ compilation of .cu files CUDA Programming and Performance	11	26018	December 11, 2011
nvcc version compatibility and #include-ing a compile-time #define using nvcc -DINC=\"header.h\& CUDA Programming and Performance	0	1531	June 30, 2009
Compliation error - Description Resource Path Location Type expected constructor, destructor, or typ CUDA Programming and Performance	0	3173	February 26, 2012
Fedora 14 + cudatoolkit 3.2 + dirver 260.19.26 GPU Computing SDK code samples CUDA Programming and Performance	4	1225	January 27, 2011
Compilation error of C++ program in CUDA CUDA Programming and Performance	1	2704	October 3, 2010

[Solved ]Error whileSpecify macro definitions for use during preprocessing or compilation.

Related topics