Hi,
I am trying to compile an application that did it well under CUDA 3.2. Now I have updated to 4.0 and things seem broken. This is what happens when trying to compile something that depends on Intel TBB libraries (NOTE: The amd stuff is a non sense here that comes from the PARSEC stuff I am using… it has nothing to do here):
nvcc -L/home/jrbcast/parsec-2.1_cuda/pkgs/libs/gsl/inst/amd64-linux.icc/lib -L/usr/local/cuda/lib64 -lcuda -L/home/jrbcast/parsec-2.1_cuda/pkgs/libs/imagick/inst/amd64-linux.icc/lib -L/home/jrbcast/intel/composerxe-2011.3.174/lib64 -L/home/jrbcast/intel/composerxe-2011.3.174/lib -L/home/jrbcast/parsec-2.1_cuda/pkgs/apps/ferret/obj/amd64-linux.icc/parsec/lib -lrt -lm -lgsl -lgslcblas -lstdc++ -lcuda -lcudart -lcublas -lcass -lgsl -lgslcblas -lpthread -ltbb -I/home/jrbcast/parsec-2.1_cuda/pkgs/apps/ferret/src/include/ -c /home/jrbcast/parsec-2.1_cuda/pkgs/apps/ferret/obj/amd64-linux.icc/benchmark/ferret-tbb6.cu -o /home/jrbcast/parsec-2.1_cuda/pkgs/apps/ferret/obj/amd64-linux.icc/parsec/obj/ferret-tbb6.o
/home/jrbcast/intel/composerxe-2011.3.174/tbb/include/tbb/atomic.h: In member function ‘I tbb::internal::atomic_impl_with_arithmetic<I, D, StepType>::fetch_and_add(D)’:
/home/jrbcast/intel/composerxe-2011.3.174/tbb/include/tbb/atomic.h:223: error: expected primary-expression before ‘)’ token
/home/jrbcast/intel/composerxe-2011.3.174/tbb/include/tbb/atomic.h:223: error: expected ‘;’
/home/jrbcast/intel/composerxe-2011.3.174/tbb/include/tbb/atomic.h:223: error: ‘__T3’ has not been declared
make: *** [/home/jrbcast/parsec-2.1_cuda/pkgs/apps/ferret/obj/amd64-linux.icc/parsec/obj/ferret-tbb6.o] Error 1
Any hints?
Regards,
I am facing exactly the same problem. I tried to find a minimal program the reproduces the error and ended up with
#include "tbb/atomic.h"
int main() {
return 0;
}
Compiling it on my Ubuntu 10.4 box with
nvcc -v -o tbb -I /usr/local/tbb30_20110427oss/include/ tbb.cu
I get
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda/bin
#$ _THERE_=/usr/local/cuda/bin
#$ _TARGET_SIZE_=64
#$ TOP=/usr/local/cuda/bin/..
#$ LD_LIBRARY_PATH=/usr/local/cuda/bin/../lib:/usr/local/cuda/bin/../extools/lib:/home/bauke/local/lib:/usr/local/cuda/lib64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mkl/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mpirt/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/ipp/../compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/ipp/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mkl/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/tbb/lib/intel64//cc4.1.0_libc2.4_kernel2.6.16.21
#$ PATH=/usr/local/cuda/bin/../open64/bin:/usr/local/cuda/bin:/usr/local/n1ge6/bin/lx24-amd64:/home/bauke/local/bin:/usr/local/cuda/bin:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/bin/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mpirt/bin/intel64:/usr/local/texlive/2010/bin/x86_64-linux:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/texlive/2010/texmf/doc/info
#$ INCLUDES="-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart"
#$ LIBRARIES= "-L/usr/local/cuda/bin/../lib64" -lcudart
#$ CUDAFE_FLAGS=
#$ OPENCC_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDACC__ -C -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -include "cuda_runtime.h" -m64 -o "/tmp/tmpxft_00002eb4_00000000-4_tbb.cpp1.ii" "tbb.cu"
#$ cudafe --m64 --gnu_version=40403 -tused --no_remove_unneeded_entities --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.c" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.stub.c" --gen_device_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.gpu" --include_file_name "/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c" "/tmp/tmpxft_00002eb4_00000000-4_tbb.cpp1.ii"
#$ gcc -D__CUDA_ARCH__=100 -E -x c -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDACC__ -C -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-5_tbb.cpp2.i" "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.gpu"
#$ cudafe --m64 --gnu_version=40403 --c --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.c" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.stub.c" --gen_device_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.gpu" --include_file_name "/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c" "/tmp/tmpxft_00002eb4_00000000-5_tbb.cpp2.i"
#$ gcc -D__CUDA_ARCH__=100 -E -x c -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDABE__ -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.gpu"
#$ filehash -s " " "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" > "/tmp/tmpxft_00002eb4_00000000-8_tbb.hash"
#$ gcc -E -x c++ -D__CUDACC__ -C -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -include "cuda_runtime.h" -m64 -o "/tmp/tmpxft_00002eb4_00000000-9_tbb.cpp4.ii" "tbb.cu"
#$ cudafe++ --m64 --gnu_version=40403 --parse_templates --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.cpp" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.stub.c" "/tmp/tmpxft_00002eb4_00000000-9_tbb.cpp4.ii"
#$ nvopencc -TARG:compute_10 -m64 -OPT:ftz=1 -CG:ftz=1 -CG:prec_div=0 -CG:prec_sqrt=0 "/tmp/tmpxft_00002eb4_00000000-10_tbb" "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" -o "/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx"
#$ ptxas -arch=sm_10 -m64 "/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx" -o "/tmp/tmpxft_00002eb4_00000000-11_tbb.sm_10.cubin"
#$ fatbinary --create="/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin" --key="1abd7ece1f0930d3" --ident="tbb.cu" -cuda "--image=profile=compute_10,file=/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx" "--image=profile=sm_10,file=/tmp/tmpxft_00002eb4_00000000-11_tbb.sm_10.cubin" --embedded-fatbin="/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c"
#$ rm /tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-12_tbb.ii" "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.cpp"
#$ gcc -c -x c++ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -fpreprocessed -m64 -o "/tmp/tmpxft_00002eb4_00000000-13_tbb.o" "/tmp/tmpxft_00002eb4_00000000-12_tbb.ii"
/usr/local/tbb30_20110427oss/include/tbb/atomic.h: In member function ‘I tbb::internal::atomic_impl_with_arithmetic<I, D, StepType>::fetch_and_add(D)’:
/usr/local/tbb30_20110427oss/include/tbb/atomic.h:231: error: ‘__T3’ has not been declared
# --error 0x1 --
I would like to switch from CUDA 3.2 to 4.0. This error, however, is a real show-stopper for me because my applications use both CUDA and Intel Threading Building Blocks. Any hints how to fix this?
Hi,
I finally found the solution. I don’t know the exact reason, but it seems that nvcc does not automatically detect now if you are dealing with C or C++ code. Therefore, you must add the correct flag to overcome the problem:
Try this: nvcc --x c++ …
Good luck,
Jose.
I am facing exactly the same problem. I tried to find a minimal program the reproduces the error and ended up with
#include "tbb/atomic.h"
int main() {
return 0;
}
Compiling it on my Ubuntu 10.4 box with
nvcc -v -o tbb -I /usr/local/tbb30_20110427oss/include/ tbb.cu
I get
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda/bin
#$ _THERE_=/usr/local/cuda/bin
#$ _TARGET_SIZE_=64
#$ TOP=/usr/local/cuda/bin/..
#$ LD_LIBRARY_PATH=/usr/local/cuda/bin/../lib:/usr/local/cuda/bin/../extools/lib:/home/bauke/local/lib:/usr/local/cuda/lib64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mkl/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mpirt/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/ipp/../compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/ipp/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/compiler/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mkl/lib/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/tbb/lib/intel64//cc4.1.0_libc2.4_kernel2.6.16.21
#$ PATH=/usr/local/cuda/bin/../open64/bin:/usr/local/cuda/bin:/usr/local/n1ge6/bin/lx24-amd64:/home/bauke/local/bin:/usr/local/cuda/bin:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/bin/intel64:/usr/local/intel/composerxe-2011.1.107/composerxe-2011.1.107/mpirt/bin/intel64:/usr/local/texlive/2010/bin/x86_64-linux:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/texlive/2010/texmf/doc/info
#$ INCLUDES="-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart"
#$ LIBRARIES= "-L/usr/local/cuda/bin/../lib64" -lcudart
#$ CUDAFE_FLAGS=
#$ OPENCC_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDACC__ -C -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -include "cuda_runtime.h" -m64 -o "/tmp/tmpxft_00002eb4_00000000-4_tbb.cpp1.ii" "tbb.cu"
#$ cudafe --m64 --gnu_version=40403 -tused --no_remove_unneeded_entities --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.c" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.stub.c" --gen_device_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.gpu" --include_file_name "/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c" "/tmp/tmpxft_00002eb4_00000000-4_tbb.cpp1.ii"
#$ gcc -D__CUDA_ARCH__=100 -E -x c -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDACC__ -C -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-5_tbb.cpp2.i" "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.gpu"
#$ cudafe --m64 --gnu_version=40403 --c --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.c" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.stub.c" --gen_device_file_name "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.gpu" --include_file_name "/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c" "/tmp/tmpxft_00002eb4_00000000-5_tbb.cpp2.i"
#$ gcc -D__CUDA_ARCH__=100 -E -x c -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDABE__ -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" "/tmp/tmpxft_00002eb4_00000000-6_tbb.cudafe2.gpu"
#$ filehash -s " " "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" > "/tmp/tmpxft_00002eb4_00000000-8_tbb.hash"
#$ gcc -E -x c++ -D__CUDACC__ -C -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -include "cuda_runtime.h" -m64 -o "/tmp/tmpxft_00002eb4_00000000-9_tbb.cpp4.ii" "tbb.cu"
#$ cudafe++ --m64 --gnu_version=40403 --parse_templates --gen_c_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.cpp" --stub_file_name "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.stub.c" "/tmp/tmpxft_00002eb4_00000000-9_tbb.cpp4.ii"
#$ nvopencc -TARG:compute_10 -m64 -OPT:ftz=1 -CG:ftz=1 -CG:prec_div=0 -CG:prec_sqrt=0 "/tmp/tmpxft_00002eb4_00000000-10_tbb" "/tmp/tmpxft_00002eb4_00000000-7_tbb.cpp3.i" -o "/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx"
#$ ptxas -arch=sm_10 -m64 "/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx" -o "/tmp/tmpxft_00002eb4_00000000-11_tbb.sm_10.cubin"
#$ fatbinary --create="/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin" --key="1abd7ece1f0930d3" --ident="tbb.cu" -cuda "--image=profile=compute_10,file=/tmp/tmpxft_00002eb4_00000000-2_tbb.ptx" "--image=profile=sm_10,file=/tmp/tmpxft_00002eb4_00000000-11_tbb.sm_10.cubin" --embedded-fatbin="/tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin.c"
#$ rm /tmp/tmpxft_00002eb4_00000000-3_tbb.fatbin
#$ gcc -D__CUDA_ARCH__=100 -E -x c++ -DCUDA_NO_SM_13_DOUBLE_INTRINSICS -DCUDA_FLOAT_MATH_FUNCTIONS -DCUDA_NO_SM_11_ATOMIC_INTRINSICS -DCUDA_NO_SM_12_ATOMIC_INTRINSICS -D__CUDA_FTZ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -m64 -o "/tmp/tmpxft_00002eb4_00000000-12_tbb.ii" "/tmp/tmpxft_00002eb4_00000000-1_tbb.cudafe1.cpp"
#$ gcc -c -x c++ -I"/usr/local/tbb30_20110427oss/include/" "-I/usr/local/cuda/bin/../include" "-I/usr/local/cuda/bin/../include/cudart" -fpreprocessed -m64 -o "/tmp/tmpxft_00002eb4_00000000-13_tbb.o" "/tmp/tmpxft_00002eb4_00000000-12_tbb.ii"
/usr/local/tbb30_20110427oss/include/tbb/atomic.h: In member function ‘I tbb::internal::atomic_impl_with_arithmetic<I, D, StepType>::fetch_and_add(D)’:
/usr/local/tbb30_20110427oss/include/tbb/atomic.h:231: error: ‘__T3’ has not been declared
# --error 0x1 --
I would like to switch from CUDA 3.2 to 4.0. This error, however, is a real show-stopper for me because my applications use both CUDA and Intel Threading Building Blocks. Any hints how to fix this?
This solved my problem. Great!
Heiko