When upgrade from CUDA12.4 to 12.5 the compilation became broken

When upgrade from CUDA12.4 to 12.5 the compilation became broken
The errors described below.



In file included from /usr/local/cuda/include/cub/util_device.cuh:52,
                 from /usr/local/cuda/include/thrust/system/cuda/detail/util.h:48,
                 from /usr/local/cuda/include/thrust/system/cuda/detail/par.h:39,
                 from /home/Yehonatans/work/utils/cuda/SingletonCudaMemoryPool.h:7,
                 from /home/Yehonatans/work/utils/cuda/unit_testing/TestCubMemoryPool.cpp:8:
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘void cub::CUB_200400___CUDA_ARCH_LIST___NS::CTA_SYNC()’:
/usr/local/cuda/include/cub/util_ptx.cuh:271:5: error: ‘__syncthreads’ was not declared in this scope
  271 |     __syncthreads();
      |     ^~~~~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::CTA_SYNC_AND(int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:280:12: error: ‘__syncthreads_and’ was not declared in this scope
  280 |     return __syncthreads_and(p);
      |            ^~~~~~~~~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::CTA_SYNC_OR(int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:289:12: error: ‘__syncthreads_or’ was not declared in this scope
  289 |     return __syncthreads_or(p);
      |            ^~~~~~~~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘void cub::CUB_200400___CUDA_ARCH_LIST___NS::WARP_SYNC(unsigned int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:298:5: error: ‘__syncwarp’ was not declared in this scope
  298 |     __syncwarp(member_mask);
      |     ^~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::WARP_ANY(int, unsigned int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:307:12: error: ‘__any_sync’ was not declared in this scope
  307 |     return __any_sync(member_mask, predicate);
      |            ^~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::WARP_ALL(int, unsigned int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:316:12: error: ‘__all_sync’ was not declared in this scope
  316 |     return __all_sync(member_mask, predicate);
      |            ^~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::WARP_BALLOT(int, unsigned int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:325:12: error: ‘__ballot_sync’ was not declared in this scope
  325 |     return __ballot_sync(member_mask, predicate);
      |            ^~~~~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘unsigned int cub::CUB_200400___CUDA_ARCH_LIST___NS::SHFL_IDX_SYNC(unsigned int, int, unsigned int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:368:12: error: ‘__shfl_sync’ was not declared in this scope
  368 |     return __shfl_sync(member_mask, word, src_lane);
      |            ^~~~~~~~~~~
/usr/local/cuda/include/cub/util_ptx.cuh: In function ‘int cub::CUB_200400___CUDA_ARCH_LIST___NS::RowMajorTid(int, int, int)’:
/usr/local/cuda/include/cub/util_ptx.cuh:415:39: error: ‘threadIdx’ was not declared in this scope
  415 |     return ((block_dim_z == 1) ? 0 : (threadIdx.z * block_dim_x * block_dim_y)) +
      |                                       ^~~~~~~~~
In file included from /usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/barrier_cluster.h:30,
                 from /usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx.h:74,
                 from /usr/local/cuda/include/cuda/ptx:19,
                 from /usr/local/cuda/include/cuda/discard_memory:25,
                 from /usr/local/cuda/include/cub/util_device.cuh:57,
                 from /usr/local/cuda/include/thrust/system/cuda/detail/util.h:48,
                 from /usr/local/cuda/include/thrust/system/cuda/detail/par.h:39,
                 from /home/Yehonatans/work/utils/cuda/SingletonCudaMemoryPool.h:7,
                 from /home/Yehonatans/work/utils/cuda/unit_testing/TestCubMemoryPool.cpp:8:
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h: In function ‘uint32_t cuda::ptx::__4::__as_ptr_smem(const void*)’:
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h:40:44: error: ‘__cvta_generic_to_shared’ was not declared in this scope
   40 |   return static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__ptr));
      |                                            ^~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h: In function ‘uint64_t cuda::ptx::__4::__as_ptr_gmem(const void*)’:
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h:60:44: error: ‘__cvta_generic_to_global’ was not declared in this scope
   60 |   return static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__ptr));
      |                                            ^~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h: In function ‘_Tp* cuda::ptx::__4::__from_ptr_smem(size_t)’:
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h:73:33: error: there are no arguments to ‘__cvta_shared_to_generic’ that depend on a template parameter, so a declaration of ‘__cvta_shared_to_generic’ must be available [-fpermissive]
   73 |   return reinterpret_cast<_Tp*>(__cvta_shared_to_generic(__ptr));
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h:73:33: note: (if you use ‘-fpermissive’, G++ will accept your code, but allowing the use of an undeclared name is deprecated)
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h: In function ‘_Tp* cuda::ptx::__4::__from_ptr_gmem(size_t)’:
/usr/local/cuda/include/cuda/std/detail/libcxx/include/__cuda/ptx/instructions/../ptx_helper_functions.h:94:33: error: there are no arguments to ‘__cvta_global_to_generic’ that depend on a template parameter, so a declaration of ‘__cvta_global_to_generic’ must be available [-fpermissive]
   94 |   return reinterpret_cast<_Tp*>(__cvta_global_to_generic(__ptr));
      |                                 ^~~~~~~~~~~~~~~~~~~~~~~~
In file included from /usr/local/cuda/include/thrust/system/cuda/detail/util.h:48,
                 from /usr/local/cuda/include/thrust/system/cuda/detail/par.h:39,
                 from /home/Yehonatans/work/utils/cuda/SingletonCudaMemoryPool.h:7,
                 from /home/Yehonatans/work/utils/cuda/unit_testing/TestCubMemoryPool.cpp:8:
/usr/local/cuda/include/cub/util_device.cuh: In static member function ‘static typename AgentT::TempStorage& cub::CUB_200400___CUDA_ARCH_LIST___NS::detail::vsmem_helper_impl<AgentT>::get_temp_storage(cub::CUB_200400___CUDA_ARCH_LIST___NS::NullType&, cub::CUB_200400___CUDA_ARCH_LIST___NS::detail::vsmem_t&)’:
/usr/local/cuda/include/cub/util_device.cuh:160:63: error: ‘blockIdx’ was not declared in this scope
  160 |       static_cast<char*>(vsmem.gmem_ptr) + (vsmem_per_block * blockIdx.x));
      |                                                               ^~~~~~~~
/usr/local/cuda/include/cub/util_device.cuh: In static member function ‘static bool cub::CUB_200400___CUDA_ARCH_LIST___NS::detail::vsmem_helper_impl<AgentT>::discard_temp_storage(typename AgentT::TempStorage&)’:
/usr/local/cuda/include/cub/util_device.cuh:201:38: error: ‘threadIdx’ was not declared in this scope
  201 |     const std::size_t linear_tid   = threadIdx.x;
      |                                      ^~~~~~~~~
/usr/local/cuda/include/cub/util_device.cuh:202:50: error: ‘blockDim’ was not declared in this scope
  202 |     const std::size_t block_stride = line_size * blockDim.x;

1 Like

Hi there are any update?

Any help?