NVCC silently compiles std::swap to incorrect code (with no error or warning) in certain scenarios

Steps to reproduce:

  • Launch an A100 on Lambda
  • Install latest cuda and run this reproduction
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-8
/usr/local/cuda-12.8/bin/nvcc -std=c++20 --expt-relaxed-constexpr -arch=sm_80 -o repro repro.cu && ./repro
// Compile with:
// nvcc -std=c++20 --expt-relaxed-constexpr -arch=sm_80 -o repro repro.cu && ./repro 

// If you remove `-std=c++20`, you get the correct error message:
// error: identifier "std::swap<    ::MyStruct> " is undefined in device code

// If you remove `--expt-relaxed-constexpr`, you get a misleading error message:
//  error: calling a constexpr __host__ function("swap") from a __global__ function("reproKernel") is not allowed. The experimental flag '--expt-relaxed-constexpr' can be used to allow this.

#include <stdio.h>
#include <cuda/std/type_traits>

struct Foo {
	int32_t foo;

	__host__ __device__ Foo(const Foo& other) : foo(other.foo) {}
	// ^ comment out this line, and the swap succeeds (?!?!?!?!)

	__host__ __device__ constexpr Foo() : foo(1337){}
	// remove this constexpr ^ to get the correct error message:
	// error: identifier "std::swap<    ::MyStruct> " is undefined in device code
};

struct MyStruct {
	Foo foo; // <-- comment out this line, and the swap succeeds (?!?!?!?!)
	int32_t bar;
};

__global__ void reproKernel() {
	MyStruct A{.bar = 123};
	MyStruct B{.bar = 456};

	printf("Before swap %d %d (expect: 123 456)\n", A.bar, B.bar);
	std::swap(A, B);
	printf("After swap %d %d (expect: 456 123)\n", A.bar, B.bar);

#if 0
	::cuda::std::swap(A, B);
	printf("After second swap %d %d (expect 123 456) (note that enabling this #if **made the first swap also succeed** (?!?!?!))\n", A.bar, B.bar);
#endif
}

int main() {
	reproKernel<<<1, 1>>>();
	cudaDeviceSynchronize();
}

you may wish to file a bug.

I have filed Log in | NVIDIA Developer