Cudafe++ runs out of memory and crashes during compilation

I have a problem where if I compile my project which includes the OpenEXR v3.1.8, cudafe++ allocates excessive amounts of memory and crashes due to OOM. Including #include <ImfHeader.h> from OpenEXR seems to be the culprit.

nvcc --version reports:

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:16:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0

Running on Archlinux with newest versions of nvidia proprietary drivers and cuda-tools.

If I kill cudafe++ before OOM kill happens, I can see some compiler errors:

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(38): error: vector_size attribute requires an arithmetic or enum type
                  __v8hf __attribute__ ((__vector_size__ (16)));
                                         ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(39): error: vector_size attribute requires an arithmetic or enum type
                  __v16hf __attribute__ ((__vector_size__ (32)));
                                          ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(40): error: vector_size attribute requires an arithmetic or enum type
                  __v32hf __attribute__ ((__vector_size__ (64)));
                                          ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(44): error: vector_size attribute requires an arithmetic or enum type
                  __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
                                          ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(45): error: vector_size attribute requires an arithmetic or enum type
                  __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
                                          ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(46): error: vector_size attribute requires an arithmetic or enum type
                  __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
                                          ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(49): error: vector_size attribute requires an arithmetic or enum type
                  __m128h_u __attribute__ ((__vector_size__ (16),
                                            ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(51): error: vector_size attribute requires an arithmetic or enum type
                  __m256h_u __attribute__ ((__vector_size__ (32),
                                            ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(53): error: vector_size attribute requires an arithmetic or enum type
                  __m512h_u __attribute__ ((__vector_size__ (64),
                                            ^

/usr/lib/gcc/x86_64-pc-linux-gnu/12.3.0/include/avx512fp16intrin.h(62): error: more than one conversion function from "__half" to "<error-type>" applies:
            function "__half::operator __half_raw() const" (declared at line 309 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator float() const" (declared at line 337 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator signed char() const" (declared at line 436 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator unsigned char() const" (declared at line 443 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator char() const" (declared at line 451 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator short() const" (declared at line 476 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator unsigned short() const" (declared at line 483 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator int() const" (declared at line 490 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator unsigned int() const" (declared at line 497 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator long() const" (declared at line 504 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator unsigned long() const" (declared at line 528 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator long long() const" (declared at line 554 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator unsigned long long() const" (declared at line 561 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
            function "__half::operator bool() const" (declared at line 593 of /opt/cuda/bin/../targets/x86_64-linux/include/cuda_fp16.hpp)
    return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,