I am using the cuBLASDx library for the first time, and it seems that just putting in an include for it is enough to get a compilation error.
kernel = r"""
#include <cublasdx.hpp>
extern "C" __global__ void entry0() {
return ;
}
"""
import cupy as cp
raw_module = cp.RawModule(code=kernel, backend='nvcc', options=("-std=c++20", "-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include", "-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include/cublasdx/include", "-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/external/cutlass/include"))
raw_module.get_function("entry0")((1, 1, 1),(1, 1, 1),())
I usually develop on Windows, but I installed the latest WSL-Ubuntu, Cuda Toolkit for WSL, and the CuPy library, along with a bunch of other things to try cuBLASDx out. I get the following error when I try to compile the above program by running the Python script.
mrakgr@Lain:~/The-Spiral-Language$ cd /home/mrakgr/The-Spiral-Language ; /usr/bin/env /bin/python3 /home/mrakgr/.vscode-server/extensions/ms-python.python-2023.22.1/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher 53897 -- /home/mrakgr/The-Spiral-Language/Spiral\ Compilation\ Tests/cuda_experiments/wsl1/matmul.py
Traceback (most recent call last):
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 62, in _run_cc
log = subprocess.check_output(cmd, cwd=cwd, env=env,
File "/usr/lib/python3.10/subprocess.py", line 421, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
File "/usr/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['/usr/local/cuda/bin/nvcc', '-gencode=arch=compute_89,code=sm_89', '--cubin', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/cub', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/thrust', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/libcudacxx', '-std=c++20', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include/cublasdx/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/external/cutlass/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cuda/cuda-12', '-I/usr/local/cuda/include', '-ftz=true', '/tmp/tmpas35tqdi/4e7fbfa90d9fb26f409322085d5a6cfe9539b0b2.cu']' returned non-zero exit status 1.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 363, in compile_using_nvcc
_run_cc(cmd, root_dir, 'nvcc', log_stream)
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 78, in _run_cc
raise NVCCException(msg)
cupy.cuda.compiler.NVCCException: `nvcc` command returns non-zero exit status.
command: ['/usr/local/cuda/bin/nvcc', '-gencode=arch=compute_89,code=sm_89', '--cubin', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/cub', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/thrust', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/libcudacxx', '-std=c++20', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include/cublasdx/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/external/cutlass/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cuda/cuda-12', '-I/usr/local/cuda/include', '-ftz=true', '/tmp/tmpas35tqdi/4e7fbfa90d9fb26f409322085d5a6cfe9539b0b2.cu']
return-code: 1
stdout/stderr:
/usr/local/cuda/include/cuda_bf16.hpp(260): error: no instance of overloaded function "__half::__half" matches the specified type
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) __half::__half(const __nv_bfloat16 f)
^
1 error detected in the compilation of "/tmp/tmpas35tqdi/4e7fbfa90d9fb26f409322085d5a6cfe9539b0b2.cu".
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mrakgr/The-Spiral-Language/Spiral Compilation Tests/cuda_experiments/wsl1/matmul.py", line 19, in <module>
if __name__ == '__main__': print(main())
File "/home/mrakgr/The-Spiral-Language/Spiral Compilation Tests/cuda_experiments/wsl1/matmul.py", line 15, in main
raw_module.get_function(f"entry{v0}")((1, 1, 1),(1, 1, 1),())
File "cupy/_core/raw.pyx", line 487, in cupy._core.raw.RawModule.get_function
File "cupy/_core/raw.pyx", line 100, in cupy._core.raw.RawKernel.kernel.__get__
File "cupy/_core/raw.pyx", line 117, in cupy._core.raw.RawKernel._kernel
File "cupy/_util.pyx", line 64, in cupy._util.memoize.decorator.ret
File "cupy/_core/raw.pyx", line 538, in cupy._core.raw._get_raw_module
File "cupy/_core/core.pyx", line 2236, in cupy._core.core.compile_with_cache
File "cupy/_core/core.pyx", line 2254, in cupy._core.core.compile_with_cache
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 484, in _compile_module_with_cache
return _compile_with_cache_cuda(
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 577, in _compile_with_cache_cuda
cubin = compile_using_nvcc(source, options, arch,
File "/home/mrakgr/.local/lib/python3.10/site-packages/cupy/cuda/compiler.py", line 373, in compile_using_nvcc
raise cex
cupy.cuda.compiler.CompileException: `nvcc` command returns non-zero exit status.
command: ['/usr/local/cuda/bin/nvcc', '-gencode=arch=compute_89,code=sm_89', '--cubin', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/cub', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/thrust', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cccl/libcudacxx', '-std=c++20', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/include/cublasdx/include', '-I /home/mrakgr/nvidia-mathdx-24.01.0/nvidia/mathdx/24.01/external/cutlass/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include', '-I/home/mrakgr/.local/lib/python3.10/site-packages/cupy/_core/include/cupy/_cuda/cuda-12', '-I/usr/local/cuda/include', '-ftz=true', '/tmp/tmpas35tqdi/4e7fbfa90d9fb26f409322085d5a6cfe9539b0b2.cu']
return-code: 1
stdout/stderr:
/usr/local/cuda/include/cuda_bf16.hpp(260): error: no instance of overloaded function "__half::__half" matches the specified type
__attribute__((host)) __attribute__((device)) __inline__ __attribute__((always_inline)) __half::__half(const __nv_bfloat16 f)
^
1 error detected in the compilation of "/tmp/tmpas35tqdi/4e7fbfa90d9fb26f409322085d5a6cfe9539b0b2.cu".