Test Programm
__global__ void test(){
printf("%.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f %.28f"
, 0.0f
, 0.1f
, 0.2f
, 0.3f
, 0.4f
, 0.5f
, 0.6f
, 0.7f
, 0.8f
, 0.9f
, 1.0f
, 1.1f
, 1.2f
, 1.3f
, 1.4f
, 1.5f
, 1.6f
, 1.7f
, 1.8f
, 1.9f
, 2.0f
, 2.1f
, 2.2f
, 2.3f
, 2.4f
, 2.5f
, 2.6f
, 2.7f
, 2.8f
, 2.9f
, 3.0f
, 3.1f
, 3.2f
, 3.3f
, 3.4f
, 3.5f
, 3.6f
, 3.7f
, 3.8f
, 3.9f
, 4.0f
, 4.1f
);
}
int main(){
test<<<1, 1>>>();
}
CMakerLists.txt:
cmake_minimum_required(VERSION 3.15) # support relative path
project(Test
LANGUAGES CXX CUDA)
include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
message("-- cuda-compiler " ${CMAKE_CUDA_COMPILER})
else()
message(STATUS "No CUDA support")
endif()
set(CUDA_FOUND ${CMAKE_CUDA_COMPILER})
set(CMAKE_CUDA_ARCHITECTURES native)
if (CUDA_FOUND)
add_executable(test)
target_compile_options(test
PRIVATE $<$<AND:$<CONFIG:Debug>,$<COMPILE_LANGUAGE:CUDA>>:-g> --expt-extended-lambda --expt-relaxed-constexpr --default-stream=per-thread --use_fast_math -lineinfo --ptxas-options=-allow-expensive-optimizations=true>
)
target_compile_features(test PRIVATE cuda_std_17)
set_target_properties(test
PROPERTIES CUDA_EXTENSIONS ON
CUDA_SEPARABLE_COMPILATION OFF
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
install(TARGETS
test
)
target_sources(test
PRIVATE test.cu
)
endif()
Compile Script:
@echo off
setlocal
cd /d %~dp0
reg Query "HKLM\Hardware\Description\System\CentralProcessor\0" | find /i "x86" > NUL && set arch_command_length=32 || set arch_command_length=64
echo "%PROCESSOR_ARCHITECTURE%" | find /i "arm" > NUL && goto ARM || goto AMD
:ARM
IF %arch_command_length% == 32 (
set arch_string=x86_arm
set arch_command_length_string=x86
) ELSE (
set arch_string=amd64_arm64
set arch_command_length_string=x64
)
goto VC_VARC_INIT
:AMD
IF %arch_command_length% == 32 (
set arch_string=x86
set arch_command_length_string=x86
) ELSE (
set arch_string=amd64
set arch_command_length_string=x64
)
goto VC_VARC_INIT
:VC_VARC_INIT
call vcvarsall.bat %arch_string% -vcvars_ver=
set compiler_path=%VCToolsInstallDir%\bin\Host%arch_command_length_string%\%arch_command_length_string%\cl.exe
set linker_path=%VCToolsInstallDir%\bin\Host%arch_command_length_string%\%arch_command_length_string%\link.exe
set archiver_path=%VCToolsInstallDir%\bin\Host%arch_command_length_string%\%arch_command_length_string%\lib.exe
call "%DevEnvDir%\COMMONEXTENSIONS\MICROSOFT\CMAKE\CMake\bin\cmake.exe" -G "Ninja Multi-Config" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_AR:FILEPATH="%archiver_path%" -DCMAKE_LINKER:FILEPATH="%linker_path%" -DCMAKE_INSTALL_PREFIX:PATH="%CD%\out\install" -DCMAKE_CXX_COMPILER:FILEPATH="%compiler_path%" -DCMAKE_C_COMPILER:FILEPATH="%compiler_path%" -DCMAKE_MAKE_PROGRAM="%DevEnvDir%\COMMONEXTENSIONS\MICROSOFT\CMAKE\Ninja\ninja.exe" "%CD%\.."
call "%DevEnvDir%\COMMONEXTENSIONS\MICROSOFT\CMAKE\CMake\bin\cmake.exe" --build . --target install --config Release
Produces following output on my machine:
0.0000000000000000000000000000 0.1000000014901161193847656250 0.2000000029802322387695312500 0.3000000119209289550781250000 0.4000000059604644775390625000 0.5000000000000000000000000000 0.6000000238418579101562500000 0.6999999880790710449218750000 0.8000000119209289550781250000 0.8999999761581420898437500000 1.0000000000000000000000000000 1.1000000238418579101562500000 1.2000000476837158203125000000 1.2999999523162841796875000000 1.3999999761581420898437500000 1.5000000000000000000000000000 1.6000000238418579101562500000 1.7000000476837158203125000000 1.7999999523162841796875000000 1.8999999761581420898437500000 2.0000000000000000000000000000 2.0999999046325683593750000000 2.2000000476837158203125000000 2.2999999523162841796875000000 2.4000000953674316406250000000 2.5000000000000000000000000000 2.5999999046325683593750000000 2.7000000476837158203125000000 2.7999999523162841796875000000 2.9000000953674316406250000000 3.0000000000000000000000000000 3.0999999046325683593750000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000 0.0000000000000000000000000000
The last values are 0 though they should not be 0. On some runs (in another programm) instead of 0 some other values (uninitialized memory) might be printed.
System parameters:
Windows 10.
CMake output:
**********************************************************************
** Visual Studio 2022 Developer Command Prompt v17.4.4
** Copyright (c) 2022 Microsoft Corporation
**********************************************************************
[vcvarsall.bat] Environment initialized for: 'x64'
-- The CXX compiler identification is MSVC 19.34.31937.0
-- The CUDA compiler identification is NVIDIA 12.1.66
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/VC/Tools/MSVC/14.34.31933//bin/Hostx64/x64/cl.exe - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1/bin/nvcc.exe - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
-- cuda-compiler C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.1/bin/nvcc.exe
-- Configuring done
-- Generating done