Hi Mat, sorry for bothering you again. This time I wrote one test case and it works well in godbolt. but when I compiled it using NVHPC 23.11, I got the error message as shown below, could you please help me to check it?
nvc++ -std=c++20 -cuda --gcc-toolchain=/work/opt/local/x86_64/cores/gcc/12.2.0 --experimental-stdpar -stdpar=gpu -o run sample.cc
"/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/nvexec/stream/bulk.cuh", line 28: error: static assertion failed
static_assert(trivially_copyable<Shape, Fun, As...>);
^
detected during:
instantiation of "void nvexec::_strm::_bulk::kernel<BlockThreads,As...,Shape,Fun>(Shape, Fun, As...) [with BlockThreads=256, As=<>, Shape=unsigned long, Fun=grid_initializer_t]" at line 62
instantiation of "void nvexec::_strm::_bulk::tag_invoke(_Tag, nvexec::_strm::_bulk::receiver_t<stdexec::__minvoke_<stdexec::__id_<true>, nvexec::_strm::_transfer::operation_state_t<nvexec::_strm::bulk_sender_t<stdexec::__minvoke_<stdexec::__id_<true>, std::decay<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, stdexec::__minvoke_<stdexec::__id_<true>, exec::__stl::__sender<exec::__on::__with_sched<stdexec::__id<stdexec::__decay_t<std::decay<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>>::type>>, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__queries::get_scheduler_t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__receiver_placeholder<stdexec::__sync_wait::__env>>::__t>::__t>, stdexec::__t<stdexec::__minvoke_<stdexec::__id_<true>, std::decay<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>>::type>::__t>, exec::__on::__with_sched_kernel<stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__queries::get_scheduler_t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__receiver_placeholder<stdexec::__sync_wait::__env>>::__t>::__t>>::__t>::__t>::__t>::type>::__t, unsigned long, grid_initializer_t>, stdexec::__minvoke_<stdexec::__id_<false>, stdexec::__schedule_from::__receiver1<stdexec::__minvoke_<stdexec::__id_<true>, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__queries::get_scheduler_t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__receiver_placeholder<stdexec::__sync_wait::__env>>::__t>::__t>::__t, stdexec::__minvoke_<stdexec::__id_<false>, stdexec::__minvoke_<stdexec::__minvoke_<stdexec::__mfold_right<stdexec::__munique<stdexec::__mbind_front_q<std::variant, std::monostate>>, stdexec::__mbind_front_q<stdexec::__schedule_from::__bind_completions_t, stdexec::__mtype<stdexec::__minvoke_<stdexec::__q<stdexec::__mfront>, stdexec::__minvoke_<stdexec::__detail::__mbc<nvexec::_strm::transfer_sender_th<std::decay<nvexec::_strm::bulk_sender_t<stdexec::__minvoke_<stdexec::__id_<true>, std::decay<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, stdexec::__minvoke_<stdexec::__id_<true>, exec::__stl::__sender<exec::__on::__with_sched<stdexec::__id<stdexec::__decay_t<std::decay<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>>::type>>, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__queries::get_scheduler_t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__receiver_placeholder<stdexec::__sync_wait::__env>>::__t>::__t>, stdexec::__t<stdexec::__minvoke_<stdexec::__id_<true>, std::decay<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>>::type>::__t>, exec::__on::__with_sched_kernel<stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__queries::get_scheduler_t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__receiver_placeholder<stdexec::__sync_wait::__env>>::__t>::__t>>::__t>::__t>::__t>::type>::__t, unsigned long, grid_initializer_t>::__t>::type>>, stdexec::__cp>::__t>::__t>::__t, stdexec::__mdefer_<stdexec::__q<stdexec::__call_result_>, stdexec::__env::get_env_t, exec::__stl::__operation<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))> &, exec::__on::__continue_on_kernel<nvexec::_strm::stream_scheduler, stdexec::__closure::__binder_back<stdexec::__bulk::bulk_t, std::size_t, grid_initializer_t>>, stdexec::__minvoke_<stdexec::__id_<true>, stdexec::__sync_wait::__receiver<>::__t>::__t>::__receiver_t>::__t>>, stdexec::__receivers::set_value_t, stdexec::__receivers::set_error_t, stdexec::__receivers::set_stopped_t>::__t>::__t>::__t, stdexec::__minvoke_<stdexec::__id_<true>, stdexec::__debug::__debug_receiver<stdexec::__cvref_id<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, stdexec::__decay_t<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>>>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, stdexec::__tag_invoke::tag_invoke_result_t<stdexec::__get_completion_signatures::get_completion_signatures_t, stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>>>>::__t>::__t>::__t>::__t::enqueue_receiver>::__t, unsigned long, grid_initializer_t>::__t &&, As &&...) noexcept [with _Tag=stdexec::__receivers::set_value_t, As=<>]" at line 169 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/functional.hpp"
instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__receivers::set_value_t, _Args=<nvexec::_strm::_bulk::receiver_t<nvexec::_strm::stream_enqueue_receiver<stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](nvexec::_strm::get_stream_provider_t)->nvexec::_strm::stream_provider_t * noexcept>, stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](stdexec::__debug::__is_debug_env_t)->bool noexcept(true)>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>>>, nvexec::variant_t<cuda::std::__4::tuple<nvexec::_strm::set_noop>, cuda::std::__4::tuple<stdexec::__receivers::set_error_t, cudaError_t>, cuda::std::__4::tuple<stdexec::__receivers::set_value_t>>>, unsigned long, grid_initializer_t>::__t>]" at line 656 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/execution.hpp"
instantiation of "void stdexec::__receivers::set_value_t::operator()(_Receiver &&, _As &&...) const noexcept [with _Receiver=nvexec::_strm::_bulk::receiver_t<nvexec::_strm::stream_enqueue_receiver<stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](nvexec::_strm::get_stream_provider_t)->nvexec::_strm::stream_provider_t * noexcept>, stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](stdexec::__debug::__is_debug_env_t)->bool noexcept(true)>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>>>, nvexec::variant_t<cuda::std::__4::tuple<nvexec::_strm::set_noop>, cuda::std::__4::tuple<stdexec::__receivers::set_error_t, cudaError_t>, cuda::std::__4::tuple<stdexec::__receivers::set_value_t>>>, unsigned long, grid_initializer_t>::__t, _As=<>]" at line 584 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/nvexec/stream/common.cuh"
instantiation of "void nvexec::_strm::operation_state_base_<OuterReceiverId>::__t::propagate_completion_signal(Tag, As &&...) noexcept [with OuterReceiverId=nvexec::_strm::_bulk::receiver_t<nvexec::_strm::stream_enqueue_receiver<stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](nvexec::_strm::get_stream_provider_t)->nvexec::_strm::stream_provider_t * noexcept>, stdexec::__env::__joined_env<stdexec::__env::__env_fn<lambda [](stdexec::__debug::__is_debug_env_t)->bool noexcept(true)>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>>>, nvexec::variant_t<cuda::std::__4::tuple<nvexec::_strm::set_noop>, cuda::std::__4::tuple<stdexec::__receivers::set_error_t, cudaError_t>, cuda::std::__4::tuple<stdexec::__receivers::set_value_t>>>, unsigned long, grid_initializer_t>, Tag=stdexec::__receivers::set_value_t, As=<>]" at line 55 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/nvexec/stream/schedule_from.cuh"
[ 24 instantiation contexts not shown ]
instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=nvexec::_strm::_transfer::operation_state_t<nvexec::_strm::bulk_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, exec::__on::__with_sched<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, stdexec::__loop::run_loop::__scheduler>>, unsigned long, grid_initializer_t>, stdexec::_Yp<stdexec::__schedule_from::__receiver1<stdexec::__loop::run_loop::__scheduler::__id, stdexec::_Yp<std::variant<std::monostate, std::tuple<stdexec::__receivers::set_stopped_t>, std::tuple<stdexec::__receivers::set_error_t, cudaError_t>, std::tuple<stdexec::__receivers::set_value_t>>>, stdexec::__debug::__debug_receiver<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, stdexec::completion_signatures<stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_value_t (), stdexec::__receivers::set_error_t (cudaError &&)>>>::__t>>::__t]" at line 5144 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/execution.hpp"
instantiation of class "stdexec::__schedule_from::__operation1<_SchedulerId, _CvrefSenderId, _ReceiverId>::__t [with _SchedulerId=stdexec::__loop::run_loop::__scheduler::__id, _CvrefSenderId=nvexec::_strm::transfer_sender_t<nvexec::_strm::bulk_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, exec::__on::__with_sched<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, stdexec::__loop::run_loop::__scheduler>>, unsigned long, grid_initializer_t>>, _ReceiverId=stdexec::__debug::__debug_receiver<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, stdexec::completion_signatures<stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_value_t (), stdexec::__receivers::set_error_t (cudaError &&)>>]" at line 169 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/functional.hpp"
instantiation of "auto stdexec::__tag_invoke::tag_invoke_t::operator()(_Tag, _Args &&...) const->stdexec::__tag_invoke::tag_invoke_result_t<_Tag, _Args...> [with _Tag=stdexec::__start::start_t, _Args=<stdexec::__schedule_from::__operation1<stdexec::__loop::run_loop::__scheduler::__id, nvexec::_strm::transfer_sender_t<nvexec::_strm::bulk_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, exec::__on::__with_sched<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, stdexec::__loop::run_loop::__scheduler>>, unsigned long, grid_initializer_t>>, stdexec::__debug::__debug_receiver<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, stdexec::completion_signatures<stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_value_t (), stdexec::__receivers::set_error_t (cudaError &&)>>>::__t &>]" at line 1675 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/execution.hpp"
instantiation of "void stdexec::__start::start_t::operator()(_Op &) const noexcept [with _Op=stdexec::__schedule_from::__operation1<stdexec::__loop::run_loop::__scheduler::__id, nvexec::_strm::transfer_sender_t<nvexec::_strm::bulk_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, exec::__on::__with_sched<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, stdexec::__loop::run_loop::__scheduler>>, unsigned long, grid_initializer_t>>, stdexec::__debug::__debug_receiver<stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>, exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, stdexec::completion_signatures<stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_value_t (), stdexec::__receivers::set_error_t (cudaError &&)>>>::__t]" at line 1080 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/execution.hpp"
instantiation of "void stdexec::__debug::__debug_sender<_Sigs,_Env,_Sender>(_Sender &&, const _Env &) [with _Sigs=stdexec::completion_signatures<stdexec::__receivers::set_error_t (std::__exception_ptr::exception_ptr), stdexec::__receivers::set_stopped_t (), stdexec::__receivers::set_value_t (), stdexec::__receivers::set_error_t (cudaError &&)>, _Env=exec::__on::__with_sched_env<stdexec::__sync_wait::__env, nvexec::_strm::stream_scheduler>, _Sender=stdexec::__basic_sender<lambda [](_Cvref, _Fun &&) mutable->decltype((<expression>))>]" at line 1306 of "/work/opt/local/x86_64/cores/nvidia/23.11/Linux_x86_64/23.11/compilers/include-stdexec/experimental/stdexec/execution.hpp"
1 error detected in the compilation of "sample.cc".