Using cooperative groups with NVRTC

Im trying to use cooperative groups with NVRTC, however I do not manage to get it compiled.
I’m using the following device code:

#include "cooperative_groups.h"
.....
using namespace cooperative_groups;
grid_group grid = this_grid(); grid.sync();

and the following host code:

std::string coop_groups = read_string_from_file("C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include\cooperative_groups.h");
std::string coop_groups_helpers = read_string_from_file("C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include\cooperative_groups_helpers.h");

const char* headers[] = { &coop_groups[0],  &coop_groups_helpers[0]};
const char* include_names[] = { "cooperative_groups.h", "cooperative_groups_helpers.h" };
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, total_source.c_str(), "kernel", 2, headers, include_names));
const char *opts[] = { "--gpu-architecture=compute_70", "--fmad=true", "--restrict", "--relocatable-device-code=true" };
nvrtcResult compileResult = nvrtcCompileProgram(prog, 4, opts);
size_t logSize;
NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
std::string Log;
Log.resize(logSize);
NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, &Log[0]));
std::cout << Log << '\n';
if (compileResult != NVRTC_SUCCESS) 
	system("pause");
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
std::string ptx;
ptx.resize(ptxSize);
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, &ptx[0]));
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog));
int max_log_size = 64 * 1024;
Log.resize(max_log_size);
void* option_values[] = { (void*)max_log_size, &(Log[0])};
CUjit_option options[] = { CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER };
CUresult ptx_result = cuModuleLoadDataEx(&_cumodule, &ptx[0], 2, options, option_values);
std::cout << &Log[0];	
if(ptx_result != CUDA_SUCCESS)
	system("pause");

However, cuModuleLoadDataEx still outputs the following error:
ptxas fatal : Unresolved extern function ‘cudaCGGetIntrinsicHandle’

What am I still missing?

Hello @Fiepchen did you manage to solve the problem? I’m trying to get cooperative groups with nvrtc running. But running into some problems as well.

Device code:

include cooperative_groups.h

using namespace cooperative_groups;

Host code:

nvrtcProgram prog;
std::string coop_groups = read_string_from_file(“/usr/include/cooperative_groups.h”);
const char* headers = { &coop_groups[0]};
const char* include_names = { “cooperative_groups.h”};
nvrtcResult result = nvrtcCreateProgram(
&prog, // program holder
source.c_str(), // buffer with source
“kernel”, // name
1, // numHeaders
headers, // headers
include_names); // includeNames

std::vector<const char*> opts =
{
“–fmad=true”,
“-D UWORD=size_t”,
“–relocatable-device-code=true”,
"–restrict”,
“–gpu-architecture=compute_75”
};

result = nvrtcCompileProgram(prog, // prog
opts.size(), // numOptions
opts.data()); // options

Error message:

 error: cuda::runtime_t::init_kernels(): compilation failed: /usr/include/cooperative_groups/details/driver_abi.h(57): error: this declaration may not have extern "C" linkage

/usr/include/cooperative_groups/details/driver_abi.h(64): error: this declaration may not have extern "C" linkage

/usr/include/cooperative_groups/details/driver_abi.h(81): error: this declaration may not have extern "C" linkage

/usr/include/cooperative_groups/details/driver_abi.h(82): error: this declaration may not have extern "C" linkage

/usr/include/cooperative_groups/details/driver_abi.h(83): error: this declaration may not have extern "C" linkage

The CUDA sample code /usr/local/cuda/samples/0_Simple/matrixMul_nvrtc demonstrates using NVRTC with cooperative groups.