Im trying to use cooperative groups with NVRTC, however I do not manage to get it compiled.
I’m using the following device code:
#include "cooperative_groups.h"
.....
using namespace cooperative_groups;
grid_group grid = this_grid(); grid.sync();
and the following host code:
std::string coop_groups = read_string_from_file("C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include\cooperative_groups.h");
std::string coop_groups_helpers = read_string_from_file("C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include\cooperative_groups_helpers.h");
const char* headers[] = { &coop_groups[0], &coop_groups_helpers[0]};
const char* include_names[] = { "cooperative_groups.h", "cooperative_groups_helpers.h" };
nvrtcProgram prog;
NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog, total_source.c_str(), "kernel", 2, headers, include_names));
const char *opts[] = { "--gpu-architecture=compute_70", "--fmad=true", "--restrict", "--relocatable-device-code=true" };
nvrtcResult compileResult = nvrtcCompileProgram(prog, 4, opts);
size_t logSize;
NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
std::string Log;
Log.resize(logSize);
NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, &Log[0]));
std::cout << Log << '\n';
if (compileResult != NVRTC_SUCCESS)
system("pause");
size_t ptxSize;
NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
std::string ptx;
ptx.resize(ptxSize);
NVRTC_SAFE_CALL(nvrtcGetPTX(prog, &ptx[0]));
NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog));
int max_log_size = 64 * 1024;
Log.resize(max_log_size);
void* option_values[] = { (void*)max_log_size, &(Log[0])};
CUjit_option options[] = { CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER };
CUresult ptx_result = cuModuleLoadDataEx(&_cumodule, &ptx[0], 2, options, option_values);
std::cout << &Log[0];
if(ptx_result != CUDA_SUCCESS)
system("pause");
However, cuModuleLoadDataEx still outputs the following error:
ptxas fatal : Unresolved extern function ‘cudaCGGetIntrinsicHandle’
What am I still missing?