The programming guide shows how to use this API. Programming Guide :: CUDA Toolkit Documentation
It seems it cannot be used for arbitrary semaphores, but only ones exported by vulkan, opengl, direct3d, and nvsci.
I tried to import an eventfd-based semaphore, but this just gives unknown error
#include <future>
#include <iostream>
#include <cassert>
#include <chrono>
#include <sys/eventfd.h>
#include <unistd.h>
static_assert(sizeof(size_t) == 8);
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__
void printkernel(){
printf("printkernel started\n");
}
void workerthread1(int semaphorefd){
std::cerr << "workerthread1 started\n";
std::this_thread::sleep_for(std::chrono::seconds{2});
size_t val = 1;
int numbyteswritten = write(semaphorefd, &val, 8);
assert(numbyteswritten == 8);
}
int main(){
int semfd = eventfd(0, EFD_SEMAPHORE);
if(semfd == -1){
perror("eventfd");
assert(false);
}
auto future = std::async(std::launch::async, workerthread1, semfd);
std::cerr << "waiting for semaphore\n";
size_t semaphorevalue;
int numbytesreads = read(semfd, &semaphorevalue, 8);
assert(numbytesreads == 8);
std::cerr << "waited for semaphore\n";
future.wait();
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaExternalSemaphore_t externalSem;
cudaExternalSemaphoreHandleDesc externSemDesc;
memset(&externSemDesc, 0, sizeof(cudaExternalSemaphoreHandleDesc));
externSemDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
externSemDesc.handle.fd = semfd;
gpuErrchk(cudaImportExternalSemaphore (&externalSem, &externSemDesc)); //does not work
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(cudaExternalSemaphoreWaitParams));
gpuErrchk(cudaWaitExternalSemaphoresAsync(&externalSem, &waitParams, 1, stream));
printkernel<<<1,1,0,stream>>>();
auto future2 = std::async(std::launch::async, workerthread1, semfd);
gpuErrchk(cudaDeviceSynchronize());
future2.wait();
}
Do you really need a semaphore? You can use cudaLaunchHostFunc
to have stream-ordered cpu work which cuda events can be used on to synchronize two streams.