Array of non-assignable object

Hello, I am creating an array of non-assignable objects, specifically cuda counting_semaphore.

I solved the problem using an array of pointers, with this code:

#include <cuda/std/semaphore>


__global__ 
void init_sems(semaphore* *sems){
    semaphore sem(1);
    sems[threadIdx.x] = &sem;
}

__global__
void test(semaphore* *sems){
    sems[threadIdx.x]->acquire();
}

int main(){
    semaphore* *sems;
    cudaMalloc((void**)&sems, 10 * sizeof(semaphore*));
    init_sems<<<1, 10>>>(sems);
    cudaDeviceSynchronize();
    test<<<1, 10>>>(sems);
}

but the acquire in test kernel remains blocked. If I try to acquire inside init_sems it works.
I suspect sem is created locally and then discarded; is this the problem? How can I solve it?

Thanks!

Your code does not compile.

sem is a local variable and will be destroyed when out of scope. This is not specific to CUDA.

To resolve this issue, simple construct the semaphores in the allocated memory. No need for a pointer of pointers.

#include <cuda/std/semaphore>

using semaphore = cuda::std::counting_semaphore<cuda::thread_scope::thread_scope_device>;

__global__ 
void init_sems(semaphore* sems){
    new (&sems[threadIdx.x]) semaphore (1);
}

__global__
void test(semaphore* sems){
    sems[threadIdx.x].acquire();
}

int main(){
    semaphore* sems;
    cudaMalloc((void**)&sems, 10 * sizeof(semaphore));
    init_sems<<<1, 10>>>(sems);
    cudaDeviceSynchronize();
    test<<<1, 10>>>(sems);
    cudaDeviceSynchronize();
}
1 Like

Yes, this works; thank you!

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.