Hi everyone,
Interestingly, it seems that’s deallocation from another thread is working, but that CUDA is broken : if the thread that allocated the memory is dead by the time you deallocate the buffer, you have a problem, otherwise it looks ok.
Appart from that issue (which we can quite easily avoid if we can make sure that all threads are alive), i’m also interested in knowing whether this is “officially” a legal thing to deallocate a buffer from any context. It’s quite important because otherwise we’ll have to keep track of who allocated every piece of data. In the case of the producer-consummer paradigm (with multiple producers), that would make a big difference for instance.
Just my 2 cents,
Cédric
PS: I enclosed the little repro case: (comment the #define TRIGGER_CUDA_BUG to have the problem to disappear).
[codebox]
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#define TRIGGER_CUDA_BUG 1
float *buffer;
size_t len = 4096*4096;
int reached = 0;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *alloc_thread(void *arg)
{
cudaSetDevice(1);
cudaError_t res;
res = cudaHostAlloc((void **)&buffer, len, cudaHostAllocPortable);
fprintf(stderr, "cudaHostAlloc returns %d\n", res);
pthread_mutex_lock(&mutex);
reached = 1;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);
#ifndef TRIGGER_CUDA_BUG
sleep(10);
#endif
return NULL;
}
int main(int argc, char **argv)
{
pthread_t th;
pthread_create(&th, NULL, alloc_thread, NULL);
sleep(1);
cudaSetDevice(0);
pthread_mutex_lock(&mutex);
if (!reached)
pthread_cond_wait(&cond, &mutex);
pthread_mutex_unlock(&mutex);
#ifdef TRIGGER_CUDA_BUG
void *ret;
pthread_join(th, &ret);
#endif
int res = cudaFreeHost(buffer);
fprintf(stderr, "cudaFreeHost returns %d\n", res);
return 0;
}
[/codebox]