No, you can. But this is no different than any other host work. If you have a kernel running and perform an allocation, it will overlap.
You can see on this picture that cudaMalloc and cudaMallocAsync execute concurrently to the kernel.
__global__
void long_running_kernel(){
for(int i = 0; i < 10; i++){
__nanosleep(10000000);
}
}
int main(){
cudaSetDevice(0);
long_running_kernel<<<1,1>>>();
void* ptr1;
void* ptr2;
cudaMalloc(&ptr1, 1024*1024);
cudaMallocAsync(&ptr2, 1024*1024, (cudaStream_t)0);
cudaFreeAsync(ptr2, (cudaStream_t)0);
cudaFree(ptr1);
cudaDeviceSynchronize();
}
``
