Hello, I’m trying to start DPA threads using a completion context. However, they are simply not waking up. I’ve provided a simplified version of my code here. I am setting up a thread and completion context, then attaching the completion context to the thread, and attaching the async op object to the completion context. I am attaching a notification comp context to the thread and sending a notification to that notification comp, but nothing happens with the thread. Am I doing this wrong? Would someone please take a quick look at my steps and determine if there is something I am missing? Thank you.
Host side (error checking omitted here but passes successfully in my code):
struct thread_arg {
doca_dpa_dev_async_ops_t async_ops_handle;
doca_dpa_dev_completion_t dpa_comp_handle;
doca_dpa_dev_mmap_t src_mmap_handle;
doca_dpa_dev_mmap_t dst_mmap_handle;
doca_dpa_dev_uintptr_t src_addr;
doca_dpa_dev_uintptr_t dst_addr;
size_t total_size;
size_t chunk_size;
doca_dpa_dev_uintptr_t runtime_ptr;
doca_dpa_dev_sync_event_t comp_sync_event;
uint64_t comp_sync_event_val;
};
struct doca_sync_event *comp_event = NULL;
doca_dpa_dev_sync_event_t comp_event_handle;
uint64_t comp_event_val = 10;
struct doca_dpa_thread *dpa_thread = NULL;
struct doca_dpa_async_ops *async_ops = NULL;
struct doca_dpa_completion *dpa_comp = NULL;
struct doca_dpa_notification_completion *notify_comp = NULL;
doca_dpa_dev_async_ops_t async_ops_handle;
doca_dpa_dev_completion_t dpa_comp_handle;
doca_dpa_dev_notification_completion_t notify_comp_handle;
doca_dpa_dev_mmap_t src_mmap_handle, dst_mmap_handle;
doca_dpa_dev_uintptr_t thread_arg_dev_ptr = 0;
struct thread_arg targ = {0};
doca_dpa_dev_uintptr_t thread_arg_dev_ptr = 0;
doca_dpa_thread_create(resources->pf_dpa_ctx, &dpa_thread);
doca_dpa_mem_alloc(resources->pf_dpa_ctx, sizeof(struct thread_arg), &thread_arg_dev_ptr);
doca_dpa_thread_set_func_arg(dpa_thread, &async_memcpy_thread, thread_arg_dev_ptr);
doca_dpa_thread_start(dpa_thread);
doca_dpa_notification_completion_create(resources->pf_dpa_ctx, dpa_thread, ¬ify_comp);
doca_dpa_notification_completion_start(notify_comp);
doca_dpa_notification_completion_get_dpa_handle(notify_comp, ¬ify_comp_handle);
doca_dpa_notification_completion_get_thread(notify_comp, ¬ify_thread);
doca_dpa_completion_create(resources->pf_dpa_ctx, 8, &dpa_comp);
doca_dpa_completion_set_thread(dpa_comp, dpa_thread);
doca_dpa_async_ops_create(resources->pf_dpa_ctx, 16, 0, &async_ops);
doca_dpa_async_ops_attach(async_ops, dpa_comp);
doca_dpa_completion_start(dpa_comp);
doca_dpa_async_ops_get_dpa_handle(async_ops, &async_ops_handle);
doca_dpa_completion_get_dpa_handle(dpa_comp, &dpa_comp_handle);
create_doca_dpa_completion_sync_event(resources->pf_dpa_ctx,
resources->pf_doca_device,
&comp_event,
&comp_event_handle);
doca_dpa_thread_run(dpa_thread);
targ.async_ops_handle = async_ops_handle;
targ.dpa_comp_handle = dpa_comp_handle;
targ.src_mmap_handle = src_mmap_handle;
targ.dst_mmap_handle = dst_mmap_handle;
targ.src_addr = DPA_arr;
targ.dst_addr = DPA_arr2;
targ.total_size = arr_volume;
targ.chunk_size = CHUNK_SIZE;
targ.runtime_ptr = DPA_runtime;
targ.comp_sync_event = comp_event_handle;
targ.comp_sync_event_val = comp_event_val;
doca_dpa_h2d_memcpy(resources->pf_dpa_ctx, thread_arg_dev_ptr, &targ, sizeof(targ));
doca_dpa_rpc(resources->pf_dpa_ctx, &wake_thread_rpc, &rpc_result, notify_comp_handle);
doca_dpa_rpc(resources->pf_dpa_ctx, &trigger_async_memcpy_rpc, &rpc_result, targ);
Device side:
__dpa_rpc__ uint64_t wake_thread_rpc(doca_dpa_dev_notification_completion_t notify_handle)
{
DOCA_DPA_DEV_LOG_INFO("[RPC-WAKE]: Waking thread via notification...\n");
doca_dpa_dev_thread_notify(notify_handle);
DOCA_DPA_DEV_LOG_INFO("[RPC-WAKE]: Got here\n");
return 0;
}
__dpa_rpc__ uint64_t trigger_async_memcpy_rpc(thread_arg_t targ)
{
size_t bytes_copied = 0;
size_t remaining = targ.total_size;
uint32_t ops_issued = 0;
while (bytes_copied < targ.total_size) {
size_t copy_size = (remaining >= targ.chunk_size) ? targ.chunk_size : remaining;
uint32_t flags = (bytes_copied + copy_size >= targ.total_size)
? DOCA_DPA_DEV_SUBMIT_FLAG_FLUSH
: DOCA_DPA_DEV_SUBMIT_FLAG_OPTIMIZE_REPORTS;
doca_dpa_dev_post_memcpy(
targ.async_ops_handle,
targ.dst_mmap_handle,
targ.dst_addr + bytes_copied,
targ.src_mmap_handle,
targ.src_addr + bytes_copied,
copy_size,
flags
);
ops_issued++;
bytes_copied += copy_size;
remaining -= copy_size;
}
return 0;
}
__dpa_global__ void async_memcpy_thread(uint64_t arg) {
DOCA_DPA_DEV_LOG_INFO("This never prints\n");
}