I’m trying to create a completion queue using the DEVX API.
I get errno 121:Remote I/O error, which the man page says to check the status and syndrome. Which I get:
- status (0x5)
- syndrome (0xd544f)
The status seems to correspond to “BAD_RESOURCE Attempt to access reserved or unallocated resource, or resource in inappropriate status. for example, not existing CQ when creating SQ/RQ”
However I have been unable to find that syndrome.
I only get these errors if I use MPI elsewhere in my application. Does anyone have any suggestion on the syndrome or how to interpret the status?
int my_create_dv_cq(const struct ibv_context *ibctx, struct my_dv_cq *dvcq)
{
uint32_t in[DEVX_ST_SZ_DW(create_cq_in)] = {};
uint32_t out[DEVX_ST_SZ_DW(create_cq_out)] = {};
void *cqc = DEVX_ADDR_OF(create_cq_in, in, cq_context);
struct mlx5_cqe64 *cqe;
uint32_t eqn;
int i, err;
int size;
dvcq->cqe_sz = 64;
dvcq->ncqe = 1 << PP_MAX_LOG_CQ_SIZE;
err = mlx5dv_devx_query_eqn(ibctx, 0, &eqn);
if (err) {
fprintf(stderr, "devx_query_eqn failed: %d, errno %d\n", ret, errno);
return err;
}
err = posix_memalign((void **)&dvcq->db, 8, 8);
if (err) {
fprintf(stderr,"cq.db posix_memalign(8) failed\n");
return err;
}
dvcq->db[0] = 0;
dvcq->db[1] = 0;
dvcq->db_umem = mlx5dv_devx_umem_reg(ibctx, dvcq->db, 8, MY_ACCESS_FALGS);
if (!dvcq->db_umem) {
fprintf(stderr,"cq.db umem_reg() failed\n");
return -1;
}
size = roundup_pow_of_two(dvcq->cqe_sz * dvcq->ncqe);
dvcq->buflen = align(size, sysconf(_SC_PAGESIZE));
err = posix_memalign(&dvcq->buf, sysconf(_SC_PAGESIZE), dvcq->buflen);
if (err) {
fprintf(stderr,"cq.buf posix_memalign(0x%lx) failed\n", dvcq->buflen);
return err;
}
memset(dvcq->buf, 0, dvcq->buflen);
dvcq->buff_umem = mlx5dv_devx_umem_reg(ibctx, dvcq->buf,
dvcq->buflen, PP_ACCESS_FALGS);
if (!dvcq->buff_umem) {
fprintf(stderr,"cq.buf umem_reg(0x%lx) failed\n", dvcq->buflen);
return -1;
}
dvcq->uar = mlx5dv_devx_alloc_uar(ibctx, MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC);
if (!dvcq->uar) {
fprintf(stderr,"mlx5dv_devc_alloc_uar failed\n");
return -1;
}
DEVX_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
DEVX_SET(create_cq_in, in, cq_umem_id, dvcq->buff_umem->umem_id);
DEVX_SET(create_cq_in, in, cq_umem_valid, 1);
DEVX_SET(cqc, cqc, log_cq_size, PP_MAX_LOG_CQ_SIZE);
DEVX_SET(cqc, cqc, cqe_sz, 0);
DEVX_SET(cqc, cqc, uar_page, dvcq->uar->page_id);
DEVX_SET(cqc, cqc, c_eqn, eqn);
DEVX_SET64(cqc, cqc, dbr_umem_id, dvcq->db_umem->umem_id);
DEVX_SET64(cqc, cqc, dbr_umem_valid, 1);
DEVX_SET64(cqc, cqc, log_page_size, 0);
DEVX_SET64(cqc, cqc, page_offset, 0);
dvcq->obj = mlx5dv_devx_obj_create(ibctx, in, sizeof(in), out, sizeof(out));
if (!dvcq->obj) {
uint32_t syndrome;
uint8_t status;
uint16_t opcode;
uint16_t op_mod;
status = DEVX_GET(mbox_out, out, status);
syndrome = DEVX_GET(mbox_out, out, syndrome);
opcode = DEVX_GET(mbox_in, in, opcode);
op_mod = DEVX_GET(mbox_in, in, op_mod);
fprintf(stderr,"devx_obj_create(cq) failed: eqn %d\n mlx5_code(0x%x), op_mod(0x%x) failed, status (0x%x), syndrome (0x%x)\n",
eqn, opcode, op_mod, status, syndrome);
return -1;
}
dvcq->cqn = DEVX_GET(create_cq_out, out, cqn);
printf("dv: CQ %d created, eqn %d, db@%p, buf@%p\n",
dvcq->cqn, eqn, dvcq->db, dvcq->buf);
dvcq->cons_index = 0;
for (i = 0; i < dvcq->ncqe; i++) {
cqe = pp_dv_get_cqe(dvcq, i);
cqe->op_own = MLX5_CQE_INVALID << 4;
}
return 0;
}