eal: remove deprecated coherent IO memory barriers

Since the 20.08 release deprecated rte_cio_*mb APIs because these APIs provide the same functionality as rte_io_*mb APIs on all platforms, so remove them and use rte_io_*mb instead. Signed-off-by: Phil Yang <phil.yang@arm.com> Signed-off-by: Joyce Kong <joyce.kong@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> Acked-by: David Marchand <david.marchand@redhat.com>
2020-09-23 17:16:37 +08:00 · 2020-09-23 17:16:37 +08:00 · f0f5d844d1
commit f0f5d844d1
parent 46697431ad
38 changed files with 74 additions and 132 deletions
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@ -83,12 +83,6 @@ Deprecation Notices
  These wrappers must be used for patches that need to be merged in 20.08
  onwards. This change will not introduce any performance degradation.

-* rte_cio_*mb: Since the IO barriers for ARMv8 platforms are relaxed from DSB
-  to DMB, rte_cio_*mb APIs provide the same functionality as rte_io_*mb
-  APIs (taking all platforms into consideration). rte_io_*mb APIs should be
-  used in the place of rte_cio_*mb APIs. The rte_cio_*mb APIs will be
-  deprecated in 20.11 release.
-
 * igb_uio: In the view of reducing the kernel dependency from the main tree,
  as a first step, the Technical Board decided to move ``igb_uio``
  kernel module to the dpdk-kmods repository in the /linux/igb_uio/ directory
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@ -113,6 +113,9 @@ API Changes
 * eal: Made the ``rte_dev_event`` structure private to the EAL as no public API
  used it.

+* eal: ``rte_cio_rmb()`` and ``rte_cio_wmb()`` were deprecated since 20.08
+  and are removed in this release.
+
 * mem: Removed the unioned field ``phys_addr`` from
  the structures ``rte_memseg`` and ``rte_memzone``.
  The field ``iova`` is remaining from the old unions.
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@ -193,7 +193,7 @@ check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,

 	if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
 		return MLX5_CQE_STATUS_HW_OWN;
-	rte_cio_rmb();
+	rte_io_rmb();
 	if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
 		     op_code == MLX5_CQE_REQ_ERR))
 		return MLX5_CQE_STATUS_ERR;
--- a/drivers/crypto/octeontx2/otx2_cryptodev_ops.c
+++ b/drivers/crypto/octeontx2/otx2_cryptodev_ops.c
@ -469,7 +469,7 @@ otx2_cpt_enqueue_req(const struct otx2_cpt_qp *qp,
 		 * buffer immediately, a DMB is not required to push out
 		 * LMTSTs.
 		 */
-		rte_cio_wmb();
+		rte_io_wmb();
 		lmt_status = otx2_lmt_submit(qp->lf_nq_reg);
 	} while (lmt_status == 0);

--- a/drivers/crypto/octeontx2/otx2_cryptodev_sec.c
+++ b/drivers/crypto/octeontx2/otx2_cryptodev_sec.c
@ -107,7 +107,7 @@ otx2_cpt_enq_sa_write(struct otx2_sec_session_ipsec_lp *lp,
 	inst.u64[3] = 0;
 	inst.res_addr = rte_mempool_virt2iova(res);

-	rte_cio_wmb();
+	rte_io_wmb();

 	do {
 		/* Copy CPT command to LMTLINE */
@ -124,7 +124,7 @@ otx2_cpt_enq_sa_write(struct otx2_sec_session_ipsec_lp *lp,
 			otx2_err("Request timed out");
 			return -ETIMEDOUT;
 		}
-	    rte_cio_rmb();
+	    rte_io_rmb();
 	}

 	if (unlikely(res->compcode != CPT_9X_COMP_E_GOOD)) {
--- a/drivers/event/octeontx/ssovf_worker.c
+++ b/drivers/event/octeontx/ssovf_worker.c
@ -286,17 +286,17 @@ __sso_event_tx_adapter_enqueue(void *port, struct rte_event ev[],
 	switch (ev->sched_type) {
 	case SSO_SYNC_ORDERED:
 		ssows_swtag_norm(ws, ev->event, SSO_SYNC_ATOMIC);
-		rte_cio_wmb();
+		rte_io_wmb();
 		ssows_swtag_wait(ws);
 		break;
 	case SSO_SYNC_UNTAGGED:
 		ssows_swtag_full(ws, ev->u64, ev->event, SSO_SYNC_ATOMIC,
 				ev->queue_id);
-		rte_cio_wmb();
+		rte_io_wmb();
 		ssows_swtag_wait(ws);
 		break;
 	case SSO_SYNC_ATOMIC:
-		rte_cio_wmb();
+		rte_io_wmb();
 		break;
 	}

--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@ -256,7 +256,7 @@ otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag)
 	if (wait_flag)
 		otx2_ssogws_head_wait(ws);

-	rte_cio_wmb();
+	rte_io_wmb();
 }

 static __rte_always_inline const struct otx2_eth_txq *
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@ -148,7 +148,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	/* Poll for the valid bit */
 	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
-		rte_cio_rmb();
+		rte_io_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
 			/* Last byte of resp contains the valid key */
 			valid = (uint8_t *)resp + resp->resp_len - 1;
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@ -82,7 +82,7 @@ void bnxt_free_rxtx_nq_ring(struct bnxt *bp);

 static inline void bnxt_db_write(struct bnxt_db_info *db, uint32_t idx)
 {
-	rte_cio_wmb();
+	rte_io_wmb();

 	if (db->db_64)
 		rte_write64_relaxed(db->db_key64 | idx, db->doorbell);
@ -96,7 +96,7 @@ static inline void bnxt_db_nq(struct bnxt_cp_ring_info *cpr)
 	if (unlikely(!cpr->cp_db.db_64))
 		return;

-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ |
 			    RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
 			    cpr->cp_db.doorbell);
@ -108,7 +108,7 @@ static inline void bnxt_db_nq_arm(struct bnxt_cp_ring_info *cpr)
 	if (unlikely(!cpr->cp_db.db_64))
 		return;

-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ_ARM |
 			    RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
 			    cpr->cp_db.doorbell);
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@ -258,21 +258,21 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 * reverse order to ensure consistent state.
 		 */
 		rxcmp1[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 7]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]);

 		rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]);

 		t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3]));

 		rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]);

 		rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
-		rte_cio_rmb();
+		rte_io_rmb();
 		rxcmp[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 0]);

 		t0 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[0], rxcmp1[1]));
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@ -2051,7 +2051,7 @@ e1000_flush_tx_ring(struct rte_eth_dev *dev)
 		tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
 		tx_desc->upper.data = 0;

-		rte_cio_wmb();
+		rte_io_wmb();
 		txq->tx_tail++;
 		if (txq->tx_tail == txq->nb_tx_desc)
 			txq->tx_tail = 0;
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@ -1248,7 +1248,7 @@ end_of_tx:
 		   (unsigned) txq->port_id, (unsigned) txq->queue_id,
 		   (unsigned) tx_id, (unsigned) nb_tx);

-	rte_cio_wmb();
+	rte_io_wmb();
 	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
 	txq->tx_tail = tx_id;

--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@ -72,7 +72,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));

-	rte_cio_wmb();
+	rte_io_wmb();
 	/* Update the tail pointer on the NIC */
 	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
 }
@ -566,7 +566,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue,

 	txq->tx_tail = tx_id;

-	rte_cio_wmb();
+	rte_io_wmb();
 	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);

 	return nb_pkts;
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@ -6118,7 +6118,7 @@ mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
 		pool->raw = pool->raw_hw;
 		rte_spinlock_unlock(&pool->sl);
 		/* Be sure the new raw counters data is updated in memory. */
-		rte_cio_wmb();
+		rte_io_wmb();
 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
 			rte_spinlock_lock(&cont->csl);
 			TAILQ_CONCAT(&cont->counters,
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@ -4452,7 +4452,7 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 		cont->last_pool_idx = pool->index;
 	}
 	/* Pool initialization must be updated before host thread access. */
-	rte_cio_wmb();
+	rte_io_wmb();
 	rte_atomic16_add(&cont->n_valid, 1);
 	return pool;
 }
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@ -484,11 +484,11 @@ rxq_sync_cq(struct mlx5_rxq_data *rxq)
 		cqe->op_own = MLX5_CQE_INVALIDATE;
 	}
 	/* Resync CQE and WQE (WQ in RESET state). */
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(0);
-	rte_cio_wmb();
+	rte_io_wmb();
 }

 /**
@ -606,12 +606,12 @@ mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx)
 		rte_errno = errno;
 		return ret;
 	}
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
-	/* Reset RQ consumer before moving queue to READY state. */
+	rte_io_wmb();
+	/* Reset RQ consumer before moving queue ro READY state. */
 	*rxq->rq_db = rte_cpu_to_be_32(0);
-	rte_cio_wmb();
+	rte_io_wmb();
 	ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, true);
 	if (ret) {
 		DRV_LOG(ERR, "Cannot change Rx WQ state to READY:  %s",
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@ -873,7 +873,7 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
 	};
 	/* Update doorbell counter. */
 	rxq->rq_ci = wqe_n >> rxq->sges_n;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }

@ -1113,15 +1113,15 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
 	case MLX5_RXQ_ERR_STATE_NEED_READY:
 		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
 		if (ret == MLX5_CQE_STATUS_HW_OWN) {
-			rte_cio_wmb();
+			rte_io_wmb();
 			*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-			rte_cio_wmb();
+			rte_io_wmb();
 			/*
 			 * The RQ consumer index must be zeroed while moving
 			 * from RESET state to RDY state.
 			 */
 			*rxq->rq_db = rte_cpu_to_be_32(0);
-			rte_cio_wmb();
+			rte_io_wmb();
 			sm.is_wq = 1;
 			sm.queue_id = rxq->idx;
 			sm.state = IBV_WQS_RDY;
@ -1515,9 +1515,9 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		return 0;
 	/* Update the consumer index. */
 	rxq->rq_ci = rq_ci >> sges_n;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	/* Increment packets counter. */
@ -1893,11 +1893,11 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 out:
 	/* Update the consumer indexes. */
 	rxq->consumed_strd = consumed_strd;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	if (rq_ci != rxq->rq_ci) {
 		rxq->rq_ci = rq_ci;
-		rte_cio_wmb();
+		rte_io_wmb();
 		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 	}
 #ifdef MLX5_PMD_SOFT_COUNTERS
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@ -627,7 +627,7 @@ mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);

-	rte_cio_wmb();
+	rte_io_wmb();
 	*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
 	/* Ensure ordering between DB record and BF copy. */
 	rte_wmb();
--- a/drivers/net/mlx5/mlx5_rxtx_vec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@ -118,7 +118,7 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
 	elts_idx = rxq->rq_ci & q_mask;
 	for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
 		(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }

--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@ -788,7 +788,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.2 copy mbuf pointers. */
 		*(vector unsigned char *)&pkts[pos] = mbp1;
 		*(vector unsigned char *)&pkts[pos + 2] = mbp2;
-		rte_cio_rmb();
+		rte_io_rmb();

 		/* C.1 load remaining CQE data and extract necessary fields. */
 		cqe_tmp2 = *(vector unsigned char *)
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@ -554,7 +554,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.0 (CQE 0) load a block having op_own. */
 		c0 = vld1q_u64((uint64_t *)(p0 + 48));
 		/* Synchronize for loading the rest of blocks. */
-		rte_cio_rmb();
+		rte_io_rmb();
 		/* Prefetch next 4 CQEs. */
 		if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
 			unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
@ -803,7 +803,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 			rxq->decompressed -= n;
 		}
 	}
-	rte_cio_wmb();
+	rte_io_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	*no_cq = !rcvd_pkt;
 	return rcvd_pkt;
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@ -552,7 +552,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
 		/* B.2 copy mbuf pointers. */
 		_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
 		_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
-		rte_cio_rmb();
+		rte_io_rmb();
 		/* C.1 load remained CQE data and extract necessary fields. */
 		cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
 		cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@ -155,9 +155,9 @@ txq_sync_cq(struct mlx5_txq_data *txq)
 		cqe->op_own = MLX5_CQE_INVALIDATE;
 	}
 	/* Resync CQE and WQE (WQ in reset state). */
-	rte_cio_wmb();
+	rte_io_wmb();
 	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
-	rte_cio_wmb();
+	rte_io_wmb();
 }

 /**
--- a/drivers/net/octeontx/octeontx_rxtx.h
+++ b/drivers/net/octeontx/octeontx_rxtx.h
@ -418,7 +418,7 @@ __octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct octeontx_txq *txq = tx_queue;
 	octeontx_dq_t *dq = &txq->dq;
 	uint16_t count = 0, nb_desc;
-	rte_cio_wmb();
+	rte_io_wmb();

 	while (count < nb_pkts) {
 		if (unlikely(*((volatile int64_t *)dq->fc_status_va) < 0))
--- a/drivers/net/octeontx2/otx2_ethdev_sec.c
+++ b/drivers/net/octeontx2/otx2_ethdev_sec.c
@ -312,7 +312,7 @@ hmac_init(struct otx2_ipsec_fp_sa_ctl *ctl, struct otx2_cpt_qp *qp,

 	timeout = rte_get_timer_cycles() + 5 * rte_get_timer_hz();

-	rte_cio_wmb();
+	rte_io_wmb();

 	do {
 		otx2_lmt_mov(qp->lmtline, &inst, 2);
--- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
+++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h
@ -160,7 +160,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 	sess->ip_id++;
 	sess->esn++;

-	rte_cio_wmb();
+	rte_io_wmb();

 	do {
 		otx2_lmt_mov(sess->cpt_lmtline, &inst, 2);
--- a/drivers/net/octeontx2/otx2_rx.c
+++ b/drivers/net/octeontx2/otx2_rx.c
@ -303,7 +303,7 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxq->head = head;
 	rxq->available -= packets;

-	rte_cio_wmb();
+	rte_io_wmb();
 	/* Free all the CQs that we've processed */
 	otx2_write64((rxq->wdata | packets), rxq->cq_door);

--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@ -39,7 +39,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	}

 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();

 	for (i = 0; i < pkts; i++) {
 		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
@ -75,7 +75,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 	}

 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();

 	for (i = 0; i < pkts; i++) {
 		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
@ -128,7 +128,7 @@ nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	txq->fc_cache_pkts -= pkts;

 	/* Lets commit any changes in the packet */
-	rte_cio_wmb();
+	rte_io_wmb();

 	senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
 	senddesc23_w0 = senddesc01_w0;
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@ -147,7 +147,7 @@ virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,

 	for (i = 0; i < num; i++) {
 		used_idx = vq->vq_used_cons_idx;
-		/* desc_is_used has a load-acquire or rte_cio_rmb inside
+		/* desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
 		if (!desc_is_used(&desc[used_idx], vq))
--- a/drivers/net/virtio/virtio_rxtx_simple_neon.c
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
@ -84,7 +84,7 @@ virtio_recv_pkts_vec(void *rx_queue,
 	if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
 		return 0;

-	/* virtqueue_nused has a load-acquire or rte_cio_rmb inside */
+	/* virtqueue_nused has a load-acquire or rte_io_rmb inside */
 	nb_used = virtqueue_nused(vq);

 	if (unlikely(nb_used == 0))
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@ -47,7 +47,7 @@ virtio_rmb(uint8_t weak_barriers)
 	if (weak_barriers)
 		rte_smp_rmb();
 	else
-		rte_cio_rmb();
+		rte_io_rmb();
 }

 static inline void
@ -56,7 +56,7 @@ virtio_wmb(uint8_t weak_barriers)
 	if (weak_barriers)
 		rte_smp_wmb();
 	else
-		rte_cio_wmb();
+		rte_io_wmb();
 }

 static inline uint16_t
@ -68,7 +68,7 @@ virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
 	if (weak_barriers) {
 /* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
 * a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
 * defined as compiler barriers on x86.
 */
 #ifdef RTE_ARCH_X86_64
@ -79,7 +79,7 @@ virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
 #endif
 	} else {
 		flags = dp->flags;
-		rte_cio_rmb();
+		rte_io_rmb();
 	}

 	return flags;
@ -92,7 +92,7 @@ virtqueue_store_flags_packed(struct vring_packed_desc *dp,
 	if (weak_barriers) {
 /* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
 * a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
 * defined as compiler barriers on x86.
 */
 #ifdef RTE_ARCH_X86_64
@ -102,7 +102,7 @@ virtqueue_store_flags_packed(struct vring_packed_desc *dp,
 		__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
 #endif
 	} else {
-		rte_cio_wmb();
+		rte_io_wmb();
 		dp->flags = flags;
 	}
 }
@ -469,7 +469,7 @@ virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
 		return VTNET_TQ;
 }

-/* virtqueue_nused has load-acquire or rte_cio_rmb insed */
+/* virtqueue_nused has load-acquire or rte_io_rmb insed */
 static inline uint16_t
 virtqueue_nused(const struct virtqueue *vq)
 {
@ -480,7 +480,7 @@ virtqueue_nused(const struct virtqueue *vq)
 	 * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
 	 * reports a slightly better perf, which comes from the saved
 	 * branch by the compiler.
-	 * The if and else branches are identical with the smp and cio
+	 * The if and else branches are identical with the smp and io
 	 * barriers both defined as compiler barriers on x86.
 	 */
 #ifdef RTE_ARCH_X86_64
@ -492,7 +492,7 @@ virtqueue_nused(const struct virtqueue *vq)
 #endif
 	} else {
 		idx = vq->vq_split.ring.used->idx;
-		rte_cio_rmb();
+		rte_io_rmb();
 	}
 	return idx - vq->vq_used_cons_idx;
 }
@ -510,7 +510,7 @@ vq_update_avail_idx(struct virtqueue *vq)
 	 * it reports a slightly better perf, which comes from the
 	 * saved branch by the compiler.
 	 * The if and else branches are identical with the smp and
-	 * cio barriers both defined as compiler barriers on x86.
+	 * io barriers both defined as compiler barriers on x86.
 	 */
 #ifdef RTE_ARCH_X86_64
 		rte_smp_wmb();
@ -520,7 +520,7 @@ vq_update_avail_idx(struct virtqueue *vq)
 				 vq->vq_avail_idx, __ATOMIC_RELEASE);
 #endif
 	} else {
-		rte_cio_wmb();
+		rte_io_wmb();
 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
 	}
 }
@ -793,7 +793,7 @@ virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
 	struct vq_desc_extra *dxp;

 	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_cio_rmb inside
+	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
 	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
@ -827,7 +827,7 @@ virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
 	struct vq_desc_extra *dxp;

 	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_cio_rmb inside
+	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
--- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
+++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
@ -475,7 +475,7 @@ sdp_ring_doorbell(struct sdp_device *sdpvf __rte_unused,
 	otx2_write64(iq->fill_cnt, iq->doorbell_reg);

 	/* Make sure doorbell writes observed by HW */
-	rte_cio_wmb();
+	rte_io_wmb();
 	iq->fill_cnt = 0;

 }
@ -812,7 +812,7 @@ sdp_rawdev_dequeue(struct rte_rawdev *rawdev,

 	/* Ack the h/w with no# of pkts read by Host */
 	rte_write32(pkts, droq->pkts_sent_reg);
-	rte_cio_wmb();
+	rte_io_wmb();

 	droq->last_pkt_count -= pkts;

--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
@ -135,7 +135,7 @@ send_doorbell(struct mlx5dv_devx_uar *uar, struct mlx5_regex_sq *sq)
 	((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 	uint64_t *doorbell_addr =
 		(uint64_t *)((uint8_t *)uar->base_addr + 0x800);
-	rte_cio_wmb();
+	rte_io_wmb();
 	sq->dbr[MLX5_SND_DBR] = rte_cpu_to_be_32((sq->db_pi + 1) &
 						 MLX5_REGEX_MAX_WQE_INDEX);
 	rte_wmb();
@ -219,7 +219,7 @@ poll_one(struct mlx5_regex_cq *cq)

 	next_cqe_offset =  (cq->ci & (cq_size_get(cq) - 1));
 	cqe = (volatile struct mlx5_cqe *)(cq->cqe + next_cqe_offset);
-	rte_cio_wmb();
+	rte_io_wmb();

 	int ret = check_cqe(cqe, cq_size_get(cq), cq->ci);

--- a/lib/librte_eal/arm/include/rte_atomic_32.h
+++ b/lib/librte_eal/arm/include/rte_atomic_32.h
@ -33,10 +33,6 @@ extern "C" {

 #define rte_io_rmb() rte_rmb()

-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
--- a/lib/librte_eal/arm/include/rte_atomic_64.h
+++ b/lib/librte_eal/arm/include/rte_atomic_64.h
@ -37,10 +37,6 @@ extern "C" {

 #define rte_io_rmb() rte_rmb()

-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
--- a/lib/librte_eal/include/generic/rte_atomic.h
+++ b/lib/librte_eal/include/generic/rte_atomic.h
@ -107,45 +107,6 @@ static inline void rte_io_wmb(void);
 static inline void rte_io_rmb(void);
 ///@}

-/** @name Coherent I/O Memory Barrier
- *
- * Coherent I/O memory barrier is a lightweight version of I/O memory
- * barriers which are system-wide data synchronization barriers. This
- * is for only coherent memory domain between lcore and I/O device but
- * it is same as the I/O memory barriers in most of architectures.
- * However, some architecture provides even lighter barriers which are
- * somewhere in between I/O memory barriers and SMP memory barriers.
- * For example, in case of ARMv8, DMB(data memory barrier) instruction
- * can have different shareability domains - inner-shareable and
- * outer-shareable. And inner-shareable DMB fits for SMP memory
- * barriers and outer-shareable DMB for coherent I/O memory barriers,
- * which acts on coherent memory.
- *
- * In most cases, I/O memory barriers are safer but if operations are
- * on coherent memory instead of incoherent MMIO region of a device,
- * then coherent I/O memory barriers can be used and this could bring
- * performance gain depending on architectures.
- */
-///@{
-/**
- * Write memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the STORE operations on coherent memory that
- * precede the rte_cio_wmb() call are visible to I/O device before the
- * STORE operations that follow it.
- */
-static inline void rte_cio_wmb(void);
-
-/**
- * Read memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the LOAD operations on coherent memory updated by
- * I/O device that precede the rte_cio_rmb() call are visible to CPU
- * before the LOAD operations that follow it.
- */
-static inline void rte_cio_rmb(void);
-///@}
-
 #endif /* __DOXYGEN__ */

 /**
--- a/lib/librte_eal/ppc/include/rte_atomic.h
+++ b/lib/librte_eal/ppc/include/rte_atomic.h
@ -36,10 +36,6 @@ extern "C" {

 #define rte_io_rmb() rte_rmb()

-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
 static __rte_always_inline void
 rte_atomic_thread_fence(int memory_order)
 {
--- a/lib/librte_eal/x86/include/rte_atomic.h
+++ b/lib/librte_eal/x86/include/rte_atomic.h
@ -79,10 +79,6 @@ rte_smp_mb(void)

 #define rte_io_rmb() rte_compiler_barrier()

-#define rte_cio_wmb() rte_compiler_barrier()
-
-#define rte_cio_rmb() rte_compiler_barrier()
-
 /**
 * Synchronization fence between threads based on the specified memory order.
 *