ring: introduce C11 memory model barrier option
This patch is to support C11 memory model barrier in librte_ring. There are 2 barrier implementation options in librte_ring (suggested by Jerin). 1. use rte_smp_rmb 2. use load_acquire/store_release(refer to [1]). The reason why providing 2 options is the performance benchmark difference in different arm machines, refer to [2]. CONFIG_RTE_RING_USE_C11_MEM_MODEL is provided, and by default it is "n" on any architectures and only "y" on arm64 so far. [1] https://github.com/freebsd/freebsd/blob/master/sys/sys/buf_ring.h#L170 [2] http://dpdk.org/ml/archives/dev/2017-October/080861.html Suggested-by: Jerin Jacob <jerin.jacob@caviumnetworks.com> Signed-off-by: Jia He <jia.he@hxt-semitech.com> Acked-by: Jerin Jacob <jerin.jacob@caviumnetworks.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Jianbo Liu <jianbo.liu@arm.com> Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
This commit is contained in:
parent
c9fb3c6289
commit
39368ebfc6
@ -29,6 +29,8 @@ CONFIG_RTE_ARCH_ARM64_MEMCPY=n
|
||||
#CONFIG_RTE_ARM64_MEMCPY_ALIGN_MASK=0xF
|
||||
#CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n
|
||||
|
||||
CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
|
||||
|
||||
CONFIG_RTE_LIBRTE_FM10K_PMD=n
|
||||
CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n
|
||||
CONFIG_RTE_LIBRTE_AVP_PMD=n
|
||||
|
@ -611,6 +611,7 @@ CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=n
|
||||
# Compile librte_ring
|
||||
#
|
||||
CONFIG_RTE_LIBRTE_RING=y
|
||||
CONFIG_RTE_RING_USE_C11_MEM_MODEL=n
|
||||
|
||||
#
|
||||
# Compile librte_mempool
|
||||
|
@ -18,6 +18,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c
|
||||
|
||||
# install includes
|
||||
SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h \
|
||||
rte_ring_generic.h
|
||||
rte_ring_generic.h \
|
||||
rte_ring_c11_mem.h
|
||||
|
||||
include $(RTE_SDK)/mk/rte.lib.mk
|
||||
|
@ -357,8 +357,20 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Move common functions to generic file */
|
||||
/* Between load and load. there might be cpu reorder in weak model
|
||||
* (powerpc/arm).
|
||||
* There are 2 choices for the users
|
||||
* 1.use rmb() memory barrier
|
||||
* 2.use one-direcion load_acquire/store_release barrier,defined by
|
||||
* CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
|
||||
* It depends on performance test results.
|
||||
* By default, move common functions to rte_ring_generic.h
|
||||
*/
|
||||
#ifdef RTE_RING_USE_C11_MEM_MODEL
|
||||
#include "rte_ring_c11_mem.h"
|
||||
#else
|
||||
#include "rte_ring_generic.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @internal Enqueue several objects on the ring
|
||||
|
218
lib/librte_ring/rte_ring_c11_mem.h
Normal file
218
lib/librte_ring/rte_ring_c11_mem.h
Normal file
@ -0,0 +1,218 @@
|
||||
/*-
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2017 HXT-semitech Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of HXT-semitech Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Derived from FreeBSD's bufring.h
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. The name of Kip Macy nor the names of other
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef _RTE_RING_C11_MEM_H_
|
||||
#define _RTE_RING_C11_MEM_H_
|
||||
|
||||
static __rte_always_inline void
|
||||
update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
|
||||
uint32_t single, uint32_t enqueue)
|
||||
{
|
||||
RTE_SET_USED(enqueue);
|
||||
|
||||
/*
|
||||
* If there are other enqueues/dequeues in progress that preceded us,
|
||||
* we need to wait for them to complete
|
||||
*/
|
||||
if (!single)
|
||||
while (unlikely(ht->tail != old_val))
|
||||
rte_pause();
|
||||
|
||||
__atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal This function updates the producer head for enqueue
|
||||
*
|
||||
* @param r
|
||||
* A pointer to the ring structure
|
||||
* @param is_sp
|
||||
* Indicates whether multi-producer path is needed or not
|
||||
* @param n
|
||||
* The number of elements we will want to enqueue, i.e. how far should the
|
||||
* head be moved
|
||||
* @param behavior
|
||||
* RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring
|
||||
* RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring
|
||||
* @param old_head
|
||||
* Returns head value as it was before the move, i.e. where enqueue starts
|
||||
* @param new_head
|
||||
* Returns the current/new head value i.e. where enqueue finishes
|
||||
* @param free_entries
|
||||
* Returns the amount of free space in the ring BEFORE head was moved
|
||||
* @return
|
||||
* Actual number of objects enqueued.
|
||||
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
|
||||
*/
|
||||
static __rte_always_inline unsigned int
|
||||
__rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
|
||||
unsigned int n, enum rte_ring_queue_behavior behavior,
|
||||
uint32_t *old_head, uint32_t *new_head,
|
||||
uint32_t *free_entries)
|
||||
{
|
||||
const uint32_t capacity = r->capacity;
|
||||
unsigned int max = n;
|
||||
int success;
|
||||
|
||||
do {
|
||||
/* Reset n to the initial burst count */
|
||||
n = max;
|
||||
|
||||
*old_head = __atomic_load_n(&r->prod.head,
|
||||
__ATOMIC_ACQUIRE);
|
||||
const uint32_t cons_tail = r->cons.tail;
|
||||
/*
|
||||
* The subtraction is done between two unsigned 32bits value
|
||||
* (the result is always modulo 32 bits even if we have
|
||||
* *old_head > cons_tail). So 'free_entries' is always between 0
|
||||
* and capacity (which is < size).
|
||||
*/
|
||||
*free_entries = (capacity + cons_tail - *old_head);
|
||||
|
||||
/* check that we have enough room in ring */
|
||||
if (unlikely(n > *free_entries))
|
||||
n = (behavior == RTE_RING_QUEUE_FIXED) ?
|
||||
0 : *free_entries;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
*new_head = *old_head + n;
|
||||
if (is_sp)
|
||||
r->prod.head = *new_head, success = 1;
|
||||
else
|
||||
success = __atomic_compare_exchange_n(&r->prod.head,
|
||||
old_head, *new_head,
|
||||
0, __ATOMIC_ACQUIRE,
|
||||
__ATOMIC_RELAXED);
|
||||
} while (unlikely(success == 0));
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal This function updates the consumer head for dequeue
|
||||
*
|
||||
* @param r
|
||||
* A pointer to the ring structure
|
||||
* @param is_sc
|
||||
* Indicates whether multi-consumer path is needed or not
|
||||
* @param n
|
||||
* The number of elements we will want to enqueue, i.e. how far should the
|
||||
* head be moved
|
||||
* @param behavior
|
||||
* RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring
|
||||
* RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring
|
||||
* @param old_head
|
||||
* Returns head value as it was before the move, i.e. where dequeue starts
|
||||
* @param new_head
|
||||
* Returns the current/new head value i.e. where dequeue finishes
|
||||
* @param entries
|
||||
* Returns the number of entries in the ring BEFORE head was moved
|
||||
* @return
|
||||
* - Actual number of objects dequeued.
|
||||
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
|
||||
*/
|
||||
static __rte_always_inline unsigned int
|
||||
__rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
|
||||
unsigned int n, enum rte_ring_queue_behavior behavior,
|
||||
uint32_t *old_head, uint32_t *new_head,
|
||||
uint32_t *entries)
|
||||
{
|
||||
unsigned int max = n;
|
||||
int success;
|
||||
|
||||
/* move cons.head atomically */
|
||||
do {
|
||||
/* Restore n as it may change every loop */
|
||||
n = max;
|
||||
*old_head = __atomic_load_n(&r->cons.head,
|
||||
__ATOMIC_ACQUIRE);
|
||||
const uint32_t prod_tail = r->prod.tail;
|
||||
/* The subtraction is done between two unsigned 32bits value
|
||||
* (the result is always modulo 32 bits even if we have
|
||||
* cons_head > prod_tail). So 'entries' is always between 0
|
||||
* and size(ring)-1.
|
||||
*/
|
||||
*entries = (prod_tail - *old_head);
|
||||
|
||||
/* Set the actual entries for dequeue */
|
||||
if (n > *entries)
|
||||
n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : *entries;
|
||||
|
||||
if (unlikely(n == 0))
|
||||
return 0;
|
||||
|
||||
*new_head = *old_head + n;
|
||||
if (is_sc)
|
||||
r->cons.head = *new_head, success = 1;
|
||||
else
|
||||
success = __atomic_compare_exchange_n(&r->cons.head,
|
||||
old_head, *new_head,
|
||||
0, __ATOMIC_ACQUIRE,
|
||||
__ATOMIC_RELAXED);
|
||||
} while (unlikely(success == 0));
|
||||
return n;
|
||||
}
|
||||
|
||||
#endif /* _RTE_RING_C11_MEM_H_ */
|
Loading…
Reference in New Issue
Block a user