ring: introduce C11 memory model barrier option

This patch is to support C11 memory model barrier in librte_ring.

There are 2 barrier implementation options in librte_ring (suggested
by Jerin).
1. use rte_smp_rmb
2. use load_acquire/store_release(refer to [1]).
The reason why providing 2 options is the performance benchmark
difference in different arm machines, refer to [2].

CONFIG_RTE_RING_USE_C11_MEM_MODEL is provided, and by default it is "n"
on any architectures and only "y" on arm64 so far.

[1] https://github.com/freebsd/freebsd/blob/master/sys/sys/buf_ring.h#L170
[2] http://dpdk.org/ml/archives/dev/2017-October/080861.html

Suggested-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Signed-off-by: Jia He <jia.he@hxt-semitech.com>
Acked-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Jianbo Liu <jianbo.liu@arm.com>
Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>
This commit is contained in:
Jia He 2018-01-21 20:41:28 -08:00 committed by Thomas Monjalon
parent c9fb3c6289
commit 39368ebfc6
5 changed files with 236 additions and 2 deletions

View File

@ -29,6 +29,8 @@ CONFIG_RTE_ARCH_ARM64_MEMCPY=n
#CONFIG_RTE_ARM64_MEMCPY_ALIGN_MASK=0xF
#CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n
CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
CONFIG_RTE_LIBRTE_FM10K_PMD=n
CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n
CONFIG_RTE_LIBRTE_AVP_PMD=n

View File

@ -611,6 +611,7 @@ CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=n
# Compile librte_ring
#
CONFIG_RTE_LIBRTE_RING=y
CONFIG_RTE_RING_USE_C11_MEM_MODEL=n
#
# Compile librte_mempool

View File

@ -18,6 +18,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h \
rte_ring_generic.h
rte_ring_generic.h \
rte_ring_c11_mem.h
include $(RTE_SDK)/mk/rte.lib.mk

View File

@ -357,8 +357,20 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
} \
} while (0)
/* Move common functions to generic file */
/* Between load and load. there might be cpu reorder in weak model
* (powerpc/arm).
* There are 2 choices for the users
* 1.use rmb() memory barrier
* 2.use one-direcion load_acquire/store_release barrier,defined by
* CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
* It depends on performance test results.
* By default, move common functions to rte_ring_generic.h
*/
#ifdef RTE_RING_USE_C11_MEM_MODEL
#include "rte_ring_c11_mem.h"
#else
#include "rte_ring_generic.h"
#endif
/**
* @internal Enqueue several objects on the ring

View File

@ -0,0 +1,218 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2017 HXT-semitech Corporation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of HXT-semitech Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Derived from FreeBSD's bufring.h
*
**************************************************************************
*
* Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. The name of Kip Macy nor the names of other
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef _RTE_RING_C11_MEM_H_
#define _RTE_RING_C11_MEM_H_
static __rte_always_inline void
update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
uint32_t single, uint32_t enqueue)
{
RTE_SET_USED(enqueue);
/*
* If there are other enqueues/dequeues in progress that preceded us,
* we need to wait for them to complete
*/
if (!single)
while (unlikely(ht->tail != old_val))
rte_pause();
__atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE);
}
/**
* @internal This function updates the producer head for enqueue
*
* @param r
* A pointer to the ring structure
* @param is_sp
* Indicates whether multi-producer path is needed or not
* @param n
* The number of elements we will want to enqueue, i.e. how far should the
* head be moved
* @param behavior
* RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring
* RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring
* @param old_head
* Returns head value as it was before the move, i.e. where enqueue starts
* @param new_head
* Returns the current/new head value i.e. where enqueue finishes
* @param free_entries
* Returns the amount of free space in the ring BEFORE head was moved
* @return
* Actual number of objects enqueued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
static __rte_always_inline unsigned int
__rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
unsigned int n, enum rte_ring_queue_behavior behavior,
uint32_t *old_head, uint32_t *new_head,
uint32_t *free_entries)
{
const uint32_t capacity = r->capacity;
unsigned int max = n;
int success;
do {
/* Reset n to the initial burst count */
n = max;
*old_head = __atomic_load_n(&r->prod.head,
__ATOMIC_ACQUIRE);
const uint32_t cons_tail = r->cons.tail;
/*
* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* *old_head > cons_tail). So 'free_entries' is always between 0
* and capacity (which is < size).
*/
*free_entries = (capacity + cons_tail - *old_head);
/* check that we have enough room in ring */
if (unlikely(n > *free_entries))
n = (behavior == RTE_RING_QUEUE_FIXED) ?
0 : *free_entries;
if (n == 0)
return 0;
*new_head = *old_head + n;
if (is_sp)
r->prod.head = *new_head, success = 1;
else
success = __atomic_compare_exchange_n(&r->prod.head,
old_head, *new_head,
0, __ATOMIC_ACQUIRE,
__ATOMIC_RELAXED);
} while (unlikely(success == 0));
return n;
}
/**
* @internal This function updates the consumer head for dequeue
*
* @param r
* A pointer to the ring structure
* @param is_sc
* Indicates whether multi-consumer path is needed or not
* @param n
* The number of elements we will want to enqueue, i.e. how far should the
* head be moved
* @param behavior
* RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring
* RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring
* @param old_head
* Returns head value as it was before the move, i.e. where dequeue starts
* @param new_head
* Returns the current/new head value i.e. where dequeue finishes
* @param entries
* Returns the number of entries in the ring BEFORE head was moved
* @return
* - Actual number of objects dequeued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
static __rte_always_inline unsigned int
__rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
unsigned int n, enum rte_ring_queue_behavior behavior,
uint32_t *old_head, uint32_t *new_head,
uint32_t *entries)
{
unsigned int max = n;
int success;
/* move cons.head atomically */
do {
/* Restore n as it may change every loop */
n = max;
*old_head = __atomic_load_n(&r->cons.head,
__ATOMIC_ACQUIRE);
const uint32_t prod_tail = r->prod.tail;
/* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* cons_head > prod_tail). So 'entries' is always between 0
* and size(ring)-1.
*/
*entries = (prod_tail - *old_head);
/* Set the actual entries for dequeue */
if (n > *entries)
n = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : *entries;
if (unlikely(n == 0))
return 0;
*new_head = *old_head + n;
if (is_sc)
r->cons.head = *new_head, success = 1;
else
success = __atomic_compare_exchange_n(&r->cons.head,
old_head, *new_head,
0, __ATOMIC_ACQUIRE,
__ATOMIC_RELAXED);
} while (unlikely(success == 0));
return n;
}
#endif /* _RTE_RING_C11_MEM_H_ */