sched: initial import

Signed-off-by: Intel
2013-06-03 00:00:00 +00:00 · 2013-06-03 00:00:00 +00:00 · de3cfa2c98
commit de3cfa2c98
parent e6541fdec8
32 changed files with 8847 additions and 3 deletions
--- a/app/test/Makefile
+++ b/app/test/Makefile
@ -85,6 +85,8 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_ipaddr.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_cirbuf.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_string.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_cmdline_lib.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_red.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_sched.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_meter.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_pm.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_pmac_acl.c
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@ -271,7 +271,7 @@ def all_sockets(num):
 },
 {
 	"Prefix":	"group_6",
-	"Memory" :	all_sockets(588),
+	"Memory" :	all_sockets(600),
 	"Tests" :	
 	[
 		{
@ -297,7 +297,13 @@ def all_sockets(num):
 		 "Command" : 	"prefetch_autotest",
 		 "Func" :	default_autotest,
 		 "Report" :	None,
-		},
+		 },
 		{
 		 "Name" :"Red autotest",
 		 "Command" : "red_autotest",
 		 "Func" :default_autotest,
 		 "Report" :None,
 		 },
 	]
 },
 {
@ -317,6 +323,12 @@ def all_sockets(num):
 		 "Func" :	default_autotest,
 		 "Report" :	None,
 		},
 		 {
 		 "Name" :"Sched autotest",
 		 "Command" : "sched_autotest",
 		 "Func" :default_autotest,
 		 "Report" :None,
 		 },
 	]
 },
 ]
--- a/app/test/commands.c
+++ b/app/test/commands.c
@ -167,6 +167,10 @@ static void cmd_autotest_parsed(void *parsed_result,
 		ret |= test_memcpy_perf();
 	if (all || !strcmp(res->autotest, "func_reentrancy_autotest"))
 		ret |= test_func_reentrancy();
 	if (all || !strcmp(res->autotest, "red_autotest"))
 		ret |= test_red();
 	if (all || !strcmp(res->autotest, "sched_autotest"))
 		ret |= test_sched();
 	if (all || !strcmp(res->autotest, "meter_autotest"))
 		ret |= test_meter();
 	if (all || !strcmp(res->autotest, "pm_autotest"))
@ -203,7 +207,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
 			"version_autotest#eal_fs_autotest#"
 			"cmdline_autotest#func_reentrancy_autotest#"
 			"mempool_perf_autotest#hash_perf_autotest#"
-			"meter_autotest#"
+			"red_autotest#meter_autotest#sched_autotest#"
 			"memcpy_perf_autotest#pm_autotest#"
 			"acl_autotest#power_autotest#"
 			"all_autotests");
--- a/app/test/test.h
+++ b/app/test/test.h
@ -84,6 +84,8 @@ int test_version(void);
 int test_eal_fs(void);
 int test_cmdline(void);
 int test_func_reentrancy(void);
 int test_red(void);
 int test_sched(void);
 int test_meter(void);
 int test_pmac_pm(void);
 int test_pmac_acl(void);
--- a/app/test/test_red.c
+++ b/app/test/test_red.c
--- a/app/test/test_sched.c
+++ b/app/test/test_sched.c
@ -0,0 +1,244 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <cmdline_parse.h>
 #include "test.h"
 #if defined(RTE_LIBRTE_SCHED) && defined(RTE_ARCH_X86_64)
 #include <rte_cycles.h>
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_byteorder.h>
 #include <rte_sched.h>
 #define VERIFY(exp,fmt,args...)                    	                \
 		if (!(exp)) {                                               \
 			printf(fmt, ##args);                                    \
 			return -1;                                              \
 		}
 #define SUBPORT 	0
 #define PIPE 		1
 #define TC 			2
 #define QUEUE 		3
 static struct rte_sched_subport_params subport_param[] = {
 	{
 		.tb_rate = 1250000000,
 		.tb_size = 1000000,
 		.tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
 		.tc_period = 10,
 	},
 };
 static struct rte_sched_pipe_params pipe_profile[] = {
 	{ /* Profile #0 */
 		.tb_rate = 305175,
 		.tb_size = 1000000,
 		.tc_rate = {305175, 305175, 305175, 305175},
 		.tc_period = 40,
 		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
 	},
 };
 static struct rte_sched_port_params port_param = {
 	.name = "port_0",
 	.socket = 0, /* computed */
 	.rate = 0, /* computed */
 	.frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
 	.n_subports_per_port = 1,
 	.n_pipes_per_subport = 4096,
 	.qsize = {64, 64, 64, 64},
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 1,
 };
 #define NB_MBUF          32
 #define MAX_PACKET_SZ    2048
 #define MBUF_SZ (MAX_PACKET_SZ + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
 #define PKT_BURST_SZ     32
 #define MEMPOOL_CACHE_SZ PKT_BURST_SZ
 #define SOCKET           0
 static struct rte_mempool *
 create_mempool(void)
 {
 	struct rte_mempool * mp;
 	mp = rte_mempool_lookup("test_sched");
 	if (!mp)
 		mp = rte_mempool_create("test_sched",
 				NB_MBUF,
 				MBUF_SZ,
 				MEMPOOL_CACHE_SZ,
 				sizeof(struct rte_pktmbuf_pool_private),
 				rte_pktmbuf_pool_init,
 				NULL,
 				rte_pktmbuf_init,
 				NULL,
 				SOCKET,
 				0);
 	return mp;
 }
 static void
 prepare_pkt(struct rte_mbuf *mbuf)
 {
 	struct ether_hdr *eth_hdr;
 	struct vlan_hdr *vlan1, *vlan2;
 	struct ipv4_hdr *ip_hdr;
 	/* Simulate a classifier */
 	eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
 	vlan1 = (struct vlan_hdr *)(&eth_hdr->ether_type );
 	vlan2 = (struct vlan_hdr *)((uintptr_t)&eth_hdr->ether_type + sizeof(struct vlan_hdr));
 	eth_hdr = (struct ether_hdr *)((uintptr_t)&eth_hdr->ether_type + 2 *sizeof(struct vlan_hdr));
 	ip_hdr = (struct ipv4_hdr *)((uintptr_t)eth_hdr +  sizeof(eth_hdr->ether_type));
 	vlan1->vlan_tci = rte_cpu_to_be_16(SUBPORT);
 	vlan2->vlan_tci = rte_cpu_to_be_16(PIPE);
 	eth_hdr->ether_type =  rte_cpu_to_be_16(ETHER_TYPE_IPv4);
 	ip_hdr->dst_addr = IPv4(0,0,TC,QUEUE);
 	rte_sched_port_pkt_write(mbuf, SUBPORT, PIPE, TC, QUEUE, e_RTE_METER_YELLOW);
 	/* 64 byte packet */
 	mbuf->pkt.pkt_len  = 60;
 	mbuf->pkt.data_len = 60;
 }
 /**
 * test main entrance for library sched
 */
 int 
 test_sched(void)
 {
 	struct rte_mempool *mp = NULL;
 	struct rte_sched_port *port = NULL;
 	uint32_t pipe;
 	struct rte_mbuf *in_mbufs[10];
 	struct rte_mbuf *out_mbufs[10];
 	int i;
 	int err;
 	mp = create_mempool();
 	port_param.socket = 0;
 	port_param.rate = (uint64_t) 10000 * 1000 * 1000 / 8;
 	port_param.name = "port_0";
 	port = rte_sched_port_config(&port_param);
 	VERIFY(port != NULL, "Error config sched port\n");
 	err = rte_sched_subport_config(port, SUBPORT, subport_param);
 	VERIFY(err == 0, "Error config sched, err=%d\n", err);
 	for (pipe = 0; pipe < port_param.n_pipes_per_subport; pipe ++) {
 		err = rte_sched_pipe_config(port, SUBPORT, pipe, 0);
 		VERIFY(err == 0, "Error config sched pipe %u, err=%d\n", pipe, err);
 	}
 	for (i = 0; i < 10; i++) {
 		in_mbufs[i] = rte_pktmbuf_alloc(mp);
 		prepare_pkt(in_mbufs[i]);
 	}
 	err = rte_sched_port_enqueue(port, in_mbufs, 10);
 	VERIFY(err == 10, "Wrong enqueue, err=%d\n", err);
 	err = rte_sched_port_dequeue(port, out_mbufs, 10);
 	VERIFY(err == 10, "Wrong dequeue, err=%d\n", err);
 	for (i = 0; i < 10; i++) {
 		enum rte_meter_color color;
 		uint32_t subport, traffic_class, queue;
 		color = rte_sched_port_pkt_read_color(out_mbufs[i]);
 		VERIFY(color == e_RTE_METER_YELLOW, "Wrong color\n");
 		rte_sched_port_pkt_read_tree_path(out_mbufs[i],
 				&subport, &pipe, &traffic_class, &queue);
 		VERIFY(subport == SUBPORT, "Wrong subport\n");
 		VERIFY(pipe == PIPE, "Wrong pipe\n");
 		VERIFY(traffic_class == TC, "Wrong traffic_class\n");
 		VERIFY(queue == QUEUE, "Wrong queue\n");
 	}
 	struct rte_sched_subport_stats subport_stats;
 	uint32_t tc_ov;
 	rte_sched_subport_read_stats(port, SUBPORT, &subport_stats, &tc_ov);
 	//VERIFY(subport_stats.n_pkts_tc[TC-1] == 10, "Wrong subport stats\n");
 	struct rte_sched_queue_stats queue_stats;
 	uint16_t qlen;
 	rte_sched_queue_read_stats(port, QUEUE, &queue_stats, &qlen);
 	//VERIFY(queue_stats.n_pkts == 10, "Wrong queue stats\n");
 	rte_sched_port_free(port);
 	return 0;
 }
 #else /* RTE_LIBRTE_SCHED */
 int
 test_sched(void)
 {
 	printf("The Scheduler library is not included in this build\n");
 	return 0;
 }
 #endif /* RTE_LIBRTE_SCHED */
--- a/config/defconfig_i686-default-linuxapp-gcc
+++ b/config/defconfig_i686-default-linuxapp-gcc
@ -233,6 +233,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 #
 # Compile librte_sched
 #
 CONFIG_RTE_LIBRTE_SCHED=y
 CONFIG_RTE_SCHED_RED=n
 CONFIG_RTE_SCHED_COLLECT_STATS=n
 CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
 CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
 CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
 #
 # Compile librte_kni
 #
--- a/config/defconfig_i686-default-linuxapp-icc
+++ b/config/defconfig_i686-default-linuxapp-icc
@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 #
 # Compile librte_sched
 #
 CONFIG_RTE_LIBRTE_SCHED=y
 CONFIG_RTE_SCHED_RED=n
 CONFIG_RTE_SCHED_COLLECT_STATS=n
 CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
 CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
 CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
 #
 # Compile librte_kni
 #
--- a/config/defconfig_x86_64-default-linuxapp-gcc
+++ b/config/defconfig_x86_64-default-linuxapp-gcc
@ -235,6 +235,17 @@ CONFIG_RTE_LIBRTE_NET=y
 CONFIG_RTE_LIBRTE_METER=y
 #
 # Compile librte_sched
 #
 CONFIG_RTE_LIBRTE_SCHED=y
 CONFIG_RTE_SCHED_RED=n
 CONFIG_RTE_SCHED_COLLECT_STATS=n
 CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
 CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
 CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
 #
 # Compile the test application
 # Compile librte_kni
 #
 CONFIG_RTE_LIBRTE_KNI=y
--- a/config/defconfig_x86_64-default-linuxapp-icc
+++ b/config/defconfig_x86_64-default-linuxapp-icc
@ -234,6 +234,16 @@ CONFIG_RTE_LIBRTE_NET=y
 #
 CONFIG_RTE_LIBRTE_METER=y
 #
 # Compile librte_sched
 #
 CONFIG_RTE_LIBRTE_SCHED=y
 CONFIG_RTE_SCHED_RED=n
 CONFIG_RTE_SCHED_COLLECT_STATS=n
 CONFIG_RTE_SCHED_SUBPORT_TC_OV=n
 CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
 CONFIG_RTE_BITMAP_ARRAY1_SIZE=16
 #
 # Compile librte_kni
 #
--- a/examples/qos_sched/Makefile
+++ b/examples/qos_sched/Makefile
@ -0,0 +1,58 @@
 #   BSD LICENSE
 # 
 #   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 #   All rights reserved.
 # 
 #   Redistribution and use in source and binary forms, with or without 
 #   modification, are permitted provided that the following conditions 
 #   are met:
 # 
 #     * Redistributions of source code must retain the above copyright 
 #       notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above copyright 
 #       notice, this list of conditions and the following disclaimer in 
 #       the documentation and/or other materials provided with the 
 #       distribution.
 #     * Neither the name of Intel Corporation nor the names of its 
 #       contributors may be used to endorse or promote products derived 
 #       from this software without specific prior written permission.
 # 
 #   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 #   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 #   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 #   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 #   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 #   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 #   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 #   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 #   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 #   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 #   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # 
 ifeq ($(RTE_SDK),)
 $(error "Please define RTE_SDK environment variable")
 endif
 # Default target, can be overriden by command line or environment
 RTE_TARGET ?= x86_64-default-linuxapp-gcc
 include $(RTE_SDK)/mk/rte.vars.mk
 ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
 $(error This application can only operate in a linuxapp environment, \
 please change the definition of the RTE_TARGET environment variable)
 endif
 # binary name
 APP = qos_sched
 # all source are stored in SRCS-y
 SRCS-y := main.c args.c init.c app_thread.c cfg_file.c
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_sched
 include $(RTE_SDK)/mk/rte.extapp.mk
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@ -0,0 +1,302 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdint.h>
 #include <rte_log.h>
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
 #include <rte_byteorder.h>
 #include <rte_branch_prediction.h>
 #include <rte_sched.h>
 #include "main.h"
 /*
 * QoS parameters are encoded as follows:
 *		Outer VLAN ID defines subport
 *		Inner VLAN ID defines pipe
 *		Destination IP 0.0.XXX.0 defines traffic class
 *		Destination IP host (0.0.0.XXX) defines queue
 * Values below define offset to each field from start of frame
 */
 #define SUBPORT_OFFSET	7
 #define PIPE_OFFSET		9
 #define TC_OFFSET		20
 #define QUEUE_OFFSET	20
 #define COLOR_OFFSET	19
 static inline int
 get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
 			uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
 {
 	uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
 	*subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
 			(port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
 	*pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
 			(port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
 	*traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
 			(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
 	*queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
 			(RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
 	*color = pdata[COLOR_OFFSET] & 0x03; 	/* Destination IP */
 	return 0;
 }
 void
 app_rx_thread(struct thread_conf **confs)
 {
 	uint32_t i, nb_rx;
 	struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned;
 	struct thread_conf *conf;
 	int conf_idx = 0;
 	uint32_t subport;
 	uint32_t pipe;
 	uint32_t traffic_class;
 	uint32_t queue;
 	uint32_t color;
 	while ((conf = confs[conf_idx])) {
 		nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs,
 				burst_conf.rx_burst);
 		if (likely(nb_rx != 0)) {
 			APP_STATS_ADD(conf->stat.nb_rx, nb_rx);
 			for(i = 0; i < nb_rx; i++) {
 				get_pkt_sched(rx_mbufs[i],
 						&subport, &pipe, &traffic_class, &queue, &color);
 				rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe,
 						traffic_class, queue, (enum rte_meter_color) color);
 			}
 			if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring,
 								(void **)rx_mbufs, nb_rx) != 0)) {
 				for(i = 0; i < nb_rx; i++) {
 					rte_pktmbuf_free(rx_mbufs[i]);
 					APP_STATS_ADD(conf->stat.nb_drop, 1);
 				}
 			}
 		}
 		conf_idx++;
 		if (confs[conf_idx] == NULL)
 			conf_idx = 0;
 	}
 }
 /* Send the packet to an output interface
 * For performance reason function returns number of packets dropped, not sent,
 * so 0 means that all packets were sent successfully
 */
 static inline void
 app_send_burst(struct thread_conf *qconf)
 {
 	struct rte_mbuf **mbufs;
 	uint32_t n, ret;
 	mbufs = (struct rte_mbuf **)qconf->m_table;
 	n = qconf->n_mbufs;
 	do {
 		ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
 		if (unlikely(ret < n)) { /* we cannot drop the packets, so re-send */
 			/* update number of packets to be sent */
 			n -= ret;
 			mbufs = (struct rte_mbuf **)&mbufs[ret];
 			/* limit number of retries to avoid endless loop */
 			/* reset retry counter if some packets were sent */
 			if (likely(ret != 0)) {
 				continue;
 			}
 		}
 	} while (ret != n);
 }
 /* Send the packet to an output interface */
 static void
 app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
 {
 	uint32_t i, len;
 	len = qconf->n_mbufs;
 	for(i = 0; i < nb_pkt; i++) {
 		qconf->m_table[len] = mbufs[i];
 		len++;
 		/* enough pkts to be sent */
 		if (unlikely(len == burst_conf.tx_burst)) {
 			qconf->n_mbufs = len;
 			app_send_burst(qconf);
 			len = 0;
 		}
 	}
 	qconf->n_mbufs = len;
 }
 void
 app_tx_thread(struct thread_conf **confs)
 {
 	struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
 	struct thread_conf *conf;
 	int conf_idx = 0;
 	int retval;
 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
 	while ((conf = confs[conf_idx])) {
 		retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
 					burst_conf.qos_dequeue);
 		if (likely(retval == 0)) {
 			app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
 			conf->counter = 0; /* reset empty read loop counter */
 		}
 		conf->counter++;
 		/* drain ring and TX queues */
 		if (unlikely(conf->counter > drain_tsc)) {
 			/* now check is there any packets left to be transmitted */
 			if (conf->n_mbufs != 0) {
 				app_send_burst(conf);
 				conf->n_mbufs = 0;
 			}
 			conf->counter = 0;
 		}
 		conf_idx++;
 		if (confs[conf_idx] == NULL)
 			conf_idx = 0;
 	}
 }
 void
 app_worker_thread(struct thread_conf **confs)
 {
 	struct rte_mbuf *mbufs[burst_conf.ring_burst];
 	struct thread_conf *conf;
 	int conf_idx = 0;
 	while ((conf = confs[conf_idx])) {
 		uint32_t nb_pkt;
 		int retval;
 		/* Read packet from the ring */
 		retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
 					burst_conf.ring_burst);
 		if (likely(retval == 0)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					burst_conf.ring_burst);
 			APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
 		}
 		nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
 					burst_conf.qos_dequeue);
 		if (likely(nb_pkt > 0))
 			while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0);
 		conf_idx++;
 		if (confs[conf_idx] == NULL)
 			conf_idx = 0;
 	}
 }
 void
 app_mixed_thread(struct thread_conf **confs)
 {
 	struct rte_mbuf *mbufs[burst_conf.ring_burst];
 	struct thread_conf *conf;
 	int conf_idx = 0;
 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
 	while ((conf = confs[conf_idx])) {
 		uint32_t nb_pkt;
 		int retval;
 		/* Read packet from the ring */
 		retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
 					burst_conf.ring_burst);
 		if (likely(retval == 0)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					burst_conf.ring_burst);
 			APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
 		}
 		nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
 					burst_conf.qos_dequeue);
 		if (likely(nb_pkt > 0)) {
 			app_send_packets(conf, mbufs, nb_pkt);
 			conf->counter = 0; /* reset empty read loop counter */
 		}
 		conf->counter++;
 		/* drain ring and TX queues */
 		if (unlikely(conf->counter > drain_tsc)) {
 			/* now check is there any packets left to be transmitted */
 			if (conf->n_mbufs != 0) {
 				app_send_burst(conf);
 				conf->n_mbufs = 0;
 			}
 			conf->counter = 0;
 		}
 		conf_idx++;
 		if (confs[conf_idx] == NULL)
 			conf_idx = 0;
 	}
 }
--- a/examples/qos_sched/args.c
+++ b/examples/qos_sched/args.c
@ -0,0 +1,467 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <locale.h>
 #include <unistd.h>
 #include <limits.h>
 #include <getopt.h>
 #include <rte_log.h>
 #include <rte_eal.h>
 #include <rte_lcore.h>
 #include <rte_string_fns.h>
 #include "main.h"
 #define APP_NAME "qos_sched"
 #define MAX_OPT_VALUES 8
 #define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/"
 static uint32_t app_master_core = 1;
 static uint32_t app_numa_mask;
 static uint64_t app_used_core_mask = 0;
 static uint64_t app_used_port_mask = 0;
 static uint64_t app_used_rx_port_mask = 0;
 static uint64_t app_used_tx_port_mask = 0;
 static const char usage[] =
 	"                                                                               \n"
 	"    %s <APP PARAMS>                                                            \n"
 	"                                                                               \n"
 	"Application mandatory parameters:                                              \n"
 	"    --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n"
 	"           multiple pfc can be configured in command line                      \n"
 	"                                                                               \n"
 	"Application optional parameters:                                               \n"
 	"    --mst I : master core index (default value is %u)                          \n" 
 	"    --rsz \"A, B, C\" :   Ring sizes                                           \n"
 	"           A = Size (in number of buffer descriptors) of each of the NIC RX    \n"
 	"               rings read by the I/O RX lcores (default value is %u)           \n"
 	"           B = Size (in number of elements) of each of the SW rings used by the\n"
 	"               I/O RX lcores to send packets to worker lcores (default value is\n"
 	"               %u)                                                             \n"
 	"           C = Size (in number of buffer descriptors) of each of the NIC TX    \n"
 	"               rings written by worker lcores (default value is %u)            \n"
 	"    --bsz \"A, B, C, D\": Burst sizes                                          \n"
 	"           A = I/O RX lcore read burst size from NIC RX (default value is %u)  \n"
 	"           B = I/O RX lcore write burst size to output SW rings,               \n"
 	"               Worker lcore read burst size from input SW rings,               \n"
 	"               QoS enqueue size (default value is %u)                          \n"
 	"           C = QoS dequeue size (default value is %u)                          \n"
 	"           D = Worker lcore write burst size to NIC TX (default value is %u)   \n"
 	"    --rth \"A, B, C\" :   RX queue threshold parameters                        \n"
 	"           A = RX prefetch threshold (default value is %u)                     \n"
 	"           B = RX host threshold (default value is %u)                         \n"
 	"           C = RX write-back threshold (default value is %u)                   \n"
 	"    --tth \"A, B, C\" :   TX queue threshold parameters                        \n"
 	"           A = TX prefetch threshold (default value is %u)                     \n"
 	"           B = TX host threshold (default value is %u)                         \n"
 	"           C = TX write-back threshold (default value is %u)                   \n"
 	"    --cfg FILE : profile configuration to load                                 \n"
 ;
 /* display usage */
 static void
 app_usage(const char *prgname)
 {
 	printf(usage, prgname, app_master_core,
 		APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT,
 		MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE, MAX_PKT_TX_BURST,
 		RX_PTHRESH, RX_HTHRESH, RX_WTHRESH,
 		TX_PTHRESH, TX_HTHRESH, TX_WTHRESH
 		);
 }
 static inline int str_is(const char *str, const char *is)
 {
 	return (strcmp(str, is) == 0);
 }
 /* returns core mask used by DPDK */
 static uint64_t
 app_eal_core_mask(void)
 {
 	uint32_t i;
 	uint64_t cm = 0;
 	struct rte_config *cfg = rte_eal_get_configuration();
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
 		if (cfg->lcore_role[i] == ROLE_RTE)
 			cm |= (1ULL << i);
 	}
 	cm |= (1ULL << cfg->master_lcore);
 	return cm;
 }
 /* returns total number of cores presented in a system */
 static uint32_t
 app_cpu_core_count(void)
 {
 	int i, len;
 	char path[PATH_MAX];
 	uint32_t ncores = 0;
 	for(i = 0; i < RTE_MAX_LCORE; i++) {
 		len = rte_snprintf(path, sizeof(path), SYS_CPU_DIR, i);
 		if (len <= 0 || (unsigned)len >= sizeof(path))
 			continue;
 		if (access(path, F_OK) == 0)
 			ncores++;
 	}
 	return ncores;
 }
 /* returns:
 	 number of values parsed
 	-1 in case of error
 */
 static int
 app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals)
 {
 	char *string;
 	uint32_t i, n_tokens;
 	char *tokens[MAX_OPT_VALUES];
 	if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES)
 		return -1;
 	/* duplicate configuration string before splitting it to tokens */
 	string = strdup(conf_str);
 	if (string == NULL)
 		return -1;
 	n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator);
 	for(i = 0; i < n_tokens; i++) {
 		opt_vals[i] = (uint32_t)atol(tokens[i]);
 	}
 	free(string);
 	return n_tokens;
 }
 static int
 app_parse_ring_conf(const char *conf_str)
 {
 	int ret;
 	uint32_t vals[3];
 	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
 	if (ret != 3)	
 		return ret;
 	ring_conf.rx_size = vals[0];
 	ring_conf.ring_size = vals[1];
 	ring_conf.tx_size = vals[2];
 	return 0;
 }
 static int
 app_parse_rth_conf(const char *conf_str)
 {
 	int ret;
 	uint32_t vals[3];
 	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
 	if (ret != 3)	
 		return ret;
 	rx_thresh.pthresh = (uint8_t)vals[0];
 	rx_thresh.hthresh = (uint8_t)vals[1];
 	rx_thresh.wthresh = (uint8_t)vals[2];
 	return 0;
 }
 static int
 app_parse_tth_conf(const char *conf_str)
 {
 	int ret;
 	uint32_t vals[3];
 	ret = app_parse_opt_vals(conf_str, ',', 3, vals);
 	if (ret != 3)	
 		return ret;
 	tx_thresh.pthresh = (uint8_t)vals[0];
 	tx_thresh.hthresh = (uint8_t)vals[1];
 	tx_thresh.wthresh = (uint8_t)vals[2];
 	return 0;
 }
 static int
 app_parse_flow_conf(const char *conf_str)
 {
 	int ret;
 	uint32_t vals[5];
 	struct flow_conf *pconf;
 	uint64_t mask;
 	ret = app_parse_opt_vals(conf_str, ',', 6, vals);
 	if (ret < 4 || ret > 5)
 		return ret;
 	pconf = &qos_conf[nb_pfc];
 	pconf->rx_port = (uint8_t)vals[0];
 	pconf->tx_port = (uint8_t)vals[1];
 	pconf->rx_core = (uint8_t)vals[2];
 	pconf->wt_core = (uint8_t)vals[3];
 	if (ret == 5)
 		pconf->tx_core = (uint8_t)vals[4];
 	else
 		pconf->tx_core = pconf->wt_core;
 	if (pconf->rx_core == pconf->wt_core) {
 		RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc);
 		return -1;
 	}
 	if (pconf->rx_port >= RTE_MAX_ETHPORTS) {
 		RTE_LOG(ERR, APP, "pfc %u: invalid rx port %hu index\n", nb_pfc, pconf->rx_port);
 		return -1;
 	}
 	if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
 		RTE_LOG(ERR, APP, "pfc %u: invalid tx port %hu index\n", nb_pfc, pconf->rx_port);
 		return -1;
 	}
 	mask = 1lu << pconf->rx_port;
 	if (app_used_rx_port_mask & mask) {
 		RTE_LOG(ERR, APP, "pfc %u: rx port %hu is used already\n", nb_pfc, pconf->rx_port);
 		return -1;
 	}
 	app_used_rx_port_mask |= mask;
 	app_used_port_mask |= mask;
 	mask = 1lu << pconf->tx_port;
 	if (app_used_tx_port_mask & mask) {
 		RTE_LOG(ERR, APP, "pfc %u: port %hu is used already\n", nb_pfc, pconf->tx_port);
 		return -1;
 	}
 	app_used_tx_port_mask |= mask;
 	app_used_port_mask |= mask;
 	mask = 1lu << pconf->rx_core;
 	app_used_core_mask |= mask;
 	mask = 1lu << pconf->wt_core;
 	app_used_core_mask |= mask;
 	mask = 1lu << pconf->tx_core;
 	app_used_core_mask |= mask;
 	nb_pfc++;
 	return 0;
 }
 static int
 app_parse_burst_conf(const char *conf_str)
 {
 	int ret;
 	uint32_t vals[4];
 	ret = app_parse_opt_vals(conf_str, ',', 4, vals);
 	if (ret != 4)
 		return ret;
 	burst_conf.rx_burst    = (uint16_t)vals[0];
 	burst_conf.ring_burst  = (uint16_t)vals[1];
 	burst_conf.qos_dequeue = (uint16_t)vals[2];
 	burst_conf.tx_burst    = (uint16_t)vals[3];
 	return 0;
 }
 /* 
 * Parses the argument given in the command line of the application,
 * calculates mask for used cores and initializes EAL with calculated core mask
 */
 int
 app_parse_args(int argc, char **argv)
 {
 	int opt, ret;
 	int option_index;
 	const char *optname;
 	char *prgname = argv[0];
 	uint32_t i, nb_lcores;
 	static struct option lgopts[] = {
 		{ "pfc", 1, 0, 0 },
 		{ "mst", 1, 0, 0 },
 		{ "rsz", 1, 0, 0 },
 		{ "bsz", 1, 0, 0 },
 		{ "rth", 1, 0, 0 },
 		{ "tth", 1, 0, 0 },
 		{ "cfg", 1, 0, 0 },
 		{ NULL,  0, 0, 0 }
 	};
 	/* initialize EAL first */
 	ret = rte_eal_init(argc, argv);
 	if (ret < 0)
 		return -1;
 	argc -= ret;
 	argv += ret;
 	/* set en_US locale to print big numbers with ',' */
 	setlocale(LC_NUMERIC, "en_US.utf-8");
 	while ((opt = getopt_long(argc, argv, "",
 		lgopts, &option_index)) != EOF) {
 			switch (opt) {
 			/* long options */
 			case 0:
 				optname = lgopts[option_index].name;
 				if (str_is(optname, "pfc")) {
 					ret = app_parse_flow_conf(optarg);
 					if (ret) {
 						RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg);
 						return -1;
 					}
 					break;
 				}
 				if (str_is(optname, "mst")) {
 					app_master_core = (uint32_t)atoi(optarg);
 					break;
 				}
 				if (str_is(optname, "rsz")) {
 					ret = app_parse_ring_conf(optarg);
 					if (ret) {
 						RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg);
 						return -1;
 					}
 					break;
 				}
 				if (str_is(optname, "bsz")) {
 					ret = app_parse_burst_conf(optarg);
 					if (ret) {
 						RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg);
 						return -1;
 					}
 					break;
 				}
 				if (str_is(optname, "rth")) {
 					ret = app_parse_rth_conf(optarg);
 					if (ret) {
 						RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg);
 						return -1;
 					}
 					break;
 				}
 				if (str_is(optname, "tth")) {
 					ret = app_parse_tth_conf(optarg);
 					if (ret) {
 						RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg);
 						return -1;
 					}
 					break;
 				}
 				if (str_is(optname, "cfg")) {
 					cfg_profile = optarg;
 					break;
 				}
 				break;
 			default:
 				app_usage(prgname);
 				return -1;
 			}
 	}
 	/* check master core index validity */
 	for(i = 0; i <= app_master_core; i++) {
 		if (app_used_core_mask & (1u << app_master_core)) {
 			RTE_LOG(ERR, APP, "Master core index is not configured properly\n");
 			app_usage(prgname);
 			return -1;
 		}
 	}
 	app_used_core_mask |= 1u << app_master_core;
 	if ((app_used_core_mask != app_eal_core_mask()) ||
 			(app_master_core != rte_get_master_lcore())) {
 		RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64
 				" instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask());
 		return -1;
 	}
 	if (nb_pfc == 0) {
 		RTE_LOG(ERR, APP, "Packet flow not configured!\n");
 		app_usage(prgname);
 		return -1;
 	}
 	/* sanity check for cores assignment */
 	nb_lcores = app_cpu_core_count();
 	for(i = 0; i < nb_pfc; i++) {
 		if (qos_conf[i].rx_core >= nb_lcores) {
 			RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1,
 					qos_conf[i].rx_core);
 			return -1;
 		}
 		if (qos_conf[i].wt_core >= nb_lcores) {
 			RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1,
 					qos_conf[i].wt_core);
 			return -1;
 		}
 		uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core);
 		uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core);
 		if (rx_sock != wt_sock) {
 			RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1);
 			return -1;
 		}
 		app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core);
 	}
 	return 0;
 }
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@ -0,0 +1,631 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <rte_string_fns.h>
 #include <rte_sched.h>
 #include "cfg_file.h"
 #include "main.h"
 /** when we resize a file structure, how many extra entries
 * for new sections do we add in */
 #define CFG_ALLOC_SECTION_BATCH 8
 /** when we resize a section structure, how many extra entries
 * for new entries do we add in */
 #define CFG_ALLOC_ENTRY_BATCH 16
 static unsigned
 _strip(char *str, unsigned len)
 {
 	int newlen = len;
 	if (len == 0)
 		return 0;
 	if (isspace(str[len-1])) {
 		/* strip trailing whitespace */
 		while (newlen > 0 && isspace(str[newlen - 1]))
 			str[--newlen] = '\0';
 	}
 	if (isspace(str[0])) {
 		/* strip leading whitespace */
 		int i,start = 1;
 		while (isspace(str[start]) && start < newlen)
 			start++
 			; /* do nothing */
 		newlen -= start;
 		for (i = 0; i < newlen; i++)
 			str[i] = str[i+start];
 		str[i] = '\0';
 	}
 	return newlen;
 }
 struct cfg_file *
 cfg_load(const char *filename, int flags)
 {
 	int allocated_sections = CFG_ALLOC_SECTION_BATCH;
 	int allocated_entries = 0;
 	int curr_section = -1;
 	int curr_entry = -1;
 	char buffer[256];
 	int lineno = 0;
 	struct cfg_file *cfg = NULL;
 	FILE *f = fopen(filename, "r");
 	if (f == NULL)
 		return NULL;
 	cfg = malloc(sizeof(*cfg) +	sizeof(cfg->sections[0]) * allocated_sections);
 	if (cfg == NULL)
 		goto error2;
 	memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
 	while (fgets(buffer, sizeof(buffer), f) != NULL) {
 		char *pos = NULL;
 		size_t len = strnlen(buffer, sizeof(buffer));
 		lineno++;
 		if (len >=sizeof(buffer) - 1 && buffer[len-1] != '\n'){
 			printf("Error line %d - no \\n found on string. "
 					"Check if line too long\n", lineno);
 			goto error1;
 		}
 		if ((pos = memchr(buffer, ';', sizeof(buffer))) != NULL) {
 			*pos = '\0';
 			len = pos -  buffer;
 		}
 		len = _strip(buffer, len);
 		if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
 			continue;
 		if (buffer[0] == '[') {
 			/* section heading line */
 			char *end = memchr(buffer, ']', len);
 			if (end == NULL) {
 				printf("Error line %d - no terminating '[' found\n", lineno);
 				goto error1;
 			}
 			*end = '\0';
 			_strip(&buffer[1], end - &buffer[1]);
 			/* close off old section and add start new one */
 			if (curr_section >= 0)
 				cfg->sections[curr_section]->num_entries = curr_entry + 1;
 			curr_section++;
 			/* resize overall struct if we don't have room for more sections */
 			if (curr_section == allocated_sections) {
 				allocated_sections += CFG_ALLOC_SECTION_BATCH;
 				struct cfg_file *n_cfg = realloc(cfg, sizeof(*cfg) +
 						sizeof(cfg->sections[0]) * allocated_sections);
 				if (n_cfg == NULL) {
 					printf("Error - no more memory\n");
 					goto error1;
 				}
 				cfg = n_cfg;
 			}
 			/* allocate space for new section */
 			allocated_entries = CFG_ALLOC_ENTRY_BATCH;
 			curr_entry = -1;
 			cfg->sections[curr_section] = malloc(sizeof(*cfg->sections[0]) +
 					sizeof(cfg->sections[0]->entries[0]) * allocated_entries);
 			if (cfg->sections[curr_section] == NULL) {
 				printf("Error - no more memory\n");
 				goto error1;
 			}
 			rte_snprintf(cfg->sections[curr_section]->name,
 					sizeof(cfg->sections[0]->name),
 					"%s", &buffer[1]);
 		}
 		else {
 			/* value line */
 			if (curr_section < 0) {
 				printf("Error line %d - value outside of section\n", lineno);
 				goto error1;
 			}
 			struct cfg_section *sect = cfg->sections[curr_section];
 			char *split[2];
 			if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') != 2) {
 				printf("Error at line %d - cannot split string\n", lineno);
 				goto error1;
 			}
 			curr_entry++;
 			if (curr_entry == allocated_entries) {
 				allocated_entries += CFG_ALLOC_ENTRY_BATCH;
 				struct cfg_section *n_sect = realloc(sect, sizeof(*sect) +
 						sizeof(sect->entries[0]) * allocated_entries);
 				if (n_sect == NULL) {
 					printf("Error - no more memory\n");
 					goto error1;
 				}
 				sect = cfg->sections[curr_section] = n_sect;
 			}
 			sect->entries[curr_entry] = malloc(sizeof(*sect->entries[0]));
 			if (sect->entries[curr_entry] == NULL) {
 				printf("Error - no more memory\n");
 				goto error1;
 			}
 			struct cfg_entry *entry = sect->entries[curr_entry];
 			rte_snprintf(entry->name, sizeof(entry->name), "%s", split[0]);
 			rte_snprintf(entry->value, sizeof(entry->value), "%s", split[1]);
 			_strip(entry->name, strnlen(entry->name, sizeof(entry->name)));
 			_strip(entry->value, strnlen(entry->value, sizeof(entry->value)));
 		}
 	}
 	fclose(f);
 	cfg->flags = flags;
 	cfg->sections[curr_section]->num_entries = curr_entry + 1;
 	cfg->num_sections = curr_section + 1;
 	return cfg;
 error1:
 	cfg_close(cfg);
 error2:
 	fclose(f);
 	return NULL;
 }
 int cfg_close(struct cfg_file *cfg)
 {
 	int i, j;
 	if (cfg == NULL)
 		return -1;
 	for(i = 0; i < cfg->num_sections; i++) {
 		if (cfg->sections[i] != NULL) {
 			if (cfg->sections[i]->num_entries) {
 				for(j = 0; j < cfg->sections[i]->num_entries; j++) {
 					if (cfg->sections[i]->entries[j] != NULL)
 						free(cfg->sections[i]->entries[j]);
 				}
 			}
 			free(cfg->sections[i]);
 		}
 	}
 	free(cfg);
 	return 0;
 }
 int
 cfg_num_sections(struct cfg_file *cfg, const char *sectionname, size_t length)
 {
 	int i;
 	int num_sections = 0;
 	for (i = 0; i < cfg->num_sections; i++) {
 		if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
 			num_sections++;
 	}
 	return num_sections;
 }
 int
 cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections)
 {
 	int i;
 	for (i = 0; i < cfg->num_sections && i < max_sections; i++) {
 		rte_snprintf(sections[i], CFG_NAME_LEN, "%s",  cfg->sections[i]->name);
 	}
 	return i;
 }
 static const struct cfg_section *
 _get_section(struct cfg_file *cfg, const char *sectionname)
 {
 	int i;
 	for (i = 0; i < cfg->num_sections; i++) {
 		if (strncmp(cfg->sections[i]->name, sectionname,
 				sizeof(cfg->sections[0]->name)) == 0)
 			return cfg->sections[i];
 	}
 	return NULL;
 }
 int
 cfg_has_section(struct cfg_file *cfg, const char *sectionname)
 {
 	return (_get_section(cfg, sectionname) != NULL);
 }
 int
 cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname)
 {
 	const struct cfg_section *s = _get_section(cfg, sectionname);
 	if (s == NULL)
 		return -1;
 	return s->num_entries;
 }
 int
 cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
 		struct cfg_entry *entries, int max_entries)
 {
 	int i;
 	const struct cfg_section *sect = _get_section(cfg, sectionname);
 	if (sect == NULL)
 		return -1;
 	for (i = 0; i < max_entries && i < sect->num_entries; i++)
 		entries[i] = *sect->entries[i];
 	return i;
 }
 const char *
 cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
 		const char *entryname)
 {
 	int i;
 	const struct cfg_section *sect = _get_section(cfg, sectionname);
 	if (sect == NULL)
 		return NULL;
 	for (i = 0; i < sect->num_entries; i++)
 		if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) == 0)
 			return sect->entries[i]->value;
 	return NULL;
 }
 int
 cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
 		const char *entryname)
 {
 	return (cfg_get_entry(cfg, sectionname, entryname) != NULL);
 }
 int
 cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port_params)
 {
 	const char *entry;
 	int j;
 	if (!cfg || !port_params)
 		return -1;
 	entry = cfg_get_entry(cfg, "port", "frame overhead");
 	if (entry)
 		port_params->frame_overhead = (uint32_t)atoi(entry);
 	entry = cfg_get_entry(cfg, "port", "number of subports per port");
 	if (entry)
 		port_params->n_subports_per_port = (uint32_t)atoi(entry);
 	entry = cfg_get_entry(cfg, "port", "number of pipes per subport");
 	if (entry)
 		port_params->n_pipes_per_subport = (uint32_t)atoi(entry);
 	entry = cfg_get_entry(cfg, "port", "queue sizes");
 	if (entry) {
 		char *next;
 		for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
 			port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10);
 			if (next == NULL)
 				break;
 			entry = next;
 		}
 	}
 #ifdef RTE_SCHED_RED
 	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
 		char str[32];
 		/* Parse WRED min thresholds */
 		rte_snprintf(str, sizeof(str), "tc %d wred min", j);
 		entry = cfg_get_entry(cfg, "red", str);
 		if (entry) {
 			char *next;
 			int k;
 			/* for each packet colour (green, yellow, red) */
 			for (k = 0; k < e_RTE_METER_COLORS; k++) {
 				port_params->red_params[j][k].min_th
 					= (uint16_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		/* Parse WRED max thresholds */
 		rte_snprintf(str, sizeof(str), "tc %d wred max", j);
 		entry = cfg_get_entry(cfg, "red", str);
 		if (entry) {
 			char *next;
 			int k;
 			/* for each packet colour (green, yellow, red) */
 			for (k = 0; k < e_RTE_METER_COLORS; k++) {
 				port_params->red_params[j][k].max_th
 					= (uint16_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		/* Parse WRED inverse mark probabilities */
 		rte_snprintf(str, sizeof(str), "tc %d wred inv prob", j);
 		entry = cfg_get_entry(cfg, "red", str);
 		if (entry) {
 			char *next;
 			int k;
 			/* for each packet colour (green, yellow, red) */
 			for (k = 0; k < e_RTE_METER_COLORS; k++) {
 				port_params->red_params[j][k].maxp_inv
 					= (uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		/* Parse WRED EWMA filter weights */
 		rte_snprintf(str, sizeof(str), "tc %d wred weight", j);
 		entry = cfg_get_entry(cfg, "red", str);
 		if (entry) {
 			char *next;
 			int k;
 			/* for each packet colour (green, yellow, red) */
 			for (k = 0; k < e_RTE_METER_COLORS; k++) {
 				port_params->red_params[j][k].wq_log2
 					= (uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 	}
 #endif /* RTE_SCHED_RED */
 	return 0;
 }
 int
 cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe_params)
 {
 	int i, j;
 	char *next;
 	const char *entry;
 	int profiles;
 	if (!cfg || !pipe_params)
 		return -1;
 	profiles = cfg_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1);
 	port_params.n_pipe_profiles = profiles;
 	for (j = 0; j < profiles; j++) {
 		char pipe_name[32];
 		rte_snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j);
 		entry = cfg_get_entry(cfg, pipe_name, "tb rate");
 		if (entry)
 			pipe_params[j].tb_rate = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tb size");
 		if (entry)
 			pipe_params[j].tb_size = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc period");
 		if (entry)
 			pipe_params[j].tc_period = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 0 rate");
 		if (entry)
 			pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 1 rate");
 		if (entry)
 			pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 2 rate");
 		if (entry)
 			pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 3 rate");
 		if (entry)
 			pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry);
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 		entry = cfg_get_entry(cfg, pipe_name, "tc 0 oversubscription weight");
 		if (entry)
 			pipe_params[j].tc_ov_weight[0] = (uint8_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 1 oversubscription weight");
 		if (entry)
 			pipe_params[j].tc_ov_weight[1] = (uint8_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 2 oversubscription weight");
 		if (entry)
 			pipe_params[j].tc_ov_weight[2] = (uint8_t)atoi(entry);
 		entry = cfg_get_entry(cfg, pipe_name, "tc 3 oversubscription weight");
 		if (entry)
 			pipe_params[j].tc_ov_weight[3] = (uint8_t)atoi(entry);
 #endif
 		entry = cfg_get_entry(cfg, pipe_name, "tc 0 wrr weights");
 		if (entry) {
 			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
 				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
 					(uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		entry = cfg_get_entry(cfg, pipe_name, "tc 1 wrr weights");
 		if (entry) {
 			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
 				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
 					(uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		entry = cfg_get_entry(cfg, pipe_name, "tc 2 wrr weights");
 		if (entry) {
 			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
 				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
 					(uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 		entry = cfg_get_entry(cfg, pipe_name, "tc 3 wrr weights");
 		if (entry) {
 			for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
 				pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
 					(uint8_t)strtol(entry, &next, 10);
 				if (next == NULL)
 					break;
 				entry = next;
 			}
 		}
 	}
 	return 0;
 }
 int
 cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport_params)
 {
 	const char *entry;
 	int i, j, k;
 	if (!cfg || !subport_params)
 		return -1;
 	memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile));
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
 		rte_snprintf(sec_name, sizeof(sec_name), "subport %d", i);
 		if (cfg_has_section(cfg, sec_name)) {
 			entry = cfg_get_entry(cfg, sec_name, "tb rate");
 			if (entry)
 				subport_params[i].tb_rate = (uint32_t)atoi(entry);
 			entry = cfg_get_entry(cfg, sec_name, "tb size");
 			if (entry)
 				subport_params[i].tb_size = (uint32_t)atoi(entry);
 			entry = cfg_get_entry(cfg, sec_name, "tc period");
 			if (entry)
 				subport_params[i].tc_period = (uint32_t)atoi(entry);
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 			entry = cfg_get_entry(cfg, sec_name, "tc oversubscription period");
 			if (entry)
 				subport_params[i].tc_ov_period = (uint32_t)atoi(entry);
 #endif
 			entry = cfg_get_entry(cfg, sec_name, "tc 0 rate");
 			if (entry)
 				subport_params[i].tc_rate[0] = (uint32_t)atoi(entry);
 			entry = cfg_get_entry(cfg, sec_name, "tc 1 rate");
 			if (entry)
 				subport_params[i].tc_rate[1] = (uint32_t)atoi(entry);
 			entry = cfg_get_entry(cfg, sec_name, "tc 2 rate");
 			if (entry)
 				subport_params[i].tc_rate[2] = (uint32_t)atoi(entry);
 			entry = cfg_get_entry(cfg, sec_name, "tc 3 rate");
 			if (entry)
 				subport_params[i].tc_rate[3] = (uint32_t)atoi(entry);
 			int n_entries = cfg_section_num_entries(cfg, sec_name);
 			struct cfg_entry entries[n_entries];
 			cfg_section_entries(cfg, sec_name, entries, n_entries);
 			for (j = 0; j < n_entries; j++) {
 				if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) {
 					int profile;
 					char *tokens[2] = {NULL, NULL};
 					int n_tokens;
 					int begin, end;
 					profile = atoi(entries[j].value);
 					n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")],
 							strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-');
 					begin =  atoi(tokens[0]);
 					if (n_tokens == 2)
 						end = atoi(tokens[1]);
 					else
 						end = begin;
 					if (end >= MAX_SCHED_PIPES || begin > end)
 						return -1;
 					for (k = begin; k <= end; k++) {
 						char profile_name[CFG_NAME_LEN];
 						rte_snprintf(profile_name, sizeof(profile_name),
 								"pipe profile %d", profile);
 						if (cfg_has_section(cfg, profile_name))
 							app_pipe_to_profile[i][k] = profile;
 						else
 							rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n",
 									entries[j].value);
 					}
 				}
 			}
 		}
 	}
 	return 0;
 }
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@ -0,0 +1,103 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __CFG_FILE_H__
 #define __CFG_FILE_H__
 #include <rte_sched.h>
 #define CFG_NAME_LEN 32
 #define CFG_VALUE_LEN 64
 struct cfg_entry {
 	char name[CFG_NAME_LEN];
 	char value[CFG_VALUE_LEN];
 };
 struct cfg_section {
 	char name[CFG_NAME_LEN];
 	int num_entries;
 	struct cfg_entry *entries[0];
 };
 struct cfg_file {
 	int flags;
 	int num_sections;
 	struct cfg_section *sections[0];
 };
 int cfg_load_port(struct cfg_file *cfg, struct rte_sched_port_params *port);
 int cfg_load_pipe(struct cfg_file *cfg, struct rte_sched_pipe_params *pipe);
 int cfg_load_subport(struct cfg_file *cfg, struct rte_sched_subport_params *subport);
 /* reads a config file from disk and returns a handle to the config 
 * 'flags' is reserved for future use and must be 0
 */
 struct cfg_file *cfg_load(const char *filename, int flags);
 /* returns the number of sections in the config */
 int cfg_num_sections(struct cfg_file *cfg, const char *sec_name, size_t length);
 /* fills the array "sections" with the names of all the sections in the file
 * (up to a max of max_sections).
 * NOTE: buffers in the sections array must be at least CFG_NAME_LEN big
 */
 int cfg_sections(struct cfg_file *cfg, char *sections[], int max_sections);
 /* true if the named section exists, false otherwise */
 int cfg_has_section(struct cfg_file *cfg, const char *sectionname);
 /* returns the number of entries in a section */
 int cfg_section_num_entries(struct cfg_file *cfg, const char *sectionname);
 /* returns the entries in a section as key-value pairs in the "entries" array */
 int cfg_section_entries(struct cfg_file *cfg, const char *sectionname,
 		struct cfg_entry *entries, int max_entries);
 /* returns a pointer to the value of the named entry in the named section */
 const char *cfg_get_entry(struct cfg_file *cfg, const char *sectionname,
 		const char *entryname);
 /* true if the given entry exists in the given section, false otherwise */
 int cfg_has_entry(struct cfg_file *cfg, const char *sectionname,
 		const char *entryname);
 /* cleans up memory allocated by cfg_load() */
 int cfg_close(struct cfg_file *cfg);
 #endif
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@ -0,0 +1,385 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdint.h>
 #include <memory.h>
 #include <rte_log.h>
 #include <rte_mbuf.h>
 #include <rte_debug.h>
 #include <rte_ethdev.h>
 #include <rte_mempool.h>
 #include <rte_sched.h>
 #include <rte_cycles.h>
 #include <rte_string_fns.h>
 #include "main.h"
 #include "cfg_file.h"
 uint32_t app_numa_mask = 0;
 static uint32_t app_inited_port_mask = 0;
 int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
 #define MAX_NAME_LEN 32
 struct ring_conf ring_conf = {
 	.rx_size   = APP_RX_DESC_DEFAULT,
 	.ring_size = APP_RING_SIZE,
 	.tx_size   = APP_TX_DESC_DEFAULT,
 };
 struct burst_conf burst_conf = {
 	.rx_burst    = MAX_PKT_RX_BURST,
 	.ring_burst  = PKT_ENQUEUE,
 	.qos_dequeue = PKT_DEQUEUE,
 	.tx_burst    = MAX_PKT_TX_BURST,
 };
 struct ring_thresh rx_thresh = {
 	.pthresh = RX_PTHRESH,
 	.hthresh = RX_HTHRESH,
 	.wthresh = RX_WTHRESH,
 };
 struct ring_thresh tx_thresh = {
 	.pthresh = TX_PTHRESH,
 	.hthresh = TX_HTHRESH,
 	.wthresh = TX_WTHRESH,
 };
 uint32_t nb_pfc;
 const char *cfg_profile = NULL;
 struct flow_conf qos_conf[MAX_DATA_STREAMS];
 static const struct rte_eth_conf port_conf = {
 	.rxmode = {
 		.max_rx_pkt_len = ETHER_MAX_LEN,
 		.split_hdr_size = 0,
 		.header_split   = 0, /**< Header Split disabled */
 		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
 		.hw_strip_crc   = 0, /**< CRC stripped by hardware */
 	},
 	.txmode = {
 		.mq_mode = ETH_DCB_NONE,
 	},
 };
 static int
 app_init_port(uint8_t portid, struct rte_mempool *mp)
 {
 	int ret;
 	struct rte_eth_link link;
 	struct rte_eth_rxconf rx_conf;
 	struct rte_eth_txconf tx_conf;
 	/* check if port already initialized (multistream configuration) */
 	if (app_inited_port_mask & (1u << portid))
 		return 0;
 	rx_conf.rx_thresh.pthresh = rx_thresh.pthresh;
 	rx_conf.rx_thresh.hthresh = rx_thresh.hthresh;
 	rx_conf.rx_thresh.wthresh = rx_thresh.wthresh;
 	rx_conf.rx_free_thresh = 32;
 	rx_conf.rx_drop_en = 0;
 	tx_conf.tx_thresh.pthresh = tx_thresh.pthresh;
 	tx_conf.tx_thresh.hthresh = tx_thresh.hthresh;
 	tx_conf.tx_thresh.wthresh = tx_thresh.wthresh;
 	tx_conf.tx_free_thresh = 0;
 	tx_conf.tx_rs_thresh = 0;
 	tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
 	/* init port */
 	RTE_LOG(INFO, APP, "Initializing port %hu... ", portid);
 	fflush(stdout);
 	ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%hu\n",
 		ret, portid);
 	/* init one RX queue */
 	fflush(stdout);
 	ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size,
 		rte_eth_dev_socket_id(portid), &rx_conf, mp);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%hu\n",
 		ret, portid);
 	/* init one TX queue */
 	fflush(stdout);
 	ret = rte_eth_tx_queue_setup(portid, 0,
 		(uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
 		"port=%hu queue=%d\n",
 		ret, portid, 0);
 	/* Start device */
 	ret = rte_eth_dev_start(portid);
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "rte_pmd_port_start: err=%d, port=%hu\n",
 		ret, portid);
 	printf("done: ");
 	/* get link status */
 	rte_eth_link_get(portid, &link);
 	if (link.link_status) {
 		printf(" Link Up - speed %u Mbps - %s\n",
 			(uint32_t) link.link_speed,
 			(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
 			("full-duplex") : ("half-duplex\n"));
 	} else {
 		printf(" Link Down\n");
 	}
 	rte_eth_promiscuous_enable(portid);
 	/* mark port as initialized */
 	app_inited_port_mask |= 1u << portid;
 	return 0;
 }
 static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 	{
 		.tb_rate = 1250000000,
 		.tb_size = 1000000,
 		.tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
 		.tc_period = 10,
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 		.tc_ov_period = 10,
 #endif
 	},
 };
 static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = {
 	{ /* Profile #0 */
 		.tb_rate = 305175,
 		.tb_size = 1000000,
 		.tc_rate = {305175, 305175, 305175, 305175}, 
 		.tc_period = 40,
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 		.tc_ov_weight = {1, 1, 1, 1},
 #endif
 		.wrr_weights = {1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1},
 	},
 };
 struct rte_sched_port_params port_params = {
 	.name = "port_0",
 	.socket = 0, /* computed */
 	.rate = 0, /* computed */
 	.frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
 	.n_subports_per_port = 1,
 	.n_pipes_per_subport = 4096,
 	.qsize = {64, 64, 64, 64},
 	.pipe_profiles = pipe_profiles,
 	.n_pipe_profiles = 1,
 #ifdef RTE_SCHED_RED
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		/* Traffic Class 1 - Colors Green / Yellow / Red */
 		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		/* Traffic Class 2 - Colors Green / Yellow / Red */
 		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		/* Traffic Class 3 - Colors Green / Yellow / Red */
 		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}
 	}
 #endif /* RTE_SCHED_RED */
 };
 static struct rte_sched_port *
 app_init_sched_port(uint32_t portid, uint32_t socketid)
 {
 	static char port_name[32]; /* static as referenced from global port_params*/
 	struct rte_eth_link link;
 	struct rte_sched_port *port = NULL;
 	uint32_t pipe, subport;
 	int err;
 	rte_eth_link_get((uint8_t)portid, &link);
 	port_params.socket = socketid;
 	port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8;
 	rte_snprintf(port_name, sizeof(port_name), "port_%d", portid);
 	port_params.name = port_name;
 	port = rte_sched_port_config(&port_params);
 	if (port == NULL){
 		rte_exit(EXIT_FAILURE, "Unable to config sched port\n");
 	}
 	for (subport = 0; subport < port_params.n_subports_per_port; subport ++) {
 		err = rte_sched_subport_config(port, subport, &subport_params[subport]);
 		if (err) {
 			rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n",
 					subport, err);
 		}
 		for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) {
 			if (app_pipe_to_profile[subport][pipe] != -1) {
 				err = rte_sched_pipe_config(port, subport, pipe,
 						app_pipe_to_profile[subport][pipe]);
 				if (err) {
 					rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u "
 							"for profile %d, err=%d\n", pipe,
 							app_pipe_to_profile[subport][pipe], err);
 				}
 			}
 		}
 	}
 	return port;
 }
 static int
 app_load_cfg_profile(const char *profile)
 {
 	if (profile == NULL)
 		return 0;
 	struct cfg_file *cfg_file = cfg_load(profile, 0);
 	if (cfg_file == NULL)
 		rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile);
 	cfg_load_port(cfg_file, &port_params);
 	cfg_load_subport(cfg_file, subport_params);
 	cfg_load_pipe(cfg_file, pipe_profiles);
 	cfg_close(cfg_file);
 	return 0;
 }
 int app_init(void)
 {
 	uint32_t i;
 	char ring_name[MAX_NAME_LEN];
 	char pool_name[MAX_NAME_LEN];
 	/* init driver(s) */
 	if (rte_pmd_init_all() < 0)
 		rte_exit(EXIT_FAILURE, "Cannot init PMD\n");
 	if (rte_eal_pci_probe() < 0)
 		rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
 	if (rte_eth_dev_count() == 0)
 		rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n");
 	/* load configuration profile */
 	if (app_load_cfg_profile(cfg_profile) != 0)
 		rte_exit(EXIT_FAILURE, "Invalid configuration profile\n");
 	/* Initialize each active flow */
 	for(i = 0; i < nb_pfc; i++) {
 		uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core);
 		struct rte_ring *ring;
 		rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core);
 		ring = rte_ring_lookup(ring_name);
 		if (ring == NULL)
 			qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
 			 	socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
 		else
 			qos_conf[i].rx_ring = ring;
 		rte_snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core);
 		ring = rte_ring_lookup(ring_name);
 		if (ring == NULL)
 			qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
 				socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
 		else
 			qos_conf[i].tx_ring = ring;
 		/* create the mbuf pools for each RX Port */
 		rte_snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i);
 		qos_conf[i].mbuf_pool = rte_mempool_create(pool_name, NB_MBUF, MBUF_SIZE,
 						burst_conf.rx_burst * 4,
 						sizeof(struct rte_pktmbuf_pool_private),
 						rte_pktmbuf_pool_init, NULL,
 						rte_pktmbuf_init, NULL,
 						rte_eth_dev_socket_id(qos_conf[i].rx_port),
 						0);
 		if (qos_conf[i].mbuf_pool == NULL)
 			rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i);
 		//printf("MP = %d\n", rte_mempool_count(qos_conf[i].app_pktmbuf_pool));
 		app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool);
 		app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool);
 		qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].rx_port, socket);
 	}
 	RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n",
 			 rte_get_timer_hz());
 	RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u,"
 			 "NIC TX = %u\n", ring_conf.rx_size, NB_MBUF, ring_conf.ring_size,
 			 ring_conf.tx_size);
 	RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n"
 						  "             Worker read/QoS enqueue = %hu,\n"
 						  "             QoS dequeue = %hu, Worker write = %hu\n",
 		burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst, 
 		burst_conf.qos_dequeue, burst_conf.tx_burst);
 	RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu),"
 				 "TX (p = %hhu, h = %hhu, w = %hhu)\n",
 		rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh,
 		tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh);
 	return 0;
 }
--- a/examples/qos_sched/main.c
+++ b/examples/qos_sched/main.c
@ -0,0 +1,246 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <unistd.h>
 #include <stdint.h>
 #include <rte_log.h>
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
 #include <rte_byteorder.h>
 #include <rte_branch_prediction.h>
 #include <rte_sched.h>
 #include "main.h"
 #define APP_MODE_NONE 0
 #define APP_RX_MODE   1
 #define APP_WT_MODE   2
 #define APP_TX_MODE   4
 /* main processing loop */
 static int
 app_main_loop(__attribute__((unused))void *dummy)
 {
 	uint32_t lcore_id;
 	uint32_t i, mode;
 	uint32_t rx_idx = 0;
 	uint32_t wt_idx = 0;
 	uint32_t tx_idx = 0;
 	struct thread_conf *rx_confs[MAX_DATA_STREAMS];
 	struct thread_conf *wt_confs[MAX_DATA_STREAMS];
 	struct thread_conf *tx_confs[MAX_DATA_STREAMS];
 	memset(rx_confs, 0, sizeof(rx_confs));
 	memset(wt_confs, 0, sizeof(wt_confs));
 	memset(tx_confs, 0, sizeof(tx_confs));
 	mode = APP_MODE_NONE;
 	lcore_id = rte_lcore_id();
 	for (i = 0; i < nb_pfc; i++) {
 		struct flow_conf *flow = &qos_conf[i];
 		if (flow->rx_core == lcore_id) {
 			flow->rx_thread.rx_port = flow->rx_port;
 			flow->rx_thread.rx_ring =  flow->rx_ring;
 			flow->rx_thread.rx_queue = flow->rx_queue;
 			rx_confs[rx_idx++] = &flow->rx_thread;
 			mode |= APP_RX_MODE;
 		}
 		if (flow->tx_core == lcore_id) {
 			flow->tx_thread.tx_port = flow->tx_port;
 			flow->tx_thread.tx_ring =  flow->tx_ring;
 			flow->tx_thread.tx_queue = flow->tx_queue;
 			tx_confs[tx_idx++] = &flow->tx_thread;
 			mode |= APP_TX_MODE;
 		}
 		if (flow->wt_core == lcore_id) {
 			flow->wt_thread.rx_ring =  flow->rx_ring;
 			flow->wt_thread.tx_ring =  flow->tx_ring;
 			flow->wt_thread.tx_port =  flow->tx_port;
 			flow->wt_thread.sched_port =  flow->sched_port;
 			wt_confs[wt_idx++] = &flow->wt_thread;
 			mode |= APP_WT_MODE;
 		}
 	}
 	if (mode == APP_MODE_NONE) {
 		RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id);
 		return -1;
 	}
 	if (mode == (APP_RX_MODE | APP_WT_MODE)) {
 		RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n",
 				 lcore_id);
 		return -1;
 	}
 	RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id);
 	/* initialize mbuf memory */
 	if (mode == APP_RX_MODE) {
 		for (i = 0; i < rx_idx; i++) {
 			RTE_LOG(INFO, APP, "flow %u lcoreid %u reading port %hu\n",
 				i, lcore_id, rx_confs[i]->rx_port);
 		}
 		app_rx_thread(rx_confs);
 	}
 	else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
 		for (i = 0; i < wt_idx; i++) {
 			wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
 					* burst_conf.tx_burst, CACHE_LINE_SIZE);
 			if (wt_confs[i]->m_table == NULL)
 				rte_panic("flow %u unable to allocate memory buffer\n", i);
 			RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write port %hu\n",
 				i, lcore_id, wt_confs[i]->tx_port);
 		}
 		app_mixed_thread(wt_confs);
 	}
 	else if (mode == APP_TX_MODE) {
 		for (i = 0; i < tx_idx; i++) {
 			tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
 					* burst_conf.tx_burst, CACHE_LINE_SIZE);
 			if (tx_confs[i]->m_table == NULL)
 				rte_panic("flow %u unable to allocate memory buffer\n", i);
 			RTE_LOG(INFO, APP, "flow %u lcoreid %u writing port %hu\n",
 				i, lcore_id, tx_confs[i]->tx_port);
 		}
 		app_tx_thread(tx_confs);
 	}
 	else if (mode == APP_WT_MODE){
 		for (i = 0; i < wt_idx; i++) {
 			RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id);
 		}
 		app_worker_thread(wt_confs);
 	}
 	return 0;
 }
 static void
 app_stat(void)
 {
 	uint32_t i;
 	struct rte_eth_stats stats;
 	static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS];
 	static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS];
 	/* print statistics */
 	for(i = 0; i < nb_pfc; i++) {
 		struct flow_conf *flow = &qos_conf[i];
 		rte_eth_stats_get(flow->rx_port, &stats);
 		printf("\nRX port %hu: rx: %"PRIu64 " err: %"PRIu64 " no_mbuf: %"PRIu64 "\n",
 			flow->rx_port,
 			stats.ipackets - rx_stats[i].ipackets,
 			stats.ierrors - rx_stats[i].ierrors,
 			stats.rx_nombuf - rx_stats[i].rx_nombuf);
 		memcpy(&rx_stats[i], &stats, sizeof(stats));
 		rte_eth_stats_get(flow->tx_port, &stats);
 		printf("TX port %hu: tx: %" PRIu64 " err: %" PRIu64 "\n",
 			flow->tx_port,
 			stats.opackets - tx_stats[i].opackets,
 			stats.oerrors - tx_stats[i].oerrors);
 		memcpy(&tx_stats[i], &stats, sizeof(stats));
 		//printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool));
 #if APP_COLLECT_STAT
 		printf("-------+------------+------------+\n");
 		printf("       |  received  |   dropped  |\n");
 		printf("-------+------------+------------+\n");
 		printf("  RX   | %10" PRIu64 " | %10" PRIu64 " |\n",
 			flow->rx_thread.stat.nb_rx,
 			flow->rx_thread.stat.nb_drop);
 		printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " |   pps: %"PRIu64 " \n",
 			flow->wt_thread.stat.nb_rx,
 			flow->wt_thread.stat.nb_drop,
 			flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop);
 		printf("-------+------------+------------+\n");
 		memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat));
 		memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat));
 #endif
 	}
 }
 int
 MAIN(int argc, char **argv)
 {
 	int ret;
 	ret = app_parse_args(argc, argv);
 	if (ret < 0)
 		return -1;
 	ret = app_init();
 	if (ret < 0)
 		return -1;
 	/* launch per-lcore init on every lcore */
 	rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER);
 	/* print statistics every second */
 	while(1) {
 		sleep(1);
 		app_stat();
 	}
 }
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@ -0,0 +1,186 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef _MAIN_H_
 #define _MAIN_H_
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <rte_sched.h>
 #ifdef RTE_EXEC_ENV_BAREMETAL
 #error "Baremetal is not supported"
 #else
 #define MAIN main
 #endif
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 /*
 * Configurable number of RX/TX ring descriptors
 */
 #define APP_RX_DESC_DEFAULT 128
 #define APP_TX_DESC_DEFAULT 256
 #define MBUF_SIZE (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
 #define APP_RING_SIZE (8*1024)
 #define NB_MBUF   (64*1024*32)
 #define MAX_PKT_RX_BURST 64
 #define PKT_ENQUEUE 64
 #define PKT_DEQUEUE 32
 #define MAX_PKT_TX_BURST 64
 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
 #define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
 #define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
 #define BURST_TX_DRAIN_US 100
 #define MAX_DATA_STREAMS (RTE_MAX_LCORE/2)
 #define MAX_SCHED_SUBPORTS		8
 #define MAX_SCHED_PIPES  		4096
 #ifndef APP_COLLECT_STAT
 #define APP_COLLECT_STAT		1
 #endif
 #if APP_COLLECT_STAT
 #define APP_STATS_ADD(stat,val) (stat) += (val)
 #else
 #define APP_STATS_ADD(stat,val) do {(void) (val);} while (0)
 #endif
 struct thread_stat
 {
 	uint64_t nb_rx;
 	uint64_t nb_drop;
 };
 struct thread_conf
 {
 	uint32_t counter;
 	uint32_t n_mbufs;
 	struct rte_mbuf **m_table;
 	uint8_t rx_port;
 	uint8_t tx_port;
 	uint16_t rx_queue;
 	uint16_t tx_queue;
 	struct rte_ring *rx_ring;
 	struct rte_ring *tx_ring;
 	struct rte_sched_port *sched_port;
 #if APP_COLLECT_STAT
 	struct thread_stat stat;
 #endif
 } __rte_cache_aligned;
 struct flow_conf
 {
 	uint32_t rx_core;
 	uint32_t wt_core;
 	uint32_t tx_core;
 	uint8_t rx_port;
 	uint8_t tx_port;
 	uint16_t rx_queue;
 	uint16_t tx_queue;
 	struct rte_ring *rx_ring;
 	struct rte_ring *tx_ring;
 	struct rte_sched_port *sched_port;
 	struct rte_mempool *mbuf_pool;
 	struct thread_conf rx_thread;
 	struct thread_conf wt_thread;
 	struct thread_conf tx_thread;
 };
 struct ring_conf
 {
 	uint32_t rx_size;
 	uint32_t ring_size;
 	uint32_t tx_size;
 };
 struct burst_conf
 {
 	uint16_t rx_burst;
 	uint16_t ring_burst;
 	uint16_t qos_dequeue;
 	uint16_t tx_burst;
 };
 struct ring_thresh
 {
 	uint8_t pthresh; /**< Ring prefetch threshold. */
 	uint8_t hthresh; /**< Ring host threshold. */
 	uint8_t wthresh; /**< Ring writeback threshold. */
 };
 extern uint32_t nb_pfc;
 extern const char *cfg_profile;
 extern struct flow_conf qos_conf[];
 extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
 extern struct ring_conf ring_conf;
 extern struct burst_conf burst_conf;
 extern struct ring_thresh rx_thresh;
 extern struct ring_thresh tx_thresh;
 extern struct rte_sched_port_params port_params;
 int MAIN(int argc, char **argv);
 int app_parse_args(int argc, char **argv);
 int app_init(void);
 void app_rx_thread(struct thread_conf **qconf);
 void app_tx_thread(struct thread_conf **qconf);
 void app_worker_thread(struct thread_conf **qconf);
 void app_mixed_thread(struct thread_conf **qconf);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _MAIN_H_ */
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@ -0,0 +1,109 @@
 ;   BSD LICENSE
 ; 
 ;   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 ;   All rights reserved.
 ; 
 ;   Redistribution and use in source and binary forms, with or without 
 ;   modification, are permitted provided that the following conditions 
 ;   are met:
 ; 
 ;     * Redistributions of source code must retain the above copyright 
 ;       notice, this list of conditions and the following disclaimer.
 ;     * Redistributions in binary form must reproduce the above copyright 
 ;       notice, this list of conditions and the following disclaimer in 
 ;       the documentation and/or other materials provided with the 
 ;       distribution.
 ;     * Neither the name of Intel Corporation nor the names of its 
 ;       contributors may be used to endorse or promote products derived 
 ;       from this software without specific prior written permission.
 ; 
 ;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 ;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 ;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 ;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 ;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 ;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 ;   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 ;   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 ;   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 ;   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 ;   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ; 
 ; This file enables the following hierarchical scheduler configuration for each
 ; 10GbE output port:
 ;	* Single subport (subport 0): 
 ;		- Subport rate set to 100% of port rate
 ;		- Each of the 4 traffic classes has rate set to 100% of port rate
 ;	* 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
 ;		- Pipe rate set to 1/4K of port rate
 ;		- Each of the 4 traffic classes has rate set to 100% of pipe rate
 ;		- Within each traffic class, the byte-level WRR weights for the 4 queues
 ;         are set to 1:1:1:1
 ;
 ; For more details, please refer to chapter "Quality of Service (QoS) Framework"
 ; of Intel Data Plane Development Kit (Intel DPDK) Programmer's Guide.
 ; Port configuration
 [port]
 frame overhead = 24
 number of subports per port = 1
 number of pipes per subport = 4096
 queue sizes = 64 64 64 64
 ; Subport configuration
 [subport 0]
 tb rate = 1250000000           ; Bytes per second
 tb size = 1000000              ; Bytes
 tc 0 rate = 1250000000         ; Bytes per second
 tc 1 rate = 1250000000         ; Bytes per second
 tc 2 rate = 1250000000         ; Bytes per second
 tc 3 rate = 1250000000         ; Bytes per second
 tc period = 10                 ; Milliseconds
 tc oversubscription period = 10; Milliseconds
 pipe 0-4095 = 0                ; These pipes are configured with pipe profile 0
 ; Pipe configuration
 [pipe profile 0]
 tb rate = 305175               ; Bytes per second
 tb size = 1000000              ; Bytes
 tc 0 rate = 305175             ; Bytes per second
 tc 1 rate = 305175             ; Bytes per second
 tc 2 rate = 305175             ; Bytes per second
 tc 3 rate = 305175             ; Bytes per second
 tc period = 40                 ; Milliseconds
 tc 0 oversubscription weight = 1
 tc 1 oversubscription weight = 1
 tc 2 oversubscription weight = 1
 tc 3 oversubscription weight = 1
 tc 0 wrr weights = 1 1 1 1
 tc 1 wrr weights = 1 1 1 1
 tc 2 wrr weights = 1 1 1 1
 tc 3 wrr weights = 1 1 1 1
 ; RED params per traffic class and color (Green / Yellow / Red)
 [red]
 tc 0 wred min = 48 40 32
 tc 0 wred max = 64 64 64
 tc 0 wred inv prob = 10 10 10
 tc 0 wred weight = 9 9 9
 tc 1 wred min = 48 40 32
 tc 1 wred max = 64 64 64
 tc 1 wred inv prob = 10 10 10
 tc 1 wred weight = 9 9 9
 tc 2 wred min = 48 40 32
 tc 2 wred max = 64 64 64
 tc 2 wred inv prob = 10 10 10
 tc 2 wred weight = 9 9 9
 tc 3 wred min = 48 40 32
 tc 3 wred max = 64 64 64
 tc 3 wred inv prob = 10 10 10
 tc 3 wred weight = 9 9 9
--- a/lib/Makefile
+++ b/lib/Makefile
@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DIRS-$(CONFIG_RTE_LIBRTE_PMAC) += librte_pmac
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@ -74,6 +74,7 @@ extern struct rte_logs rte_logs;
 #define RTE_LOGTYPE_PMAC    0x00000200 /**< Log related to PMAC. */
 #define RTE_LOGTYPE_POWER   0x00000400 /**< Log related to power. */
 #define RTE_LOGTYPE_METER   0x00000800 /**< Log related to QoS meter. */
 #define RTE_LOGTYPE_SCHED   0x00001000 /**< Log related to QoS port scheduler. */
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1   0x01000000 /**< User-defined log type 1. */
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@ -158,6 +158,7 @@ struct rte_pktmbuf {
 			uint16_t hash;
 			uint16_t id;
 		} fdir;             /**< Filter identifier if FDIR enabled */
 		uint32_t sched;     /**< Hierarchical scheduler */
 	} hash;                 /**< hash information */
 };
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@ -0,0 +1,56 @@
 #   BSD LICENSE
 # 
 #   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 #   All rights reserved.
 # 
 #   Redistribution and use in source and binary forms, with or without 
 #   modification, are permitted provided that the following conditions 
 #   are met:
 # 
 #     * Redistributions of source code must retain the above copyright 
 #       notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above copyright 
 #       notice, this list of conditions and the following disclaimer in 
 #       the documentation and/or other materials provided with the 
 #       distribution.
 #     * Neither the name of Intel Corporation nor the names of its 
 #       contributors may be used to endorse or promote products derived 
 #       from this software without specific prior written permission.
 # 
 #   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 #   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 #   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 #   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 #   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 #   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 #   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 #   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 #   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 #   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 #   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # 
 include $(RTE_SDK)/mk/rte.vars.mk
 #
 # library name
 #
 LIB = librte_sched.a
 CFLAGS += -O3
 CFLAGS += -g
 CFLAGS += $(WERROR_FLAGS)
 #
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
 include $(RTE_SDK)/mk/rte.lib.mk
--- a/lib/librte_sched/rte_approx.c
+++ b/lib/librte_sched/rte_approx.c
@ -0,0 +1,197 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <stdlib.h>
 #include "rte_approx.h"
 /* 
 * Based on paper "Approximating Rational Numbers by Fractions" by Michal 
 * Forisek forisek@dcs.fmph.uniba.sk
 *
 * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
 * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
 * q is minimal.
 *
 * http://people.ksp.sk/~misof/publications/2007approx.pdf
 */
 /* fraction comparison: compare (a/b) and (c/d) */
 static inline uint32_t 
 less(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 {
 	return (a*d < b*c);
 }
 static inline uint32_t
 less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 {
 	return (a*d <= b*c);
 }
 /* check whether a/b is a valid approximation */
 static inline uint32_t 
 matches(uint32_t a, uint32_t b, 
 	uint32_t alpha_num, uint32_t d_num, uint32_t denum)
 {
 	if (less_or_equal(a, b, alpha_num - d_num, denum))
 		return 0;
 	if (less(a ,b, alpha_num + d_num, denum))
 		return 1;
 	return 0;
 }
 static inline void 
 find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, 
 	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
 {
 	uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
 	uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
 	uint32_t k = (k_num / k_denum) + 1;
 	*p = p_b + k * p_a;
 	*q = q_b + k * q_a;
 }
 static inline void
 find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
 	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) 
 {
 	uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b;
 	uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a;
 	uint32_t k = (k_num / k_denum) + 1;
 	*p = p_b + k * p_a;
 	*q = q_b + k * q_a;
 }
 static int 
 find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
 {
 	uint32_t p_a, q_a, p_b, q_b;
 	/* check assumptions on the inputs */
 	if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) {
 		return -1;
 	}
 	/* set initial bounds for the search */
 	p_a = 0;
 	q_a = 1;
 	p_b = 1;
 	q_b = 1;
 	while (1) {
 		uint32_t new_p_a, new_q_a, new_p_b, new_q_b;
 		uint32_t x_num, x_denum, x;
 		int aa, bb;
 		/* compute the number of steps to the left */
 		x_num = denum * p_b - alpha_num * q_b;
 		x_denum = - denum * p_a + alpha_num * q_a;
 		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
 		/* check whether we have a valid approximation */
 		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
 		bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
 		if (aa || bb) {
 			find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
 			return 0;
 		}
 		/* update the interval */
 		new_p_a = p_b + (x - 1) * p_a ;
 		new_q_a = q_b + (x - 1) * q_a;
 		new_p_b = p_b + x * p_a ;
 		new_q_b = q_b + x * q_a;
 		p_a = new_p_a ;
 		q_a = new_q_a;
 		p_b = new_p_b ;
 		q_b = new_q_b;
 		/* compute the number of steps to the right */
 		x_num = alpha_num * q_b - denum * p_b;
 		x_denum = - alpha_num * q_a + denum * p_a;
 		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
 		/* check whether we have a valid approximation */
 		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
 		bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
 		if (aa || bb) {
 			find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
 			return 0;
 		 }
 		/* update the interval */
 		new_p_a = p_b + (x - 1) * p_a;
 		new_q_a = q_b + (x - 1) * q_a;
 		new_p_b = p_b + x * p_a;
 		new_q_b = q_b + x * q_a;
 		p_a = new_p_a;
 		q_a = new_q_a;
 		p_b = new_p_b;
 		q_b = new_q_b;
 	}
 }
 int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
 {
 	uint32_t alpha_num, d_num, denum;
 	/* Check input arguments */
 	if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) {
 		return -1;
 	}
 	if ((p == NULL) || (q == NULL)) {
 		return -2;
 	}
 	/* Compute alpha_num, d_num and denum */
 	denum = 1;
 	while (d < 1) {
 		alpha *= 10;
 		d *= 10;
 		denum *= 10;
 	}
 	alpha_num = (uint32_t) alpha;
 	d_num = (uint32_t) d;
 	/* Perform approximation */
 	return find_best_rational_approximation(alpha_num, d_num, denum, p, q);	
 }
--- a/lib/librte_sched/rte_approx.h
+++ b/lib/librte_sched/rte_approx.h
@ -0,0 +1,76 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __INCLUDE_RTE_APPROX_H__
 #define __INCLUDE_RTE_APPROX_H__
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * @file
 * RTE Rational Approximation
 *
 * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
 * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
 * q is minimal.
 * 
 ***/
 #include <stdint.h>
 /**
 * Find best rational approximation
 *
 * @param alpha
 *   Rational number to approximate
 * @param d
 *   Precision for the rational approximation
 * @param p
 *   Pointer to pre-allocated space where the numerator of the rational 
 *   approximation will be stored when operation is successful
 * @param q
 *   Pointer to pre-allocated space where the denominator of the rational
 *   approximation will be stored when operation is successful
 * @return
 *   0 upon success, error code otherwise
 */
 int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
 #ifdef __cplusplus
 }
 #endif
 #endif /* __INCLUDE_RTE_APPROX_H__ */
--- a/lib/librte_sched/rte_bitmap.h
+++ b/lib/librte_sched/rte_bitmap.h
@ -0,0 +1,505 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __INCLUDE_RTE_BITMAP_H__
 #define __INCLUDE_RTE_BITMAP_H__
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * @file
 * RTE Bitmap
 *
 * The bitmap component provides a mechanism to manage large arrays of bits
 * through bit get/set/clear and bit array scan operations.
 *
 * The bitmap scan operation is optimized for 64-bit CPUs using 64-byte cache
 * lines. The bitmap is hierarchically organized using two arrays (array1 and
 * array2), with each bit in array1 being associated with a full cache line
 * (512 bits) of bitmap bits, which are stored in array2: the bit in array1 is
 * set only when there is at least one bit set within its associated array2
 * bits, otherwise the bit in array1 is cleared. The read and write operations
 * for array1 and array2 are always done in slabs of 64 bits.
 *
 * This bitmap is not thread safe. For lock free operation on a specific bitmap
 * instance, a single writer thread performing bit set/clear operations is
 * allowed, only the writer thread can do bitmap scan operations, while there 
 * can be several reader threads performing bit get operations in parallel with
 * the writer thread. When the use of locking primitives is acceptable, the 
 * serialization of the bit set/clear and bitmap scan operations needs to be
 * enforced by the caller, while the bit get operation does not require locking
 * the bitmap.
 *
 ***/
 #include <rte_debug.h>
 #include <rte_memory.h>
 #include <rte_branch_prediction.h>
 #include <rte_prefetch.h>
 #ifndef RTE_BITMAP_OPTIMIZATIONS
 #define RTE_BITMAP_OPTIMIZATIONS		         1
 #endif
 #if RTE_BITMAP_OPTIMIZATIONS
 #include <tmmintrin.h>
 #endif
 /** Number of elements in array1. Each element in array1 is a 64-bit slab. */
 #ifndef RTE_BITMAP_ARRAY1_SIZE
 #define RTE_BITMAP_ARRAY1_SIZE                   16
 #endif
 /* Slab */
 #define RTE_BITMAP_SLAB_BIT_SIZE                 64
 #define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
 #define RTE_BITMAP_SLAB_BIT_MASK                 (RTE_BITMAP_SLAB_BIT_SIZE - 1)
 /* Cache line (CL) */
 #define RTE_BITMAP_CL_BIT_SIZE                   (CACHE_LINE_SIZE * 8)
 #define RTE_BITMAP_CL_BIT_SIZE_LOG2              9
 #define RTE_BITMAP_CL_BIT_MASK                   (RTE_BITMAP_CL_BIT_SIZE - 1)
 #define RTE_BITMAP_CL_SLAB_SIZE                  (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
 #define RTE_BITMAP_CL_SLAB_SIZE_LOG2             3
 #define RTE_BITMAP_CL_SLAB_MASK                  (RTE_BITMAP_CL_SLAB_SIZE - 1)
 /** Bitmap data structure */
 struct rte_bitmap {
 	uint64_t array1[RTE_BITMAP_ARRAY1_SIZE]; /**< Bitmap array1 */
 	uint64_t *array2;                        /**< Bitmap array2 */
 	uint32_t array1_size;                    /**< Number of 64-bit slabs in array1 that are actually used */
 	uint32_t array2_size;                    /**< Number of 64-bit slabs in array2 */
 	/* Context for the "scan next" operation */
 	uint32_t index1;  /**< Bitmap scan: Index of current array1 slab */
 	uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
 	uint32_t index2;  /**< Bitmap scan: Index of current array2 slab */
 	uint32_t go2;     /**< Bitmap scan: Go/stop condition for current array2 cache line */
 } __rte_cache_aligned;
 static inline void
 __rte_bitmap_index1_inc(struct rte_bitmap *bmp)
 {
 	bmp->index1 = (bmp->index1 + 1) & (RTE_BITMAP_ARRAY1_SIZE - 1);
 }
 static inline uint64_t
 __rte_bitmap_mask1_get(struct rte_bitmap *bmp)
 {
 	return ((~1lu) << bmp->offset1);
 }
 static inline void
 __rte_bitmap_index2_set(struct rte_bitmap *bmp)
 {
 	bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
 }
 #if RTE_BITMAP_OPTIMIZATIONS
 static inline int 
 rte_bsf64(uint64_t slab, uint32_t *pos)
 {
 	if (likely(slab == 0)) {
 		return 0;
 	}
 	*pos = __builtin_ctzll(slab);
 	return 1;
 }
 #else
 static inline int 
 rte_bsf64(uint64_t slab, uint32_t *pos)
 {
 	uint64_t mask;
 	uint32_t i;
 	if (likely(slab == 0)) {
 		return 0;
 	}
 	for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
 		if (unlikely(slab & mask)) {
 			*pos = i;
 			return 1;
 		}
 	}
 	return 0;
 }
 #endif
 static inline void
 __rte_bitmap_scan_init(struct rte_bitmap *bmp)
 {
 	bmp->index1 = RTE_BITMAP_ARRAY1_SIZE - 1;
 	bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
 	__rte_bitmap_index2_set(bmp);
 	bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
 	bmp->go2 = 0;
 }
 /**
 * Bitmap initialization
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param array2
 *   Base address of pre-allocated array2
 * @param n_bits
 *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
 * @return
 *   0 upon success, error code otherwise
 */
 static inline int 
 rte_bitmap_init(struct rte_bitmap *bmp, uint8_t *array2, uint32_t n_bits)
 {
 	uint32_t array1_size, array2_size;
 	/* Check input arguments */
 	if ((bmp == NULL) || 
 	    (array2 == NULL) || (((uintptr_t) array2) & CACHE_LINE_MASK) ||
 		(n_bits == 0) || (n_bits & RTE_BITMAP_CL_BIT_MASK)){
 		return -1;
 	}
 	array2_size = n_bits / RTE_BITMAP_SLAB_BIT_SIZE;
 	array1_size = ((n_bits / RTE_BITMAP_CL_BIT_SIZE) + (RTE_BITMAP_SLAB_BIT_SIZE - 1)) / RTE_BITMAP_SLAB_BIT_SIZE;
 	if (array1_size > RTE_BITMAP_ARRAY1_SIZE){
 		return -1;
 	}
 	/* Setup bitmap */
 	memset(bmp, 0, sizeof(struct rte_bitmap));
 	bmp->array2 = (uint64_t *) array2;
 	bmp->array1_size = array1_size;
 	bmp->array2_size = array2_size;
 	__rte_bitmap_scan_init(bmp);
 	return 0;
 }
 /**
 * Bitmap free
 *
 * @param bmp
 *   Handle to bitmap instance
 * @return
 *   0 upon success, error code otherwise
 */
 static inline int
 rte_bitmap_free(struct rte_bitmap *bmp)
 {
 	/* Check input arguments */
 	if (bmp == NULL) {
 		return -1;
 	}
 	return 0;
 }
 /**
 * Bitmap reset
 *
 * @param bmp
 *   Handle to bitmap instance
 */
 static inline void
 rte_bitmap_reset(struct rte_bitmap *bmp)
 {
 	memset(bmp->array1, 0, sizeof(bmp->array1));
 	memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
 	__rte_bitmap_scan_init(bmp);
 }
 /**
 * Bitmap location prefetch into CPU L1 cache
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   Bit position
 * @return
 *   0 upon success, error code otherwise
 */
 static inline void
 rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
 {
 	uint64_t *slab2;
 	uint32_t index2;
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	slab2 = bmp->array2 + index2;
 	rte_prefetch0((void *) slab2);
 }
 /**
 * Bitmap bit get
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   Bit position
 * @return
 *   0 when bit is cleared, non-zero when bit is set
 */
 static inline uint64_t
 rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
 {
 	uint64_t *slab2;
 	uint32_t index2, offset2;
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
 	slab2 = bmp->array2 + index2;
 	return ((*slab2) & (1lu << offset2));
 }
 /**
 * Bitmap bit set
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   Bit position
 */
 static inline void
 rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
 {
 	uint64_t *slab1, *slab2;
 	uint32_t index1, index2, offset1, offset2;
 	/* Set bit in array2 slab and set bit in array1 slab */
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
 	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
 	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
 	slab2 = bmp->array2 + index2;
 	slab1 = bmp->array1 + index1;
 	*slab2 |= 1lu << offset2;
 	*slab1 |= 1lu << offset1;
 }
 /**
 * Bitmap slab set
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   Bit position identifying the array2 slab
 * @param slab
 *   Value to be assigned to the 64-bit slab in array2
 */
 static inline void
 rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
 {
 	uint64_t *slab1, *slab2;
 	uint32_t index1, index2, offset1;
 	/* Set bits in array2 slab and set bit in array1 slab */
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
 	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
 	slab2 = bmp->array2 + index2;
 	slab1 = bmp->array1 + index1;
 	*slab2 |= slab;
 	*slab1 |= 1lu << offset1;
 }
 static inline uint64_t
 __rte_bitmap_line_not_empty(uint64_t *slab2)
 {
 	uint64_t v1, v2, v3, v4;
 	v1 = slab2[0] | slab2[1];
 	v2 = slab2[2] | slab2[3];
 	v3 = slab2[4] | slab2[5];
 	v4 = slab2[6] | slab2[7];
 	v1 |= v2;
 	v3 |= v4;
 	return (v1 | v3);
 }
 /**
 * Bitmap bit clear
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   Bit position
 */
 static inline void
 rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
 {
 	uint64_t *slab1, *slab2;
 	uint32_t index1, index2, offset1, offset2;
 	/* Clear bit in array2 slab */
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
 	slab2 = bmp->array2 + index2;
 	/* Return if array2 slab is not all-zeros */
 	*slab2 &= ~(1lu << offset2);
 	if (*slab2){
 		return;
 	}
 	/* Check the entire cache line of array2 for all-zeros */
 	index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
 	slab2 = bmp->array2 + index2;
 	if (__rte_bitmap_line_not_empty(slab2)) {
 		return;
 	}
 	/* The array2 cache line is all-zeros, so clear bit in array1 slab */
 	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
 	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
 	slab1 = bmp->array1 + index1;
 	*slab1 &= ~(1lu << offset1);
 	return;
 }
 static inline int
 __rte_bitmap_scan_search(struct rte_bitmap *bmp)
 {
 	uint64_t value1;
 	uint32_t i;
 	/* Check current array1 slab */
 	value1 = bmp->array1[bmp->index1];
 	value1 &= __rte_bitmap_mask1_get(bmp);
 	if (rte_bsf64(value1, &bmp->offset1)) {
 		return 1;
 	}
 	__rte_bitmap_index1_inc(bmp);
 	bmp->offset1 = 0;
 	/* Look for another array1 slab */
 	for (i = 0; i < RTE_BITMAP_ARRAY1_SIZE; i ++, __rte_bitmap_index1_inc(bmp)) {
 		value1 = bmp->array1[bmp->index1];
 		if (rte_bsf64(value1, &bmp->offset1)) {
 			return 1;
 		}
 	}
 	return 0;
 }
 static inline void
 __rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
 {
 	__rte_bitmap_index2_set(bmp);
 	bmp->go2 = 1;
 	rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
 }
 static inline int
 __rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
 {
 	uint64_t *slab2;
 	slab2 = bmp->array2 + bmp->index2;
 	for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
 		if (*slab2) {
 			*pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 			*slab = *slab2;
 			bmp->index2 ++;
 			slab2 ++;
 			bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
 			return 1;
 		}
 	}
 	return 0;
 }
 /**
 * Bitmap scan (with automatic wrap-around)
 *
 * @param bmp
 *   Handle to bitmap instance
 * @param pos
 *   When function call returns 1, pos contains the position of the next set
 *   bit, otherwise not modified
 * @param slab
 *   When function call returns 1, slab contains the value of the entire 64-bit
 *   slab where the bit indicated by pos is located. Slabs are always 64-bit
 *   aligned, so the position of the first bit of the slab (this bit is not 
 *   necessarily set) is pos / 64. Once a slab has been returned by the bitmap
 *   scan operation, the internal pointers of the bitmap are updated to point
 *   after this slab, so the same slab will not be returned again if it 
 *   contains more than one bit which is set. When function call returns 0,
 *   slab is not modified.
 * @return
 *   0 if there is no bit set in the bitmap, 1 otherwise
 */
 static inline int
 rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
 {
 	/* Return data from current array2 line if available */
 	if (__rte_bitmap_scan_read(bmp, pos, slab)) {
 		return 1;
 	}
 	/* Look for non-empty array2 line */
 	if (__rte_bitmap_scan_search(bmp)) {
 		__rte_bitmap_scan_read_init(bmp);
 		__rte_bitmap_scan_read(bmp, pos, slab);
 		return 1;
 	}
 	/* Empty bitmap */
 	return 0;
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* __INCLUDE_RTE_BITMAP_H__ */
--- a/lib/librte_sched/rte_red.c
+++ b/lib/librte_sched/rte_red.c
@ -0,0 +1,160 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #include <math.h>
 #include "rte_red.h"
 #include <rte_random.h>
 #ifdef __INTEL_COMPILER
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 #define DIM(x) (sizeof(x)/sizeof(x[0]))
 static int rte_red_init_done = 0;     /**< Flag to indicate that global initialisation is done */
 uint32_t rte_red_rand_val = 0;        /**< Random value cache */
 uint32_t rte_red_rand_seed = 0;       /**< Seed for random number generation */
 /**
 * table[i] = log2(1-Wq) * Scale * -1
 *       Wq = 1/(2^i)
 */
 uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
 /**
 * table[i] = 2^(i/16) * Scale
 */
 uint16_t rte_red_pow2_frac_inv[16];
 /**
 * @brief Initialize tables used to compute average
 *        queue size when queue is empty.
 */
 static void
 __rte_red_init_tables(void)
 {
 	uint32_t i = 0;
 	double scale = 0.0;
 	double table_size = 0.0;
 	scale = (double)(1 << RTE_RED_SCALING);
 	table_size = (double)(DIM(rte_red_pow2_frac_inv));
 	for (i = 0; i < DIM(rte_red_pow2_frac_inv); i++) {
 		double m = (double)i;
 		rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size));
 	}
 	scale = 1024.0;
 	RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == DIM(rte_red_log2_1_minus_Wq));
 	for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
 		double n = (double)i;
 		double Wq = pow(2, -n);
 		uint32_t index = i - RTE_RED_WQ_LOG2_MIN;
 		rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq));
 		/**
 		* Table entry of zero, corresponds to a Wq of zero
 		* which is not valid (avg would remain constant no
 		* matter how long the queue is empty). So we have
 		* to check for zero and round up to one.
 		*/
 		if (rte_red_log2_1_minus_Wq[index] == 0) {
 			rte_red_log2_1_minus_Wq[index] = 1;
 		}
 	}
 }
 int
 rte_red_rt_data_init(struct rte_red *red)
 {
 	if (red == NULL)
 		return -1;
 	red->avg = 0;
 	red->count = 0;
 	red->q_time = 0;
 	return 0;
 }
 int
 rte_red_config_init(struct rte_red_config *red_cfg,
 	const uint16_t wq_log2,
 	const uint16_t min_th,
 	const uint16_t max_th,
 	const uint16_t maxp_inv)
 {
 	if (red_cfg == NULL) {
 		return -1;
 	}
 	if (max_th > RTE_RED_MAX_TH_MAX) {
 		return -2;
 	}
 	if (min_th >= max_th) {
 		return -3;
 	}
 	if (wq_log2 > RTE_RED_WQ_LOG2_MAX) {
 		return -4;
 	}
 	if (wq_log2 < RTE_RED_WQ_LOG2_MIN) {
 		return -5;
 	}
 	if (maxp_inv < RTE_RED_MAXP_INV_MIN) {
 		return -6;
 	}
 	if (maxp_inv > RTE_RED_MAXP_INV_MAX) {
 		return -7;
 	}
 	/**
 	 *  Initialize the RED module if not already done
 	 */
 	if (!rte_red_init_done) {
 		rte_red_rand_seed = rte_rand();
 		rte_red_rand_val = rte_fast_rand();
 		__rte_red_init_tables();
 		rte_red_init_done = 1;
 	}
 	red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING);
 	red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING);
 	red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING;
 	red_cfg->maxp_inv = maxp_inv;
 	red_cfg->wq_log2 = wq_log2;
 	return 0;
 }
--- a/lib/librte_sched/rte_red.h
+++ b/lib/librte_sched/rte_red.h
@ -0,0 +1,454 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __RTE_RED_H_INCLUDED__
 #define __RTE_RED_H_INCLUDED__
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * @file
 * RTE Random Early Detection (RED)
 *
 *
 ***/
 #include <stdint.h>
 #include <limits.h>
 #include <rte_common.h>
 #include <rte_debug.h>
 #include <rte_cycles.h>
 #include <rte_branch_prediction.h>
 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
 #define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
 #define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
 #define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
 #define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
 #define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
 #define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
 #define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
 #ifdef RTE_RED_DEBUG
 #define RTE_RED_ASSERT(exp)                                      \
 if (!(exp)) {                                                    \
 	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
 }
 #else
 #define RTE_RED_ASSERT(exp)                 do { } while(0)
 #endif /* RTE_RED_DEBUG */
 /**
 * Externs
 * 
 */
 extern uint32_t rte_red_rand_val;
 extern uint32_t rte_red_rand_seed;
 extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
 extern uint16_t rte_red_pow2_frac_inv[16];
 /**
 * RED configuration parameters passed by user
 * 
 */
 struct rte_red_params {
 	uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
 	uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
 	uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
 	uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
 };
 /**
 * RED configuration parameters
 */
 struct rte_red_config {
 	uint32_t min_th;   /**< min_th scaled in fixed-point format */
 	uint32_t max_th;   /**< max_th scaled in fixed-point format */
 	uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
 	uint8_t maxp_inv;  /**< maxp_inv */
 	uint8_t wq_log2;   /**< wq_log2 */
 };
 /**
 * RED run-time data
 */
 struct rte_red {
 	uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
 	uint32_t count;    /**< Number of packets since last marked packet (count) */
 	uint64_t q_time;   /**< Start of the queue idle time (q_time) */
 };
 /** 
 * @brief Initialises run-time data
 *  
 * @param [in,out] data pointer to RED runtime data
 *
 * @return Operation status
 * @retval 0 success
 * @retval !0 error
 */
 int
 rte_red_rt_data_init(struct rte_red *red);
 /** 
 * @brief Configures a single RED configuration parameter structure.
 * 
 * @param [in,out] config pointer to a RED configuration parameter structure
 * @param [in] wq_log2 log2 of the filter weight, valid range is:
 *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
 * @param [in] min_th queue minimum threshold in number of packets
 * @param [in] max_th queue maximum threshold in number of packets
 * @param [in] maxp_inv inverse maximum mark probability
 * 
 * @return Operation status
 * @retval 0 success
 * @retval !0 error
 */
 int
 rte_red_config_init(struct rte_red_config *red_cfg,
 	const uint16_t wq_log2,
 	const uint16_t min_th,
 	const uint16_t max_th,
 	const uint16_t maxp_inv);
 /**
 * @brief Generate random number for RED
 *
 * Implemenetation based on:
 * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
 *
 * 10 bit shift has been found through empirical tests (was 16).
 *
 * @return Random number between 0 and (2^22 - 1)
 */
 static inline uint32_t
 rte_fast_rand(void)
 {
 	rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
 	return (rte_red_rand_seed >> 10);
 }
 /**
 * @brief calculate factor to scale average queue size when queue
 *        becomes empty
 *
 * @param [in] wq_log2, where EWMA filter weight wq = 1/(2 ^ wq_log2)
 * @param [in] m exponent in the computed value (1 - wq) ^ m
 *
 * @return computed value
 * @retval ((1 - wq) ^ m) scaled in fixed-point format
 */
 static inline uint16_t
 __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
 {
 	uint32_t n = 0;
 	uint32_t f = 0;
 	/**
 	 * Basic math tells us that:
 	 *   a^b = 2^(b * log2(a) )
 	 *
 	 * in our case:
 	 *   a = (1-Wq)
 	 *   b = m
 	 *  Wq = 1/ (2^log2n)
 	 *
 	 * So we are computing this equation:
 	 *   factor = 2 ^ ( m * log2(1-Wq))
 	 *
 	 * First we are computing:
 	 *    n = m * log2(1-Wq)
 	 *
 	 * To avoid dealing with signed numbers log2 values are positive
 	 * but they should be negative because (1-Wq) is always < 1.
 	 * Contents of log2 table values are also scaled for precision.
 	 */
 	n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
 	/**
 	 * The tricky part is computing 2^n, for this I split n into
 	 * integer part and fraction part.
 	 *   f - is fraction part of n
 	 *   n - is integer part of original n
 	 *
 	 * Now using basic math we compute 2^n:
 	 *   2^(f+n) = 2^f * 2^n
 	 *   2^f - we use lookup table
 	 *   2^n - can be replaced with bit shift right oeprations
 	 */
 	f = (n >> 6) & 0xf;
 	n >>= 10;
 	if (n < RTE_RED_SCALING)
 		return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
 	return 0;
 }
 /** 
 * @brief Updates queue average in condition when queue is empty
 *
 * Note: packet is never dropped in this particular case.
 *
 * @param [in] config pointer to a RED configuration parameter structure
 * @param [in,out] data pointer to RED runtime data
 * @param [in] time current time stamp
 * 
 * @return Operation status
 * @retval 0 enqueue the packet
 * @retval 1 drop the packet based on max threshold criterion
 * @retval 2 drop the packet based on mark probability criterion
 */
 static inline int
 rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
 	struct rte_red *red,
 	const uint64_t time)
 {
 	uint64_t time_diff = 0, m = 0;
 	RTE_RED_ASSERT(red_cfg != NULL);
 	RTE_RED_ASSERT(red != NULL);
 	red->count ++;
 	/**
 	 * We compute avg but we don't compare avg against
 	 *  min_th or max_th, nor calculate drop probability
 	 */
 	time_diff = time - red->q_time;
 	/**
 	 * m is the number of packets that might have arrived while the queue was empty.
 	 * In this case we have time stamps provided by scheduler in byte units (bytes 
 	 * transmitted on network port). Such time stamp translates into time units as
 	 * port speed is fixed but such approach simplifies the code.
 	 */
 	m = time_diff / RTE_RED_S;
 	/**
 	 * Check that m will fit into 16-bit unsigned integer
 	 */
 	if (m >= RTE_RED_2POW16) {
 		red->avg = 0;
 	} else {
 		red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
 	}
 	return 0;
 }
 /**
 *  Drop probability (Sally Floyd and Van Jacobson):
 *
 *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
 *     pa = pb / (2 - count * pb)
 *
 *
 *                 (1 / maxp_inv) * (avg - min_th)
 *                ---------------------------------
 *                         max_th - min_th
 *     pa = -----------------------------------------------
 *                count * (1 / maxp_inv) * (avg - min_th)
 *           2 - -----------------------------------------
 *                          max_th - min_th
 *
 *
 *                                  avg - min_th
 *     pa = -----------------------------------------------------------
 *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
 *
 *
 *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
 *
 *
 *                     avg - min_th
 *     pa = -----------------------------------
 *           pa_const - count * (avg - min_th)
 */
 /**
 * @brief make a decision to drop or enqueue a packet based on mark probability
 *        criteria
 *
 * @param [in] config pointer to structure defining RED parameters
 * @param [in,out] data pointer to RED runtime data
 *
 * @return operation status
 * @retval 0 enqueue the packet
 * @retval 1 drop the packet
 */
 static inline int
 __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
 {
 	uint32_t pa_num = 0;    /* numerator of drop-probability */
 	uint32_t pa_den = 0;    /* denominator of drop-probability */
 	uint32_t pa_num_count = 0;
 	pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
 	pa_num_count = red->count * pa_num;
 	if (red_cfg->pa_const <= pa_num_count)
 		return 1;
 	pa_den = red_cfg->pa_const - pa_num_count;
 	/* If drop, generate and save random number to be used next time */
 	if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
 		rte_red_rand_val = rte_fast_rand();
 		return 1;
 	}
 	/* No drop */
 	return 0;
 }
 /** 
 * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
 *
 * @param [in] config pointer to a RED configuration parameter structure
 * @param [in,out] data pointer to RED runtime data
 * @param [in] q current queue size (measured in packets)
 * 
 * @return Operation status
 * @retval 0 enqueue the packet
 * @retval 1 drop the packet based on max threshold criterion
 * @retval 2 drop the packet based on mark probability criterion
 */
 static inline int
 rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
 	struct rte_red *red,
 	const unsigned q)
 {
 	RTE_RED_ASSERT(red_cfg != NULL);
 	RTE_RED_ASSERT(red != NULL);
 	/**
 	* EWMA filter (Sally Floyd and Van Jacobson):
 	*    avg = (1 - wq) * avg + wq * q
 	*    avg = avg + q * wq - avg * wq
 	*
 	* We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
 	*    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
 	*
 	* By using shift left/right operations, we get:
 	*    avg_s = avg_s + (q << N) - (avg_s >> n)
 	*    avg_s += (q << N) - (avg_s >> n)
 	*/
 	/* avg update */
 	red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
 	/* avg < min_th: do not mark the packet  */
 	if (red->avg < red_cfg->min_th) {
 		red->count ++;
 		return 0;
 	}
 	/* min_th <= avg < max_th: mark the packet with pa probability */
 	if (red->avg < red_cfg->max_th) {
 		if (!__rte_red_drop(red_cfg, red)) {
 			red->count ++;
 			return 0;
 		}
 		red->count = 0;
 		return 2;
 	}
 	/* max_th <= avg: always mark the packet */
 	red->count = 0;
 	return 1;
 }
 /** 
 * @brief Decides if new packet should be enqeued or dropped
 * Updates run time data based on new queue size value.
 * Based on new queue average and RED configuration parameters
 * gives verdict whether to enqueue or drop the packet. 
 *
 * @param [in] config pointer to a RED configuration parameter structure
 * @param [in,out] data pointer to RED runtime data
 * @param [in] q updated queue size in packets
 * @param [in] time current time stamp
 * 
 * @return Operation status
 * @retval 0 enqueue the packet
 * @retval 1 drop the packet based on max threshold criteria
 * @retval 2 drop the packet based on mark probability criteria
 */
 static inline int
 rte_red_enqueue(const struct rte_red_config *red_cfg,
 	struct rte_red *red,
 	const unsigned q,
 	const uint64_t time)
 {
 	RTE_RED_ASSERT(red_cfg != NULL);
 	RTE_RED_ASSERT(red != NULL);
 	if (q != 0) {
 		return rte_red_enqueue_nonempty(red_cfg, red, q);
 	} else {
 		return rte_red_enqueue_empty(red_cfg, red, time);
 	}
 }
 /** 
 * @brief Callback to records time that queue became empty
 *
 * @param [in,out] data pointer to RED runtime data
 * @param [in] time current time stamp
 */
 static inline void
 rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
 {
 	red->q_time = time;
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* __RTE_RED_H_INCLUDED__ */
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@ -0,0 +1,446 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __INCLUDE_RTE_SCHED_H__
 #define __INCLUDE_RTE_SCHED_H__
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * @file
 * RTE Hierarchical Scheduler
 *
 * The hierarchical scheduler prioritizes the transmission of packets from different
 * users and traffic classes according to the Service Level Agreements (SLAs) defined
 * for the current network node.
 *
 * The scheduler supports thousands of packet queues grouped under a 5-level hierarchy:
 *     1. Port: 
 *           - Typical usage: output Ethernet port;
 *           - Multiple ports are scheduled in round robin order with equal priority;
 *     2. Subport:
 *           - Typical usage: group of users;
 *           - Traffic shaping using the token bucket algorithm (one bucket per subport);
 *           - Upper limit enforced per traffic class at subport level;
 *           - Lower priority traffic classes able to reuse subport bandwidth currently
 *             unused by higher priority traffic classes of the same subport;
 *           - When any subport traffic class is oversubscribed (configuration time 
 *             event), the usage of subport member pipes with high demand for that 
 *             traffic class pipes is truncated to a dynamically adjusted value with no 
 *             impact to low demand pipes;
 *     3. Pipe: 
 *           - Typical usage: individual user/subscriber;
 *           - Traffic shaping using the token bucket algorithm (one bucket per pipe);
 *     4. Traffic class:
 *           - Traffic classes of the same pipe handled in strict priority order;
 *           - Upper limit enforced per traffic class at the pipe level;
 *           - Lower priority traffic classes able to reuse pipe bandwidth currently
 *             unused by higher priority traffic classes of the same pipe;
 *     5. Queue:
 *           - Typical usage: queue hosting packets from one or multiple connections 
 *             of same traffic class belonging to the same user;
 *           - Weighted Round Robin (WRR) is used to service the queues within same 
 *             pipe traffic class.
 *
 ***/
 #include <sys/types.h>
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 /** Random Early Detection (RED) */
 #ifdef RTE_SCHED_RED
 #include "rte_red.h"
 #endif
 /** Number of traffic classes per pipe (as well as subport). Cannot be changed. */
 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
 /** Number of queues per pipe traffic class. Cannot be changed. */
 #define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
 /** Number of queues per pipe. */
 #define RTE_SCHED_QUEUES_PER_PIPE             \
 	(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *     \
 	RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
 /** Maximum number of pipe profiles that can be defined per port. Compile-time configurable.*/
 #ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT
 #define RTE_SCHED_PIPE_PROFILES_PER_PORT      256
 #endif
 /** Ethernet framing overhead. Overhead fields per Ethernet frame:
   1. Preamble:                             7 bytes;
   2. Start of Frame Delimiter (SFD):       1 byte;
   3. Frame Check Sequence (FCS):           4 bytes;
   4. Inter Frame Gap (IFG):               12 bytes.
 The FCS is considered overhead only if not included in the packet length (field pkt.pkt_len
 of struct rte_mbuf). */
 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 /** Subport configuration parameters. The period and credits_per_period parameters are measured
 in bytes, with one byte meaning the time duration associated with the transmission of one byte 
 on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
 percentage of output port rate) determined as credits_per_period divided by period. One credit
 represents one byte. */
 struct rte_sched_subport_params {
 	/* Subport token bucket */
 	uint32_t tb_rate;                /**< Subport token bucket rate (measured in bytes per second) */
 	uint32_t tb_size;                /**< Subport token bucket size (measured in credits) */
 	/* Subport traffic classes */
 	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Subport traffic class rates (measured in bytes per second) */
 	uint32_t tc_period;              /**< Enforcement period for traffic class rates (measured in milliseconds) */
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 	uint32_t tc_ov_period;           /**< Enforcement period for traffic class oversubscription (measured in milliseconds) */
 #endif
 };
 /** Subport statistics */
 struct rte_sched_subport_stats {
 	/* Packets */
 	uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets successfully written to current
 	                                      subport for each traffic class */
 	uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of packets dropped by the current
 	                                      subport for each traffic class due to subport queues being full or congested*/
 	/* Bytes */
 	uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes successfully written to current 
 	                                      subport for each traffic class*/
 	uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Number of bytes dropped by the current 
                                          subport for each traffic class due to subport queues being full or congested */
 };
 /** Pipe configuration parameters. The period and credits_per_period parameters are measured
 in bytes, with one byte meaning the time duration associated with the transmission of one byte 
 on the physical medium of the output port, with pipe or pipe traffic class rate (measured as 
 percentage of output port rate) determined as credits_per_period divided by period. One credit
 represents one byte. */
 struct rte_sched_pipe_params {
 	/* Pipe token bucket */
 	uint32_t tb_rate;                /**< Pipe token bucket rate (measured in bytes per second) */
 	uint32_t tb_size;                /**< Pipe token bucket size (measured in credits) */
 	/* Pipe traffic classes */
 	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Pipe traffic class rates (measured in bytes per second) */
 	uint32_t tc_period;              /**< Enforcement period for pipe traffic class rates (measured in milliseconds) */
 #ifdef RTE_SCHED_SUBPORT_TC_OV
 	uint8_t tc_ov_weight[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Traffic class weights to be used for the 
 	                                      current pipe in the event of subport traffic class oversubscription */
 #endif
 	/* Pipe queues */
 	uint8_t  wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights for the queues of the current pipe */
 };
 /** Queue statistics */
 struct rte_sched_queue_stats {
 	/* Packets */
 	uint32_t n_pkts;                 /**< Number of packets successfully written to current queue */
 	uint32_t n_pkts_dropped;         /**< Number of packets dropped due to current queue being full or congested */
 	/* Bytes */
 	uint32_t n_bytes;                /**< Number of bytes successfully written to current queue */
 	uint32_t n_bytes_dropped;        /**< Number of bytes dropped due to current queue being full or congested */	
 };
 /** Port configuration parameters. */
 struct rte_sched_port_params {
 	const char *name;                /**< Literal string to be associated to the current port scheduler instance */
 	int socket;                      /**< CPU socket ID where the memory for port scheduler should be allocated */
 	uint32_t rate;                   /**< Output port rate (measured in bytes per second) */
 	uint32_t frame_overhead;         /**< Framing overhead per packet (measured in bytes) */
 	uint32_t n_subports_per_port;    /**< Number of subports for the current port scheduler instance*/
 	uint32_t n_pipes_per_subport;    /**< Number of pipes for each port scheduler subport */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; /**< Packet queue size for each traffic class. All queues 
 	                                      within the same pipe traffic class have the same size. Queues from 
 										  different pipes serving the same traffic class have the same size. */
 	struct rte_sched_pipe_params *pipe_profiles; /**< Pipe profile table defined for current port scheduler instance.
                                          Every pipe of the current port scheduler is configured using one of the
 										  profiles from this table. */
 	uint32_t n_pipe_profiles;        /**< Number of profiles in the pipe profile table */
 #ifdef RTE_SCHED_RED
 	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */
 #endif
 };
 /** Path through the scheduler hierarchy used by the scheduler enqueue operation to
 identify the destination queue for the current packet. Stored in the field pkt.hash.sched
 of struct rte_mbuf of each packet, typically written by the classification stage and read by 
 scheduler enqueue.*/
 struct rte_sched_port_hierarchy {
 	uint32_t queue:2;                /**< Queue ID (0 .. 3) */
 	uint32_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
 	uint32_t pipe:20;                /**< Pipe ID */
 	uint32_t subport:6;              /**< Subport ID */
 	uint32_t color:2;                /**< Color */
 };
 /*
 * Configuration
 *
 ***/
 /**
 * Hierarchical scheduler port configuration
 *
 * @param params
 *   Port scheduler configuration parameter structure
 * @return
 *   Handle to port scheduler instance upon success or NULL otherwise.
 */
 struct rte_sched_port * 
 rte_sched_port_config(struct rte_sched_port_params *params);
 /**
 * Hierarchical scheduler port free
 *
 * @param port
 *   Handle to port scheduler instance
 */
 void
 rte_sched_port_free(struct rte_sched_port *port);
 /**
 * Hierarchical scheduler subport configuration
 *
 * @param port
 *   Handle to port scheduler instance
 * @param subport_id
 *   Subport ID
 * @param params
 *   Subport configuration parameters
 * @return
 *   0 upon success, error code otherwise
 */
 int
 rte_sched_subport_config(struct rte_sched_port *port, 
 	uint32_t subport_id,
 	struct rte_sched_subport_params *params);
 /**
 * Hierarchical scheduler pipe configuration
 *
 * @param port
 *   Handle to port scheduler instance
 * @param subport_id
 *   Subport ID
 * @param pipe_id
 *   Pipe ID within subport
 * @param pipe_profile
 *   ID of port-level pre-configured pipe profile
 * @return
 *   0 upon success, error code otherwise
 */
 int
 rte_sched_pipe_config(struct rte_sched_port *port,
 	uint32_t subport_id, 
 	uint32_t pipe_id,
 	int32_t pipe_profile);
 /**
 * Hierarchical scheduler memory footprint size per port
 *
 * @param params
 *   Port scheduler configuration parameter structure
 * @return
 *   Memory footprint size in bytes upon success, 0 otherwise
 */
 uint32_t
 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
 /*
 * Statistics 
 *
 ***/
 /**
 * Hierarchical scheduler subport statistics read
 *
 * @param port
 *   Handle to port scheduler instance
 * @param subport_id
 *   Subport ID
 * @param stats
 *   Pointer to pre-allocated subport statistics structure where the statistics 
 *   counters should be stored
 * @param tc_ov
 *   Pointer to pre-allocated 4-entry array where the oversubscription status for
 *   each of the 4 subport traffic classes should be stored.
 * @return
 *   0 upon success, error code otherwise
 */
 int
 rte_sched_subport_read_stats(struct rte_sched_port *port,
 	uint32_t subport_id,
 	struct rte_sched_subport_stats *stats,
 	uint32_t *tc_ov);
 /**
 * Hierarchical scheduler queue statistics read
 *
 * @param port
 *   Handle to port scheduler instance
 * @param queue_id
 *   Queue ID within port scheduler
 * @param stats
 *   Pointer to pre-allocated subport statistics structure where the statistics 
 *   counters should be stored
 * @param qlen
 *   Pointer to pre-allocated variable where the current queue length should be stored.
 * @return
 *   0 upon success, error code otherwise
 */
 int
 rte_sched_queue_read_stats(struct rte_sched_port *port,
 	uint32_t queue_id,
 	struct rte_sched_queue_stats *stats,
 	uint16_t *qlen);
 /* 
 * Run-time 
 *
 ***/
 /**
 * Scheduler hierarchy path write to packet descriptor. Typically called by the 
 * packet classification stage.
 * 
 * @param pkt
 *   Packet descriptor handle
 * @param subport
 *   Subport ID
 * @param pipe
 *   Pipe ID within subport
 * @param traffic_class
 *   Traffic class ID within pipe (0 .. 3)
 * @param queue
 *   Queue ID within pipe traffic class (0 .. 3)
 */
 static inline void
 rte_sched_port_pkt_write(struct rte_mbuf *pkt, 
 	uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue, enum rte_meter_color color)
 {
 	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
 	sched->color = (uint32_t) color;
 	sched->subport = subport;
 	sched->pipe = pipe;
 	sched->traffic_class = traffic_class;
 	sched->queue = queue;
 }
 /**
 * Scheduler hierarchy path read from packet descriptor (struct rte_mbuf). Typically
 * called as part of the hierarchical scheduler enqueue operation. The subport, 
 * pipe, traffic class and queue parameters need to be pre-allocated by the caller.
 *
 * @param pkt
 *   Packet descriptor handle
 * @param subport
 *   Subport ID
 * @param pipe
 *   Pipe ID within subport
 * @param traffic_class
 *   Traffic class ID within pipe (0 .. 3)
 * @param queue
 *   Queue ID within pipe traffic class (0 .. 3)
 *   
 */
 static inline void
 rte_sched_port_pkt_read_tree_path(struct rte_mbuf *pkt, uint32_t *subport, uint32_t *pipe, uint32_t *traffic_class, uint32_t *queue)
 {
 	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
 	*subport = sched->subport;
 	*pipe = sched->pipe;
 	*traffic_class = sched->traffic_class;
 	*queue = sched->queue;
 }
 static inline enum rte_meter_color
 rte_sched_port_pkt_read_color(struct rte_mbuf *pkt)
 {
 	struct rte_sched_port_hierarchy *sched = (struct rte_sched_port_hierarchy *) &pkt->pkt.hash.sched;
 	return (enum rte_meter_color) sched->color;
 }
 /**
 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port scheduler and 
 * returns the number of packets actually written. For each packet, the port scheduler
 * queue to write the packet to is identified by reading the hierarchy path from the 
 * packet descriptor; if the queue is full or congested and the packet is not written 
 * to the queue, then the packet is automatically dropped without any action required 
 * from the caller.
 *
 * @param port
 *   Handle to port scheduler instance
 * @param pkts
 *   Array storing the packet descriptor handles
 * @param n_pkts
 *   Number of packets to enqueue from the pkts array into the port scheduler
 * @return
 *   Number of packets successfully enqueued
 */
 int
 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 /**
 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the port scheduler 
 * and stores them in the pkts array and returns the number of packets actually read. 
 * The pkts array needs to be pre-allocated by the caller with at least n_pkts entries.
 *
 * @param port
 *   Handle to port scheduler instance
 * @param pkts
 *   Pre-allocated packet descriptor array where the packets dequeued from the port 
 *   scheduler should be stored
 * @param n_pkts
 *   Number of packets to dequeue from the port scheduler
 * @return
 *   Number of packets successfully dequeued and placed in the pkts array
 */
 int
 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 #ifdef __cplusplus
 }
 #endif
 #endif /* __INCLUDE_RTE_SCHED_H__ */
--- a/lib/librte_sched/rte_sched_common.h
+++ b/lib/librte_sched/rte_sched_common.h
@ -0,0 +1,130 @@
 /*-
 *   BSD LICENSE
 * 
 *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
 *   All rights reserved.
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met:
 * 
 *     * Redistributions of source code must retain the above copyright 
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright 
 *       notice, this list of conditions and the following disclaimer in 
 *       the documentation and/or other materials provided with the 
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its 
 *       contributors may be used to endorse or promote products derived 
 *       from this software without specific prior written permission.
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
 #ifndef __INCLUDE_RTE_SCHED_COMMON_H__
 #define __INCLUDE_RTE_SCHED_COMMON_H__
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <sys/types.h>
 #define __rte_aligned_16 __attribute__((__aligned__(16)))
 static inline uint32_t
 rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
 {
 	return (x < y)? x : y;
 }
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
 {
 	uint32_t pos0, pos1;
 	pos0 = (x[0] <= x[1])? 0 : 1;
 	pos1 = (x[2] <= x[3])? 2 : 3;
 	return (x[pos0] <= x[pos1])? pos0 : pos1;
 }
 #else
 /* simplified version to remove branches with CMOV instruction */
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
 {
 	uint32_t pos0 = 0;
 	uint32_t pos1 = 2;
 	if (x[1] <= x[0]) pos0 = 1;
 	if (x[3] <= x[2]) pos1 = 3;
 	if (x[pos1] <= x[pos0]) pos0 = pos1;
 	return pos0;
 }
 #endif
 /*
 * Compute the Greatest Common Divisor (GCD) of two numbers.
 * This implementation uses Euclid's algorithm:
 *    gcd(a, 0) = a
 *    gcd(a, b) = gcd(b, a mod b)
 *
 */
 static inline uint32_t
 rte_get_gcd(uint32_t a, uint32_t b)
 {
 	uint32_t c;
 	if (a == 0)
 		return b;
 	if (b == 0)
 		return a;
 	if (a < b) {
 		c = a;
 		a = b;
 		b = c;
 	}
 	while (b != 0) {
 		c = a % b;
 		a = b;
 		b = c;
 	}
 	return a;
 }
 /*
 * Compute the Lowest Common Denominator (LCD) of two numbers.
 * This implementation computes GCD first:
 *    LCD(a, b) = (a * b) / GCD(a, b)
 *
 */
 static inline uint32_t
 rte_get_lcd(uint32_t a, uint32_t b)
 {
 	return (a * b) / rte_get_gcd(a, b);
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@ -105,6 +105,12 @@ ifeq ($(CONFIG_RTE_LIBRTE_METER),y)
 LDLIBS += -lrte_meter
 endif
 ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
 LDLIBS += -lrte_sched
 LDLIBS += -lm
 LDLIBS += -lrt
 endif
 LDLIBS += --start-group
 ifeq ($(CONFIG_RTE_LIBRTE_ETHER),y)