kni: add vhost backend
Attach to vhost-net as raw socket backend. Signed-off-by: Intel
This commit is contained in:
parent
904d29a135
commit
b23ffbaa82
@ -277,6 +277,11 @@ CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
|
||||
CONFIG_RTE_LIBRTE_KNI=y
|
||||
CONFIG_RTE_LIBRTE_KNI_DEBUG=n
|
||||
CONFIG_RTE_KNI_KO_DEBUG=n
|
||||
CONFIG_RTE_KNI_VHOST=n
|
||||
CONFIG_RTE_KNI_VHOST_MAX_CACHE_SIZE=1024
|
||||
CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
|
||||
|
||||
#
|
||||
# Enable warning directives
|
||||
|
@ -277,6 +277,11 @@ CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
|
||||
CONFIG_RTE_LIBRTE_KNI=y
|
||||
CONFIG_RTE_LIBRTE_KNI_DEBUG=n
|
||||
CONFIG_RTE_KNI_KO_DEBUG=n
|
||||
CONFIG_RTE_KNI_VHOST=n
|
||||
CONFIG_RTE_KNI_VHOST_MAX_CACHE_SIZE=1024
|
||||
CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
|
||||
|
||||
#
|
||||
# Enable warning directives
|
||||
|
@ -289,6 +289,11 @@ CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
|
||||
CONFIG_RTE_LIBRTE_KNI=y
|
||||
CONFIG_RTE_LIBRTE_KNI_DEBUG=n
|
||||
CONFIG_RTE_KNI_KO_DEBUG=n
|
||||
CONFIG_RTE_KNI_VHOST=n
|
||||
CONFIG_RTE_KNI_VHOST_MAX_CACHE_SIZE=1024
|
||||
CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
|
||||
|
||||
#
|
||||
# Enable warning directives
|
||||
|
@ -277,6 +277,11 @@ CONFIG_RTE_SCHED_PORT_N_GRINDERS=8
|
||||
CONFIG_RTE_LIBRTE_KNI=y
|
||||
CONFIG_RTE_LIBRTE_KNI_DEBUG=n
|
||||
CONFIG_RTE_KNI_KO_DEBUG=n
|
||||
CONFIG_RTE_KNI_VHOST=n
|
||||
CONFIG_RTE_KNI_VHOST_MAX_CACHE_SIZE=1024
|
||||
CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
|
||||
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
|
||||
|
||||
#
|
||||
# Enable warning directives
|
||||
|
@ -79,5 +79,6 @@ SRCS-y += ethtool/igb/igb_vmdq.c
|
||||
SRCS-y += kni_misc.c
|
||||
SRCS-y += kni_net.c
|
||||
SRCS-y += kni_ethtool.c
|
||||
SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c
|
||||
|
||||
include $(RTE_SDK)/mk/rte.module.mk
|
||||
|
@ -32,6 +32,10 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
#include <net/sock.h>
|
||||
#endif
|
||||
|
||||
#include <exec-env/rte_kni_common.h>
|
||||
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
|
||||
|
||||
@ -91,8 +95,16 @@ struct kni_dev {
|
||||
|
||||
/* synchro for request processing */
|
||||
unsigned long synchro;
|
||||
};
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
struct kni_vhost_queue* vhost_queue;
|
||||
volatile enum {
|
||||
BE_STOP = 0x1,
|
||||
BE_START = 0x2,
|
||||
BE_FINISH = 0x4,
|
||||
}vq_status;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
|
||||
#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
|
||||
@ -102,4 +114,37 @@ struct kni_dev {
|
||||
#define KNI_DBG(args...)
|
||||
#endif
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
unsigned int
|
||||
kni_poll(struct file *file, struct socket *sock, poll_table * wait);
|
||||
int kni_chk_vhost_rx(struct kni_dev *kni);
|
||||
int kni_vhost_init(struct kni_dev *kni);
|
||||
int kni_vhost_backend_release(struct kni_dev *kni);
|
||||
|
||||
struct kni_vhost_queue {
|
||||
struct sock sk;
|
||||
struct socket *sock;
|
||||
int vnet_hdr_sz;
|
||||
struct kni_dev *kni;
|
||||
int sockfd;
|
||||
unsigned int flags;
|
||||
struct sk_buff* cache;
|
||||
struct rte_kni_fifo* fifo;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef RTE_KNI_VHOST_DEBUG_RX
|
||||
#define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
|
||||
#else
|
||||
#define KNI_DBG_RX(args...)
|
||||
#endif
|
||||
|
||||
#ifdef RTE_KNI_VHOST_DEBUG_TX
|
||||
#define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
|
||||
#else
|
||||
#define KNI_DBG_TX(args...)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -91,4 +91,18 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo)
|
||||
return (fifo->read - fifo->write - 1) & (fifo->len - 1);
|
||||
}
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
/**
|
||||
* Initializes the kni fifo structure
|
||||
*/
|
||||
static inline void
|
||||
kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
|
||||
{
|
||||
fifo->write = 0;
|
||||
fifo->read = 0;
|
||||
fifo->len = size;
|
||||
fifo->elem_size = sizeof(void *);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _KNI_FIFO_H_ */
|
||||
|
@ -193,6 +193,9 @@ kni_release(struct inode *inode, struct file *file)
|
||||
dev->pthread = NULL;
|
||||
}
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni_vhost_backend_release(dev);
|
||||
#endif
|
||||
kni_dev_remove(dev);
|
||||
list_del(&dev->list);
|
||||
}
|
||||
@ -217,7 +220,11 @@ kni_thread_single(void *unused)
|
||||
for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
|
||||
list_for_each_entry_safe(dev, n,
|
||||
&kni_list_head, list) {
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni_chk_vhost_rx(dev);
|
||||
#else
|
||||
kni_net_rx(dev);
|
||||
#endif
|
||||
kni_net_poll_resp(dev);
|
||||
}
|
||||
}
|
||||
@ -238,7 +245,11 @@ kni_thread_multiple(void *param)
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni_chk_vhost_rx(dev);
|
||||
#else
|
||||
kni_net_rx(dev);
|
||||
#endif
|
||||
kni_net_poll_resp(dev);
|
||||
}
|
||||
schedule_timeout_interruptible(usecs_to_jiffies( \
|
||||
@ -361,6 +372,10 @@ kni_ioctl_create(unsigned int ioctl_num, unsigned long ioctl_param)
|
||||
kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
|
||||
kni->mbuf_va = dev_info.mbuf_va;
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni->vhost_queue = NULL;
|
||||
kni->vq_status = BE_STOP;
|
||||
#endif
|
||||
kni->mbuf_size = dev_info.mbuf_size;
|
||||
|
||||
KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
|
||||
@ -443,6 +458,10 @@ kni_ioctl_create(unsigned int ioctl_num, unsigned long ioctl_param)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni_vhost_init(kni);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Create a new kernel thread for multiple mode, set its core affinity,
|
||||
* and finally wake it up.
|
||||
@ -497,6 +516,9 @@ kni_ioctl_release(unsigned int ioctl_num, unsigned long ioctl_param)
|
||||
dev->pthread = NULL;
|
||||
}
|
||||
|
||||
#ifdef RTE_KNI_VHOST
|
||||
kni_vhost_backend_release(dev);
|
||||
#endif
|
||||
kni_dev_remove(dev);
|
||||
list_del(&dev->list);
|
||||
ret = 0;
|
||||
|
@ -379,6 +379,18 @@ kni_net_rx(struct kni_dev *kni)
|
||||
/*
|
||||
* Transmit a packet (called by the kernel)
|
||||
*/
|
||||
#ifdef RTE_KNI_VHOST
|
||||
static int
|
||||
kni_net_tx(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct kni_dev *kni = netdev_priv(dev);
|
||||
|
||||
dev_kfree_skb(skb);
|
||||
kni->stats.tx_dropped++;
|
||||
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
#else
|
||||
static int
|
||||
kni_net_tx(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
@ -451,6 +463,7 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
|
||||
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Deal with a transmit timeout.
|
||||
|
764
lib/librte_eal/linuxapp/kni/kni_vhost.c
Normal file
764
lib/librte_eal/linuxapp/kni/kni_vhost.c
Normal file
@ -0,0 +1,764 @@
|
||||
/*-
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
* The full GNU General Public License is included in this distribution
|
||||
* in the file called LICENSE.GPL.
|
||||
*
|
||||
* Contact Information:
|
||||
* Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/net.h>
|
||||
#include <net/sock.h>
|
||||
#include <linux/virtio_net.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/nsproxy.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/if_tun.h>
|
||||
|
||||
#include "kni_dev.h"
|
||||
#include "kni_fifo.h"
|
||||
|
||||
#define RX_BURST_SZ 4
|
||||
|
||||
extern void put_unused_fd(unsigned int fd);
|
||||
|
||||
static struct proto kni_raw_proto = {
|
||||
.name = "kni_vhost",
|
||||
.owner = THIS_MODULE,
|
||||
.obj_size = sizeof(struct kni_vhost_queue),
|
||||
};
|
||||
|
||||
static inline int
|
||||
kni_vhost_net_tx(struct kni_dev *kni, struct iovec *iov,
|
||||
unsigned offset, unsigned len)
|
||||
{
|
||||
struct rte_kni_mbuf *pkt_kva = NULL;
|
||||
struct rte_kni_mbuf *pkt_va = NULL;
|
||||
int ret;
|
||||
|
||||
KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
|
||||
offset, len, (int)iov->iov_len);
|
||||
|
||||
/**
|
||||
* Check if it has at least one free entry in tx_q and
|
||||
* one entry in alloc_q.
|
||||
*/
|
||||
if (kni_fifo_free_count(kni->tx_q) == 0 ||
|
||||
kni_fifo_count(kni->alloc_q) == 0) {
|
||||
/**
|
||||
* If no free entry in tx_q or no entry in alloc_q,
|
||||
* drops skb and goes out.
|
||||
*/
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* dequeue a mbuf from alloc_q */
|
||||
ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
|
||||
if (likely(ret == 1)) {
|
||||
void *data_kva;
|
||||
|
||||
pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
|
||||
data_kva = pkt_kva->data - kni->mbuf_va + kni->mbuf_kva;
|
||||
|
||||
memcpy_fromiovecend(data_kva, iov, offset, len);
|
||||
if (unlikely(len < ETH_ZLEN)) {
|
||||
memset(data_kva + len, 0, ETH_ZLEN - len);
|
||||
len = ETH_ZLEN;
|
||||
}
|
||||
pkt_kva->pkt_len = len;
|
||||
pkt_kva->data_len = len;
|
||||
|
||||
/* enqueue mbuf into tx_q */
|
||||
ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
|
||||
if (unlikely(ret != 1)) {
|
||||
/* Failing should not happen */
|
||||
KNI_ERR("Fail to enqueue mbuf into tx_q\n");
|
||||
goto drop;
|
||||
}
|
||||
} else {
|
||||
/* Failing should not happen */
|
||||
KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* update statistics */
|
||||
kni->stats.tx_bytes += len;
|
||||
kni->stats.tx_packets++;
|
||||
|
||||
return 0;
|
||||
|
||||
drop:
|
||||
/* update statistics */
|
||||
kni->stats.tx_dropped++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
kni_vhost_net_rx(struct kni_dev *kni, struct iovec *iov,
|
||||
unsigned offset, unsigned len)
|
||||
{
|
||||
uint32_t pkt_len;
|
||||
struct rte_kni_mbuf *kva;
|
||||
struct rte_kni_mbuf *va;
|
||||
void * data_kva;
|
||||
struct sk_buff *skb;
|
||||
struct kni_vhost_queue *q = kni->vhost_queue;
|
||||
|
||||
if (unlikely(q == NULL))
|
||||
return 0;
|
||||
|
||||
/* ensure at least one entry in free_q */
|
||||
if (unlikely(kni_fifo_free_count(kni->free_q) == 0))
|
||||
return 0;
|
||||
|
||||
skb = skb_dequeue(&q->sk.sk_receive_queue);
|
||||
if (unlikely(skb == NULL))
|
||||
return 0;
|
||||
|
||||
kva = (struct rte_kni_mbuf*)skb->data;
|
||||
|
||||
/* free skb to cache */
|
||||
skb->data = NULL;
|
||||
if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
|
||||
/* Failing should not happen */
|
||||
KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
|
||||
|
||||
pkt_len = kva->data_len;
|
||||
if (unlikely(pkt_len > len))
|
||||
goto drop;
|
||||
|
||||
KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
|
||||
offset, len, pkt_len, (int)iov->iov_len);
|
||||
|
||||
data_kva = kva->data - kni->mbuf_va + kni->mbuf_kva;
|
||||
if (unlikely(memcpy_toiovecend(iov, data_kva, offset, pkt_len)))
|
||||
goto drop;
|
||||
|
||||
/* Update statistics */
|
||||
kni->stats.rx_bytes += pkt_len;
|
||||
kni->stats.rx_packets++;
|
||||
|
||||
/* enqueue mbufs into free_q */
|
||||
va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
|
||||
if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
|
||||
/* Failing should not happen */
|
||||
KNI_ERR("Fail to enqueue entries into free_q\n");
|
||||
|
||||
KNI_DBG_RX("receive done %d\n", pkt_len);
|
||||
|
||||
return pkt_len;
|
||||
|
||||
drop:
|
||||
/* Update drop statistics */
|
||||
kni->stats.rx_dropped++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
|
||||
{
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sock->sk, struct kni_vhost_queue, sk);
|
||||
struct kni_dev *kni;
|
||||
unsigned int mask = 0;
|
||||
|
||||
if (unlikely(q == NULL || q->kni == NULL))
|
||||
return POLLERR;
|
||||
|
||||
kni = q->kni;
|
||||
KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
|
||||
kni->group_id, (uint64_t)sock->wq);
|
||||
|
||||
poll_wait(file, &sock->wq->wait, wait);
|
||||
|
||||
if (kni_fifo_count(kni->rx_q) > 0)
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
|
||||
if (sock_writeable(&q->sk) ||
|
||||
(!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
|
||||
sock_writeable(&q->sk)))
|
||||
mask |= POLLOUT | POLLWRNORM;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static inline void
|
||||
kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
|
||||
struct sk_buff *skb, struct rte_kni_mbuf *va)
|
||||
{
|
||||
struct rte_kni_mbuf *kva;
|
||||
|
||||
kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
|
||||
(skb)->data = (unsigned char*)kva;
|
||||
(skb)->len = kva->data_len;
|
||||
skb_queue_tail(&q->sk.sk_receive_queue, skb);
|
||||
}
|
||||
|
||||
static inline void
|
||||
kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
|
||||
struct sk_buff **skb, struct rte_kni_mbuf **va)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
|
||||
kni_vhost_enqueue(kni, q, *skb, *va);
|
||||
}
|
||||
|
||||
int
|
||||
kni_chk_vhost_rx(struct kni_dev *kni)
|
||||
{
|
||||
struct kni_vhost_queue *q = kni->vhost_queue;
|
||||
unsigned nb_in, nb_mbuf, nb_skb;
|
||||
const unsigned BURST_MASK = RX_BURST_SZ - 1;
|
||||
unsigned nb_burst, nb_backlog, i;
|
||||
struct sk_buff *skb[RX_BURST_SZ];
|
||||
struct rte_kni_mbuf *va[RX_BURST_SZ];
|
||||
|
||||
if (unlikely(BE_STOP & kni->vq_status)) {
|
||||
kni->vq_status |= BE_FINISH;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(q == NULL))
|
||||
return 0;
|
||||
|
||||
nb_skb = kni_fifo_count(q->fifo);
|
||||
nb_mbuf = kni_fifo_count(kni->rx_q);
|
||||
|
||||
nb_in = min(nb_mbuf, nb_skb);
|
||||
nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
|
||||
nb_burst = (nb_in & ~BURST_MASK);
|
||||
nb_backlog = (nb_in & BURST_MASK);
|
||||
|
||||
/* enqueue skb_queue per BURST_SIZE bulk */
|
||||
if (0 != nb_burst) {
|
||||
if (unlikely(RX_BURST_SZ != kni_fifo_get(
|
||||
kni->rx_q, (void **)&va,
|
||||
RX_BURST_SZ)))
|
||||
goto except;
|
||||
|
||||
if (unlikely(RX_BURST_SZ != kni_fifo_get(
|
||||
q->fifo, (void **)&skb,
|
||||
RX_BURST_SZ)))
|
||||
goto except;
|
||||
|
||||
kni_vhost_enqueue_burst(kni, q, skb, va);
|
||||
}
|
||||
|
||||
/* all leftover, do one by one */
|
||||
for (i = 0; i < nb_backlog; ++i) {
|
||||
if (unlikely(1 != kni_fifo_get(
|
||||
kni->rx_q,(void **)&va, 1)))
|
||||
goto except;
|
||||
|
||||
if (unlikely(1 != kni_fifo_get(
|
||||
q->fifo, (void **)&skb, 1)))
|
||||
goto except;
|
||||
|
||||
kni_vhost_enqueue(kni, q, *skb, *va);
|
||||
}
|
||||
|
||||
/* Ondemand wake up */
|
||||
if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) ||
|
||||
((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
|
||||
wake_up_interruptible_poll(sk_sleep(&q->sk),
|
||||
POLLIN | POLLRDNORM | POLLRDBAND);
|
||||
KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
|
||||
nb_mbuf, nb_skb, nb_in);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
except:
|
||||
/* Failing should not happen */
|
||||
KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
|
||||
BUG_ON(1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
|
||||
struct msghdr *m, size_t total_len)
|
||||
{
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sock->sk, struct kni_vhost_queue, sk);
|
||||
int vnet_hdr_len = 0;
|
||||
unsigned long len = total_len;
|
||||
|
||||
if (unlikely(q == NULL || q->kni == NULL))
|
||||
return 0;
|
||||
|
||||
KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
|
||||
len, q->flags, (int)m->msg_iovlen);
|
||||
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
if (likely(q->flags & IFF_VNET_HDR)) {
|
||||
vnet_hdr_len = q->vnet_hdr_sz;
|
||||
if (unlikely(len < vnet_hdr_len))
|
||||
return -EINVAL;
|
||||
len -= vnet_hdr_len;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
|
||||
return -EINVAL;
|
||||
|
||||
return kni_vhost_net_tx(q->kni, m->msg_iov, vnet_hdr_len, len);
|
||||
}
|
||||
|
||||
static int
|
||||
kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
|
||||
struct msghdr *m, size_t len, int flags)
|
||||
{
|
||||
int vnet_hdr_len = 0;
|
||||
int pkt_len = 0;
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sock->sk, struct kni_vhost_queue, sk);
|
||||
static struct virtio_net_hdr
|
||||
__attribute__ ((unused)) vnet_hdr = {
|
||||
.flags = 0,
|
||||
.gso_type = VIRTIO_NET_HDR_GSO_NONE
|
||||
};
|
||||
|
||||
if (unlikely(q == NULL || q->kni == NULL))
|
||||
return 0;
|
||||
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
if (likely(q->flags & IFF_VNET_HDR)) {
|
||||
vnet_hdr_len = q->vnet_hdr_sz;
|
||||
if ((len -= vnet_hdr_len) < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
|
||||
m->msg_iov, vnet_hdr_len, len))))
|
||||
return 0;
|
||||
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
/* no need to copy hdr when no pkt received */
|
||||
if (unlikely(memcpy_toiovecend(m->msg_iov,
|
||||
(void *)&vnet_hdr, 0, vnet_hdr_len)))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
|
||||
(unsigned long)len, q->flags, pkt_len);
|
||||
|
||||
return (pkt_len + vnet_hdr_len);
|
||||
}
|
||||
|
||||
/* dummy tap like ioctl */
|
||||
static int
|
||||
kni_sock_ioctl(struct socket *sock, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
struct ifreq __user *ifr = argp;
|
||||
unsigned int __user *up = argp;
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sock->sk, struct kni_vhost_queue, sk);
|
||||
struct kni_dev *kni;
|
||||
unsigned int u;
|
||||
int __user *sp = argp;
|
||||
int s;
|
||||
int ret;
|
||||
|
||||
KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
|
||||
|
||||
switch (cmd) {
|
||||
case TUNSETIFF:
|
||||
KNI_DBG("TUNSETIFF\n");
|
||||
/* ignore the name, just look at flags */
|
||||
if (get_user(u, &ifr->ifr_flags))
|
||||
return -EFAULT;
|
||||
|
||||
ret = 0;
|
||||
if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
|
||||
ret = -EINVAL;
|
||||
else
|
||||
q->flags = u;
|
||||
|
||||
return ret;
|
||||
|
||||
case TUNGETIFF:
|
||||
KNI_DBG("TUNGETIFF\n");
|
||||
rcu_read_lock_bh();
|
||||
kni = rcu_dereference_bh(q->kni);
|
||||
if (kni)
|
||||
dev_hold(kni->net_dev);
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
if (!kni)
|
||||
return -ENOLINK;
|
||||
|
||||
ret = 0;
|
||||
if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
|
||||
put_user(q->flags, &ifr->ifr_flags))
|
||||
ret = -EFAULT;
|
||||
dev_put(kni->net_dev);
|
||||
return ret;
|
||||
|
||||
case TUNGETFEATURES:
|
||||
KNI_DBG("TUNGETFEATURES\n");
|
||||
u = IFF_TAP | IFF_NO_PI;
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
u |= IFF_VNET_HDR;
|
||||
#endif
|
||||
if (put_user(u, up))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
|
||||
case TUNSETSNDBUF:
|
||||
KNI_DBG("TUNSETSNDBUF\n");
|
||||
if (get_user(u, up))
|
||||
return -EFAULT;
|
||||
|
||||
q->sk.sk_sndbuf = u;
|
||||
return 0;
|
||||
|
||||
case TUNGETVNETHDRSZ:
|
||||
s = q->vnet_hdr_sz;
|
||||
if (put_user(s, sp))
|
||||
return -EFAULT;
|
||||
KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
|
||||
return 0;
|
||||
|
||||
case TUNSETVNETHDRSZ:
|
||||
if (get_user(s, sp))
|
||||
return -EFAULT;
|
||||
if (s < (int)sizeof(struct virtio_net_hdr))
|
||||
return -EINVAL;
|
||||
|
||||
KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
|
||||
q->vnet_hdr_sz = s;
|
||||
return 0;
|
||||
|
||||
case TUNSETOFFLOAD:
|
||||
KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
/* not support any offload yet */
|
||||
if (!(q->flags & IFF_VNET_HDR))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
||||
default:
|
||||
KNI_DBG("NOT SUPPORT\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
/* 32 bits app on 64 bits OS to be supported later */
|
||||
KNI_PRINT("Not implemented.\n");
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#define KNI_VHOST_WAIT_WQ_SAFE() \
|
||||
do { \
|
||||
while ((BE_FINISH | BE_STOP) == kni->vq_status) \
|
||||
msleep(1); \
|
||||
}while(0) \
|
||||
|
||||
|
||||
static int
|
||||
kni_sock_release(struct socket *sock)
|
||||
{
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sock->sk, struct kni_vhost_queue, sk);
|
||||
struct kni_dev *kni;
|
||||
|
||||
if (q == NULL)
|
||||
return 0;
|
||||
|
||||
if (NULL != (kni = q->kni)) {
|
||||
kni->vq_status = BE_STOP;
|
||||
KNI_VHOST_WAIT_WQ_SAFE();
|
||||
kni->vhost_queue = NULL;
|
||||
q->kni = NULL;
|
||||
}
|
||||
|
||||
if (q->sockfd != -1)
|
||||
q->sockfd = -1;
|
||||
|
||||
sk_set_socket(&q->sk, NULL);
|
||||
sock->sk = NULL;
|
||||
|
||||
sock_put(&q->sk);
|
||||
|
||||
KNI_DBG("dummy sock release done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kni_sock_getname (struct socket *sock,
|
||||
struct sockaddr *addr,
|
||||
int *sockaddr_len, int peer)
|
||||
{
|
||||
KNI_DBG("dummy sock getname\n");
|
||||
((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct proto_ops kni_socket_ops = {
|
||||
.getname = kni_sock_getname,
|
||||
.sendmsg = kni_sock_sndmsg,
|
||||
.recvmsg = kni_sock_rcvmsg,
|
||||
.release = kni_sock_release,
|
||||
.poll = kni_sock_poll,
|
||||
.ioctl = kni_sock_ioctl,
|
||||
.compat_ioctl = kni_sock_compat_ioctl,
|
||||
};
|
||||
|
||||
static void
|
||||
kni_sk_write_space(struct sock *sk)
|
||||
{
|
||||
wait_queue_head_t *wqueue;
|
||||
|
||||
if (!sock_writeable(sk) ||
|
||||
!test_and_clear_bit(SOCK_ASYNC_NOSPACE,
|
||||
&sk->sk_socket->flags))
|
||||
return;
|
||||
wqueue = sk_sleep(sk);
|
||||
if (wqueue && waitqueue_active(wqueue))
|
||||
wake_up_interruptible_poll(
|
||||
wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
|
||||
}
|
||||
|
||||
static void
|
||||
kni_sk_destruct(struct sock *sk)
|
||||
{
|
||||
struct kni_vhost_queue *q =
|
||||
container_of(sk, struct kni_vhost_queue, sk);
|
||||
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
/* make sure there's no packet in buffer */
|
||||
while (skb_dequeue(&sk->sk_receive_queue) != NULL)
|
||||
;
|
||||
|
||||
mb();
|
||||
|
||||
if (q->fifo != NULL) {
|
||||
kfree(q->fifo);
|
||||
q->fifo = NULL;
|
||||
}
|
||||
|
||||
if (q->cache != NULL) {
|
||||
kfree(q->cache);
|
||||
q->cache = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
kni_vhost_backend_init(struct kni_dev *kni)
|
||||
{
|
||||
struct kni_vhost_queue *q;
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
int err, i, sockfd;
|
||||
struct rte_kni_fifo *fifo;
|
||||
struct sk_buff *elem;
|
||||
|
||||
if (kni->vhost_queue != NULL)
|
||||
return -1;
|
||||
|
||||
if (!(q = (struct kni_vhost_queue *)sk_alloc(
|
||||
net, AF_UNSPEC, GFP_KERNEL, &kni_raw_proto)))
|
||||
return -ENOMEM;
|
||||
|
||||
err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock);
|
||||
if (err)
|
||||
goto free_sk;
|
||||
|
||||
sockfd = sock_map_fd(q->sock, 0);
|
||||
if (sockfd < 0) {
|
||||
err = sockfd;
|
||||
goto free_sock;
|
||||
}
|
||||
|
||||
/* cache init */
|
||||
q->cache = (struct sk_buff*)
|
||||
kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
|
||||
GFP_KERNEL);
|
||||
if (!q->cache)
|
||||
goto free_fd;
|
||||
|
||||
fifo = (struct rte_kni_fifo*)
|
||||
kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *)
|
||||
+ sizeof(struct rte_kni_fifo), GFP_KERNEL);
|
||||
if (!fifo)
|
||||
goto free_cache;
|
||||
|
||||
kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE);
|
||||
|
||||
for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
|
||||
elem = &q->cache[i];
|
||||
kni_fifo_put(fifo, (void**)&elem, 1);
|
||||
}
|
||||
q->fifo = fifo;
|
||||
|
||||
/* store sockfd in vhost_queue */
|
||||
q->sockfd = sockfd;
|
||||
|
||||
/* init socket */
|
||||
q->sock->type = SOCK_RAW;
|
||||
q->sock->state = SS_CONNECTED;
|
||||
q->sock->ops = &kni_socket_ops;
|
||||
sock_init_data(q->sock, &q->sk);
|
||||
|
||||
/* init sock data */
|
||||
q->sk.sk_write_space = kni_sk_write_space;
|
||||
q->sk.sk_destruct = kni_sk_destruct;
|
||||
q->flags = IFF_NO_PI | IFF_TAP;
|
||||
q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
|
||||
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
|
||||
q->flags |= IFF_VNET_HDR;
|
||||
#endif
|
||||
|
||||
/* bind kni_dev with vhost_queue */
|
||||
q->kni = kni;
|
||||
kni->vhost_queue = q;
|
||||
|
||||
wmb();
|
||||
|
||||
kni->vq_status = BE_START;
|
||||
|
||||
KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
|
||||
"sk->sk_wq=0x%16llx",
|
||||
q->sockfd, (uint64_t)q->sock->wq,
|
||||
(uint64_t)q->sk.sk_wq);
|
||||
|
||||
return 0;
|
||||
|
||||
free_cache:
|
||||
kfree(q->cache);
|
||||
q->cache = NULL;
|
||||
|
||||
free_fd:
|
||||
put_unused_fd(sockfd);
|
||||
|
||||
free_sock:
|
||||
q->kni = NULL;
|
||||
kni->vhost_queue = NULL;
|
||||
kni->vq_status |= BE_FINISH;
|
||||
sock_release(q->sock);
|
||||
q->sock->ops = NULL;
|
||||
q->sock = NULL;
|
||||
|
||||
free_sk:
|
||||
sk_free((struct sock*)q);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* kni vhost sock sysfs */
|
||||
static ssize_t
|
||||
show_sock_fd(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct net_device *net_dev = container_of(dev, struct net_device, dev);
|
||||
struct kni_dev *kni = netdev_priv(net_dev);
|
||||
int sockfd = -1;
|
||||
if (kni->vhost_queue != NULL)
|
||||
sockfd = kni->vhost_queue->sockfd;
|
||||
return snprintf(buf, 10, "%d\n", sockfd);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
show_sock_en(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct net_device *net_dev = container_of(dev, struct net_device, dev);
|
||||
struct kni_dev *kni = netdev_priv(net_dev);
|
||||
return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
set_sock_en(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct net_device *net_dev = container_of(dev, struct net_device, dev);
|
||||
struct kni_dev *kni = netdev_priv(net_dev);
|
||||
unsigned long en;
|
||||
int err = 0;
|
||||
|
||||
if (0 != strict_strtoul(buf, 0, &en))
|
||||
return -EINVAL;
|
||||
|
||||
if (en)
|
||||
err = kni_vhost_backend_init(kni);
|
||||
|
||||
return err ? err : count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL);
|
||||
static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
|
||||
static struct attribute *dev_attrs[] = {
|
||||
&dev_attr_sock_fd.attr,
|
||||
&dev_attr_sock_en.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group dev_attr_grp = {
|
||||
.attrs = dev_attrs,
|
||||
};
|
||||
|
||||
int
|
||||
kni_vhost_backend_release(struct kni_dev *kni)
|
||||
{
|
||||
struct kni_vhost_queue *q = kni->vhost_queue;
|
||||
|
||||
if (q == NULL)
|
||||
return 0;
|
||||
|
||||
/* dettach from kni */
|
||||
q->kni = NULL;
|
||||
|
||||
KNI_DBG("release backend done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
kni_vhost_init(struct kni_dev *kni)
|
||||
{
|
||||
struct net_device *dev = kni->net_dev;
|
||||
|
||||
if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp))
|
||||
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
|
||||
|
||||
kni->vq_status = BE_STOP;
|
||||
|
||||
KNI_DBG("kni_vhost_init done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user