diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 648b688fcd..6c8e43a70b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -45,11 +45,14 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 -/* Some virtio feature bits (currently bits 28 through 32) are reserved for the - * transport being used (eg. virtio_ring), the rest are per-device feature - * bits. */ +/* + * Virtio feature bits VIRTIO_TRANSPORT_F_START through + * VIRTIO_TRANSPORT_F_END are reserved for the transport + * being used (e.g. virtio_ring, virtio_pci etc.), the + * rest are per-device feature bits. + */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 34 +#define VIRTIO_TRANSPORT_F_END 38 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -71,4 +74,18 @@ * this is for compatibility with legacy systems. */ #define VIRTIO_F_IOMMU_PLATFORM 33 + +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + +/* + * Does the device support Single Root I/O Virtualization? + */ +#define VIRTIO_F_SR_IOV 37 #endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h new file mode 100644 index 0000000000..660138ffbe --- /dev/null +++ b/include/linux/virtio_ring.h @@ -0,0 +1,218 @@ +#ifndef _LINUX_VIRTIO_RING_H +#define _LINUX_VIRTIO_RING_H +/* An interface for efficient virtio implementation, currently for use by KVM, + * but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS +'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ +#ifndef __KERNEL__ +#include +#endif +#include +#include + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + __virtio64 addr; + /* Length. */ + __virtio32 len; + /* The flags as indicated above. */ + __virtio16 flags; + /* We chain unused descriptors via this, too */ + __virtio16 next; +}; + +struct vring_avail { + __virtio16 flags; + __virtio16 idx; + __virtio16 ring[]; +}; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + __virtio32 id; + /* Total length of the descriptor chain which was used (written to) */ + __virtio32 len; +}; + +struct vring_used { + __virtio16 flags; + __virtio16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; +}; + +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * A ring of available descriptor heads with free-running index. + * __virtio16 avail_flags; + * __virtio16 avail_idx; + * __virtio16 available[num]; + * __virtio16 used_event_idx; + * + * Padding to the next align boundary. + * char pad[]; + * + * A ring of used descriptor heads with free-running index. + * __virtio16 used_flags; + * __virtio16 used_idx; + * struct vring_used_elem used[num]; + * __virtio16 avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) + +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc + )); + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + + align - 1) & ~(align - 1)); +} + +static inline unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __le16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __le16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __le64 addr; + /* Buffer Length. */ + __le32 len; + /* Buffer ID. */ + __le16 id; + /* The flags depending on descriptor type. */ + __le16 flags; +}; + +#endif /* _LINUX_VIRTIO_RING_H */