eal: remove Xen dom0 support
We remove xen-specific code in EAL, including the option --xen-dom0, memory initialization code, compiling dependency, etc. Related documents are removed or updated, and bump the eal library version. Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
This commit is contained in:
parent
a7cb2e20d2
commit
f26ab687a7
@ -192,13 +192,6 @@ Linux VFIO
|
||||
M: Anatoly Burakov <anatoly.burakov@intel.com>
|
||||
F: lib/librte_eal/linuxapp/eal/*vfio*
|
||||
|
||||
Linux Xen
|
||||
M: Jianfeng Tan <jianfeng.tan@intel.com>
|
||||
F: lib/librte_eal/linuxapp/xen_dom0/
|
||||
F: lib/librte_eal/linuxapp/eal/*xen*
|
||||
F: lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
|
||||
F: doc/guides/xen/
|
||||
|
||||
FreeBSD EAL (with overlaps)
|
||||
M: Bruce Richardson <bruce.richardson@intel.com>
|
||||
M: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
|
||||
|
@ -732,11 +732,6 @@ CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
|
||||
#
|
||||
CONFIG_RTE_LIBRTE_PMD_VHOST=n
|
||||
|
||||
#
|
||||
#Compile Xen domain0 support
|
||||
#
|
||||
CONFIG_RTE_LIBRTE_XEN_DOM0=n
|
||||
|
||||
#
|
||||
# Compile the test application
|
||||
#
|
||||
|
@ -34,7 +34,6 @@ The main directories that contain files related to documentation are shown below
|
||||
|-- testpmd_app_ug
|
||||
|-- rel_notes
|
||||
|-- nics
|
||||
|-- xen
|
||||
|-- ...
|
||||
|
||||
|
||||
|
@ -44,7 +44,6 @@ DPDK documentation
|
||||
nics/index
|
||||
cryptodevs/index
|
||||
eventdevs/index
|
||||
xen/index
|
||||
contributing/index
|
||||
rel_notes/index
|
||||
faq/index
|
||||
|
@ -116,7 +116,7 @@ The following is the list of options that can be given to the EAL:
|
||||
|
||||
./rte-app [-c COREMASK | -l CORELIST] [-n NUM] [-b <domain:bus:devid.func>] \
|
||||
[--socket-mem=MB,...] [-d LIB.so|DIR] [-m MB] [-r NUM] [-v] [--file-prefix] \
|
||||
[--proc-type <primary|secondary|auto>] [-- xen-dom0]
|
||||
[--proc-type <primary|secondary|auto>]
|
||||
|
||||
The EAL options are as follows:
|
||||
|
||||
@ -166,9 +166,6 @@ The EAL options are as follows:
|
||||
* ``--proc-type``:
|
||||
The type of process instance.
|
||||
|
||||
* ``--xen-dom0``:
|
||||
Support application running on Xen Domain0 without hugetlbfs.
|
||||
|
||||
* ``--vmware-tsc-map``:
|
||||
Use VMware TSC map instead of native RDTSC.
|
||||
|
||||
|
@ -228,56 +228,3 @@ The mount point can be made permanent across reboots, by adding the following li
|
||||
For 1GB pages, the page size must be specified as a mount option::
|
||||
|
||||
nodev /mnt/huge_1GB hugetlbfs pagesize=1GB 0 0
|
||||
|
||||
Xen Domain0 Support in the Linux Environment
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The existing memory management implementation is based on the Linux kernel hugepage mechanism.
|
||||
On the Xen hypervisor, hugepage support for DomainU (DomU) Guests means that DPDK applications work as normal for guests.
|
||||
|
||||
However, Domain0 (Dom0) does not support hugepages.
|
||||
To work around this limitation, a new kernel module rte_dom0_mm is added to facilitate the allocation and mapping of memory via
|
||||
**IOCTL** (allocation) and **MMAP** (mapping).
|
||||
|
||||
Enabling Xen Dom0 Mode in the DPDK
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
By default, Xen Dom0 mode is disabled in the DPDK build configuration files.
|
||||
To support Xen Dom0, the CONFIG_RTE_LIBRTE_XEN_DOM0 setting should be changed to “y”, which enables the Xen Dom0 mode at compile time.
|
||||
|
||||
Furthermore, the CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID setting should also be changed to “y” in the case of the wrong socket ID being received.
|
||||
|
||||
Loading the DPDK rte_dom0_mm Module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To run any DPDK application on Xen Dom0, the ``rte_dom0_mm`` module must be loaded into the running kernel with rsv_memsize option.
|
||||
The module is found in the kmod sub-directory of the DPDK target directory.
|
||||
This module should be loaded using the insmod command as shown below (assuming that the current directory is the DPDK target directory)::
|
||||
|
||||
sudo insmod kmod/rte_dom0_mm.ko rsv_memsize=X
|
||||
|
||||
The value X cannot be greater than 4096(MB).
|
||||
|
||||
Configuring Memory for DPDK Use
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
After the rte_dom0_mm.ko kernel module has been loaded, the user must configure the memory size for DPDK usage.
|
||||
This is done by echoing the memory size to a memsize file in the /sys/devices/ directory.
|
||||
Use the following command (assuming that 2048 MB is required)::
|
||||
|
||||
echo 2048 > /sys/kernel/mm/dom0-mm/memsize-mB/memsize
|
||||
|
||||
The user can also check how much memory has already been used::
|
||||
|
||||
cat /sys/kernel/mm/dom0-mm/memsize-mB/memsize_rsvd
|
||||
|
||||
Xen Domain0 does not support NUMA configuration, as a result the ``--socket-mem`` command line option is invalid for Xen Domain0.
|
||||
|
||||
.. note::
|
||||
|
||||
The memsize value cannot be greater than the rsv_memsize value.
|
||||
|
||||
Running the DPDK Application on Xen Domain0
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To run the DPDK application on Xen Domain0, an extra command line option ``--xen-dom0`` is required.
|
||||
|
@ -117,17 +117,6 @@ The physical address of the reserved memory for that memory zone is also returne
|
||||
|
||||
Memory reservations done using the APIs provided by rte_malloc are also backed by pages from the hugetlbfs filesystem.
|
||||
|
||||
Xen Dom0 support without hugetbls
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The existing memory management implementation is based on the Linux kernel hugepage mechanism.
|
||||
However, Xen Dom0 does not support hugepages, so a new Linux kernel module rte_dom0_mm is added to workaround this limitation.
|
||||
|
||||
The EAL uses IOCTL interface to notify the Linux kernel module rte_dom0_mm to allocate memory of specified size,
|
||||
and get all memory segments information from the module,
|
||||
and the EAL uses MMAP interface to map the allocated memory.
|
||||
For each memory segment, the physical addresses are contiguous within it but actual hardware addresses are contiguous within 2MB.
|
||||
|
||||
PCI Access
|
||||
~~~~~~~~~~
|
||||
|
||||
|
@ -108,7 +108,6 @@ The drivers directory has a *net* subdirectory which contains::
|
||||
+-- szedata2 # SZEDATA2 poll mode driver
|
||||
+-- virtio # Virtio poll mode driver
|
||||
+-- vmxnet3 # VMXNET3 poll mode driver
|
||||
+-- xenvirt # Xen virtio poll mode driver
|
||||
|
||||
.. note::
|
||||
|
||||
|
@ -29,9 +29,6 @@ Deprecation Notices
|
||||
- ``rte_eal_devargs_type_count``
|
||||
- ``rte_eal_parse_devargs_str``, replaced by ``rte_eal_devargs_parse``
|
||||
|
||||
* eal: the support of Xen dom0 will be removed from EAL in 17.11; and with
|
||||
that, drivers/net/xenvirt and examples/vhost_xen will also be removed.
|
||||
|
||||
* eal: An ABI change is planned for 17.11 to make DPDK aware of IOVA address
|
||||
translation scheme.
|
||||
Reference to phys address in EAL data-structure or functions may change to
|
||||
|
@ -178,6 +178,8 @@ API Changes
|
||||
* Added ``flags`` param in ``rte_mempool_xmem_size`` and
|
||||
``rte_mempool_xmem_usage``.
|
||||
|
||||
* Xen dom0 in EAL was removed, as well as xenvirt PMD and vhost_xen.
|
||||
|
||||
* ``rte_mem_phy2mch`` was used in Xen dom0 to obtain the physical address;
|
||||
remove this API as Xen dom0 support was removed.
|
||||
|
||||
|
@ -94,10 +94,6 @@ See the DPDK Getting Started Guides for more information on these options.
|
||||
|
||||
Display the version information on startup.
|
||||
|
||||
* ``--xen-dom0``
|
||||
|
||||
Support application running on Xen Domain0 without hugetlbfs.
|
||||
|
||||
* ``--syslog``
|
||||
|
||||
Set the syslog facility.
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 160 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.3 KiB |
Binary file not shown.
Before Width: | Height: | Size: 94 KiB |
@ -1,38 +0,0 @@
|
||||
.. BSD LICENSE
|
||||
Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Xen Guide
|
||||
=========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:numbered:
|
||||
|
||||
pkt_switch
|
@ -1,470 +0,0 @@
|
||||
.. BSD LICENSE
|
||||
Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
DPDK Xen Based Packet-Switching Solution
|
||||
========================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
DPDK provides a para-virtualization packet switching solution, based on the Xen hypervisor's Grant Table, Note 1,
|
||||
which provides simple and fast packet switching capability between guest domains and host domain based on MAC address or VLAN tag.
|
||||
|
||||
This solution is comprised of two components;
|
||||
a Poll Mode Driver (PMD) as the front end in the guest domain and a switching back end in the host domain.
|
||||
XenStore is used to exchange configure information between the PMD front end and switching back end,
|
||||
including grant reference IDs for shared Virtio RX/TX rings,
|
||||
MAC address, device state, and so on. XenStore is an information storage space shared between domains,
|
||||
see further information on XenStore below.
|
||||
|
||||
The front end PMD can be found in the DPDK directory lib/ librte_pmd_xenvirt and back end example in examples/vhost_xen.
|
||||
|
||||
The PMD front end and switching back end use shared Virtio RX/TX rings as para- virtualized interface.
|
||||
The Virtio ring is created by the front end, and Grant table references for the ring are passed to host.
|
||||
The switching back end maps those grant table references and creates shared rings in a mapped address space.
|
||||
|
||||
The following diagram describes the functionality of the DPDK Xen Packet- Switching Solution.
|
||||
|
||||
|
||||
.. _figure_dpdk_xen_pkt_switch:
|
||||
|
||||
.. figure:: img/dpdk_xen_pkt_switch.*
|
||||
|
||||
Functionality of the DPDK Xen Packet Switching Solution.
|
||||
|
||||
|
||||
Note 1 The Xen hypervisor uses a mechanism called a Grant Table to share memory between domains
|
||||
(`http://wiki.xen.org/wiki/Grant Table <http://wiki.xen.org/wiki/Grant%20Table>`_).
|
||||
|
||||
A diagram of the design is shown below, where "gva" is the Guest Virtual Address,
|
||||
which is the data pointer of the mbuf, and "hva" is the Host Virtual Address:
|
||||
|
||||
|
||||
.. _figure_grant_table:
|
||||
|
||||
.. figure:: img/grant_table.*
|
||||
|
||||
DPDK Xen Layout
|
||||
|
||||
|
||||
In this design, a Virtio ring is used as a para-virtualized interface for better performance over a Xen private ring
|
||||
when packet switching to and from a VM.
|
||||
The additional performance is gained by avoiding a system call and memory map in each memory copy with a XEN private ring.
|
||||
|
||||
Device Creation
|
||||
---------------
|
||||
|
||||
Poll Mode Driver Front End
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Mbuf pool allocation:
|
||||
|
||||
To use a Xen switching solution, the DPDK application should use rte_mempool_gntalloc_create()
|
||||
to reserve mbuf pools during initialization.
|
||||
rte_mempool_gntalloc_create() creates a mempool with objects from memory allocated and managed via gntalloc/gntdev.
|
||||
|
||||
The DPDK now supports construction of mempools from allocated virtual memory through the rte_mempool_xmem_create() API.
|
||||
|
||||
This front end constructs mempools based on memory allocated through the xen_gntalloc driver.
|
||||
rte_mempool_gntalloc_create() allocates Grant pages, maps them to continuous virtual address space,
|
||||
and calls rte_mempool_xmem_create() to build mempools.
|
||||
The Grant IDs for all Grant pages are passed to the host through XenStore.
|
||||
|
||||
* Virtio Ring Creation:
|
||||
|
||||
The Virtio queue size is defined as 256 by default in the VQ_DESC_NUM macro.
|
||||
Using the queue setup function,
|
||||
Grant pages are allocated based on ring size and are mapped to continuous virtual address space to form the Virtio ring.
|
||||
Normally, one ring is comprised of several pages.
|
||||
Their Grant IDs are passed to the host through XenStore.
|
||||
|
||||
There is no requirement that this memory be physically continuous.
|
||||
|
||||
* Interrupt and Kick:
|
||||
|
||||
There are no interrupts in DPDK Xen Switching as both front and back ends work in polling mode.
|
||||
There is no requirement for notification.
|
||||
|
||||
* Feature Negotiation:
|
||||
|
||||
Currently, feature negotiation through XenStore is not supported.
|
||||
|
||||
* Packet Reception & Transmission:
|
||||
|
||||
With mempools and Virtio rings created, the front end can operate Virtio devices,
|
||||
as it does in Virtio PMD for KVM Virtio devices with the exception that the host
|
||||
does not require notifications or deal with interrupts.
|
||||
|
||||
XenStore is a database that stores guest and host information in the form of (key, value) pairs.
|
||||
The following is an example of the information generated during the startup of the front end PMD in a guest VM (domain ID 1):
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
xenstore -ls /local/domain/1/control/dpdk
|
||||
0_mempool_gref="3042,3043,3044,3045"
|
||||
0_mempool_va="0x7fcbc6881000"
|
||||
0_tx_vring_gref="3049"
|
||||
0_rx_vring_gref="3053"
|
||||
0_ether_addr="4e:0b:d0:4e:aa:f1"
|
||||
0_vring_flag="3054"
|
||||
...
|
||||
|
||||
Multiple mempools and multiple Virtios may exist in the guest domain, the first number is the index, starting from zero.
|
||||
|
||||
The idx#_mempool_va stores the guest virtual address for mempool idx#.
|
||||
|
||||
The idx#_ether_adder stores the MAC address of the guest Virtio device.
|
||||
|
||||
For idx#_rx_ring_gref, idx#_tx_ring_gref, and idx#_mempool_gref, the value is a list of Grant references.
|
||||
Take idx#_mempool_gref node for example, the host maps those Grant references to a continuous virtual address space.
|
||||
The real Grant reference information is stored in this virtual address space,
|
||||
where (gref, pfn) pairs follow each other with -1 as the terminator.
|
||||
|
||||
|
||||
.. _figure_grant_refs:
|
||||
|
||||
.. figure:: img/grant_refs.*
|
||||
|
||||
Mapping Grant references to a continuous virtual address space
|
||||
|
||||
|
||||
After all gref# IDs are retrieved, the host maps them to a continuous virtual address space.
|
||||
With the guest mempool virtual address, the host establishes 1:1 address mapping.
|
||||
With multiple guest mempools, the host establishes multiple address translation regions.
|
||||
|
||||
Switching Back End
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The switching back end monitors changes in XenStore.
|
||||
When the back end detects that a new Virtio device has been created in a guest domain, it will:
|
||||
|
||||
#. Retrieve Grant and configuration information from XenStore.
|
||||
|
||||
#. Map and create a Virtio ring.
|
||||
|
||||
#. Map mempools in the host and establish address translation between the guest address and host address.
|
||||
|
||||
#. Select a free VMDQ pool, set its affinity with the Virtio device, and set the MAC/ VLAN filter.
|
||||
|
||||
Packet Reception
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
When packets arrive from an external network, the MAC?VLAN filter classifies packets into queues in one VMDQ pool.
|
||||
As each pool is bonded to a Virtio device in some guest domain, the switching back end will:
|
||||
|
||||
#. Fetch an available entry from the Virtio RX ring.
|
||||
|
||||
#. Get gva, and translate it to hva.
|
||||
|
||||
#. Copy the contents of the packet to the memory buffer pointed to by gva.
|
||||
|
||||
The DPDK application in the guest domain, based on the PMD front end,
|
||||
is polling the shared Virtio RX ring for available packets and receives them on arrival.
|
||||
|
||||
Packet Transmission
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
When a Virtio device in one guest domain is to transmit a packet,
|
||||
it puts the virtual address of the packet's data area into the shared Virtio TX ring.
|
||||
|
||||
The packet switching back end is continuously polling the Virtio TX ring.
|
||||
When new packets are available for transmission from a guest, it will:
|
||||
|
||||
#. Fetch an available entry from the Virtio TX ring.
|
||||
|
||||
#. Get gva, and translate it to hva.
|
||||
|
||||
#. Copy the packet from hva to the host mbuf's data area.
|
||||
|
||||
#. Compare the destination MAC address with all the MAC addresses of the Virtio devices it manages.
|
||||
If a match exists, it directly copies the packet to the matched VIrtio RX ring.
|
||||
Otherwise, it sends the packet out through hardware.
|
||||
|
||||
.. note::
|
||||
|
||||
The packet switching back end is for demonstration purposes only.
|
||||
The user could implement their switching logic based on this example.
|
||||
In this example, only one physical port on the host is supported.
|
||||
Multiple segments are not supported. The biggest mbuf supported is 4KB.
|
||||
When the back end is restarted, all front ends must also be restarted.
|
||||
|
||||
Running the Application
|
||||
-----------------------
|
||||
|
||||
The following describes the steps required to run the application.
|
||||
|
||||
Validated Environment
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Host:
|
||||
|
||||
Xen-hypervisor: 4.2.2
|
||||
|
||||
Distribution: Fedora release 18
|
||||
|
||||
Kernel: 3.10.0
|
||||
|
||||
Xen development package (including Xen, Xen-libs, xen-devel): 4.2.3
|
||||
|
||||
Guest:
|
||||
|
||||
Distribution: Fedora 16 and 18
|
||||
|
||||
Kernel: 3.6.11
|
||||
|
||||
Xen Host Prerequisites
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Note that the following commands might not be the same on different Linux* distributions.
|
||||
|
||||
* Install xen-devel package:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
yum install xen-devel.x86_64
|
||||
|
||||
* Start xend if not already started:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
/etc/init.d/xend start
|
||||
|
||||
* Mount xenfs if not already mounted:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
mount -t xenfs none /proc/xen
|
||||
|
||||
* Enlarge the limit for xen_gntdev driver:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
modprobe -r xen_gntdev
|
||||
modprobe xen_gntdev limit=1000000
|
||||
|
||||
.. note::
|
||||
|
||||
The default limit for earlier versions of the xen_gntdev driver is 1024.
|
||||
That is insufficient to support the mapping of multiple Virtio devices into multiple VMs,
|
||||
so it is necessary to enlarge the limit by reloading this module.
|
||||
The default limit of recent versions of xen_gntdev is 1048576.
|
||||
The rough calculation of this limit is:
|
||||
|
||||
limit=nb_mbuf# * VM#.
|
||||
|
||||
In DPDK examples, nb_mbuf# is normally 8192.
|
||||
|
||||
Building and Running the Switching Backend
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
#. Edit config/common_linuxapp, and change the default configuration value for the following two items:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
CONFIG_RTE_LIBRTE_XEN_DOM0=y
|
||||
CONFIG RTE_LIBRTE_PMD_XENVIRT=n
|
||||
|
||||
#. Build the target:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make install T=x86_64-native-linuxapp-gcc
|
||||
|
||||
#. Ensure that RTE_SDK and RTE_TARGET are correctly set. Build the switching example:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make -C examples/vhost_xen/
|
||||
|
||||
#. Load the Xen DPDK memory management module and preallocate memory:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
insmod ./x86_64-native-linuxapp-gcc/build/lib/librte_eal/linuxapp/xen_dom0/rte_dom0_mm.ko
|
||||
echo 2048> /sys/kernel/mm/dom0-mm/memsize-mB/memsize
|
||||
|
||||
.. note::
|
||||
|
||||
On Xen Dom0, there is no hugepage support.
|
||||
Under Xen Dom0, the DPDK uses a special memory management kernel module
|
||||
to allocate chunks of physically continuous memory.
|
||||
Refer to the *DPDK Getting Started Guide* for more information on memory management in the DPDK.
|
||||
In the above command, 4 GB memory is reserved (2048 of 2 MB pages) for DPDK.
|
||||
|
||||
#. Load uio_pci_generic and bind one Intel NIC controller to it:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
modprobe uio_pci_generic
|
||||
python usertools/dpdk-devbind.py -b uio_pci_generic 0000:09:00:00.0
|
||||
|
||||
In this case, 0000:09:00.0 is the PCI address for the NIC controller.
|
||||
|
||||
#. Run the switching back end example:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
examples/vhost_xen/build/vhost-switch -l 0-3 -n 3 --xen-dom0 -- -p1
|
||||
|
||||
.. note::
|
||||
|
||||
The -xen-dom0 option instructs the DPDK to use the Xen kernel module to allocate memory.
|
||||
|
||||
Other Parameters:
|
||||
|
||||
* -vm2vm
|
||||
|
||||
The vm2vm parameter enables/disables packet switching in software.
|
||||
Disabling vm2vm implies that on a VM packet transmission will always go to the Ethernet port
|
||||
and will not be switched to another VM
|
||||
|
||||
* -Stats
|
||||
|
||||
The Stats parameter controls the printing of Virtio-net device statistics.
|
||||
The parameter specifies the interval (in seconds) at which to print statistics,
|
||||
an interval of 0 seconds will disable printing statistics.
|
||||
|
||||
Xen PMD Frontend Prerequisites
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
#. Install xen-devel package for accessing XenStore:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
yum install xen-devel.x86_64
|
||||
|
||||
#. Mount xenfs, if it is not already mounted:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
mount -t xenfs none /proc/xen
|
||||
|
||||
#. Enlarge the default limit for xen_gntalloc driver:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
modprobe -r xen_gntalloc
|
||||
modprobe xen_gntalloc limit=6000
|
||||
|
||||
.. note::
|
||||
|
||||
Before the Linux kernel version 3.8-rc5, Jan 15th 2013,
|
||||
a critical defect occurs when a guest is heavily allocating Grant pages.
|
||||
The Grant driver allocates fewer pages than expected which causes kernel memory corruption.
|
||||
This happens, for example, when a guest uses the v1 format of a Grant table entry and allocates
|
||||
more than 8192 Grant pages (this number might be different on different hypervisor versions).
|
||||
To work around this issue, set the limit for gntalloc driver to 6000.
|
||||
(The kernel normally allocates hundreds of Grant pages with one Xen front end per virtualized device).
|
||||
If the kernel allocates a lot of Grant pages, for example, if the user uses multiple net front devices,
|
||||
it is best to upgrade the Grant alloc driver.
|
||||
This defect has been fixed in kernel version 3.8-rc5 and later.
|
||||
|
||||
Building and Running the Front End
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
#. Edit config/common_linuxapp, and change the default configuration value:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
CONFIG_RTE_LIBRTE_XEN_DOM0=n
|
||||
CONFIG_RTE_LIBRTE_PMD_XENVIRT=y
|
||||
|
||||
#. Build the package:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make install T=x86_64-native-linuxapp-gcc
|
||||
|
||||
#. Enable hugepages. Refer to the *DPDK Getting Started Guide* for instructions on
|
||||
how to use hugepages in the DPDK.
|
||||
|
||||
#. Run TestPMD. Refer to *DPDK TestPMD Application User Guide* for detailed parameter usage.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./x86_64-native-linuxapp-gcc/app/testpmd -l 0-3 -n 4 --vdev="net_xenvirt0,mac=00:00:00:00:00:11"
|
||||
testpmd>set fwd mac
|
||||
testpmd>start
|
||||
|
||||
As an example to run two TestPMD instances over 2 Xen Virtio devices:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
--vdev="net_xenvirt0,mac=00:00:00:00:00:11" --vdev="net_xenvirt1;mac=00:00:00:00:00:22"
|
||||
|
||||
|
||||
Usage Examples: Injecting a Packet Stream Using a Packet Generator
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Loopback Mode
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Run TestPMD in a guest VM:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./x86_64-native-linuxapp-gcc/app/testpmd -l 0-3 -n 4 --vdev="net_xenvirt0,mac=00:00:00:00:00:11" -- -i --eth-peer=0,00:00:00:00:00:22
|
||||
testpmd> set fwd mac
|
||||
testpmd> start
|
||||
|
||||
Example output of the vhost_switch would be:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
DATA:(0) MAC_ADDRESS 00:00:00:00:00:11 and VLAN_TAG 1000 registered.
|
||||
|
||||
The above message indicates that device 0 has been registered with MAC address 00:00:00:00:00:11 and VLAN tag 1000.
|
||||
Any packets received on the NIC with these values is placed on the device's receive queue.
|
||||
|
||||
Configure a packet stream in the packet generator, set the destination MAC address to 00:00:00:00:00:11, and VLAN to 1000,
|
||||
the guest Virtio receives these packets and sends them out with destination MAC address 00:00:00:00:00:22.
|
||||
|
||||
Inter-VM Mode
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Run TestPMD in guest VM1:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./x86_64-native-linuxapp-gcc/app/testpmd -l 0-3 -n 4 --vdev="net_xenvirt0,mac=00:00:00:00:00:11" -- -i --eth-peer=0,00:00:00:00:00:22 -- -i
|
||||
|
||||
Run TestPMD in guest VM2:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./x86_64-native-linuxapp-gcc/app/testpmd -l 0-3 -n 4 --vdev="net_xenvirt0,mac=00:00:00:00:00:22" -- -i --eth-peer=0,00:00:00:00:00:33
|
||||
|
||||
Configure a packet stream in the packet generator, and set the destination MAC address to 00:00:00:00:00:11 and VLAN to 1000.
|
||||
The packets received in Virtio in guest VM1 will be forwarded to Virtio in guest VM2 and
|
||||
then sent out through hardware with destination MAC address 00:00:00:00:00:33.
|
||||
|
||||
The packet flow is:
|
||||
|
||||
packet generator->Virtio in guest VM1->switching backend->Virtio in guest VM2->switching backend->wire
|
@ -48,7 +48,7 @@ LDLIBS += -lgcc_s
|
||||
|
||||
EXPORT_MAP := rte_eal_version.map
|
||||
|
||||
LIBABIVER := 5
|
||||
LIBABIVER := 6
|
||||
|
||||
# specific to bsdapp exec-env
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
|
||||
|
@ -1,107 +0,0 @@
|
||||
/*-
|
||||
* This file is provided under a dual BSD/LGPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GNU LESSER GENERAL PUBLIC LICENSE
|
||||
*
|
||||
* Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Contact Information:
|
||||
* Intel Corporation
|
||||
*
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RTE_DOM0_COMMON_H_
|
||||
#define _RTE_DOM0_COMMON_H_
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/if.h>
|
||||
#endif
|
||||
|
||||
#define DOM0_NAME_MAX 256
|
||||
#define DOM0_MM_DEV "/dev/dom0_mm"
|
||||
|
||||
#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
|
||||
#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
|
||||
#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
|
||||
#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
|
||||
#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
|
||||
|
||||
#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
|
||||
#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
|
||||
#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
|
||||
#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
|
||||
|
||||
/**
|
||||
* A structure used to store memory information.
|
||||
*/
|
||||
struct memory_info {
|
||||
char name[DOM0_NAME_MAX];
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
/**
|
||||
* A structure used to store memory segment information.
|
||||
*/
|
||||
struct memseg_info {
|
||||
uint32_t idx;
|
||||
uint64_t pfn;
|
||||
uint64_t size;
|
||||
uint64_t mfn[DOM0_NUM_MEMBLOCK];
|
||||
};
|
||||
|
||||
/**
|
||||
* A structure used to store memory block information.
|
||||
*/
|
||||
struct memblock_info {
|
||||
uint8_t exchange_flag;
|
||||
uint64_t vir_addr;
|
||||
uint64_t pfn;
|
||||
uint64_t mfn;
|
||||
};
|
||||
#endif /* _RTE_DOM0_COMMON_H_ */
|
@ -87,8 +87,6 @@ DPDK_2.0 {
|
||||
rte_thread_get_affinity;
|
||||
rte_thread_set_affinity;
|
||||
rte_vlog;
|
||||
rte_xen_dom0_memory_attach;
|
||||
rte_xen_dom0_memory_init;
|
||||
rte_zmalloc;
|
||||
rte_zmalloc_socket;
|
||||
|
||||
@ -115,7 +113,6 @@ DPDK_2.2 {
|
||||
rte_keepalive_dispatch_pings;
|
||||
rte_keepalive_mark_alive;
|
||||
rte_keepalive_register_core;
|
||||
rte_xen_dom0_supported;
|
||||
|
||||
} DPDK_2.1;
|
||||
|
||||
|
@ -98,7 +98,6 @@ eal_long_options[] = {
|
||||
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
|
||||
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
|
||||
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
|
||||
{OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
|
||||
{0, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
@ -209,8 +208,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
|
||||
|
||||
internal_cfg->syslog_facility = LOG_DAEMON;
|
||||
|
||||
internal_cfg->xen_dom0_support = 0;
|
||||
|
||||
/* if set to NONE, interrupt mode is determined automatically */
|
||||
internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
|
||||
|
||||
|
@ -65,7 +65,6 @@ struct internal_config {
|
||||
volatile unsigned force_nrank; /**< force number of ranks */
|
||||
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
|
||||
unsigned hugepage_unlink; /**< true to unlink backing files */
|
||||
volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
|
||||
volatile unsigned no_pci; /**< true to disable PCI */
|
||||
volatile unsigned no_hpet; /**< true to disable HPET */
|
||||
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
|
||||
|
@ -83,8 +83,6 @@ enum {
|
||||
OPT_VFIO_INTR_NUM,
|
||||
#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
|
||||
OPT_VMWARE_TSC_MAP_NUM,
|
||||
#define OPT_XEN_DOM0 "xen-dom0"
|
||||
OPT_XEN_DOM0_NUM,
|
||||
OPT_LONG_MAX_NUM
|
||||
};
|
||||
|
||||
|
@ -46,10 +46,6 @@
|
||||
|
||||
#include <rte_config.h>
|
||||
|
||||
#ifdef RTE_EXEC_ENV_LINUXAPP
|
||||
#include <exec-env/rte_dom0_common.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -116,10 +112,6 @@ struct rte_memseg {
|
||||
int32_t socket_id; /**< NUMA socket ID. */
|
||||
uint32_t nchannel; /**< Number of channels. */
|
||||
uint32_t nrank; /**< Number of ranks. */
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
/**< store segment MFNs */
|
||||
uint64_t mfn[DOM0_NUM_MEMBLOCK];
|
||||
#endif
|
||||
} __rte_packed;
|
||||
|
||||
/**
|
||||
@ -195,39 +187,6 @@ unsigned rte_memory_get_nchannel(void);
|
||||
*/
|
||||
unsigned rte_memory_get_nrank(void);
|
||||
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
|
||||
/**< Internal use only - should DOM0 memory mapping be used */
|
||||
int rte_xen_dom0_supported(void);
|
||||
|
||||
/**
|
||||
* Memory init for supporting application running on Xen domain0.
|
||||
*
|
||||
* @param void
|
||||
*
|
||||
* @return
|
||||
* 0: successfully
|
||||
* negative: error
|
||||
*/
|
||||
int rte_xen_dom0_memory_init(void);
|
||||
|
||||
/**
|
||||
* Attach to memory setments of primary process on Xen domain0.
|
||||
*
|
||||
* @param void
|
||||
*
|
||||
* @return
|
||||
* 0: successfully
|
||||
* negative: error
|
||||
*/
|
||||
int rte_xen_dom0_memory_attach(void);
|
||||
#else
|
||||
static inline int rte_xen_dom0_supported(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -35,7 +35,5 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
|
||||
DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
|
||||
DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
|
||||
DEPDIRS-kni := eal
|
||||
DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
|
||||
DEPDIRS-xen_dom0 := eal
|
||||
|
||||
include $(RTE_SDK)/mk/rte.subdir.mk
|
||||
|
@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH)
|
||||
EXPORT_MAP := rte_eal_version.map
|
||||
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
|
||||
|
||||
LIBABIVER := 5
|
||||
LIBABIVER := 6
|
||||
|
||||
VPATH += $(RTE_SDK)/lib/librte_eal/common
|
||||
|
||||
@ -58,9 +58,6 @@ endif
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
|
||||
ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
|
||||
endif
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
|
||||
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
|
||||
@ -130,7 +127,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
|
||||
CFLAGS_eal_thread.o += -Wno-return-type
|
||||
endif
|
||||
|
||||
INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
|
||||
INC := rte_interrupts.h rte_kni_common.h
|
||||
|
||||
SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
|
||||
$(addprefix include/exec-env/,$(INC))
|
||||
|
@ -367,7 +367,6 @@ eal_usage(const char *prgname)
|
||||
" --"OPT_BASE_VIRTADDR" Base virtual address\n"
|
||||
" --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
|
||||
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
|
||||
" --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
|
||||
"\n");
|
||||
/* Allow the application to print its usage message too if hook is set */
|
||||
if ( rte_application_usage_hook ) {
|
||||
@ -568,19 +567,6 @@ eal_parse_args(int argc, char **argv)
|
||||
eal_usage(prgname);
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
/* long options */
|
||||
case OPT_XEN_DOM0_NUM:
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
internal_config.xen_dom0_support = 1;
|
||||
#else
|
||||
RTE_LOG(ERR, EAL, "Can't support DPDK app "
|
||||
"running on Dom0, please configure"
|
||||
" RTE_LIBRTE_XEN_DOM0=y\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OPT_HUGE_DIR_NUM:
|
||||
internal_config.hugepage_dir = optarg;
|
||||
break;
|
||||
@ -658,15 +644,6 @@ eal_parse_args(int argc, char **argv)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* --xen-dom0 doesn't make sense with --socket-mem */
|
||||
if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
|
||||
RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
|
||||
"together with --"OPT_XEN_DOM0"\n");
|
||||
eal_usage(prgname);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (optind >= 0)
|
||||
argv[optind-1] = prgname;
|
||||
ret = optind-1;
|
||||
@ -827,7 +804,6 @@ rte_eal_init(int argc, char **argv)
|
||||
|
||||
if (internal_config.no_hugetlbfs == 0 &&
|
||||
internal_config.process_type != RTE_PROC_SECONDARY &&
|
||||
internal_config.xen_dom0_support == 0 &&
|
||||
eal_hugepage_info_init() < 0) {
|
||||
rte_eal_init_alert("Cannot get hugepage information.");
|
||||
rte_errno = EACCES;
|
||||
|
@ -75,13 +75,6 @@
|
||||
|
||||
#define PFN_MASK_SIZE 8
|
||||
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
int rte_xen_dom0_supported(void)
|
||||
{
|
||||
return internal_config.xen_dom0_support;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Huge page mapping under linux
|
||||
@ -106,10 +99,6 @@ test_phys_addrs_available(void)
|
||||
uint64_t tmp;
|
||||
phys_addr_t physaddr;
|
||||
|
||||
/* For dom0, phys addresses can always be available */
|
||||
if (rte_xen_dom0_supported())
|
||||
return;
|
||||
|
||||
if (!rte_eal_has_hugepages()) {
|
||||
RTE_LOG(ERR, EAL,
|
||||
"Started without hugepages support, physical addresses not available\n");
|
||||
@ -142,29 +131,6 @@ rte_mem_virt2phy(const void *virtaddr)
|
||||
if (rte_eal_iova_mode() == RTE_IOVA_VA)
|
||||
return (uintptr_t)virtaddr;
|
||||
|
||||
/* when using dom0, /proc/self/pagemap always returns 0, check in
|
||||
* dpdk memory by browsing the memsegs */
|
||||
if (rte_xen_dom0_supported()) {
|
||||
struct rte_mem_config *mcfg;
|
||||
struct rte_memseg *memseg;
|
||||
unsigned i;
|
||||
|
||||
mcfg = rte_eal_get_configuration()->mem_config;
|
||||
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
|
||||
memseg = &mcfg->memseg[i];
|
||||
if (memseg->addr == NULL)
|
||||
break;
|
||||
if (virtaddr > memseg->addr &&
|
||||
virtaddr < RTE_PTR_ADD(memseg->addr,
|
||||
memseg->len)) {
|
||||
return memseg->phys_addr +
|
||||
RTE_PTR_DIFF(virtaddr, memseg->addr);
|
||||
}
|
||||
}
|
||||
|
||||
return RTE_BAD_PHYS_ADDR;
|
||||
}
|
||||
|
||||
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
|
||||
if (!phys_addrs_available)
|
||||
return RTE_BAD_PHYS_ADDR;
|
||||
@ -1070,17 +1036,6 @@ rte_eal_hugepage_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check if app runs on Xen Dom0 */
|
||||
if (internal_config.xen_dom0_support) {
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
/* use dom0_mm kernel driver to init memory */
|
||||
if (rte_xen_dom0_memory_init() < 0)
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* calculate total number of hugepages available. at this point we haven't
|
||||
* yet started sorting them so they all are on socket 0 */
|
||||
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
|
||||
@ -1403,17 +1358,6 @@ rte_eal_hugepage_attach(void)
|
||||
|
||||
test_phys_addrs_available();
|
||||
|
||||
if (internal_config.xen_dom0_support) {
|
||||
#ifdef RTE_LIBRTE_XEN_DOM0
|
||||
if (rte_xen_dom0_memory_attach() < 0) {
|
||||
RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary "
|
||||
"process\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
fd_zero = open("/dev/zero", O_RDONLY);
|
||||
if (fd_zero < 0) {
|
||||
RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
|
||||
|
@ -1,381 +0,0 @@
|
||||
/*-
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/file.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <rte_log.h>
|
||||
#include <rte_memory.h>
|
||||
#include <rte_memzone.h>
|
||||
#include <rte_launch.h>
|
||||
#include <rte_eal.h>
|
||||
#include <rte_eal_memconfig.h>
|
||||
#include <rte_per_lcore.h>
|
||||
#include <rte_lcore.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_string_fns.h>
|
||||
|
||||
#include "eal_private.h"
|
||||
#include "eal_internal_cfg.h"
|
||||
#include "eal_filesystem.h"
|
||||
#include <exec-env/rte_dom0_common.h>
|
||||
|
||||
#define PAGE_SIZE RTE_PGSIZE_4K
|
||||
#define DEFAUL_DOM0_NAME "dom0-mem"
|
||||
|
||||
static int xen_fd = -1;
|
||||
static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
|
||||
|
||||
/*
|
||||
* Try to mmap *size bytes in /dev/zero. If it is successful, return the
|
||||
* pointer to the mmap'd area and keep *size unmodified. Else, retry
|
||||
* with a smaller zone: decrease *size by mem_size until it reaches
|
||||
* 0. In this case, return NULL. Note: this function returns an address
|
||||
* which is a multiple of mem_size size.
|
||||
*/
|
||||
static void *
|
||||
xen_get_virtual_area(size_t *size, size_t mem_size)
|
||||
{
|
||||
void *addr;
|
||||
int fd;
|
||||
long aligned_addr;
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
|
||||
|
||||
fd = open("/dev/zero", O_RDONLY);
|
||||
if (fd < 0){
|
||||
RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
|
||||
return NULL;
|
||||
}
|
||||
do {
|
||||
addr = mmap(NULL, (*size) + mem_size, PROT_READ,
|
||||
MAP_PRIVATE, fd, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
*size -= mem_size;
|
||||
} while (addr == MAP_FAILED && *size > 0);
|
||||
|
||||
if (addr == MAP_FAILED) {
|
||||
close(fd);
|
||||
RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
munmap(addr, (*size) + mem_size);
|
||||
close(fd);
|
||||
|
||||
/* align addr to a mem_size boundary */
|
||||
aligned_addr = (uintptr_t)addr;
|
||||
aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
|
||||
addr = (void *)(aligned_addr);
|
||||
|
||||
RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
|
||||
addr, *size);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
|
||||
* /memsize-mB/memsize file, and the size unit is mB.
|
||||
*/
|
||||
static int
|
||||
get_xen_memory_size(void)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
unsigned long mem_size = 0;
|
||||
static const char *file_name;
|
||||
|
||||
file_name = "memsize";
|
||||
snprintf(path, sizeof(path), "%s/%s",
|
||||
sys_dir_path, file_name);
|
||||
|
||||
if (eal_parse_sysfs_value(path, &mem_size) < 0)
|
||||
return -1;
|
||||
|
||||
if (mem_size == 0)
|
||||
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
|
||||
" configured.\n",sys_dir_path, file_name);
|
||||
if (mem_size % 2)
|
||||
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
|
||||
" even number.\n",sys_dir_path, file_name);
|
||||
|
||||
if (mem_size > DOM0_CONFIG_MEMSIZE)
|
||||
rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
|
||||
" than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
|
||||
|
||||
return mem_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Based on physical address to caculate MFN in Xen Dom0.
|
||||
*/
|
||||
phys_addr_t
|
||||
rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
|
||||
{
|
||||
int mfn_id, i;
|
||||
uint64_t mfn, mfn_offset;
|
||||
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
|
||||
struct rte_memseg *memseg = mcfg->memseg;
|
||||
|
||||
/* find the memory segment owning the physical address */
|
||||
if (memseg_id == -1) {
|
||||
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
|
||||
if ((phy_addr >= memseg[i].phys_addr) &&
|
||||
(phy_addr < memseg[i].phys_addr +
|
||||
memseg[i].len)) {
|
||||
memseg_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (memseg_id == -1)
|
||||
return RTE_BAD_PHYS_ADDR;
|
||||
}
|
||||
|
||||
mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
|
||||
|
||||
/*the MFN is contiguous in 2M */
|
||||
mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
|
||||
RTE_PGSIZE_2M / PAGE_SIZE;
|
||||
mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
|
||||
|
||||
/** return mechine address */
|
||||
return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
|
||||
}
|
||||
|
||||
int
|
||||
rte_xen_dom0_memory_init(void)
|
||||
{
|
||||
void *vir_addr, *vma_addr = NULL;
|
||||
int err, ret = 0;
|
||||
uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
|
||||
size_t vma_len = 0;
|
||||
struct memory_info meminfo;
|
||||
struct memseg_info seginfo[RTE_MAX_MEMSEG];
|
||||
int flags, page_size = getpagesize();
|
||||
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
|
||||
struct rte_memseg *memseg = mcfg->memseg;
|
||||
uint64_t total_mem = internal_config.memory;
|
||||
|
||||
memset(seginfo, 0, sizeof(seginfo));
|
||||
memset(&meminfo, 0, sizeof(struct memory_info));
|
||||
|
||||
mem_size = get_xen_memory_size();
|
||||
requested = (unsigned) (total_mem / 0x100000);
|
||||
if (requested > mem_size)
|
||||
/* if we didn't satisfy total memory requirements */
|
||||
rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
|
||||
" available: %uMB\n", requested, mem_size);
|
||||
else if (total_mem != 0)
|
||||
mem_size = requested;
|
||||
|
||||
/* Check FD and open once */
|
||||
if (xen_fd < 0) {
|
||||
xen_fd = open(DOM0_MM_DEV, O_RDWR);
|
||||
if (xen_fd < 0) {
|
||||
RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
meminfo.size = mem_size;
|
||||
|
||||
/* construct memory mangement name for Dom0 */
|
||||
snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
|
||||
internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
|
||||
|
||||
/* Notify kernel driver to allocate memory */
|
||||
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Get number of memory segment from driver */
|
||||
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if(num_memseg > RTE_MAX_MEMSEG){
|
||||
RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
|
||||
" than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* get all memory segements information */
|
||||
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
|
||||
if (ret < 0) {
|
||||
RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* map all memory segments to contiguous user space */
|
||||
for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
|
||||
{
|
||||
vma_len = seginfo[memseg_idx].size;
|
||||
|
||||
/**
|
||||
* get the biggest virtual memory area up to vma_len. If it fails,
|
||||
* vma_addr is NULL, so let the kernel provide the address.
|
||||
*/
|
||||
vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
|
||||
if (vma_addr == NULL) {
|
||||
flags = MAP_SHARED;
|
||||
vma_len = RTE_PGSIZE_2M;
|
||||
} else
|
||||
flags = MAP_SHARED | MAP_FIXED;
|
||||
|
||||
seginfo[memseg_idx].size = vma_len;
|
||||
vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
|
||||
PROT_READ|PROT_WRITE, flags, xen_fd,
|
||||
memseg_idx * page_size);
|
||||
if (vir_addr == MAP_FAILED) {
|
||||
RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
|
||||
DOM0_MM_DEV);
|
||||
err = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
memseg[memseg_idx].addr = vir_addr;
|
||||
memseg[memseg_idx].phys_addr = page_size *
|
||||
seginfo[memseg_idx].pfn ;
|
||||
memseg[memseg_idx].len = seginfo[memseg_idx].size;
|
||||
for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
|
||||
memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
|
||||
|
||||
/* MFNs are continuous in 2M, so assume that page size is 2M */
|
||||
memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
|
||||
|
||||
memseg[memseg_idx].nchannel = mcfg->nchannel;
|
||||
memseg[memseg_idx].nrank = mcfg->nrank;
|
||||
|
||||
/* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
|
||||
memseg[memseg_idx].socket_id = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
if (xen_fd > 0) {
|
||||
close(xen_fd);
|
||||
xen_fd = -1;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates the memory mappings in the secondary process to match that of
|
||||
* the server process. It goes through each memory segment in the DPDK runtime
|
||||
* configuration, mapping them in order to form a contiguous block in the
|
||||
* virtual memory space
|
||||
*/
|
||||
int
|
||||
rte_xen_dom0_memory_attach(void)
|
||||
{
|
||||
const struct rte_mem_config *mcfg;
|
||||
unsigned s = 0; /* s used to track the segment number */
|
||||
int xen_fd = -1;
|
||||
int ret = -1;
|
||||
void *vir_addr;
|
||||
char name[DOM0_NAME_MAX] = {0};
|
||||
int page_size = getpagesize();
|
||||
|
||||
mcfg = rte_eal_get_configuration()->mem_config;
|
||||
|
||||
/* Check FD and open once */
|
||||
if (xen_fd < 0) {
|
||||
xen_fd = open(DOM0_MM_DEV, O_RDWR);
|
||||
if (xen_fd < 0) {
|
||||
RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* construct memory mangement name for Dom0 */
|
||||
snprintf(name, DOM0_NAME_MAX, "%s-%s",
|
||||
internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
|
||||
/* attach to memory segments of primary process */
|
||||
ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
|
||||
if (ret) {
|
||||
RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* map all segments into memory to make sure we get the addrs */
|
||||
for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
|
||||
|
||||
/*
|
||||
* the first memory segment with len==0 is the one that
|
||||
* follows the last valid segment.
|
||||
*/
|
||||
if (mcfg->memseg[s].len == 0)
|
||||
break;
|
||||
|
||||
vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
|
||||
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
|
||||
s * page_size);
|
||||
if (vir_addr == MAP_FAILED) {
|
||||
RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
|
||||
"in %s to requested address [%p]\n",
|
||||
(unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
|
||||
mcfg->memseg[s].addr);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
if (xen_fd >= 0) {
|
||||
close(xen_fd);
|
||||
xen_fd = -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
@ -1,108 +0,0 @@
|
||||
/*-
|
||||
* This file is provided under a dual BSD/LGPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GNU LESSER GENERAL PUBLIC LICENSE
|
||||
*
|
||||
* Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Contact Information:
|
||||
* Intel Corporation
|
||||
*
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _RTE_DOM0_COMMON_H_
|
||||
#define _RTE_DOM0_COMMON_H_
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/if.h>
|
||||
#endif
|
||||
|
||||
#define DOM0_NAME_MAX 256
|
||||
#define DOM0_MM_DEV "/dev/dom0_mm"
|
||||
|
||||
#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */
|
||||
#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
|
||||
#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */
|
||||
#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
|
||||
#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
|
||||
|
||||
#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
|
||||
#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
|
||||
#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
|
||||
#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
|
||||
|
||||
/**
|
||||
* A structure used to store memory information.
|
||||
*/
|
||||
struct memory_info {
|
||||
char name[DOM0_NAME_MAX];
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
/**
|
||||
* A structure used to store memory segment information.
|
||||
*/
|
||||
struct memseg_info {
|
||||
uint32_t idx;
|
||||
uint64_t pfn;
|
||||
uint64_t size;
|
||||
uint64_t mfn[DOM0_NUM_MEMBLOCK];
|
||||
};
|
||||
|
||||
/**
|
||||
* A structure used to store memory block information.
|
||||
*/
|
||||
struct memblock_info {
|
||||
uint8_t exchange_flag;
|
||||
uint8_t used;
|
||||
uint64_t vir_addr;
|
||||
uint64_t pfn;
|
||||
uint64_t mfn;
|
||||
};
|
||||
#endif /* _RTE_DOM0_COMMON_H_ */
|
@ -87,8 +87,6 @@ DPDK_2.0 {
|
||||
rte_thread_get_affinity;
|
||||
rte_thread_set_affinity;
|
||||
rte_vlog;
|
||||
rte_xen_dom0_memory_attach;
|
||||
rte_xen_dom0_memory_init;
|
||||
rte_zmalloc;
|
||||
rte_zmalloc_socket;
|
||||
|
||||
@ -118,8 +116,6 @@ DPDK_2.2 {
|
||||
rte_keepalive_dispatch_pings;
|
||||
rte_keepalive_mark_alive;
|
||||
rte_keepalive_register_core;
|
||||
rte_xen_dom0_supported;
|
||||
rte_xen_mem_phy2mch;
|
||||
|
||||
} DPDK_2.1;
|
||||
|
||||
|
@ -34,9 +34,6 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#ifdef CONFIG_XEN_DOM0
|
||||
#include <xen/xen.h>
|
||||
#endif
|
||||
#include <rte_pci_dev_features.h>
|
||||
|
||||
#include "compat.h"
|
||||
@ -191,52 +188,6 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XEN_DOM0
|
||||
static int
|
||||
igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = (int)vma->vm_pgoff;
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
#ifdef HAVE_PTE_MASK_PAGE_IOMAP
|
||||
vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
|
||||
#endif
|
||||
|
||||
return remap_pfn_range(vma,
|
||||
vma->vm_start,
|
||||
info->mem[idx].addr >> PAGE_SHIFT,
|
||||
vma->vm_end - vma->vm_start,
|
||||
vma->vm_page_prot);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is uio device mmap method which will use igbuio mmap for Xen
|
||||
* Dom0 environment.
|
||||
*/
|
||||
static int
|
||||
igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if (vma->vm_pgoff >= MAX_UIO_MAPS)
|
||||
return -EINVAL;
|
||||
|
||||
if (info->mem[vma->vm_pgoff].size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
idx = (int)vma->vm_pgoff;
|
||||
switch (info->mem[idx].memtype) {
|
||||
case UIO_MEM_PHYS:
|
||||
return igbuio_dom0_mmap_phys(info, vma);
|
||||
case UIO_MEM_LOGICAL:
|
||||
case UIO_MEM_VIRTUAL:
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Remap pci resources described by bar #pci_bar in uio resource n. */
|
||||
static int
|
||||
igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
|
||||
@ -480,11 +431,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
||||
udev->info.irqcontrol = igbuio_pci_irqcontrol;
|
||||
udev->info.open = igbuio_pci_open;
|
||||
udev->info.release = igbuio_pci_release;
|
||||
#ifdef CONFIG_XEN_DOM0
|
||||
/* check if the driver run on Xen Dom0 */
|
||||
if (xen_initial_domain())
|
||||
udev->info.mmap = igbuio_dom0_pci_mmap;
|
||||
#endif
|
||||
udev->info.priv = udev;
|
||||
udev->pdev = dev;
|
||||
|
||||
|
@ -1,53 +0,0 @@
|
||||
# BSD LICENSE
|
||||
#
|
||||
# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include $(RTE_SDK)/mk/rte.vars.mk
|
||||
|
||||
#
|
||||
# module name and path
|
||||
#
|
||||
MODULE = rte_dom0_mm
|
||||
|
||||
#
|
||||
# CFLAGS
|
||||
#
|
||||
MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
|
||||
MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
|
||||
MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
|
||||
MODULE_CFLAGS += -Wall -Werror
|
||||
|
||||
#
|
||||
# all source are stored in SRCS-y
|
||||
#
|
||||
|
||||
SRCS-y += dom0_mm_misc.c
|
||||
|
||||
include $(RTE_SDK)/mk/rte.module.mk
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Minimal wrappers to allow compiling xen_dom0 on older kernels.
|
||||
*/
|
||||
|
||||
#ifndef RHEL_RELEASE_VERSION
|
||||
#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
|
||||
(!(defined(RHEL_RELEASE_CODE) && \
|
||||
RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
|
||||
|
||||
#define kstrtoul strict_strtoul
|
||||
|
||||
#endif /* < 2.6.39 */
|
@ -1,107 +0,0 @@
|
||||
/*-
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
* The full GNU General Public License is included in this distribution
|
||||
* in the file called LICENSE.GPL.
|
||||
*
|
||||
* Contact Information:
|
||||
* Intel Corporation
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
#ifndef _DOM0_MM_DEV_H_
|
||||
#define _DOM0_MM_DEV_H_
|
||||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <exec-env/rte_dom0_common.h>
|
||||
|
||||
#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
|
||||
#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
|
||||
#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE)
|
||||
#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1)
|
||||
#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/
|
||||
|
||||
/**
|
||||
* A structure describing the private information for a dom0 device.
|
||||
*/
|
||||
struct dom0_mm_dev {
|
||||
struct miscdevice miscdev;
|
||||
uint8_t fail_times;
|
||||
uint32_t used_memsize;
|
||||
uint32_t num_mem_ctx;
|
||||
uint32_t config_memsize;
|
||||
uint32_t num_bigblock;
|
||||
struct dom0_mm_data *mm_data[NUM_MEM_CTX];
|
||||
struct mutex data_lock;
|
||||
};
|
||||
|
||||
struct dom0_mm_data{
|
||||
uint32_t refcnt;
|
||||
uint32_t num_memseg; /**< Number of memory segment. */
|
||||
uint32_t mem_size; /**< Size of requesting memory. */
|
||||
|
||||
char name[DOM0_NAME_MAX];
|
||||
|
||||
/** Store global memory block IDs used by an instance */
|
||||
uint32_t block_num[DOM0_NUM_MEMBLOCK];
|
||||
|
||||
/** Store memory block information.*/
|
||||
struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
|
||||
|
||||
/** Store memory segment information.*/
|
||||
struct memseg_info seg_info[DOM0_NUM_MEMSEG];
|
||||
};
|
||||
|
||||
#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
|
||||
#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
|
||||
#endif
|
@ -1,780 +0,0 @@
|
||||
/*-
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
* The full GNU General Public License is included in this distribution
|
||||
* in the file called LICENSE.GPL.
|
||||
*
|
||||
* Contact Information:
|
||||
* Intel Corporation
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/xen-ops.h>
|
||||
#include <xen/interface/memory.h>
|
||||
|
||||
#include <exec-env/rte_dom0_common.h>
|
||||
|
||||
#include "compat.h"
|
||||
#include "dom0_mm_dev.h"
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
|
||||
|
||||
static struct dom0_mm_dev dom0_dev;
|
||||
static struct kobject *dom0_kobj = NULL;
|
||||
|
||||
static struct memblock_info *rsv_mm_info;
|
||||
|
||||
/* Default configuration for reserved memory size(2048 MB). */
|
||||
static uint32_t rsv_memsize = 2048;
|
||||
|
||||
static int dom0_open(struct inode *inode, struct file *file);
|
||||
static int dom0_release(struct inode *inode, struct file *file);
|
||||
static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
|
||||
unsigned long ioctl_param);
|
||||
static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
|
||||
static int dom0_memory_free(uint32_t size);
|
||||
static int dom0_memory_release(struct dom0_mm_data *mm_data);
|
||||
|
||||
static const struct file_operations data_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = dom0_open,
|
||||
.release = dom0_release,
|
||||
.mmap = dom0_mmap,
|
||||
.unlocked_ioctl = (void *)dom0_ioctl,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
store_memsize(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
int err = 0;
|
||||
unsigned long mem_size;
|
||||
|
||||
if (0 != kstrtoul(buf, 0, &mem_size))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&dom0_dev.data_lock);
|
||||
if (0 == mem_size) {
|
||||
err = -EINVAL;
|
||||
goto fail;
|
||||
} else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
|
||||
XEN_ERR("configure memory size fail\n");
|
||||
err = -EINVAL;
|
||||
goto fail;
|
||||
} else
|
||||
dom0_dev.config_memsize = mem_size;
|
||||
|
||||
fail:
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
return err ? err : count;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
|
||||
static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
|
||||
|
||||
static struct attribute *dev_attrs[] = {
|
||||
&dev_attr_memsize.attr,
|
||||
&dev_attr_memsize_rsvd.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* the memory size unit is MB */
|
||||
static const struct attribute_group dev_attr_grp = {
|
||||
.name = "memsize-mB",
|
||||
.attrs = dev_attrs,
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
sort_viraddr(struct memblock_info *mb, int cnt)
|
||||
{
|
||||
int i,j;
|
||||
uint64_t tmp_pfn;
|
||||
uint64_t tmp_viraddr;
|
||||
|
||||
/*sort virtual address and pfn */
|
||||
for(i = 0; i < cnt; i ++) {
|
||||
for(j = cnt - 1; j > i; j--) {
|
||||
if(mb[j].pfn < mb[j - 1].pfn) {
|
||||
tmp_pfn = mb[j - 1].pfn;
|
||||
mb[j - 1].pfn = mb[j].pfn;
|
||||
mb[j].pfn = tmp_pfn;
|
||||
|
||||
tmp_viraddr = mb[j - 1].vir_addr;
|
||||
mb[j - 1].vir_addr = mb[j].vir_addr;
|
||||
mb[j].vir_addr = tmp_viraddr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_find_memdata(const char * mem_name)
|
||||
{
|
||||
unsigned i;
|
||||
int idx = -1;
|
||||
for(i = 0; i< NUM_MEM_CTX; i++) {
|
||||
if(dom0_dev.mm_data[i] == NULL)
|
||||
continue;
|
||||
if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
|
||||
sizeof(char) * DOM0_NAME_MAX)) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_find_mempos(void)
|
||||
{
|
||||
unsigned i;
|
||||
int idx = -1;
|
||||
|
||||
for(i = 0; i< NUM_MEM_CTX; i++) {
|
||||
if(dom0_dev.mm_data[i] == NULL){
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_memory_release(struct dom0_mm_data *mm_data)
|
||||
{
|
||||
int idx;
|
||||
uint32_t num_block, block_id;
|
||||
|
||||
/* each memory block is 2M */
|
||||
num_block = mm_data->mem_size / SIZE_PER_BLOCK;
|
||||
if (num_block == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* reset global memory data */
|
||||
idx = dom0_find_memdata(mm_data->name);
|
||||
if (idx >= 0) {
|
||||
dom0_dev.used_memsize -= mm_data->mem_size;
|
||||
dom0_dev.mm_data[idx] = NULL;
|
||||
dom0_dev.num_mem_ctx--;
|
||||
}
|
||||
|
||||
/* reset these memory blocks status as free */
|
||||
for (idx = 0; idx < num_block; idx++) {
|
||||
block_id = mm_data->block_num[idx];
|
||||
rsv_mm_info[block_id].used = 0;
|
||||
}
|
||||
|
||||
memset(mm_data, 0, sizeof(struct dom0_mm_data));
|
||||
vfree(mm_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_memory_free(uint32_t rsv_size)
|
||||
{
|
||||
uint64_t vstart, vaddr;
|
||||
uint32_t i, num_block, size;
|
||||
|
||||
if (!xen_pv_domain())
|
||||
return -1;
|
||||
|
||||
/* each memory block is 2M */
|
||||
num_block = rsv_size / SIZE_PER_BLOCK;
|
||||
if (num_block == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* free all memory blocks of size of 4M and destroy contiguous region */
|
||||
for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
|
||||
vstart = rsv_mm_info[i].vir_addr;
|
||||
if (vstart) {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
|
||||
if (rsv_mm_info[i].exchange_flag)
|
||||
xen_destroy_contiguous_region(vstart,
|
||||
DOM0_CONTIG_NUM_ORDER);
|
||||
if (rsv_mm_info[i + 1].exchange_flag)
|
||||
xen_destroy_contiguous_region(vstart +
|
||||
DOM0_MEMBLOCK_SIZE,
|
||||
DOM0_CONTIG_NUM_ORDER);
|
||||
#else
|
||||
if (rsv_mm_info[i].exchange_flag)
|
||||
xen_destroy_contiguous_region(rsv_mm_info[i].pfn
|
||||
* PAGE_SIZE,
|
||||
DOM0_CONTIG_NUM_ORDER);
|
||||
if (rsv_mm_info[i + 1].exchange_flag)
|
||||
xen_destroy_contiguous_region(rsv_mm_info[i].pfn
|
||||
* PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
|
||||
DOM0_CONTIG_NUM_ORDER);
|
||||
#endif
|
||||
|
||||
size = DOM0_MEMBLOCK_SIZE * 2;
|
||||
vaddr = vstart;
|
||||
while (size > 0) {
|
||||
ClearPageReserved(virt_to_page(vaddr));
|
||||
vaddr += PAGE_SIZE;
|
||||
size -= PAGE_SIZE;
|
||||
}
|
||||
free_pages(vstart, MAX_NUM_ORDER);
|
||||
}
|
||||
}
|
||||
|
||||
/* free all memory blocks size of 2M and destroy contiguous region */
|
||||
for (; i < num_block; i++) {
|
||||
vstart = rsv_mm_info[i].vir_addr;
|
||||
if (vstart) {
|
||||
if (rsv_mm_info[i].exchange_flag)
|
||||
xen_destroy_contiguous_region(vstart,
|
||||
DOM0_CONTIG_NUM_ORDER);
|
||||
|
||||
size = DOM0_MEMBLOCK_SIZE;
|
||||
vaddr = vstart;
|
||||
while (size > 0) {
|
||||
ClearPageReserved(virt_to_page(vaddr));
|
||||
vaddr += PAGE_SIZE;
|
||||
size -= PAGE_SIZE;
|
||||
}
|
||||
free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
|
||||
}
|
||||
}
|
||||
|
||||
memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
|
||||
vfree(rsv_mm_info);
|
||||
rsv_mm_info = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t j = 0;
|
||||
|
||||
while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
|
||||
if (rsv_mm_info[j].used == 0) {
|
||||
mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
|
||||
mm_data->block_info[i].vir_addr =
|
||||
rsv_mm_info[j].vir_addr;
|
||||
mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
|
||||
mm_data->block_info[i].exchange_flag =
|
||||
rsv_mm_info[j].exchange_flag;
|
||||
mm_data->block_num[i] = j;
|
||||
rsv_mm_info[j].used = 1;
|
||||
i++;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find all memory segments in which physical addresses are contiguous.
|
||||
*/
|
||||
static void
|
||||
find_memseg(int count, struct dom0_mm_data * mm_data)
|
||||
{
|
||||
int i = 0;
|
||||
int j, k, idx = 0;
|
||||
uint64_t zone_len, pfn, num_block;
|
||||
|
||||
while(i < count) {
|
||||
if (mm_data->block_info[i].exchange_flag == 0) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
k = 0;
|
||||
pfn = mm_data->block_info[i].pfn;
|
||||
mm_data->seg_info[idx].pfn = pfn;
|
||||
mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
|
||||
|
||||
for (j = i + 1; j < count; j++) {
|
||||
|
||||
/* ignore exchange fail memory block */
|
||||
if (mm_data->block_info[j].exchange_flag == 0)
|
||||
break;
|
||||
|
||||
if (mm_data->block_info[j].pfn !=
|
||||
(mm_data->block_info[j - 1].pfn +
|
||||
DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
|
||||
break;
|
||||
++k;
|
||||
mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
|
||||
}
|
||||
|
||||
num_block = j - i;
|
||||
zone_len = num_block * DOM0_MEMBLOCK_SIZE;
|
||||
mm_data->seg_info[idx].size = zone_len;
|
||||
|
||||
XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
|
||||
i = i+ num_block;
|
||||
idx++;
|
||||
if (idx == DOM0_NUM_MEMSEG)
|
||||
break;
|
||||
}
|
||||
mm_data->num_memseg = idx;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_memory_reserve(uint32_t rsv_size)
|
||||
{
|
||||
uint64_t pfn, vstart, vaddr;
|
||||
uint32_t i, num_block, size, allocated_size = 0;
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
|
||||
dma_addr_t dma_handle;
|
||||
#endif
|
||||
|
||||
/* 2M as memory block */
|
||||
num_block = rsv_size / SIZE_PER_BLOCK;
|
||||
|
||||
rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
|
||||
if (!rsv_mm_info) {
|
||||
XEN_ERR("Unable to allocate device memory information\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
|
||||
|
||||
/* try alloc size of 4M once */
|
||||
for (i = 0; i < num_block; i += 2) {
|
||||
vstart = (unsigned long)
|
||||
__get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
|
||||
if (vstart == 0)
|
||||
break;
|
||||
|
||||
dom0_dev.num_bigblock = i / 2 + 1;
|
||||
allocated_size = SIZE_PER_BLOCK * (i + 2);
|
||||
|
||||
/* size of 4M */
|
||||
size = DOM0_MEMBLOCK_SIZE * 2;
|
||||
|
||||
vaddr = vstart;
|
||||
while (size > 0) {
|
||||
SetPageReserved(virt_to_page(vaddr));
|
||||
vaddr += PAGE_SIZE;
|
||||
size -= PAGE_SIZE;
|
||||
}
|
||||
|
||||
pfn = virt_to_pfn(vstart);
|
||||
rsv_mm_info[i].pfn = pfn;
|
||||
rsv_mm_info[i].vir_addr = vstart;
|
||||
rsv_mm_info[i + 1].pfn =
|
||||
pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
|
||||
rsv_mm_info[i + 1].vir_addr =
|
||||
vstart + DOM0_MEMBLOCK_SIZE;
|
||||
}
|
||||
|
||||
/*if it failed to alloc 4M, and continue to alloc 2M once */
|
||||
for (; i < num_block; i++) {
|
||||
vstart = (unsigned long)
|
||||
__get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
|
||||
if (vstart == 0) {
|
||||
XEN_ERR("allocate memory fail.\n");
|
||||
dom0_memory_free(allocated_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
allocated_size += SIZE_PER_BLOCK;
|
||||
|
||||
size = DOM0_MEMBLOCK_SIZE;
|
||||
vaddr = vstart;
|
||||
while (size > 0) {
|
||||
SetPageReserved(virt_to_page(vaddr));
|
||||
vaddr += PAGE_SIZE;
|
||||
size -= PAGE_SIZE;
|
||||
}
|
||||
pfn = virt_to_pfn(vstart);
|
||||
rsv_mm_info[i].pfn = pfn;
|
||||
rsv_mm_info[i].vir_addr = vstart;
|
||||
}
|
||||
|
||||
sort_viraddr(rsv_mm_info, num_block);
|
||||
|
||||
for (i = 0; i< num_block; i++) {
|
||||
|
||||
/*
|
||||
* This API is used to exchage MFN for getting a block of
|
||||
* contiguous physical addresses, its maximum size is 2M.
|
||||
*/
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
|
||||
if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
|
||||
DOM0_CONTIG_NUM_ORDER, 0) == 0) {
|
||||
#else
|
||||
if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
|
||||
DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
|
||||
#endif
|
||||
rsv_mm_info[i].exchange_flag = 1;
|
||||
rsv_mm_info[i].mfn =
|
||||
pfn_to_mfn(rsv_mm_info[i].pfn);
|
||||
rsv_mm_info[i].used = 0;
|
||||
} else {
|
||||
XEN_ERR("exchange memeory fail\n");
|
||||
rsv_mm_info[i].exchange_flag = 0;
|
||||
dom0_dev.fail_times++;
|
||||
if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
|
||||
dom0_memory_free(rsv_size);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
|
||||
{
|
||||
uint32_t num_block;
|
||||
int idx;
|
||||
|
||||
/* check if there is a free name buffer */
|
||||
memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
|
||||
mm_data->name[DOM0_NAME_MAX - 1] = '\0';
|
||||
idx = dom0_find_mempos();
|
||||
if (idx < 0)
|
||||
return -1;
|
||||
|
||||
num_block = meminfo->size / SIZE_PER_BLOCK;
|
||||
/* find free memory and new memory segments*/
|
||||
find_free_memory(num_block, mm_data);
|
||||
find_memseg(num_block, mm_data);
|
||||
|
||||
/* update private memory data */
|
||||
mm_data->refcnt++;
|
||||
mm_data->mem_size = meminfo->size;
|
||||
|
||||
/* update global memory data */
|
||||
dom0_dev.mm_data[idx] = mm_data;
|
||||
dom0_dev.num_mem_ctx++;
|
||||
dom0_dev.used_memsize += mm_data->mem_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_check_memory (struct memory_info *meminfo)
|
||||
{
|
||||
int idx;
|
||||
uint64_t mem_size;
|
||||
|
||||
/* round memory size to the next even number. */
|
||||
if (meminfo->size % 2)
|
||||
++meminfo->size;
|
||||
|
||||
mem_size = meminfo->size;
|
||||
if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
|
||||
XEN_ERR("Memory data space is full in Dom0 driver\n");
|
||||
return -1;
|
||||
}
|
||||
idx = dom0_find_memdata(meminfo->name);
|
||||
if (idx >= 0) {
|
||||
XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
|
||||
meminfo->name);
|
||||
return -1;
|
||||
}
|
||||
if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
|
||||
XEN_ERR("Total size can't be larger than reserved size.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init
|
||||
dom0_init(void)
|
||||
{
|
||||
if (!xen_domain())
|
||||
return -ENODEV;
|
||||
|
||||
if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
|
||||
XEN_ERR("The reserved memory size cannot be greater than %d\n",
|
||||
DOM0_CONFIG_MEMSIZE);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Setup the misc device */
|
||||
dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
|
||||
dom0_dev.miscdev.name = "dom0_mm";
|
||||
dom0_dev.miscdev.fops = &data_fops;
|
||||
|
||||
/* register misc char device */
|
||||
if (misc_register(&dom0_dev.miscdev) != 0) {
|
||||
XEN_ERR("Misc device registration failed\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
mutex_init(&dom0_dev.data_lock);
|
||||
dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
|
||||
|
||||
if (!dom0_kobj) {
|
||||
XEN_ERR("dom0-mm object creation failed\n");
|
||||
misc_deregister(&dom0_dev.miscdev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
|
||||
kobject_put(dom0_kobj);
|
||||
misc_deregister(&dom0_dev.miscdev);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (dom0_memory_reserve(rsv_memsize) < 0) {
|
||||
sysfs_remove_group(dom0_kobj, &dev_attr_grp);
|
||||
kobject_put(dom0_kobj);
|
||||
misc_deregister(&dom0_dev.miscdev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit
|
||||
dom0_exit(void)
|
||||
{
|
||||
if (rsv_mm_info != NULL)
|
||||
dom0_memory_free(rsv_memsize);
|
||||
|
||||
sysfs_remove_group(dom0_kobj, &dev_attr_grp);
|
||||
kobject_put(dom0_kobj);
|
||||
misc_deregister(&dom0_dev.miscdev);
|
||||
|
||||
XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
file->private_data = NULL;
|
||||
|
||||
XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
int ret = 0;
|
||||
struct dom0_mm_data *mm_data = file->private_data;
|
||||
|
||||
if (mm_data == NULL)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&dom0_dev.data_lock);
|
||||
if (--mm_data->refcnt == 0)
|
||||
ret = dom0_memory_release(mm_data);
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
|
||||
file->private_data = NULL;
|
||||
XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
dom0_mmap(struct file *file, struct vm_area_struct *vm)
|
||||
{
|
||||
int status = 0;
|
||||
uint32_t idx = vm->vm_pgoff;
|
||||
uint64_t pfn, size = vm->vm_end - vm->vm_start;
|
||||
struct dom0_mm_data *mm_data = file->private_data;
|
||||
|
||||
if(mm_data == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&dom0_dev.data_lock);
|
||||
if (idx >= mm_data->num_memseg) {
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (size > mm_data->seg_info[idx].size){
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
|
||||
|
||||
pfn = mm_data->seg_info[idx].pfn;
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
|
||||
status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
|
||||
|
||||
return status;
|
||||
}
|
||||
static int
|
||||
dom0_ioctl(struct file *file,
|
||||
unsigned int ioctl_num,
|
||||
unsigned long ioctl_param)
|
||||
{
|
||||
int idx, ret;
|
||||
char name[DOM0_NAME_MAX] = {0};
|
||||
struct memory_info meminfo;
|
||||
struct dom0_mm_data *mm_data = file->private_data;
|
||||
|
||||
XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
|
||||
|
||||
/**
|
||||
* Switch according to the ioctl called
|
||||
*/
|
||||
switch _IOC_NR(ioctl_num) {
|
||||
case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
|
||||
ret = copy_from_user(&meminfo, (void *)ioctl_param,
|
||||
sizeof(struct memory_info));
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
if (mm_data != NULL) {
|
||||
XEN_ERR("Cannot create memory segment for the same"
|
||||
" file descriptor\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Allocate private data */
|
||||
mm_data = vmalloc(sizeof(struct dom0_mm_data));
|
||||
if (!mm_data) {
|
||||
XEN_ERR("Unable to allocate device private data\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(mm_data, 0, sizeof(struct dom0_mm_data));
|
||||
|
||||
mutex_lock(&dom0_dev.data_lock);
|
||||
/* check if we can allocate memory*/
|
||||
if (dom0_check_memory(&meminfo) < 0) {
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
vfree(mm_data);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* allocate memory and created memory segments*/
|
||||
if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
|
||||
XEN_ERR("create memory segment fail.\n");
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
file->private_data = mm_data;
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
break;
|
||||
|
||||
/* support multiple process in term of memory mapping*/
|
||||
case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
|
||||
ret = copy_from_user(name, (void *)ioctl_param,
|
||||
sizeof(char) * DOM0_NAME_MAX);
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&dom0_dev.data_lock);
|
||||
idx = dom0_find_memdata(name);
|
||||
if (idx < 0) {
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mm_data = dom0_dev.mm_data[idx];
|
||||
mm_data->refcnt++;
|
||||
file->private_data = mm_data;
|
||||
mutex_unlock(&dom0_dev.data_lock);
|
||||
break;
|
||||
|
||||
case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
|
||||
ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
|
||||
sizeof(int));
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
break;
|
||||
|
||||
case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
|
||||
ret = copy_to_user((void *)ioctl_param,
|
||||
&mm_data->seg_info[0],
|
||||
sizeof(struct memseg_info) *
|
||||
mm_data->num_memseg);
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
break;
|
||||
default:
|
||||
XEN_PRINT("IOCTL default \n");
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(dom0_init);
|
||||
module_exit(dom0_exit);
|
||||
|
||||
module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");
|
@ -52,9 +52,6 @@ ExclusiveArch: i686 x86_64 aarch64
|
||||
%endif
|
||||
|
||||
BuildRequires: kernel-devel, kernel-headers, libpcap-devel
|
||||
%ifarch i686 x86_64
|
||||
BuildRequires: xen-devel
|
||||
%endif
|
||||
BuildRequires: doxygen, python-sphinx, inkscape
|
||||
BuildRequires: texlive-collection-latexextra
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user