Pass SR-IOV configuration to kernel using an nvlist
Pass all SR-IOV configuration to the kernel using an nvlist. The main benefit that this offers is flexibility. It allows a driver to accept any number of parameters of any type supported by the SR-IOV configuration infrastructure with having to make any changes outside of the driver. It also offers the user very fine-grained control over the configuration of the VFs -- if they want, they can have different configuration applied to every VF. Differential Revision: https://reviews.freebsd.org/D82 Reviewed by: jhb MFC after: 1 month Sponsored by: Sandvine Inc.
This commit is contained in:
parent
f0a0142dd0
commit
c476927dd3
@ -217,6 +217,7 @@ METHOD int iov_detach {
|
||||
METHOD int init_iov {
|
||||
device_t dev;
|
||||
uint16_t num_vfs;
|
||||
const struct nvlist *config;
|
||||
};
|
||||
|
||||
METHOD void uninit_iov {
|
||||
@ -226,6 +227,7 @@ METHOD void uninit_iov {
|
||||
METHOD int add_vf {
|
||||
device_t dev;
|
||||
uint16_t vfnum;
|
||||
const struct nvlist *config;
|
||||
};
|
||||
|
||||
METHOD device_t create_iov_child {
|
||||
|
@ -70,6 +70,18 @@ static struct cdevsw iov_cdevsw = {
|
||||
.d_ioctl = pci_iov_ioctl
|
||||
};
|
||||
|
||||
SYSCTL_DECL(_hw_pci);
|
||||
|
||||
/*
|
||||
* The maximum amount of memory we will allocate for user configuration of an
|
||||
* SR-IOV device. 1MB ought to be enough for anyone, but leave this
|
||||
* configurable just in case.
|
||||
*/
|
||||
static u_long pci_iov_max_config = 1024 * 1024;
|
||||
SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
|
||||
&pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
|
||||
|
||||
|
||||
#define IOV_READ(d, r, w) \
|
||||
pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
|
||||
|
||||
@ -348,6 +360,51 @@ pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
|
||||
nvlist_t **ret)
|
||||
{
|
||||
void *packed_config;
|
||||
nvlist_t *config;
|
||||
int error;
|
||||
|
||||
config = NULL;
|
||||
packed_config = NULL;
|
||||
|
||||
if (arg->len > pci_iov_max_config) {
|
||||
error = EMSGSIZE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
|
||||
|
||||
error = copyin(arg->config, packed_config, arg->len);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
config = nvlist_unpack(packed_config, arg->len);
|
||||
if (config == NULL) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = pci_iov_schema_validate_config(iov->iov_schema, config);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
error = nvlist_error(config);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
*ret = config;
|
||||
config = NULL;
|
||||
|
||||
out:
|
||||
nvlist_destroy(config);
|
||||
free(packed_config, M_SRIOV);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
|
||||
* capability. This bit is only writeable on the lowest-numbered PF but
|
||||
@ -421,6 +478,16 @@ pci_iov_config_page_size(struct pci_devinfo *dinfo)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_init_iov(device_t dev, uint16_t num_vfs, const nvlist_t *config)
|
||||
{
|
||||
const nvlist_t *device, *driver_config;
|
||||
|
||||
device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
|
||||
driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
|
||||
return (PCI_INIT_IOV(dev, num_vfs, driver_config));
|
||||
}
|
||||
|
||||
static int
|
||||
pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
|
||||
{
|
||||
@ -479,9 +546,11 @@ pci_iov_setup_bars(struct pci_devinfo *dinfo)
|
||||
}
|
||||
|
||||
static void
|
||||
pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
|
||||
pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
|
||||
uint16_t first_rid, uint16_t rid_stride)
|
||||
{
|
||||
char device_name[VF_MAX_NAME];
|
||||
const nvlist_t *device, *driver_config, *iov_config;
|
||||
device_t bus, dev, vf;
|
||||
struct pcicfg_iov *iov;
|
||||
struct pci_devinfo *vfinfo;
|
||||
@ -498,12 +567,23 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
|
||||
did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
|
||||
|
||||
for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
|
||||
|
||||
snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
|
||||
device = nvlist_get_nvlist(config, device_name);
|
||||
iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
|
||||
driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
|
||||
|
||||
vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
|
||||
if (vf == NULL)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If we are creating passthrough devices then force the ppt
|
||||
* driver to attach to prevent a VF driver from claiming the
|
||||
* VFs.
|
||||
*/
|
||||
if (nvlist_get_bool(iov_config, "passthrough"))
|
||||
device_set_devclass(vf, "ppt");
|
||||
|
||||
vfinfo = device_get_ivars(vf);
|
||||
|
||||
vfinfo->cfg.iov = iov;
|
||||
@ -511,7 +591,7 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
|
||||
|
||||
pci_iov_add_bars(iov, vfinfo);
|
||||
|
||||
error = PCI_ADD_VF(dev, i);
|
||||
error = PCI_ADD_VF(dev, i, driver_config);
|
||||
if (error != 0) {
|
||||
device_printf(dev, "Failed to add VF %d\n", i);
|
||||
pci_delete_child(bus, vf);
|
||||
@ -525,14 +605,14 @@ static int
|
||||
pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
{
|
||||
device_t bus, dev;
|
||||
const char *driver;
|
||||
struct pci_devinfo *dinfo;
|
||||
struct pcicfg_iov *iov;
|
||||
nvlist_t *config;
|
||||
int i, error;
|
||||
uint16_t rid_off, rid_stride;
|
||||
uint16_t first_rid, last_rid;
|
||||
uint16_t iov_ctl;
|
||||
uint16_t total_vfs;
|
||||
uint16_t num_vfs, total_vfs;
|
||||
int iov_inited;
|
||||
|
||||
mtx_lock(&Giant);
|
||||
@ -541,6 +621,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
dev = dinfo->cfg.dev;
|
||||
bus = device_get_parent(dev);
|
||||
iov_inited = 0;
|
||||
config = NULL;
|
||||
|
||||
if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
|
||||
mtx_unlock(&Giant);
|
||||
@ -548,22 +629,17 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
}
|
||||
iov->iov_flags |= IOV_BUSY;
|
||||
|
||||
total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
|
||||
error = pci_iov_parse_config(iov, arg, &config);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
if (arg->num_vfs > total_vfs) {
|
||||
num_vfs = pci_iov_config_get_num_vfs(config);
|
||||
total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
|
||||
if (num_vfs > total_vfs) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are creating passthrough devices then force the ppt driver to
|
||||
* attach to prevent a VF driver from claming the VFs.
|
||||
*/
|
||||
if (arg->passthrough)
|
||||
driver = "ppt";
|
||||
else
|
||||
driver = NULL;
|
||||
|
||||
error = pci_iov_config_page_size(dinfo);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
@ -572,19 +648,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
error = PCI_INIT_IOV(dev, arg->num_vfs);
|
||||
|
||||
error = pci_init_iov(dev, num_vfs, config);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
iov_inited = 1;
|
||||
IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
|
||||
|
||||
IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
|
||||
|
||||
rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
|
||||
rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
|
||||
|
||||
first_rid = pci_get_rid(dev) + rid_off;
|
||||
last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
|
||||
last_rid = first_rid + (num_vfs - 1) * rid_stride;
|
||||
|
||||
/* We don't yet support allocating extra bus numbers for VFs. */
|
||||
if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
|
||||
@ -600,7 +675,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
if (error != 0)
|
||||
goto out;
|
||||
|
||||
iov->iov_num_vfs = arg->num_vfs;
|
||||
iov->iov_num_vfs = num_vfs;
|
||||
|
||||
error = pci_iov_setup_bars(dinfo);
|
||||
if (error != 0)
|
||||
@ -612,7 +687,10 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
|
||||
/* Per specification, we must wait 100ms before accessing VFs. */
|
||||
pause("iov", roundup(hz, 10));
|
||||
pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
|
||||
pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
|
||||
|
||||
nvlist_destroy(config);
|
||||
iov->iov_flags &= ~IOV_BUSY;
|
||||
mtx_unlock(&Giant);
|
||||
|
||||
return (0);
|
||||
@ -635,6 +713,8 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
|
||||
rman_fini(&iov->rman);
|
||||
iov->iov_flags &= ~IOV_RMAN_INITED;
|
||||
}
|
||||
|
||||
nvlist_destroy(config);
|
||||
iov->iov_num_vfs = 0;
|
||||
iov->iov_flags &= ~IOV_BUSY;
|
||||
mtx_unlock(&Giant);
|
||||
|
@ -46,12 +46,6 @@
|
||||
#define DEFAULT_SCHEMA_NAME "DEFAULT"
|
||||
#define REQUIRED_SCHEMA_NAME "REQUIRED"
|
||||
|
||||
struct pci_iov_arg
|
||||
{
|
||||
int num_vfs;
|
||||
int passthrough;
|
||||
};
|
||||
|
||||
/*
|
||||
* Because each PF device is expected to expose a unique set of possible
|
||||
* configurations, the SR-IOV infrastructure dynamically queries the PF
|
||||
@ -168,7 +162,94 @@ struct pci_iov_schema
|
||||
int error;
|
||||
};
|
||||
|
||||
#define IOV_CONFIG _IOWR('p', 10, struct pci_iov_arg)
|
||||
/*
|
||||
* SR-IOV configuration is passed to the kernel as a packed nvlist. See nv(3)
|
||||
* for the details of the nvlist API. The expected format of the nvlist is:
|
||||
*
|
||||
* BASIC RULES
|
||||
* 1) All keys are case-insensitive.
|
||||
* 2) No keys that are not specified below may exist at any level of the
|
||||
* config nvlist.
|
||||
* 3) Unless otherwise specified, all keys are optional. It should go without
|
||||
* saying a key being mandatory is transitive: that is, if a key is
|
||||
* specified to contain a sub-nodes that contains a mandatory key, then
|
||||
* the outer key is implicitly mandatory. If a key is mandatory then the
|
||||
* associated value is also mandatory.
|
||||
* 4) Order of keys is irrelevant.
|
||||
*
|
||||
* TOP LEVEL OF CONFIG NVLIST
|
||||
* 1) All keys specified in this section are mandatory.
|
||||
* 2) There must be a top-level key with the name PF_CONFIG_NAME. The value
|
||||
* associated is an nvlist that follows the "device node" format. The
|
||||
* parameters in this node specify parameters that apply to the PF.
|
||||
* 3) For every VF being configured (this is set via the "num_vfs" parameter
|
||||
* in the PF section), there must be a top-level key whose name is VF_PREFIX
|
||||
* immediately followed by the index of the VF as a decimal integer. For
|
||||
* example, this would be VF-0 for the first VF. VFs are numbered starting
|
||||
* from 0. The value associated with this key follows the "device node"
|
||||
* format. The parameters in this node specify configuration that applies
|
||||
* to the VF specified in the key. Leading zeros are not permitted in VF
|
||||
* index. Configuration for the second VF must be specified in a node with
|
||||
* the key VF-1. VF-01 is not a valid key.
|
||||
*
|
||||
* DEVICE NODES
|
||||
* 1) All keys specified in this section are mandatory.
|
||||
* 2) The device node must contain a key with the name DRIVER_CONFIG_NAME. The
|
||||
* value associated with this key is an nvlist following the subsystem node
|
||||
* format. The parameters in this key specify configuration that is specific
|
||||
* to a particular device driver.
|
||||
* 3) The device node must contain a key with the name IOV_CONFIG_NAME. The
|
||||
* value associated with this key is an nvlist following the subsystem node
|
||||
* format. The parameters in this key specify configuration that is consumed
|
||||
* by the SR-IOV infrastructure.
|
||||
*
|
||||
* SUBSYSTEM NODES
|
||||
* 1) A subsystem node specifies configuration parameters that apply to a
|
||||
* particular subsystem (driver or infrastructure) of a particular device
|
||||
* (PF or individual VF).
|
||||
* Note: We will refer to the section of the configuration schema that
|
||||
* specifies the parameters for this subsystem and device
|
||||
* configuration as the device/subystem schema.
|
||||
* 2) The subsystem node must contain only keys that correspond to parameters
|
||||
* that are specified in the device/subsystem schema.
|
||||
* 3) Every parameter specified as required in the device/subsystem schema is
|
||||
* a mandatory key in the subsystem node.
|
||||
* Note: All parameters that are not required in device/subsystem schema are
|
||||
* optional keys. In particular, any parameter specified to have a
|
||||
* default value in the device/subsystem schema is optional. The
|
||||
* kernel is responsible for applying default values.
|
||||
* 4) The value of every parameter in the device node must conform to the
|
||||
* restrictions of the type specified for that parameter in the device/
|
||||
* subsystem schema.
|
||||
*
|
||||
* The following is an example of a valid configuration, when validated against
|
||||
* the schema example given above.
|
||||
*
|
||||
* PF (NVLIST):
|
||||
* driver (NVLIST):
|
||||
* iov (NVLIST):
|
||||
* num_vfs (NUMBER): 3 (3) (0x3)
|
||||
* device (STRING): [ix0]
|
||||
* VF-0 (NVLIST):
|
||||
* driver (NVLIST):
|
||||
* vlan (NUMBER): 1000 (1000) (0x3e8)
|
||||
* iov (NVLIST):
|
||||
* passthrough (BOOL): TRUE
|
||||
* VF-1 (NVLIST):
|
||||
* driver (NVLIST):
|
||||
* iov (NVLIST):
|
||||
* VF-2 (NVLIST):
|
||||
* driver (NVLIST):
|
||||
* mac-addr (BINARY): 6 020102030405
|
||||
* iov (NVLIST):
|
||||
*/
|
||||
struct pci_iov_arg
|
||||
{
|
||||
void *config;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
#define IOV_CONFIG _IOW('p', 10, struct pci_iov_arg)
|
||||
#define IOV_DELETE _IO('p', 11)
|
||||
#define IOV_GET_SCHEMA _IOWR('p', 12, struct pci_iov_schema)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user