Kernel-side infrastructure to implement nvlist-based set/get ifcaps

Reviewed by:	hselasky, jhb, kp (previous version)
Sponsored by:	NVIDIA Networking
MFC after:	3 weeks
Differential revision:	https://reviews.freebsd.org/D32551
This commit is contained in:
Konstantin Belousov 2021-10-17 18:00:34 +03:00
parent b96549f057
commit 051e7d78b0
3 changed files with 236 additions and 4 deletions

View File

@ -58,6 +58,7 @@
#include <sys/lock.h>
#include <sys/refcount.h>
#include <sys/module.h>
#include <sys/nv.h>
#include <sys/rwlock.h>
#include <sys/sockio.h>
#include <sys/syslog.h>
@ -2391,6 +2392,88 @@ ifr_data_get_ptr(void *ifrp)
return (ifrup->ifr.ifr_ifru.ifru_data);
}
struct ifcap_nv_bit_name {
int cap_bit;
const char *cap_name;
};
#define CAPNV(x) {.cap_bit = IFCAP_##x, \
.cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) }
const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = {
CAPNV(RXCSUM),
CAPNV(TXCSUM),
CAPNV(NETCONS),
CAPNV(VLAN_MTU),
CAPNV(VLAN_HWTAGGING),
CAPNV(JUMBO_MTU),
CAPNV(POLLING),
CAPNV(VLAN_HWCSUM),
CAPNV(TSO4),
CAPNV(TSO6),
CAPNV(LRO),
CAPNV(WOL_UCAST),
CAPNV(WOL_MCAST),
CAPNV(WOL_MAGIC),
CAPNV(TOE4),
CAPNV(TOE6),
CAPNV(VLAN_HWFILTER),
CAPNV(VLAN_HWTSO),
CAPNV(LINKSTATE),
CAPNV(NETMAP),
CAPNV(RXCSUM_IPV6),
CAPNV(TXCSUM_IPV6),
CAPNV(HWSTATS),
CAPNV(TXRTLMT),
CAPNV(HWRXTSTMP),
CAPNV(MEXTPG),
CAPNV(TXTLS4),
CAPNV(TXTLS6),
CAPNV(VXLAN_HWCSUM),
CAPNV(VXLAN_HWTSO),
CAPNV(TXTLS_RTLMT),
{0, NULL}
};
#define CAP2NV(x) {.cap_bit = IFCAP2_##x, \
.cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) }
const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
CAP2NV(RXTLS4),
CAP2NV(RXTLS6),
{0, NULL}
};
#undef CAPNV
#undef CAP2NV
int
if_capnv_to_capint(const nvlist_t *nv, int *old_cap,
const struct ifcap_nv_bit_name *nn, bool all)
{
int i, res;
res = 0;
for (i = 0; nn[i].cap_name != NULL; i++) {
if (nvlist_exists_bool(nv, nn[i].cap_name)) {
if (all || nvlist_get_bool(nv, nn[i].cap_name))
res |= nn[i].cap_bit;
} else {
res |= *old_cap & nn[i].cap_bit;
}
}
return (res);
}
void
if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn,
int ifr_cap, int ifr_req)
{
int i;
for (i = 0; nn[i].cap_name != NULL; i++) {
if ((nn[i].cap_bit & ifr_cap) != 0) {
nvlist_add_bool(nv, nn[i].cap_name,
(nn[i].cap_bit & ifr_req) != 0);
}
}
}
/*
* Hardware specific interface ioctls.
*/
@ -2401,12 +2484,15 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
int error = 0, do_ifup = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
size_t descrlen;
size_t descrlen, nvbuflen;
char *descrbuf, *odescrbuf;
char new_name[IFNAMSIZ];
char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8];
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
void *buf;
nvlist_t *nvcap;
struct siocsifcapnv_driver_data drv_ioctl_data;
ifr = (struct ifreq *)data;
switch (cmd) {
@ -2425,6 +2511,47 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
ifr->ifr_curcap = ifp->if_capenable;
break;
case SIOCGIFCAPNV:
if ((ifp->if_capabilities & IFCAP_NV) == 0) {
error = EINVAL;
break;
}
buf = NULL;
nvcap = nvlist_create(0);
for (;;) {
if_capint_to_capnv(nvcap, ifcap_nv_bit_names,
ifp->if_capabilities, ifp->if_capenable);
if_capint_to_capnv(nvcap, ifcap2_nv_bit_names,
ifp->if_capabilities2, ifp->if_capenable2);
error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV,
__DECONST(caddr_t, nvcap));
if (error != 0) {
if_printf(ifp,
"SIOCGIFCAPNV driver mistake: nvlist error %d\n",
error);
break;
}
buf = nvlist_pack(nvcap, &nvbuflen);
if (buf == NULL) {
error = nvlist_error(nvcap);
if (error == 0)
error = EDOOFUS;
break;
}
if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
ifr->ifr_cap_nv.length = nvbuflen;
ifr->ifr_cap_nv.buffer = NULL;
error = EFBIG;
break;
}
ifr->ifr_cap_nv.length = nvbuflen;
error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen);
break;
}
free(buf, M_NVLIST);
nvlist_destroy(nvcap);
break;
case SIOCGIFDATA:
{
struct if_data ifd;
@ -2563,7 +2690,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
case SIOCSIFCAP:
error = priv_check(td, PRIV_NET_SETIFCAP);
if (error)
if (error != 0)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
@ -2574,6 +2701,53 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
getmicrotime(&ifp->if_lastchange);
break;
case SIOCSIFCAPNV:
error = priv_check(td, PRIV_NET_SETIFCAP);
if (error != 0)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
if ((ifp->if_capabilities & IFCAP_NV) == 0)
return (EINVAL);
if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
return (EINVAL);
nvcap = NULL;
buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
for (;;) {
error = copyin(ifr->ifr_cap_nv.buffer, buf,
ifr->ifr_cap_nv.length);
if (error != 0)
break;
nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
if (nvcap == NULL) {
error = EINVAL;
break;
}
drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap,
&ifp->if_capenable, ifcap_nv_bit_names, false);
if ((drv_ioctl_data.reqcap &
~ifp->if_capabilities) != 0) {
error = EINVAL;
break;
}
drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap,
&ifp->if_capenable2, ifcap2_nv_bit_names, false);
if ((drv_ioctl_data.reqcap2 &
~ifp->if_capabilities2) != 0) {
error = EINVAL;
break;
}
drv_ioctl_data.nvcap = nvcap;
error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV,
(caddr_t)&drv_ioctl_data);
break;
}
nvlist_destroy(nvcap);
free(buf, M_TEMP);
if (error == 0)
getmicrotime(&ifp->if_lastchange);
break;
#ifdef MAC
case SIOCSIFMAC:
error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);

View File

@ -236,7 +236,7 @@ struct if_data {
#define IFCAP_TOE4 0x04000 /* interface can offload TCP */
#define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */
#define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */
/* available 0x20000 */
#define IFCAP_NV 0x20000 /* can do SIOCGIFCAPNV/SIOCSIFCAPNV */
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
@ -260,7 +260,40 @@ struct if_data {
#define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6)
#define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6)
#define IFCAP_CANTCHANGE (IFCAP_NETMAP)
#define IFCAP_CANTCHANGE (IFCAP_NETMAP | IFCAP_NV)
#define IFCAP_ALLCAPS 0xffffffff
#define IFCAP_RXCSUM_NAME "RXCSUM"
#define IFCAP_TXCSUM_NAME "TXCSUM"
#define IFCAP_NETCONS_NAME "NETCONS"
#define IFCAP_VLAN_MTU_NAME "VLAN_MTU"
#define IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING"
#define IFCAP_JUMBO_MTU_NAME "JUMBO_MTU"
#define IFCAP_POLLING_NAME "POLLING"
#define IFCAP_VLAN_HWCSUM_NAME "VLAN_HWCSUM"
#define IFCAP_TSO4_NAME "TSO4"
#define IFCAP_TSO6_NAME "TSO6"
#define IFCAP_LRO_NAME "LRO"
#define IFCAP_WOL_UCAST_NAME "WOL_UCAST"
#define IFCAP_WOL_MCAST_NAME "WOL_MCAST"
#define IFCAP_WOL_MAGIC_NAME "WOL_MAGIC"
#define IFCAP_TOE4_NAME "TOE4"
#define IFCAP_TOE6_NAME "TOE6"
#define IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER"
#define IFCAP_VLAN_HWTSO_NAME "VLAN_HWTSO"
#define IFCAP_LINKSTATE_NAME "LINKSTATE"
#define IFCAP_NETMAP_NAME "NETMAP"
#define IFCAP_RXCSUM_IPV6_NAME "RXCSUM_IPV6"
#define IFCAP_TXCSUM_IPV6_NAME "TXCSUM_IPV6"
#define IFCAP_HWSTATS_NAME "HWSTATS"
#define IFCAP_TXRTLMT_NAME "TXRTLMT"
#define IFCAP_HWRXTSTMP_NAME "HWRXTSTMP"
#define IFCAP_MEXTPG_NAME "MEXTPG"
#define IFCAP_TXTLS4_NAME "TXTLS4"
#define IFCAP_TXTLS6_NAME "TXTLS6"
#define IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM"
#define IFCAP_VXLAN_HWTSO_NAME "VXLAN_HWTSO"
#define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT"
#define IFQ_MAXLEN 50
#define IFNET_SLOWHZ 1 /* granularity is 1 second */
@ -387,6 +420,15 @@ struct ifreq_buffer {
void *buffer;
};
struct ifreq_nv_req {
u_int buf_length; /* Total size of buffer,
u_int for ABI struct ifreq */
u_int length; /* Length of the filled part */
void *buffer; /* Buffer itself, containing packed nv */
};
#define IFR_CAP_NV_MAXBUFSIZE (2 * 1024 * 1024)
/*
* Interface request structure used for socket
* ioctl's. All interface ioctl's must have parameter
@ -411,6 +453,7 @@ struct ifreq {
int ifru_cap[2];
u_int ifru_fib;
u_char ifru_vlan_pcp;
struct ifreq_nv_req ifru_nv;
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
@ -434,6 +477,7 @@ struct ifreq {
#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
#define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
#define ifr_cap_nv ifr_ifru.ifru_nv /* nv-based cap interface */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
@ -605,6 +649,17 @@ MALLOC_DECLARE(M_IFMADDR);
extern struct sx ifnet_detach_sxlock;
struct nvlist;
struct ifcap_nv_bit_name;
int if_capnv_to_capint(const struct nvlist *nv, int *old_cap,
const struct ifcap_nv_bit_name *nn, bool all);
void if_capint_to_capnv(struct nvlist *nv,
const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req);
struct siocsifcapnv_driver_data {
int reqcap;
int reqcap2;
struct nvlist *nvcap;
};
#endif
#ifndef _KERNEL

View File

@ -147,4 +147,7 @@
#define SIOCGIFDOWNREASON _IOWR('i', 154, struct ifdownreason)
#define SIOCSIFCAPNV _IOW('i', 155, struct ifreq) /* set IF features */
#define SIOCGIFCAPNV _IOWR('i', 156, struct ifreq) /* get IF features */
#endif /* !_SYS_SOCKIO_H_ */