Allow to specify PCP on packets not belonging to any VLAN.

According to 802.1Q-2014, VLAN tagged packets with VLAN id 0 should be
considered as untagged, and only PCP and DEI values from the VLAN tag
are meaningful.  See for instance
https://www.cisco.com/c/en/us/td/docs/switches/connectedgrid/cg-switch-sw-master/software/configuration/guide/vlan0/b_vlan_0.html.

Make it possible to specify PCP value for outgoing packets on an
ethernet interface.  When PCP is supplied, the tag is appended, VLAN
id set to 0, and PCP is filled by the supplied value.  The code to do
VLAN tag encapsulation is refactored from the if_vlan.c and moved into
if_ethersubr.c.

Drivers might have issues with filtering VID 0 packets on
receive.  This bug should be fixed for each driver.

Reviewed by:	ae (previous version), hselasky, melifaro
Sponsored by:	Mellanox Technologies
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D14702
This commit is contained in:
Konstantin Belousov 2018-03-27 15:29:32 +00:00
parent 34a77b9741
commit f137973487
11 changed files with 250 additions and 163 deletions

@ -60,78 +60,78 @@ static void
link_status(int s __unused, const struct ifaddrs *ifa)
{
/* XXX no const 'cuz LLADDR is defined wrong */
struct sockaddr_dl *sdl = (struct sockaddr_dl *) ifa->ifa_addr;
struct sockaddr_dl *sdl;
char *ether_format, *format_char;
struct ifreq ifr;
int n, rc, sock_hw;
static const u_char laggaddr[6] = {0};
if (sdl != NULL && sdl->sdl_alen > 0) {
if ((sdl->sdl_type == IFT_ETHER ||
sdl->sdl_type == IFT_L2VLAN ||
sdl->sdl_type == IFT_BRIDGE) &&
sdl->sdl_alen == ETHER_ADDR_LEN) {
ether_format = ether_ntoa((struct ether_addr *)LLADDR(sdl));
if (f_ether != NULL && strcmp(f_ether, "dash") == 0) {
for (format_char = strchr(ether_format, ':');
format_char != NULL;
format_char = strchr(ether_format, ':'))
*format_char = '-';
}
printf("\tether %s\n", ether_format);
} else {
int n = sdl->sdl_nlen > 0 ? sdl->sdl_nlen + 1 : 0;
sdl = (struct sockaddr_dl *) ifa->ifa_addr;
if (sdl == NULL || sdl->sdl_alen == 0)
return;
printf("\tlladdr %s\n", link_ntoa(sdl) + n);
}
/* Best-effort (i.e. failures are silent) to get original
* hardware address, as read by NIC driver at attach time. Only
* applies to Ethernet NICs (IFT_ETHER). However, laggX
* interfaces claim to be IFT_ETHER, and re-type their component
* Ethernet NICs as IFT_IEEE8023ADLAG. So, check for both. If
* the MAC is zeroed, then it's actually a lagg.
*/
if ((sdl->sdl_type == IFT_ETHER ||
sdl->sdl_type == IFT_IEEE8023ADLAG) &&
sdl->sdl_alen == ETHER_ADDR_LEN) {
struct ifreq ifr;
int sock_hw;
int rc;
static const u_char laggaddr[6] = {0};
strncpy(ifr.ifr_name, ifa->ifa_name,
sizeof(ifr.ifr_name));
memcpy(&ifr.ifr_addr, ifa->ifa_addr,
sizeof(ifa->ifa_addr->sa_len));
ifr.ifr_addr.sa_family = AF_LOCAL;
if ((sock_hw = socket(AF_LOCAL, SOCK_DGRAM, 0)) < 0) {
warn("socket(AF_LOCAL,SOCK_DGRAM)");
return;
}
rc = ioctl(sock_hw, SIOCGHWADDR, &ifr);
close(sock_hw);
if (rc != 0) {
return;
}
/*
* If this is definitely a lagg device or the hwaddr
* matches the link addr, don't bother.
*/
if (memcmp(ifr.ifr_addr.sa_data, laggaddr,
sdl->sdl_alen) == 0 ||
memcmp(ifr.ifr_addr.sa_data, LLADDR(sdl),
sdl->sdl_alen) == 0) {
return;
}
ether_format = ether_ntoa((const struct ether_addr *)
&ifr.ifr_addr.sa_data);
if (f_ether != NULL && strcmp(f_ether, "dash") == 0) {
for (format_char = strchr(ether_format, ':');
format_char != NULL;
format_char = strchr(ether_format, ':'))
*format_char = '-';
}
printf("\thwaddr %s\n", ether_format);
if ((sdl->sdl_type == IFT_ETHER || sdl->sdl_type == IFT_L2VLAN ||
sdl->sdl_type == IFT_BRIDGE) && sdl->sdl_alen == ETHER_ADDR_LEN) {
ether_format = ether_ntoa((struct ether_addr *)LLADDR(sdl));
if (f_ether != NULL && strcmp(f_ether, "dash") == 0) {
for (format_char = strchr(ether_format, ':');
format_char != NULL;
format_char = strchr(ether_format, ':'))
*format_char = '-';
}
printf("\tether %s\n", ether_format);
} else {
n = sdl->sdl_nlen > 0 ? sdl->sdl_nlen + 1 : 0;
printf("\tlladdr %s\n", link_ntoa(sdl) + n);
}
/*
* Best-effort (i.e. failures are silent) to get original
* hardware address, as read by NIC driver at attach time. Only
* applies to Ethernet NICs (IFT_ETHER). However, laggX
* interfaces claim to be IFT_ETHER, and re-type their component
* Ethernet NICs as IFT_IEEE8023ADLAG. So, check for both. If
* the MAC is zeroed, then it's actually a lagg.
*/
if ((sdl->sdl_type != IFT_ETHER &&
sdl->sdl_type != IFT_IEEE8023ADLAG) ||
sdl->sdl_alen != ETHER_ADDR_LEN)
return;
strncpy(ifr.ifr_name, ifa->ifa_name, sizeof(ifr.ifr_name));
memcpy(&ifr.ifr_addr, ifa->ifa_addr, sizeof(ifa->ifa_addr->sa_len));
ifr.ifr_addr.sa_family = AF_LOCAL;
if ((sock_hw = socket(AF_LOCAL, SOCK_DGRAM, 0)) < 0) {
warn("socket(AF_LOCAL,SOCK_DGRAM)");
return;
}
rc = ioctl(sock_hw, SIOCGHWADDR, &ifr);
close(sock_hw);
if (rc != 0)
return;
/*
* If this is definitely a lagg device or the hwaddr
* matches the link addr, don't bother.
*/
if (memcmp(ifr.ifr_addr.sa_data, laggaddr, sdl->sdl_alen) == 0 ||
memcmp(ifr.ifr_addr.sa_data, LLADDR(sdl), sdl->sdl_alen) == 0)
goto pcp;
ether_format = ether_ntoa((const struct ether_addr *)
&ifr.ifr_addr.sa_data);
if (f_ether != NULL && strcmp(f_ether, "dash") == 0) {
for (format_char = strchr(ether_format, ':');
format_char != NULL;
format_char = strchr(ether_format, ':'))
*format_char = '-';
}
printf("\thwaddr %s\n", ether_format);
pcp:
if (ioctl(s, SIOCGLANPCP, (caddr_t)&ifr) == 0 &&
ifr.ifr_lan_pcp != IFNET_PCP_NONE)
printf("\tpcp %d\n", ifr.ifr_lan_pcp);
}
static void

@ -1079,6 +1079,32 @@ setifmtu(const char *val, int dummy __unused, int s,
err(1, "ioctl SIOCSIFMTU (set mtu)");
}
static void
setifpcp(const char *val, int arg __unused, int s, const struct afswtch *afp)
{
u_long ul;
char *endp;
ul = strtoul(val, &endp, 0);
if (*endp != '\0')
errx(1, "invalid value for pcp");
if (ul > 7)
errx(1, "value for pcp out of range");
ifr.ifr_lan_pcp = ul;
if (ioctl(s, SIOCSLANPCP, (caddr_t)&ifr) == -1)
err(1, "SIOCSLANPCP");
}
static void
disableifpcp(const char *val, int arg __unused, int s,
const struct afswtch *afp)
{
ifr.ifr_lan_pcp = IFNET_PCP_NONE;
if (ioctl(s, SIOCSLANPCP, (caddr_t)&ifr) == -1)
err(1, "SIOCSLANPCP");
}
static void
setifname(const char *val, int dummy __unused, int s,
const struct afswtch *afp)
@ -1436,6 +1462,8 @@ static struct cmd basic_cmds[] = {
DEF_CMD("-txcsum", -IFCAP_TXCSUM, setifcap),
DEF_CMD("netcons", IFCAP_NETCONS, setifcap),
DEF_CMD("-netcons", -IFCAP_NETCONS, setifcap),
DEF_CMD_ARG("pcp", setifpcp),
DEF_CMD("-pcp", 0, disableifpcp),
DEF_CMD("polling", IFCAP_POLLING, setifcap),
DEF_CMD("-polling", -IFCAP_POLLING, setifcap),
DEF_CMD("tso6", IFCAP_TSO6, setifcap),

@ -385,6 +385,20 @@ struct ether_vlan_header {
} \
} while (0)
/*
* Names for 802.1q priorities ("802.1p"). Notice that in this scheme,
* (0 < 1), allowing default 0-tagged traffic to take priority over background
* tagged traffic.
*/
#define IEEE8021Q_PCP_BK 1 /* Background (lowest) */
#define IEEE8021Q_PCP_BE 0 /* Best effort (default) */
#define IEEE8021Q_PCP_EE 2 /* Excellent effort */
#define IEEE8021Q_PCP_CA 3 /* Critical applications */
#define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */
#define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */
#define IEEE8021Q_PCP_IC 6 /* Internetwork control */
#define IEEE8021Q_PCP_NC 7 /* Network control (highest) */
#ifdef _KERNEL
struct ifnet;
@ -406,6 +420,8 @@ extern char *ether_sprintf(const u_int8_t *);
void ether_vlan_mtap(struct bpf_if *, struct mbuf *,
void *, u_int);
struct mbuf *ether_vlanencap(struct mbuf *, uint16_t);
bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
uint16_t vid, uint8_t pcp);
#ifdef _SYS_EVENTHANDLER_H_
/* new ethernet interface attached event */

@ -485,6 +485,7 @@ if_alloc(u_char type)
for (int i = 0; i < IFCOUNTERS; i++)
ifp->if_counters[i] = counter_u64_alloc(M_WAITOK);
ifp->if_get_counter = if_get_counter_default;
ifp->if_pcp = IFNET_PCP_NONE;
ifnet_setbyindex(ifp->if_index, ifp);
return (ifp);
}

@ -416,6 +416,7 @@ struct ifreq {
#define ifr_index ifr_ifru.ifru_index /* interface index */
#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
#define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
@ -565,6 +566,8 @@ struct ifrsshash {
uint32_t ifrh_types; /* RSS_TYPE_ */
};
#define IFNET_PCP_NONE 0xff /* PCP disabled */
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL

@ -47,6 +47,7 @@
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/random.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@ -437,6 +438,19 @@ bad: if (m != NULL)
return ether_output_frame(ifp, m);
}
static bool
ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
{
struct ether_header *eh;
eh = mtod(*mp, struct ether_header *);
if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
ether_8021q_frame(mp, ifp, ifp, 0, pcp))
return (true);
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (false);
}
/*
* Ethernet link layer output routine to send a raw frame to the device.
*
@ -446,13 +460,17 @@ bad: if (m != NULL)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
int i;
int error;
uint8_t pcp;
pcp = ifp->if_pcp;
if (pcp != IFNET_PCP_NONE && !ether_set_pcp(&m, ifp, pcp))
return (0);
if (PFIL_HOOKED(&V_link_pfil_hook)) {
i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, 0,
NULL);
if (i != 0)
error = pfil_run_hooks(&V_link_pfil_hook, &m, ifp,
PFIL_OUT, 0, NULL);
if (error != 0)
return (EACCES);
if (m == NULL)
@ -1109,6 +1127,22 @@ ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_mtu = ifr->ifr_mtu;
}
break;
case SIOCSLANPCP:
error = priv_check(curthread, PRIV_NET_SETLANPCP);
if (error != 0)
break;
if (ifr->ifr_lan_pcp > 7 &&
ifr->ifr_lan_pcp != IFNET_PCP_NONE)
error = EINVAL;
else
ifp->if_pcp = ifr->ifr_lan_pcp;
break;
case SIOCGLANPCP:
ifr->ifr_lan_pcp = ifp->if_pcp;
break;
default:
error = EINVAL; /* XXX netbsd has ENOTTY??? */
break;
@ -1257,5 +1291,86 @@ ether_vlanencap(struct mbuf *m, uint16_t tag)
return (m);
}
static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
"IEEE 802.1Q VLAN");
static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
"for consistency");
static VNET_DEFINE(int, soft_pad);
#define V_soft_pad VNET(soft_pad)
SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(soft_pad), 0,
"pad short frames before tagging");
/*
* For now, make preserving PCP via an mbuf tag optional, as it increases
* per-packet memory allocations and frees. In the future, it would be
* preferable to reuse ether_vtag for this, or similar.
*/
int vlan_mtag_pcp = 0;
SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
&vlan_mtag_pcp, 0,
"Retain VLAN PCP information as packets are passed up the stack");
bool
ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
uint16_t vid, uint8_t pcp)
{
struct m_tag *mtag;
int n;
uint16_t tag;
static const char pad[8]; /* just zeros */
/*
* Pad the frame to the minimum size allowed if told to.
* This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
* paragraph C.4.4.3.b. It can help to work around buggy
* bridges that violate paragraph C.4.4.3.a from the same
* document, i.e., fail to pad short frames after untagging.
* E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
* untagging it will produce a 62-byte frame, which is a runt
* and requires padding. There are VLAN-enabled network
* devices that just discard such runts instead or mishandle
* them somehow.
*/
if (V_soft_pad && p->if_type == IFT_ETHER) {
for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
n > 0; n -= sizeof(pad)) {
if (!m_append(*mp, min(n, sizeof(pad)), pad))
break;
}
if (n > 0) {
m_freem(*mp);
*mp = NULL;
if_printf(ife, "cannot pad short frame");
return (false);
}
}
/*
* If underlying interface can do VLAN tag insertion itself,
* just pass the packet along. However, we need some way to
* tell the interface where the packet came from so that it
* knows how to find the VLAN tag to use, so we attach a
* packet tag that holds it.
*/
if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
MTAG_8021Q_PCP_OUT, NULL)) != NULL)
tag = EVL_MAKETAG(vid, *(uint8_t *)(mtag + 1), 0);
else
tag = EVL_MAKETAG(vid, pcp, 0);
if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
(*mp)->m_pkthdr.ether_vtag = tag;
(*mp)->m_flags |= M_VLANTAG;
} else {
*mp = ether_vlanencap(*mp, tag);
if (*mp == NULL) {
if_printf(ife, "unable to prepend 802.1Q header");
return (false);
}
}
return (true);
}
DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
MODULE_VERSION(ether, 1);

@ -365,6 +365,9 @@ struct ifnet {
if_snd_tag_query_t *if_snd_tag_query;
if_snd_tag_free_t *if_snd_tag_free;
/* Ethernet PCP */
uint8_t if_pcp;
/*
* Spare fields to be added before branching a stable branch, so
* that structure can be enhanced without changing the kernel

@ -196,25 +196,7 @@ static struct {
{0, NULL}
};
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
"IEEE 802.1Q VLAN");
static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
"for consistency");
static VNET_DEFINE(int, soft_pad);
#define V_soft_pad VNET(soft_pad)
SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(soft_pad), 0, "pad short frames before tagging");
/*
* For now, make preserving PCP via an mbuf tag optional, as it increases
* per-packet memory allocations and frees. In the future, it would be
* preferable to reuse ether_vtag for this, or similar.
*/
static int vlan_mtag_pcp = 0;
SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0,
"Retain VLAN PCP information as packets are passed up the stack");
extern int vlan_mtag_pcp;
static const char vlanname[] = "vlan";
static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
@ -1171,8 +1153,6 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct ifvlan *ifv;
struct ifnet *p;
struct m_tag *mtag;
uint16_t tag;
int error, len, mcast;
VLAN_LOCK_READER;
@ -1201,59 +1181,10 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
return (ENETDOWN);
}
/*
* Pad the frame to the minimum size allowed if told to.
* This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
* paragraph C.4.4.3.b. It can help to work around buggy
* bridges that violate paragraph C.4.4.3.a from the same
* document, i.e., fail to pad short frames after untagging.
* E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
* untagging it will produce a 62-byte frame, which is a runt
* and requires padding. There are VLAN-enabled network
* devices that just discard such runts instead or mishandle
* them somehow.
*/
if (V_soft_pad && p->if_type == IFT_ETHER) {
static char pad[8]; /* just zeros */
int n;
for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
n > 0; n -= sizeof(pad))
if (!m_append(m, min(n, sizeof(pad)), pad))
break;
if (n > 0) {
if_printf(ifp, "cannot pad short frame\n");
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
VLAN_RUNLOCK();
m_freem(m);
return (0);
}
}
/*
* If underlying interface can do VLAN tag insertion itself,
* just pass the packet along. However, we need some way to
* tell the interface where the packet came from so that it
* knows how to find the VLAN tag to use, so we attach a
* packet tag that holds it.
*/
if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q,
MTAG_8021Q_PCP_OUT, NULL)) != NULL)
tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0);
else
tag = ifv->ifv_tag;
if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
m->m_pkthdr.ether_vtag = tag;
m->m_flags |= M_VLANTAG;
} else {
m = ether_vlanencap(m, tag);
if (m == NULL) {
if_printf(ifp, "unable to prepend VLAN header\n");
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
VLAN_RUNLOCK();
return (0);
}
if (!ether_8021q_frame(&m, ifp, p, ifv->ifv_vid, ifv->ifv_pcp)) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
VLAN_RUNLOCK();
return (0);
}
/*

@ -73,22 +73,8 @@ struct vlanreq {
#define SIOCSETVLAN SIOCSIFGENERIC
#define SIOCGETVLAN SIOCGIFGENERIC
#define SIOCGVLANPCP _IOWR('i', 152, struct ifreq) /* Get VLAN PCP */
#define SIOCSVLANPCP _IOW('i', 153, struct ifreq) /* Set VLAN PCP */
/*
* Names for 802.1q priorities ("802.1p"). Notice that in this scheme,
* (0 < 1), allowing default 0-tagged traffic to take priority over background
* tagged traffic.
*/
#define IEEE8021Q_PCP_BK 1 /* Background (lowest) */
#define IEEE8021Q_PCP_BE 0 /* Best effort (default) */
#define IEEE8021Q_PCP_EE 2 /* Excellent effort */
#define IEEE8021Q_PCP_CA 3 /* Critical applications */
#define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */
#define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */
#define IEEE8021Q_PCP_IC 6 /* Internetwork control */
#define IEEE8021Q_PCP_NC 7 /* Network control (highest) */
#define SIOCGVLANPCP SIOCGLANPCP /* Get VLAN PCP */
#define SIOCSVLANPCP SIOCSLANPCP /* Set VLAN PCP */
#ifdef _KERNEL
/*

@ -344,7 +344,8 @@
#define PRIV_NET_SETIFDESCR 418 /* Set interface description. */
#define PRIV_NET_SETIFFIB 419 /* Set interface fib. */
#define PRIV_NET_VXLAN 420 /* Administer vxlan. */
#define PRIV_NET_SETVLANPCP 421 /* Set VLAN priority. */
#define PRIV_NET_SETLANPCP 421 /* Set LAN priority. */
#define PRIV_NET_SETVLANPCP PRIV_NET_SETLANPCP /* Alias Set VLAN priority */
/*
* 802.11-related privileges.

@ -140,4 +140,7 @@
#define SIOCGIFRSSHASH _IOWR('i', 151, struct ifrsshash)/* get the current RSS
type/func settings */
#define SIOCGLANPCP _IOWR('i', 152, struct ifreq) /* Get (V)LAN PCP */
#define SIOCSLANPCP _IOW('i', 153, struct ifreq) /* Set (V)LAN PCP */
#endif /* !_SYS_SOCKIO_H_ */