From 75ee267c22fd5c7d65fd847a1ca3ff3592186ce1 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Mon, 30 Jan 2006 13:45:15 +0000 Subject: [PATCH] Merge the //depot/user/yar/vlan branch into CVS. It contains some collective work by yar, thompsa and myself. The checksum offloading part also involves work done by Mihail Balikov. The most important changes: o Instead of global linked list of all vlan softc use a per-trunk hash. The size of hash is dynamically adjusted, depending on number of entries. This changes struct ifnet, replacing counter of vlans with a pointer to trunk structure. This change is an improvement for setups with big number of VLANs, several interfaces and several CPUs. It is a small regression for a setup with a single VLAN interface. An alternative to dynamic hash is a per-trunk static array with 4096 entries, which is a compile time option - VLAN_ARRAY. In my experiments the array is not an improvement, probably because such a big trunk structure doesn't fit into CPU cache. o Introduce an UMA zone for VLAN tags. Since drivers depend on it, the zone is declared in kern_mbuf.c, not in optional vlan(4) driver. This change is a big improvement for any setup utilizing vlan(4). o Use rwlock(9) instead of mutex(9) for locking. We are the first ones to do this! :) o Some drivers can do hardware VLAN tagging + hardware checksum offloading. Add an infrastructure for this. Whenever vlan(4) is attached to a parent or parent configuration is changed, the flags on vlan(4) interface are updated. In collaboration with: yar, thompsa In collaboration with: Mihail Balikov --- sys/conf/options | 2 + sys/kern/kern_mbuf.c | 25 ++ sys/net/if.c | 3 +- sys/net/if.h | 1 + sys/net/if_ethersubr.c | 6 +- sys/net/if_var.h | 3 +- sys/net/if_vlan.c | 581 ++++++++++++++++++++++++++++++++--------- sys/net/if_vlan_var.h | 14 +- sys/sys/mbuf.h | 5 + 9 files changed, 511 insertions(+), 129 deletions(-) diff --git a/sys/conf/options b/sys/conf/options index 494256fb369f..d5c9df64f3d3 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -385,6 +385,8 @@ TCPDEBUG TCP_SIGNATURE opt_inet.h TCP_SACK_DEBUG opt_tcp_sack.h TCP_DROP_SYNFIN opt_tcp_input.h +DEV_VLAN opt_vlan.h +VLAN_ARRAY opt_vlan.h XBONEHACK # Netgraph(4). Use option NETGRAPH to enable the base netgraph code. diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 514eca468feb..8f5b919cfaa1 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -133,6 +133,7 @@ uma_zone_t zone_jumbo4; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; uma_zone_t zone_ext_refcnt; +uma_zone_t zone_mtag_vlan; /* * Local prototypes. @@ -145,6 +146,7 @@ static void mb_dtor_clust(void *, int, void *); static void mb_dtor_pack(void *, int, void *); static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); +static int mt_zinit_vlan(void *, int, int); static void mb_reclaim(void *); static void mbuf_init(void *); @@ -225,6 +227,12 @@ mbuf_init(void *dummy) NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); + zone_mtag_vlan = uma_zcreate("mtag_vlan", + sizeof(struct m_tag) + sizeof(u_int), + NULL, NULL, + mt_zinit_vlan, NULL, + UMA_ALIGN_INT, 0); + /* uma_prealloc() goes here... */ /* @@ -511,6 +519,23 @@ mb_ctor_pack(void *mem, int size, void *arg, int how) return (0); } +static void +mt_vlan_free(struct m_tag *mtag) +{ + uma_zfree(zone_mtag_vlan, mtag); +} + +static int +mt_zinit_vlan(void *mem, int size, int how) +{ + struct m_tag *mtag = (struct m_tag *)mem; + + m_tag_setup(mtag, MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int)); + mtag->m_tag_free = mt_vlan_free; + + return (0); +} + /* * This is the protocol drain routine. * diff --git a/sys/net/if.c b/sys/net/if.c index 733a77ea3fcd..814815b14b80 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1040,6 +1040,7 @@ if_route(struct ifnet *ifp, int flag, int fam) } void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */ +void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ /* * Handle a change in the interface link state. To avoid LORs @@ -1075,7 +1076,7 @@ do_link_state_change(void *arg, int pending) else link = NOTE_LINKINV; KNOTE_UNLOCKED(&ifp->if_klist, link); - if (ifp->if_nvlans != 0) + if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp, link); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && diff --git a/sys/net/if.h b/sys/net/if.h index 460f37410a1a..8e483cc31616 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -191,6 +191,7 @@ struct if_data { #define IFCAP_VLAN_HWTAGGING 0x0010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x0040 /* driver supports polling */ +#define IFCAP_VLAN_HWCSUM 0x0080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 9a5ffc3055c2..4f9d1192ca8d 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -623,7 +623,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) if (!(ifp->if_bridge) && !((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) && - ifp->if_nvlans > 0)) { + ifp->if_vlantrunk != NULL)) { #ifdef DEV_CARP /* * XXX: Okay, we need to call carp_forus() and - if it is for @@ -696,7 +696,7 @@ post_stats: /* * If no VLANs are configured, drop. */ - if (ifp->if_nvlans == 0) { + if (ifp->if_vlantrunk == NULL) { ifp->if_noproto++; m_freem(m); return; @@ -716,7 +716,7 @@ post_stats: */ switch (ether_type) { case ETHERTYPE_VLAN: - if (ifp->if_nvlans != 0) { + if (ifp->if_vlantrunk != NULL) { KASSERT(vlan_input_p,("ether_input: VLAN not loaded!")); (*vlan_input_p)(ifp, m); } else { diff --git a/sys/net/if_var.h b/sys/net/if_var.h index afd014a97e1f..b76e07535ee6 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -69,6 +69,7 @@ struct rt_addrinfo; struct socket; struct ether_header; struct carp_if; +struct ifvlantrunk; #endif #include /* get TAILQ macros */ @@ -133,7 +134,7 @@ struct ifnet { struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ - u_short if_nvlans; /* number of active vlans */ + struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ int if_flags; /* up/down, broadcast, etc. */ int if_capabilities; /* interface capabilities */ int if_capenable; /* enabled features */ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index d53ad6e135c1..4ef86baabdc8 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -42,12 +42,15 @@ */ #include "opt_inet.h" +#include "opt_vlan.h" #include #include +#include #include #include #include +#include #include #include #include @@ -69,6 +72,24 @@ #endif #define VLANNAME "vlan" +#define VLAN_DEF_HWIDTH 4 + +LIST_HEAD(ifvlanhead, ifvlan); + +struct ifvlantrunk { + struct ifnet *parent; /* parent interface of this trunk */ + struct rwlock rw; +#ifdef VLAN_ARRAY + struct ifvlan *vlans[EVL_VLID_MASK+1]; /* static table */ +#else + struct ifvlanhead *hash; /* dynamic hash-list table */ + uint16_t hmask; + uint16_t hwidth; +#endif + int refcnt; + LIST_ENTRY(ifvlantrunk) trunk_entry; +}; +static LIST_HEAD(, ifvlantrunk) trunk_list; struct vlan_mc_entry { struct ether_addr mc_addr; @@ -76,18 +97,20 @@ struct vlan_mc_entry { }; struct ifvlan { + struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; - struct ifnet *ifv_p; /* parent inteface of this vlan */ +#define TRUNK(ifv) ((ifv)->ifv_trunk) +#define PARENT(ifv) ((ifv)->ifv_trunk->parent) int ifv_pflags; /* special flags we have set on parent */ struct ifv_linkmib { int ifvm_parent; int ifvm_encaplen; /* encapsulation length */ int ifvm_mtufudge; /* MTU fudged by this much */ int ifvm_mintu; /* min transmission unit */ - u_int16_t ifvm_proto; /* encapsulation ethertype */ - u_int16_t ifvm_tag; /* tag to apply on packets leaving if */ + uint16_t ifvm_proto; /* encapsulation ethertype */ + uint16_t ifvm_tag; /* tag to apply on packets leaving if */ } ifv_mib; - SLIST_HEAD(__vlan_mchead, vlan_mc_entry) vlan_mc_listhead; + SLIST_HEAD(__vlan_mchead, vlan_mc_entry) vlan_mc_listhead; LIST_ENTRY(ifvlan) ifv_list; }; #define ifv_tag ifv_mib.ifvm_tag @@ -95,7 +118,7 @@ struct ifvlan { #define ifv_mtufudge ifv_mib.ifvm_mtufudge #define ifv_mintu ifv_mib.ifvm_mintu -/* Special flags we should propagate to parent */ +/* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); @@ -110,19 +133,44 @@ SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN"); SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface"); -static LIST_HEAD(, ifvlan) ifv_list; /* - * Locking: one lock is used to guard both the ifv_list and modification - * to vlan data structures. We are rather conservative here; probably - * more than necessary. + * We have a global mutex, that is used to serialize configuration + * changes and isn't used in normal packet delivery. + * + * We also have a per-trunk rwlock, that is locked shared on packet + * processing and exclusive when configuration is changed. + * + * The VLAN_ARRAY substitutes the dynamic hash with a static array + * with 4096 entries. In theory this can give a boots in processing, + * however on practice it does not. Probably this is because array + * is too big to fit into CPU cache. */ static struct mtx ifv_mtx; -#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, VLANNAME, NULL, MTX_DEF) +#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF) #define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx) #define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED) -#define VLAN_LOCK() mtx_lock(&ifv_mtx) -#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx) +#define VLAN_LOCK() mtx_lock(&ifv_mtx) +#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx) +#define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME) +#define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw) +#define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw) +#define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw) +#define TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED) +#define TRUNK_RLOCK(trunk) rw_rlock(&(trunk)->rw) +#define TRUNK_RUNLOCK(trunk) rw_runlock(&(trunk)->rw) +#define TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED) + +#ifndef VLAN_ARRAY +static void vlan_inithash(struct ifvlantrunk *trunk); +static void vlan_freehash(struct ifvlantrunk *trunk); +static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); +static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); +static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); +static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, + uint16_t tag); +#endif +static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_start(struct ifnet *ifp); static void vlan_ifinit(void *foo); @@ -133,8 +181,10 @@ static int vlan_setflag(struct ifnet *ifp, int flag, int status, static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_unconfig(struct ifnet *ifp); -static int vlan_config(struct ifvlan *ifv, struct ifnet *p); +static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); static void vlan_link_state(struct ifnet *ifp, int link); +static void vlan_capabilities(struct ifvlan *ifv); +static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethertag(struct if_clone *, const char *, int *); @@ -145,6 +195,196 @@ static int vlan_clone_destroy(struct if_clone *, struct ifnet *); static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL, IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); +#ifndef VLAN_ARRAY +#define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) +static void +vlan_inithash(struct ifvlantrunk *trunk) +{ + int i, n; + + /* + * The trunk must not be locked here since we call malloc(M_WAITOK). + * It is OK in case this function is called before the trunk struct + * gets hooked up and becomes visible from other threads. + */ + + KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, + ("%s: hash already initialized", __func__)); + + trunk->hwidth = VLAN_DEF_HWIDTH; + n = 1 << trunk->hwidth; + trunk->hmask = n - 1; + trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); + for (i = 0; i < n; i++) + LIST_INIT(&trunk->hash[i]); +} + +static void +vlan_freehash(struct ifvlantrunk *trunk) +{ +#ifdef INVARIANTS + int i; + + TRUNK_LOCK_ASSERT(trunk); /* XXX just unhook trunk first? */ + KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); + for (i = 0; i < (1 << trunk->hwidth); i++) + KASSERT(LIST_EMPTY(&trunk->hash[i]), + ("%s: hash table not empty", __func__)); +#endif + free(trunk->hash, M_VLAN); + trunk->hash = NULL; + trunk->hwidth = trunk->hmask = 0; +} + +static int +vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) +{ + int i, b; + struct ifvlan *ifv2; + + TRUNK_LOCK_ASSERT(trunk); + KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); + + b = 1 << trunk->hwidth; + i = HASH(ifv->ifv_tag, trunk->hmask); + LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) + if (ifv->ifv_tag == ifv2->ifv_tag) + return (EEXIST); + + /* + * Grow the hash when the number of vlans exceeds half of the number of + * hash buckets squared. This will make the average linked-list length + * buckets/2. + */ + if (trunk->refcnt > (b * b) / 2) { + vlan_growhash(trunk, 1); + i = HASH(ifv->ifv_tag, trunk->hmask); + } + LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); + trunk->refcnt++; + + return (0); +} + +static int +vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) +{ + int i, b; + struct ifvlan *ifv2; + + TRUNK_LOCK_ASSERT(trunk); + KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); + + b = 1 << trunk->hwidth; + i = HASH(ifv->ifv_tag, trunk->hmask); + LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) + if (ifv2 == ifv) { + trunk->refcnt--; + LIST_REMOVE(ifv2, ifv_list); + if (trunk->refcnt < (b * b) / 2) + vlan_growhash(trunk, -1); + return (0); + } + + panic("%s: vlan not found\n", __func__); + return (ENOENT); /*NOTREACHED*/ +} + +/* + * Grow the hash larger or smaller if memory permits. + */ +static void +vlan_growhash(struct ifvlantrunk *trunk, int howmuch) +{ + + struct ifvlan *ifv; + struct ifvlanhead *hash2; + int hwidth2, i, j, n, n2; + + TRUNK_LOCK_ASSERT(trunk); + KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); + + if (howmuch == 0) { + /* Harmless yet obvious coding error */ + printf("%s: howmuch is 0\n", __func__); + return; + } + + hwidth2 = trunk->hwidth + howmuch; + n = 1 << trunk->hwidth; + n2 = 1 << hwidth2; + /* Do not shrink the table below the default */ + if (hwidth2 < VLAN_DEF_HWIDTH) + return; + + /* M_NOWAIT because we're called with trunk mutex held */ + hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); + if (hash2 == NULL) { + printf("%s: out of memory -- hash size not changed\n", + __func__); + return; /* We can live with the old hash table */ + } + for (j = 0; j < n2; j++) + LIST_INIT(&hash2[j]); + for (i = 0; i < n; i++) + while (!LIST_EMPTY(&trunk->hash[i])) { + ifv = LIST_FIRST(&trunk->hash[i]); + LIST_REMOVE(ifv, ifv_list); + j = HASH(ifv->ifv_tag, n2 - 1); + LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); + } + free(trunk->hash, M_VLAN); + trunk->hash = hash2; + trunk->hwidth = hwidth2; + trunk->hmask = n2 - 1; +} + +static __inline struct ifvlan * +vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag) +{ + struct ifvlan *ifv; + + TRUNK_LOCK_RASSERT(trunk); + + LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list) + if (ifv->ifv_tag == tag) + return (ifv); + return (NULL); +} + +#if 0 +/* Debugging code to view the hashtables. */ +static void +vlan_dumphash(struct ifvlantrunk *trunk) +{ + int i; + struct ifvlan *ifv; + + for (i = 0; i < (1 << trunk->hwidth); i++) { + printf("%d: ", i); + LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) + printf("%s ", ifv->ifv_ifp->if_xname); + printf("\n"); + } +} +#endif /* 0 */ +#endif /* !VLAN_ARRAY */ + +static void +trunk_destroy(struct ifvlantrunk *trunk) +{ + VLAN_LOCK_ASSERT(); + + TRUNK_LOCK(trunk); +#ifndef VLAN_ARRAY + vlan_freehash(trunk); +#endif + TRUNK_LOCK_DESTROY(trunk); + LIST_REMOVE(trunk, trunk_entry); + trunk->parent->if_vlantrunk = NULL; + free(trunk, M_VLAN); +} + /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the @@ -170,14 +410,7 @@ vlan_setmulti(struct ifnet *ifp) /* Find the parent. */ sc = ifp->if_softc; - ifp_p = sc->ifv_p; - - /* - * If we don't have a parent, just remember the membership for - * when we do. - */ - if (ifp_p == NULL) - return (0); + ifp_p = PARENT(sc); bzero((char *)&sdl, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); @@ -223,12 +456,6 @@ vlan_setmulti(struct ifnet *ifp) * into this code through vlan_input_p which is defined there and * set here. Noone else in the system should be aware of this so * we use an explicit reference here. - * - * NB: Noone should ever need to check if vlan_input_p is null or - * not. This is because interfaces have a count of the number - * of active vlans (if_nvlans) and this should never be bumped - * except by vlan_config--which is in this module so therefore - * the module must be loaded and vlan_input_p must be non-NULL. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); @@ -241,18 +468,28 @@ vlan_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - LIST_INIT(&ifv_list); + LIST_INIT(&trunk_list); VLAN_LOCK_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; + vlan_trunk_cap_p = vlan_trunk_capabilities; if_clone_attach(&vlan_cloner); break; case MOD_UNLOAD: + { + struct ifvlantrunk *trunk, *trunk1; + if_clone_detach(&vlan_cloner); vlan_input_p = NULL; vlan_link_state_p = NULL; + vlan_trunk_cap_p = NULL; + VLAN_LOCK(); + LIST_FOREACH_SAFE(trunk, &trunk_list, trunk_entry, trunk1) + trunk_destroy(trunk); + VLAN_UNLOCK(); VLAN_LOCK_DESTROY(); break; + } default: return (EOPNOTSUPP); } @@ -397,31 +634,22 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len) ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; - VLAN_LOCK(); - LIST_INSERT_HEAD(&ifv_list, ifv, ifv_list); - VLAN_UNLOCK(); - if (ethertag) { - VLAN_LOCK(); - error = vlan_config(ifv, p); + error = vlan_config(ifv, p, tag); if (error != 0) { /* * Since we've partialy failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ - LIST_REMOVE(ifv, ifv_list); vlan_unconfig(ifp); - VLAN_UNLOCK(); ether_ifdetach(ifp); if_free_type(ifp, IFT_ETHER); free(ifv, M_VLAN); return (error); } - ifv->ifv_tag = tag; ifp->if_drv_flags |= IFF_DRV_RUNNING; - VLAN_UNLOCK(); /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); @@ -438,10 +666,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) unit = ifp->if_dunit; - VLAN_LOCK(); - LIST_REMOVE(ifv, ifv_list); vlan_unconfig(ifp); - VLAN_UNLOCK(); ether_ifdetach(ifp); if_free_type(ifp, IFT_ETHER); @@ -475,12 +700,11 @@ vlan_start(struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *p; - struct ether_vlan_header *evl; struct mbuf *m; int error; ifv = ifp->if_softc; - p = ifv->ifv_p; + p = PARENT(ifv); for (;;) { IF_DEQUEUE(&ifp->if_snd, m); @@ -507,10 +731,8 @@ vlan_start(struct ifnet *ifp) * packet tag that holds it. */ if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { - struct m_tag *mtag = m_tag_alloc(MTAG_VLAN, - MTAG_VLAN_TAG, - sizeof(u_int), - M_NOWAIT); + struct m_tag *mtag = (struct m_tag *) + uma_zalloc(zone_mtag_vlan, M_NOWAIT); if (mtag == NULL) { ifp->if_oerrors++; m_freem(m); @@ -520,6 +742,8 @@ vlan_start(struct ifnet *ifp) m_tag_prepend(m, mtag); m->m_flags |= M_VLANTAG; } else { + struct ether_vlan_header *evl; + M_PREPEND(m, ifv->ifv_encaplen, M_DONTWAIT); if (m == NULL) { if_printf(ifp, @@ -570,10 +794,12 @@ vlan_start(struct ifnet *ifp) static void vlan_input(struct ifnet *ifp, struct mbuf *m) { - struct ether_vlan_header *evl; + struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; struct m_tag *mtag; - u_int tag; + uint16_t tag; + + KASSERT(trunk != NULL, ("%s: no trunk", __func__)); if (m->m_flags & M_VLANTAG) { /* @@ -587,6 +813,8 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) m_tag_delete(m, mtag); m->m_flags &= ~M_VLANTAG; } else { + struct ether_vlan_header *evl; + /* * Packet is tagged in-band as specified by 802.1q. */ @@ -613,7 +841,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) evl->evl_encap_proto = evl->evl_proto; break; default: - tag = (u_int) -1; + tag = (uint16_t) -1; #ifdef INVARIANTS panic("%s: unsupported if_type (%u)", __func__, ifp->if_type); @@ -622,23 +850,26 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) } } - VLAN_LOCK(); - LIST_FOREACH(ifv, &ifv_list, ifv_list) - if (ifp == ifv->ifv_p && tag == ifv->ifv_tag) - break; - + /* + * In VLAN_ARRAY case we proceed completely lockless. + */ +#ifdef VLAN_ARRAY + ifv = trunk->vlans[tag]; if (ifv == NULL || (ifv->ifv_ifp->if_flags & IFF_UP) == 0) { - VLAN_UNLOCK(); m_freem(m); ifp->if_noproto++; -#ifdef DEBUG - printf("%s: tag %d, no interface\n", __func__, tag); -#endif return; } - VLAN_UNLOCK(); /* XXX extend below? */ -#ifdef DEBUG - printf("%s: tag %d, parent %s\n", __func__, tag, ifv->ifv_p->if_xname); +#else + TRUNK_RLOCK(trunk); + ifv = vlan_gethash(trunk, tag); + if (ifv == NULL || (ifv->ifv_ifp->if_flags & IFF_UP) == 0) { + TRUNK_RUNLOCK(trunk); + m_freem(m); + ifp->if_noproto++; + return; + } + TRUNK_RUNLOCK(trunk); #endif if (mtag == NULL) { @@ -661,28 +892,58 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) } static int -vlan_config(struct ifvlan *ifv, struct ifnet *p) +vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag) { + struct ifvlantrunk *trunk; struct ifnet *ifp; + int error = 0; - VLAN_LOCK_ASSERT(); - + /* VID numbers 0x0 and 0xFFF are reserved */ + if (tag == 0 || tag == 0xFFF) + return (EINVAL); if (p->if_type != IFT_ETHER) return (EPROTONOSUPPORT); - if (ifv->ifv_p) + if (ifv->ifv_trunk) return (EBUSY); + if (p->if_vlantrunk == NULL) { + trunk = malloc(sizeof(struct ifvlantrunk), + M_VLAN, M_WAITOK | M_ZERO); + VLAN_LOCK(); + if (p->if_vlantrunk != NULL) { + /* A race that that is very unlikely to be hit. */ + free(trunk, M_VLAN); + goto exists; + } +#ifndef VLAN_ARRAY + vlan_inithash(trunk); +#endif + TRUNK_LOCK_INIT(trunk); + LIST_INSERT_HEAD(&trunk_list, trunk, trunk_entry); + TRUNK_LOCK(trunk); + p->if_vlantrunk = trunk; + trunk->parent = p; + } else { + VLAN_LOCK(); +exists: + trunk = p->if_vlantrunk; + TRUNK_LOCK(trunk); + } + + ifv->ifv_tag = tag; +#ifdef VLAN_ARRAY + if (trunk->vlans[tag] != NULL) + error = EEXIST; +#else + error = vlan_inshash(trunk, ifv); +#endif + if (error) + goto done; + ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; - /* - * The active VLAN counter on the parent is used - * at various places to see if there is a vlan(4) - * attached to this physical interface. - */ - p->if_nvlans++; - /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, @@ -705,10 +966,10 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p) ifv->ifv_mtufudge = ifv->ifv_encaplen; } - ifv->ifv_p = p; + ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; - ifv->ifv_ifp->if_baudrate = p->if_baudrate; + ifp->if_baudrate = p->if_baudrate; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. @@ -721,20 +982,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p) ifp->if_link_state = p->if_link_state; -#if 0 - /* - * Not ready yet. We need notification from the parent - * when hw checksumming flags in its if_capenable change. - * Flags set in if_capabilities only are useless. - */ - /* - * If the parent interface can do hardware-assisted - * VLAN encapsulation, then propagate its hardware- - * assisted checksumming flags. - */ - if (p->if_capabilities & IFCAP_VLAN_HWTAGGING) - ifp->if_capabilities |= p->if_capabilities & IFCAP_HWCSUM; -#endif + vlan_capabilities(ifv); /* * Set up our ``Ethernet address'' to reflect the underlying @@ -748,24 +996,40 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p) */ (void)vlan_setmulti(ifp); /* XXX: VLAN lock held */ - return (0); +#ifdef VLAN_ARRAY + atomic_store_rel_ptr((uintptr_t *)&trunk->vlans[tag], (uintptr_t)ifv); + trunk->refcnt++; +#endif +done: + TRUNK_UNLOCK(trunk); + VLAN_UNLOCK(); + + return (error); } static int vlan_unconfig(struct ifnet *ifp) { + struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; - struct ifnet *p; int error; - VLAN_LOCK_ASSERT(); + VLAN_LOCK(); ifv = ifp->if_softc; - p = ifv->ifv_p; + trunk = ifv->ifv_trunk; - if (p) { + if (trunk) { struct sockaddr_dl sdl; + struct ifnet *p = trunk->parent; + + TRUNK_LOCK(trunk); +#ifdef VLAN_ARRAY + atomic_store_rel_ptr((uintptr_t *)&trunk->vlans[ifv->ifv_tag], + (uintptr_t)NULL); + trunk->refcnt--; +#endif /* * Since the interface is being unconfigured, we need to @@ -791,19 +1055,43 @@ vlan_unconfig(struct ifnet *ifp) } vlan_setflags(ifp, 0); /* clear special flags on parent */ - p->if_nvlans--; +#ifndef VLAN_ARRAY + vlan_remhash(trunk, ifv); +#endif + ifv->ifv_trunk = NULL; + + /* + * Check if we were the last. + */ + if (trunk->refcnt == 0) { + atomic_store_rel_ptr((uintptr_t *) + &trunk->parent->if_vlantrunk, + (uintptr_t)NULL); + /* + * XXXGL: If some ithread has already entered + * vlan_input() and is now blocked on the trunk + * lock, then it should preempt us right after + * unlock and finish its work. Then we will acquire + * lock again in trunk_destroy(). + * XXX: not true in case of VLAN_ARRAY + */ + TRUNK_UNLOCK(trunk); + trunk_destroy(trunk); + } else + TRUNK_UNLOCK(trunk); } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); - ifv->ifv_p = NULL; ifv->ifv_ifp->if_mtu = ETHERMTU; /* XXX why not 0? */ ifv->ifv_ifp->if_link_state = LINK_STATE_UNKNOWN; /* Clear our MAC address. */ bzero(IF_LLADDR(ifv->ifv_ifp), ETHER_ADDR_LEN); + VLAN_UNLOCK(); + return (0); } @@ -831,7 +1119,7 @@ vlan_setflag(struct ifnet *ifp, int flag, int status, * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { - error = (*func)(ifv->ifv_p, status); + error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; @@ -863,15 +1151,71 @@ vlan_setflags(struct ifnet *ifp, int status) static void vlan_link_state(struct ifnet *ifp, int link) { + struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; + int i; - VLAN_LOCK(); - LIST_FOREACH(ifv, &ifv_list, ifv_list) { - if (ifv->ifv_p == ifp) + TRUNK_LOCK(trunk); +#ifdef VLAN_ARRAY + for (i = 0; i < EVL_VLID_MASK+1; i++) + if (trunk->vlans[i] != NULL) { + ifv = trunk->vlans[i]; +#else + for (i = 0; i < (1 << trunk->hwidth); i++) { + LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) +#endif if_link_state_change(ifv->ifv_ifp, - ifv->ifv_p->if_link_state); + trunk->parent->if_link_state); } - VLAN_UNLOCK(); + TRUNK_UNLOCK(trunk); +} + +static void +vlan_capabilities(struct ifvlan *ifv) +{ + struct ifnet *p = PARENT(ifv); + struct ifnet *ifp = ifv->ifv_ifp; + + TRUNK_LOCK_ASSERT(TRUNK(ifv)); + + /* + * If the parent interface can do checksum offloading + * on VLANs, then propagate its hardware-assisted + * checksumming flags. Also assert that checksum + * offloading requires hardware VLAN tagging. + */ + if (p->if_capabilities & IFCAP_VLAN_HWCSUM) + ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM; + + if (p->if_capenable & IFCAP_VLAN_HWCSUM && + p->if_capenable & IFCAP_VLAN_HWTAGGING) { + ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM; + ifp->if_hwassist = p->if_hwassist; + } else { + ifp->if_capenable = 0; + ifp->if_hwassist = 0; + } +} + +static void +vlan_trunk_capabilities(struct ifnet *ifp) +{ + struct ifvlantrunk *trunk = ifp->if_vlantrunk; + struct ifvlan *ifv; + int i; + + TRUNK_LOCK(trunk); +#ifdef VLAN_ARRAY + for (i = 0; i < EVL_VLID_MASK+1; i++) + if (trunk->vlans[i] != NULL) { + ifv = trunk->vlans[i]; +#else + for (i = 0; i < (1 << trunk->hwidth); i++) { + LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) +#endif + vlan_capabilities(ifv); + } + TRUNK_UNLOCK(trunk); } static int @@ -915,8 +1259,8 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCGIFMEDIA: VLAN_LOCK(); - if (ifv->ifv_p != NULL) { - error = (*ifv->ifv_p->if_ioctl)(ifv->ifv_p, + if (TRUNK(ifv) != NULL) { + error = (*PARENT(ifv)->if_ioctl)(PARENT(ifv), SIOCGIFMEDIA, data); VLAN_UNLOCK(); /* Limit the result to the parent's current config. */ @@ -946,9 +1290,9 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * Set the interface MTU. */ VLAN_LOCK(); - if (ifv->ifv_p != NULL) { + if (TRUNK(ifv) != NULL) { if (ifr->ifr_mtu > - (ifv->ifv_p->if_mtu - ifv->ifv_mtufudge) || + (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; @@ -985,15 +1329,10 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EINVAL; break; } - VLAN_LOCK(); - error = vlan_config(ifv, p); - if (error) { - VLAN_UNLOCK(); + error = vlan_config(ifv, p, vlr.vlr_tag); + if (error) break; - } - ifv->ifv_tag = vlr.vlr_tag; ifp->if_drv_flags |= IFF_DRV_RUNNING; - VLAN_UNLOCK(); /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); @@ -1002,8 +1341,8 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCGETVLAN: bzero(&vlr, sizeof(vlr)); VLAN_LOCK(); - if (ifv->ifv_p) { - strlcpy(vlr.vlr_parent, ifv->ifv_p->if_xname, + if (TRUNK(ifv) != NULL) { + strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_tag; } @@ -1016,16 +1355,20 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ - if (ifv->ifv_p != NULL) + if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); break; case SIOCADDMULTI: case SIOCDELMULTI: - /*VLAN_LOCK();*/ - error = vlan_setmulti(ifp); - /*VLAN_UNLOCK();*/ + /* + * If we don't have a parent, just remember the membership for + * when we do. + */ + if (TRUNK(ifv) != NULL) + error = vlan_setmulti(ifp); break; + default: error = EINVAL; } diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index b1fecf6cd883..d4a2448efec7 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -93,8 +93,6 @@ struct vlanreq { * Note that a driver must indicate it supports hardware VLAN * tagging by marking IFCAP_VLAN_HWTAGGING in if_capabilities. */ -#define MTAG_VLAN 1035328035 -#define MTAG_VLAN_TAG 0 /* tag of VLAN interface */ /* * This macro must expand to a lvalue so that it can be used @@ -103,9 +101,8 @@ struct vlanreq { #define VLAN_TAG_VALUE(_mt) (*(u_int *)((_mt) + 1)) #define VLAN_INPUT_TAG(_ifp, _m, _t) do { \ - struct m_tag *mtag; \ - mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, \ - sizeof (u_int), M_NOWAIT); \ + struct m_tag *mtag = (struct m_tag *) \ + uma_zalloc(zone_mtag_vlan, M_NOWAIT); \ if (mtag != NULL) { \ VLAN_TAG_VALUE(mtag) = (_t); \ m_tag_prepend((_m), mtag); \ @@ -120,6 +117,13 @@ struct vlanreq { #define VLAN_OUTPUT_TAG(_ifp, _m) \ ((_m)->m_flags & M_VLANTAG ? \ m_tag_locate((_m), MTAG_VLAN, MTAG_VLAN_TAG, NULL) : NULL) + +#define VLAN_CAPABILITIES(_ifp) do { \ + if ((_ifp)->if_vlantrunk != NULL) \ + (*vlan_trunk_cap_p)(_ifp); \ +} while (0) + +extern void (*vlan_trunk_cap_p)(struct ifnet *); #endif /* _KERNEL */ #endif /* _NET_IF_VLAN_VAR_H_ */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 81c44a612407..d762a290b112 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -337,6 +337,7 @@ extern uma_zone_t zone_jumbo4; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; extern uma_zone_t zone_ext_refcnt; +extern uma_zone_t zone_mtag_vlan; static __inline struct mbuf *m_get(int how, short type); static __inline struct mbuf *m_gethdr(int how, short type); @@ -760,6 +761,10 @@ struct mbuf *m_uiotombuf(struct uio *, int, int, int); #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ +/* Specific cookies and tags. */ +#define MTAG_VLAN 1035328035 +#define MTAG_VLAN_TAG 0 /* tag of VLAN interface */ + /* Packet tag routines. */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_delete(struct mbuf *, struct m_tag *);