diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index d9d23f9c39ff..3af620bf9151 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -2309,6 +2309,21 @@ Set the aggregation protocol. The default is failover. The available options are failover, fec, lacp, loadbalance, roundrobin and none. +.It Cm lagghash Ar option Ns Oo , Ns Ar option Oc +Set the packet layers to hash for aggregation protocols which load balance. +The default is +.Dq l2,l3,l4 . +The options can be combined using commas. +.Pp +.Bl -tag -width ".Cm l2" -compact +.It Cm l2 +src/dst mac address and optional vlan number. +.It Cm l3 +src/dst address for IPv4 or IPv6. +.It Cm l4 +src/dst port for TCP/UCP/SCTP. +.El +.Pp .El .Pp The following parameters are specific to IP tunnel interfaces, diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c index 82e6ccb53a49..ed39868e1bdc 100644 --- a/sbin/ifconfig/iflagg.c +++ b/sbin/ifconfig/iflagg.c @@ -81,6 +81,36 @@ setlaggproto(const char *val, int d, int s, const struct afswtch *afp) err(1, "SIOCSLAGG"); } +static void +setlagghash(const char *val, int d, int s, const struct afswtch *afp) +{ + struct lagg_reqflags rf; + char *str, *tmp, *tok; + + + rf.rf_flags = 0; + str = tmp = strdup(val); + while ((tok = strsep(&tmp, ",")) != NULL) { + if (strcmp(tok, "l2") == 0) + rf.rf_flags |= LAGG_F_HASHL2; + else if (strcmp(tok, "l3") == 0) + rf.rf_flags |= LAGG_F_HASHL3; + else if (strcmp(tok, "l4") == 0) + rf.rf_flags |= LAGG_F_HASHL4; + else { + free(str); + errx(1, "Invalid lagghash option: %s", tok); + } + } + free(str); + if (rf.rf_flags == 0) + errx(1, "No lagghash options supplied"); + + strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); + if (ioctl(s, SIOCSLAGGHASH, &rf)) + err(1, "SIOCSLAGGHASH"); +} + static char * lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) { @@ -115,6 +145,7 @@ lagg_status(int s) struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqport rp, rpbuf[LAGG_MAX_PORTS]; struct lagg_reqall ra; + struct lagg_reqflags rf; struct lacp_opreq *lp; const char *proto = ""; int i, isport = 0; @@ -132,6 +163,10 @@ lagg_status(int s) ra.ra_size = sizeof(rpbuf); ra.ra_port = rpbuf; + strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); + if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0) + rf.rf_flags = 0; + if (ioctl(s, SIOCGLAGG, &ra) == 0) { lp = (struct lacp_opreq *)&ra.ra_lacpreq; @@ -143,6 +178,23 @@ lagg_status(int s) } printf("\tlaggproto %s", proto); + if (rf.rf_flags & LAGG_F_HASHMASK) { + const char *sep = ""; + + printf(" lagghash "); + if (rf.rf_flags & LAGG_F_HASHL2) { + printf("%sl2", sep); + sep = ","; + } + if (rf.rf_flags & LAGG_F_HASHL3) { + printf("%sl3", sep); + sep = ","; + } + if (rf.rf_flags & LAGG_F_HASHL4) { + printf("%sl4", sep); + sep = ","; + } + } if (isport) printf(" laggdev %s", rp.rp_ifname); putchar('\n'); @@ -174,6 +226,7 @@ static struct cmd lagg_cmds[] = { DEF_CMD_ARG("laggport", setlaggport), DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), + DEF_CMD_ARG("lagghash", setlagghash), }; static struct afswtch af_lagg = { .af_name = "af_lagg", diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index ea838669e863..137e27313c36 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -815,7 +815,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m) if (sc->use_flowid && (m->m_flags & M_FLOWID)) hash = m->m_pkthdr.flowid; else - hash = lagg_hashmbuf(m, lsc->lsc_hashkey); + hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey); hash %= pm->pm_count; lp = pm->pm_map[hash]; diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 5d12187c93c6..892a256aaea8 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -285,6 +285,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, "Use flow id for load sharing"); + /* Hash all layers by default */ + sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; sc->sc_proto = LAGG_PROTO_NONE; for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { @@ -895,6 +897,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; + struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; struct ifnet *tpif; @@ -984,6 +987,22 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } error = EPROTONOSUPPORT; break; + case SIOCGLAGGFLAGS: + rf->rf_flags = sc->sc_flags; + break; + case SIOCSLAGGHASH: + error = priv_check(td, PRIV_NET_LAGG); + if (error) + break; + if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { + error = EINVAL; + break; + } + LAGG_WLOCK(sc); + sc->sc_flags &= ~LAGG_F_HASHMASK; + sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK; + LAGG_WUNLOCK(sc); + break; case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { @@ -1413,34 +1432,46 @@ lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) } uint32_t -lagg_hashmbuf(struct mbuf *m, uint32_t key) +lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { uint16_t etype; - uint32_t p = 0; + uint32_t p = key; int off; struct ether_header *eh; struct ether_vlan_header vlanbuf; const struct ether_vlan_header *vlan; #ifdef INET const struct ip *ip; - struct ip ipbuf; + const uint32_t *ports; + int iphlen; #endif #ifdef INET6 const struct ip6_hdr *ip6; - struct ip6_hdr ip6buf; uint32_t flow; #endif + union { +#ifdef INET + struct ip ip; +#endif +#ifdef INET6 + struct ip6_hdr ip6; +#endif + uint32_t port; + } buf; + off = sizeof(*eh); if (m->m_len < off) goto out; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); - p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key); - p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); + if (sc->sc_flags & LAGG_F_HASHL2) { + p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p); + p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); + } /* Special handling for encapsulating VLAN frames */ - if (m->m_flags & M_VLANTAG) { + if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) { p = hash32_buf(&m->m_pkthdr.ether_vtag, sizeof(m->m_pkthdr.ether_vtag), p); } else if (etype == ETHERTYPE_VLAN) { @@ -1448,7 +1479,8 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key) if (vlan == NULL) goto out; - p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); + if (sc->sc_flags & LAGG_F_HASHL2) + p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); etype = ntohs(vlan->evl_proto); off += sizeof(*vlan) - sizeof(*eh); } @@ -1456,17 +1488,37 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key) switch (etype) { #ifdef INET case ETHERTYPE_IP: - ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf); + ip = lagg_gethdr(m, off, sizeof(*ip), &buf); if (ip == NULL) goto out; - p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); - p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); + if (sc->sc_flags & LAGG_F_HASHL3) { + p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); + p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); + } + if (!(sc->sc_flags & LAGG_F_HASHL4)) + break; + switch (ip->ip_p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + iphlen = ip->ip_hl << 2; + if (iphlen < sizeof(*ip)) + break; + off += iphlen; + ports = lagg_gethdr(m, off, sizeof(*ports), &buf); + if (ports == NULL) + break; + p = hash32_buf(ports, sizeof(*ports), p); + break; + } break; #endif #ifdef INET6 case ETHERTYPE_IPV6: - ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf); + if (!(sc->sc_flags & LAGG_F_HASHL3)) + break; + ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf); if (ip6 == NULL) goto out; @@ -1696,7 +1748,7 @@ lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) if (sc->use_flowid && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid; else - p = lagg_hashmbuf(m, lb->lb_key); + p = lagg_hashmbuf(sc, m, lb->lb_key); p %= sc->sc_count; lp = lb->lb_ports[p]; diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index 823176449645..27ab46f2ffa5 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -31,6 +31,12 @@ #define LAGG_MAX_NAMESIZE 32 /* name of a protocol */ #define LAGG_MAX_STACKING 4 /* maximum number of stacked laggs */ +/* Lagg flags */ +#define LAGG_F_HASHL2 0x00000001 /* hash layer 2 */ +#define LAGG_F_HASHL3 0x00000002 /* hash layer 3 */ +#define LAGG_F_HASHL4 0x00000004 /* hash layer 4 */ +#define LAGG_F_HASHMASK 0x00000007 + /* Port flags */ #define LAGG_PORT_SLAVE 0x00000000 /* normal enslaved port */ #define LAGG_PORT_MASTER 0x00000001 /* primary port */ @@ -122,6 +128,14 @@ struct lagg_reqall { #define SIOCGLAGG _IOWR('i', 143, struct lagg_reqall) #define SIOCSLAGG _IOW('i', 144, struct lagg_reqall) +struct lagg_reqflags { + char rf_ifname[IFNAMSIZ]; /* name of the lagg */ + uint32_t rf_flags; /* lagg protocol */ +}; + +#define SIOCGLAGGFLAGS _IOWR('i', 145, struct lagg_reqflags) +#define SIOCSLAGGHASH _IOW('i', 146, struct lagg_reqflags) + #ifdef _KERNEL /* * Internal kernel part @@ -179,6 +193,7 @@ struct lagg_softc { struct ifmedia sc_media; /* media config */ caddr_t sc_psc; /* protocol data */ uint32_t sc_seq; /* sequence counter */ + uint32_t sc_flags; SLIST_HEAD(__tplhd, lagg_port) sc_ports; /* list of interfaces */ SLIST_ENTRY(lagg_softc) sc_entries; @@ -244,7 +259,7 @@ extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); extern void (*lagg_linkstate_p)(struct ifnet *, int ); int lagg_enqueue(struct ifnet *, struct mbuf *); -uint32_t lagg_hashmbuf(struct mbuf *, uint32_t); +uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t); #endif /* _KERNEL */