From 7caf4ab7acf8c73cbaf00c81077e095baa69908e Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Tue, 15 Oct 2013 11:37:57 +0000 Subject: [PATCH] - Utilize counter(9) to accumulate statistics on interface addresses. Add four counters to struct ifaddr. This kills '+=' on a variables shared between processors for every packet. - Nuke struct if_data from struct ifaddr. - In ip_input() do not put a reference on ifaddr, instead update statistics right now in place and do IN_IFADDR_RUNLOCK(). These removes atomic(9) for every packet. [1] - To properly support NET_RT_IFLISTL sysctl used by getifaddrs(3), in rtsock.c fill if_data fields using counter_u64_fetch(). - Accidentially fix bug in COMPAT_32 version of NET_RT_IFLISTL, which took if_data not from the ifaddr, but from ifaddr's ifnet. [2] Submitted by: melifaro [1], pluknet[2] Sponsored by: Netflix Sponsored by: Nginx, Inc. --- sys/net/if.c | 24 +++++++++++++++++++++++- sys/net/if_var.h | 7 ++++++- sys/net/rtsock.c | 20 ++++++++++++++++++-- sys/netinet/ip_input.c | 24 ++++++++++-------------- sys/netinet/ip_output.c | 14 ++++++++------ sys/netinet6/ip6_input.c | 10 ++++++---- sys/netinet6/ip6_output.c | 10 ++++++---- 7 files changed, 77 insertions(+), 32 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index 28d1cddbed6f..17c36d12128a 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1428,10 +1428,28 @@ ifa_alloc(size_t size, int flags) if (ifa == NULL) return (NULL); + if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) + goto fail; + if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) + goto fail; + if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) + goto fail; + if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) + goto fail; + refcount_init(&ifa->ifa_refcnt, 1); - ifa->if_data.ifi_datalen = sizeof(ifa->if_data); return (ifa); + +fail: + /* free(NULL) is okay */ + counter_u64_free(ifa->ifa_opackets); + counter_u64_free(ifa->ifa_ipackets); + counter_u64_free(ifa->ifa_obytes); + counter_u64_free(ifa->ifa_ibytes); + free(ifa, M_IFADDR); + + return (NULL); } void @@ -1446,6 +1464,10 @@ ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) { + counter_u64_free(ifa->ifa_opackets); + counter_u64_free(ifa->ifa_ipackets); + counter_u64_free(ifa->ifa_obytes); + counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } } diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 59213e7a7dc0..73554a93f94f 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -83,6 +83,7 @@ struct vnet; #include #include #endif /* _KERNEL */ +#include #include /* XXX */ #include /* XXX */ #include /* XXX */ @@ -794,7 +795,6 @@ struct ifaddr { struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ - struct if_data if_data; /* not all members are meaningful */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ @@ -805,6 +805,11 @@ struct ifaddr { int ifa_metric; /* cost of going out this interface */ int (*ifa_claim_addr) /* check if an addr goes to this if */ (struct ifaddr *, struct sockaddr *); + + counter_u64_t ifa_ipackets; + counter_u64_t ifa_opackets; + counter_u64_t ifa_ibytes; + counter_u64_t ifa_obytes; }; #endif diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 0b5d1c5ebc50..e92eb8937027 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1751,7 +1751,17 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_metric; - copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data); + bzero(&ifam32->ifam_data, sizeof(ifam32->ifam_data)); + ifam32->ifam_data.ifi_datalen = sizeof(struct if_data32); + ifam32->ifam_data.ifi_ipackets = + counter_u64_fetch(ifa->ifa_ipackets); + ifam32->ifam_data.ifi_opackets = + counter_u64_fetch(ifa->ifa_opackets); + ifam32->ifam_data.ifi_ibytes = + counter_u64_fetch(ifa->ifa_ibytes); + ifam32->ifam_data.ifi_obytes = + counter_u64_fetch(ifa->ifa_obytes); + /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); @@ -1769,7 +1779,13 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); ifam->ifam_metric = ifa->ifa_metric; - ifam->ifam_data = ifa->if_data; + bzero(&ifam->ifam_data, sizeof(ifam->ifam_data)); + ifam->ifam_data.ifi_datalen = sizeof(struct if_data); + ifam->ifam_data.ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); + ifam->ifam_data.ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); + ifam->ifam_data.ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); + ifam->ifam_data.ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); + /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index c265d02b4e28..4fe52d5721cc 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -603,7 +603,9 @@ passin: */ if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && (!checkif || ia->ia_ifp == ifp)) { - ifa_ref(&ia->ia_ifa); + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, + m->m_pkthdr.len); /* IN_IFADDR_RUNLOCK(); */ goto ours; } @@ -626,13 +628,17 @@ passin: ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == ip->ip_dst.s_addr) { - ifa_ref(ifa); + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, + m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } #ifdef BOOTP_COMPAT if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { - ifa_ref(ifa); + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, + m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } @@ -717,20 +723,10 @@ ours: * IPSTEALTH: Process non-routing options only * if the packet is destined for us. */ - if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) { - if (ia != NULL) - ifa_free(&ia->ia_ifa); + if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) return; - } #endif /* IPSTEALTH */ - /* Count the packet in the ip address stats */ - if (ia != NULL) { - ia->ia_ifa.if_ipackets++; - ia->ia_ifa.if_ibytes += m->m_pkthdr.len; - ifa_free(&ia->ia_ifa); - } - /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf. diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 464c85289e41..6880a5c14c35 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -610,11 +610,12 @@ passout: */ if (!(flags & IP_FORWARDING) && ia) { if (m->m_pkthdr.csum_flags & CSUM_TSO) - ia->ia_ifa.if_opackets += - m->m_pkthdr.len / m->m_pkthdr.tso_segsz; + counter_u64_add(ia->ia_ifa.ifa_opackets, + m->m_pkthdr.len / m->m_pkthdr.tso_segsz); else - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia->ia_ifa.ifa_opackets, 1); + + counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } #ifdef MBUF_STRESS_TEST if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) @@ -651,8 +652,9 @@ passout: if (error == 0) { /* Record statistics for this interface address. */ if (ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia->ia_ifa.ifa_opackets, 1); + counter_u64_add(ia->ia_ifa.ifa_obytes, + m->m_pkthdr.len); } /* * Reset layer specific mbuf flags diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 54d5856ce094..5404c9549c0b 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -724,8 +724,9 @@ passin: ia6 = (struct in6_ifaddr *)ifa; if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; + counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia6->ia_ifa.ifa_ibytes, + m->m_pkthdr.len); /* * record address information into m_tag. @@ -840,8 +841,9 @@ passin: ours = 1; deliverifp = ia6->ia_ifp; /* correct? */ /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; + counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia6->ia_ifa.ifa_ibytes, + m->m_pkthdr.len); if (free_ia6) ifa_free(&ia6->ia_ifa); goto hbhcheck; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 81e3c9a8991d..143746420f59 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1021,8 +1021,9 @@ passout: ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ - ia6->ia_ifa.if_opackets++; - ia6->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); + counter_u64_add(ia6->ia_ifa.ifa_obytes, + m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); @@ -1177,8 +1178,9 @@ sendorfree: if (error == 0) { /* Record statistics for this interface address. */ if (ia) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia->ia_ifa.ifa_opackets, 1); + counter_u64_add(ia->ia_ifa.ifa_obytes, + m->m_pkthdr.len); } error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); } else