- Utilize counter(9) to accumulate statistics on interface addresses. Add

four counters to struct ifaddr. This kills '+=' on a variables shared
  between processors for every packet.
- Nuke struct if_data from struct ifaddr.
- In ip_input() do not put a reference on ifaddr, instead update statistics
  right now in place and do IN_IFADDR_RUNLOCK(). These removes atomic(9)
  for every packet. [1]
- To properly support NET_RT_IFLISTL sysctl used by getifaddrs(3), in
  rtsock.c fill if_data fields using counter_u64_fetch().
- Accidentially fix bug in COMPAT_32 version of NET_RT_IFLISTL, which
  took if_data not from the ifaddr, but from ifaddr's ifnet. [2]

Submitted by:	melifaro [1], pluknet[2]
Sponsored by:	Netflix
Sponsored by:	Nginx, Inc.
This commit is contained in:
glebius 2013-10-15 11:37:57 +00:00
parent bc71d67cbb
commit 790225cfbc
7 changed files with 77 additions and 32 deletions

View File

@ -1428,10 +1428,28 @@ ifa_alloc(size_t size, int flags)
if (ifa == NULL)
return (NULL);
if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL)
goto fail;
if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL)
goto fail;
if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL)
goto fail;
if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL)
goto fail;
refcount_init(&ifa->ifa_refcnt, 1);
ifa->if_data.ifi_datalen = sizeof(ifa->if_data);
return (ifa);
fail:
/* free(NULL) is okay */
counter_u64_free(ifa->ifa_opackets);
counter_u64_free(ifa->ifa_ipackets);
counter_u64_free(ifa->ifa_obytes);
counter_u64_free(ifa->ifa_ibytes);
free(ifa, M_IFADDR);
return (NULL);
}
void
@ -1446,6 +1464,10 @@ ifa_free(struct ifaddr *ifa)
{
if (refcount_release(&ifa->ifa_refcnt)) {
counter_u64_free(ifa->ifa_opackets);
counter_u64_free(ifa->ifa_ipackets);
counter_u64_free(ifa->ifa_obytes);
counter_u64_free(ifa->ifa_ibytes);
free(ifa, M_IFADDR);
}
}

View File

@ -83,6 +83,7 @@ struct vnet;
#include <sys/buf_ring.h>
#include <net/vnet.h>
#endif /* _KERNEL */
#include <sys/counter.h>
#include <sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/rwlock.h> /* XXX */
@ -794,7 +795,6 @@ struct ifaddr {
struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
struct sockaddr *ifa_netmask; /* used to determine subnet */
struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
struct carp_softc *ifa_carp; /* pointer to CARP data */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
@ -805,6 +805,11 @@ struct ifaddr {
int ifa_metric; /* cost of going out this interface */
int (*ifa_claim_addr) /* check if an addr goes to this if */
(struct ifaddr *, struct sockaddr *);
counter_u64_t ifa_ipackets;
counter_u64_t ifa_opackets;
counter_u64_t ifa_ibytes;
counter_u64_t ifa_obytes;
};
#endif

View File

@ -1751,7 +1751,17 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
offsetof(struct ifa_msghdrl32, ifam_data);
ifam32->ifam_metric = ifa->ifa_metric;
copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
bzero(&ifam32->ifam_data, sizeof(ifam32->ifam_data));
ifam32->ifam_data.ifi_datalen = sizeof(struct if_data32);
ifam32->ifam_data.ifi_ipackets =
counter_u64_fetch(ifa->ifa_ipackets);
ifam32->ifam_data.ifi_opackets =
counter_u64_fetch(ifa->ifa_opackets);
ifam32->ifam_data.ifi_ibytes =
counter_u64_fetch(ifa->ifa_ibytes);
ifam32->ifam_data.ifi_obytes =
counter_u64_fetch(ifa->ifa_obytes);
/* Fixup if_data carp(4) vhid. */
if (carp_get_vhid_p != NULL)
ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
@ -1769,7 +1779,13 @@ sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
ifam->ifam_metric = ifa->ifa_metric;
ifam->ifam_data = ifa->if_data;
bzero(&ifam->ifam_data, sizeof(ifam->ifam_data));
ifam->ifam_data.ifi_datalen = sizeof(struct if_data);
ifam->ifam_data.ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
ifam->ifam_data.ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
ifam->ifam_data.ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
ifam->ifam_data.ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
/* Fixup if_data carp(4) vhid. */
if (carp_get_vhid_p != NULL)
ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);

View File

@ -603,7 +603,9 @@ ip_input(struct mbuf *m)
*/
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
(!checkif || ia->ia_ifp == ifp)) {
ifa_ref(&ia->ia_ifa);
counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
/* IN_IFADDR_RUNLOCK(); */
goto ours;
}
@ -626,13 +628,17 @@ ip_input(struct mbuf *m)
ia = ifatoia(ifa);
if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
ip->ip_dst.s_addr) {
ifa_ref(ifa);
counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
IF_ADDR_RUNLOCK(ifp);
goto ours;
}
#ifdef BOOTP_COMPAT
if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
ifa_ref(ifa);
counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
IF_ADDR_RUNLOCK(ifp);
goto ours;
}
@ -717,20 +723,10 @@ ip_input(struct mbuf *m)
* IPSTEALTH: Process non-routing options only
* if the packet is destined for us.
*/
if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
return;
}
#endif /* IPSTEALTH */
/* Count the packet in the ip address stats */
if (ia != NULL) {
ia->ia_ifa.if_ipackets++;
ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
ifa_free(&ia->ia_ifa);
}
/*
* Attempt reassembly; if it succeeds, proceed.
* ip_reass() will return a different mbuf.

View File

@ -610,11 +610,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
*/
if (!(flags & IP_FORWARDING) && ia) {
if (m->m_pkthdr.csum_flags & CSUM_TSO)
ia->ia_ifa.if_opackets +=
m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
counter_u64_add(ia->ia_ifa.ifa_opackets,
m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
else
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
}
#ifdef MBUF_STRESS_TEST
if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
@ -651,8 +652,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
if (error == 0) {
/* Record statistics for this interface address. */
if (ia != NULL) {
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
/*
* Reset layer specific mbuf flags

View File

@ -724,8 +724,9 @@ ip6_input(struct mbuf *m)
ia6 = (struct in6_ifaddr *)ifa;
if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
/* Count the packet in the ip address stats */
ia6->ia_ifa.if_ipackets++;
ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia6->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
/*
* record address information into m_tag.
@ -840,8 +841,9 @@ ip6_input(struct mbuf *m)
ours = 1;
deliverifp = ia6->ia_ifp; /* correct? */
/* Count the packet in the ip address stats */
ia6->ia_ifa.if_ipackets++;
ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1);
counter_u64_add(ia6->ia_ifa.ifa_ibytes,
m->m_pkthdr.len);
if (free_ia6)
ifa_free(&ia6->ia_ifa);
goto hbhcheck;

View File

@ -1021,8 +1021,9 @@ skip_ipsec2:;
ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
if (ia6) {
/* Record statistics for this interface address. */
ia6->ia_ifa.if_opackets++;
ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
counter_u64_add(ia6->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
@ -1177,8 +1178,9 @@ skip_ipsec2:;
if (error == 0) {
/* Record statistics for this interface address. */
if (ia) {
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
} else