freebsd-skq/sys/netipx/ipx_input.c
glebius 8a3e4bbebb - Remove rt_metrics_lite and simply put its members into rtentry.
- Use counter(9) for rt_pksent (former rt_rmx.rmx_pksent). This
  removes another cache trashing ++ from packet forwarding path.
- Create zini/fini methods for the rtentry UMA zone. Via initialize
  mutex and counter in them.
- Fix reporting of rmx_pksent to routing socket.
- Fix netstat(1) to report "Use" both in kvm(3) and sysctl(3) mode.

The change is mostly targeted for stable/10 merge. For head,
rt_pksent is expected to just disappear.

Discussed with:		melifaro
Sponsored by:		Netflix
Sponsored by:		Nginx, Inc.
2014-03-05 01:17:47 +00:00

470 lines
12 KiB
C

/*-
* Copyright (c) 1984, 1985, 1986, 1987, 1993
* The Regents of the University of California.
* Copyright (c) 2004-2009 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright (c) 1995, Mike Mitchell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ipx_input.c
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/random.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/netisr.h>
#include <netipx/ipx.h>
#include <netipx/spx.h>
#include <netipx/ipx_if.h>
#include <netipx/ipx_pcb.h>
#include <netipx/ipx_var.h>
int ipxcksum = 0;
SYSCTL_INT(_net_ipx_ipx, OID_AUTO, checksum, CTLFLAG_RW,
&ipxcksum, 0, "Compute ipx checksum");
static int ipxprintfs = 0; /* printing forwarding information */
SYSCTL_INT(_net_ipx_ipx, OID_AUTO, ipxprintfs, CTLFLAG_RW,
&ipxprintfs, 0, "Printing forwarding information");
static int ipxforwarding = 0;
SYSCTL_INT(_net_ipx_ipx, OID_AUTO, ipxforwarding, CTLFLAG_RW,
&ipxforwarding, 0, "Enable ipx forwarding");
static int ipxnetbios = 0;
SYSCTL_INT(_net_ipx, OID_AUTO, ipxnetbios, CTLFLAG_RW,
&ipxnetbios, 0, "Propagate netbios over ipx");
static int ipx_do_route(struct ipx_addr *src, struct route *ro);
static void ipx_undo_route(struct route *ro);
static void ipx_forward(struct mbuf *m);
static void ipxintr(struct mbuf *m);
const union ipx_net ipx_zeronet;
const union ipx_net ipx_broadnet = { .s_net[0] = 0xffff,
.s_net[1] = 0xffff };
const union ipx_host ipx_broadhost = { .s_host[0] = 0xffff,
.s_host[1] = 0xffff,
.s_host[2] = 0xffff };
struct ipxstat ipxstat;
struct sockaddr_ipx ipx_netmask, ipx_hostmask;
/*
* IPX protocol control block (pcb) lists.
*/
struct mtx ipxpcb_list_mtx;
struct ipxpcbhead ipxpcb_list;
struct ipxpcbhead ipxrawpcb_list;
static struct netisr_handler ipx_nh = {
.nh_name = "ipx",
.nh_handler = ipxintr,
.nh_proto = NETISR_IPX,
.nh_policy = NETISR_POLICY_SOURCE,
};
long ipx_pexseq; /* Locked with ipxpcb_list_mtx. */
/*
* IPX initialization.
*/
void
ipx_init(void)
{
read_random(&ipx_pexseq, sizeof ipx_pexseq);
LIST_INIT(&ipxpcb_list);
LIST_INIT(&ipxrawpcb_list);
TAILQ_INIT(&ipx_ifaddrhead);
IPX_LIST_LOCK_INIT();
IPX_IFADDR_LOCK_INIT();
ipx_netmask.sipx_len = 6;
ipx_netmask.sipx_addr.x_net = ipx_broadnet;
ipx_hostmask.sipx_len = 12;
ipx_hostmask.sipx_addr.x_net = ipx_broadnet;
ipx_hostmask.sipx_addr.x_host = ipx_broadhost;
netisr_register(&ipx_nh);
}
/*
* IPX input routine. Pass to next level.
*/
static void
ipxintr(struct mbuf *m)
{
struct ipx *ipx;
struct ipxpcb *ipxp;
struct ipx_ifaddr *ia;
int len;
/*
* If no IPX addresses have been set yet but the interfaces
* are receiving, can't do anything with incoming packets yet.
*/
if (TAILQ_EMPTY(&ipx_ifaddrhead)) {
m_freem(m);
return;
}
ipxstat.ipxs_total++;
if ((m->m_flags & M_EXT || m->m_len < sizeof(struct ipx)) &&
(m = m_pullup(m, sizeof(struct ipx))) == NULL) {
ipxstat.ipxs_toosmall++;
return;
}
/*
* Give any raw listeners a crack at the packet
*/
IPX_LIST_LOCK();
LIST_FOREACH(ipxp, &ipxrawpcb_list, ipxp_list) {
struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL);
if (m1 != NULL) {
IPX_LOCK(ipxp);
ipx_input(m1, ipxp);
IPX_UNLOCK(ipxp);
}
}
IPX_LIST_UNLOCK();
ipx = mtod(m, struct ipx *);
len = ntohs(ipx->ipx_len);
/*
* Check that the amount of data in the buffers
* is as at least much as the IPX header would have us expect.
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
*/
if (m->m_pkthdr.len < len) {
ipxstat.ipxs_tooshort++;
m_freem(m);
return;
}
if (m->m_pkthdr.len > len) {
if (m->m_len == m->m_pkthdr.len) {
m->m_len = len;
m->m_pkthdr.len = len;
} else
m_adj(m, len - m->m_pkthdr.len);
}
if (ipxcksum && ipx->ipx_sum != 0xffff) {
if (ipx->ipx_sum != ipx_cksum(m, len)) {
ipxstat.ipxs_badsum++;
m_freem(m);
return;
}
}
/*
* Propagated (Netbios) packets (type 20) has to be handled
* different. :-(
*/
if (ipx->ipx_pt == IPXPROTO_NETBIOS) {
if (ipxnetbios) {
ipx_output_type20(m);
return;
} else {
m_freem(m);
return;
}
}
/*
* Is this a directed broadcast?
*/
if (ipx_hosteqnh(ipx_broadhost,ipx->ipx_dna.x_host)) {
if ((!ipx_neteq(ipx->ipx_dna, ipx->ipx_sna)) &&
(!ipx_neteqnn(ipx->ipx_dna.x_net, ipx_broadnet)) &&
(!ipx_neteqnn(ipx->ipx_sna.x_net, ipx_zeronet)) &&
(!ipx_neteqnn(ipx->ipx_dna.x_net, ipx_zeronet)) ) {
/*
* If it is a broadcast to the net where it was
* received from, treat it as ours.
*/
IPX_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &ipx_ifaddrhead, ia_link) {
if ((ia->ia_ifa.ifa_ifp == m->m_pkthdr.rcvif)
&& ipx_neteq(ia->ia_addr.sipx_addr,
ipx->ipx_dna)) {
IPX_IFADDR_RUNLOCK();
goto ours;
}
}
IPX_IFADDR_RUNLOCK();
/*
* Look to see if I need to eat this packet.
* Algorithm is to forward all young packets
* and prematurely age any packets which will
* by physically broadcasted.
* Any very old packets eaten without forwarding
* would die anyway.
*
* Suggestion of Bill Nesheim, Cornell U.
*/
if (ipx->ipx_tc < IPX_MAXHOPS) {
ipx_forward(m);
return;
}
}
/*
* Is this our packet? If not, forward.
*/
} else {
IPX_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &ipx_ifaddrhead, ia_link) {
if (ipx_hosteq(ipx->ipx_dna, ia->ia_addr.sipx_addr) &&
(ipx_neteq(ipx->ipx_dna, ia->ia_addr.sipx_addr) ||
ipx_neteqnn(ipx->ipx_dna.x_net, ipx_zeronet)))
break;
}
IPX_IFADDR_RUNLOCK();
if (ia == NULL) {
ipx_forward(m);
return;
}
}
ours:
/*
* Locate pcb for datagram.
*/
IPX_LIST_LOCK();
ipxp = ipx_pcblookup(&ipx->ipx_sna, ipx->ipx_dna.x_port, IPX_WILDCARD);
/*
* Switch out to protocol's input routine.
*/
if (ipxp != NULL) {
ipxstat.ipxs_delivered++;
if ((ipxp->ipxp_flags & IPXP_ALL_PACKETS) == 0)
switch (ipx->ipx_pt) {
case IPXPROTO_SPX:
IPX_LOCK(ipxp);
/* Will release both locks. */
spx_input(m, ipxp);
return;
}
IPX_LOCK(ipxp);
ipx_input(m, ipxp);
IPX_UNLOCK(ipxp);
} else
m_freem(m);
IPX_LIST_UNLOCK();
}
void
ipx_ctlinput(cmd, arg_as_sa, dummy)
int cmd;
struct sockaddr *arg_as_sa; /* XXX should be swapped with dummy */
void *dummy;
{
/* Currently, nothing. */
}
/*
* Forward a packet. If some error occurs drop the packet. IPX don't
* have a way to return errors to the sender.
*/
static struct route ipx_droute;
static struct route ipx_sroute;
static void
ipx_forward(struct mbuf *m)
{
struct ipx *ipx = mtod(m, struct ipx *);
int error;
int agedelta = 1;
int flags = IPX_FORWARDING;
int ok_there = 0;
int ok_back = 0;
if (ipxforwarding == 0) {
/* can't tell difference between net and host */
ipxstat.ipxs_cantforward++;
m_freem(m);
goto cleanup;
}
ipx->ipx_tc++;
if (ipx->ipx_tc > IPX_MAXHOPS) {
ipxstat.ipxs_cantforward++;
m_freem(m);
goto cleanup;
}
if ((ok_there = ipx_do_route(&ipx->ipx_dna,&ipx_droute)) == 0) {
ipxstat.ipxs_noroute++;
m_freem(m);
goto cleanup;
}
/*
* Here we think about forwarding broadcast packets,
* so we try to insure that it doesn't go back out
* on the interface it came in on. Also, if we
* are going to physically broadcast this, let us
* age the packet so we can eat it safely the second time around.
*/
if (ipx->ipx_dna.x_host.c_host[0] & 0x1) {
struct ipx_ifaddr *ia;
struct ifnet *ifp;
IPX_IFADDR_RLOCK();
ia = ipx_iaonnetof(&ipx->ipx_dna);
if (ia != NULL) {
/* I'm gonna hafta eat this packet */
agedelta += IPX_MAXHOPS - ipx->ipx_tc;
ipx->ipx_tc = IPX_MAXHOPS;
}
IPX_IFADDR_RUNLOCK();
if ((ok_back = ipx_do_route(&ipx->ipx_sna,&ipx_sroute)) == 0) {
/* error = ENETUNREACH; He'll never get it! */
ipxstat.ipxs_noroute++;
m_freem(m);
goto cleanup;
}
if (ipx_droute.ro_rt &&
(ifp = ipx_droute.ro_rt->rt_ifp) &&
ipx_sroute.ro_rt &&
(ifp != ipx_sroute.ro_rt->rt_ifp)) {
flags |= IPX_ALLOWBROADCAST;
} else {
ipxstat.ipxs_noroute++;
m_freem(m);
goto cleanup;
}
}
/*
* We don't need to recompute checksum because ipx_tc field
* is ignored by checksum calculation routine, however
* it may be desirable to reset checksum if ipxcksum == 0
*/
#if 0
if (!ipxcksum)
ipx->ipx_sum = 0xffff;
#endif
error = ipx_outputfl(m, &ipx_droute, flags);
if (error == 0) {
ipxstat.ipxs_forward++;
if (ipxprintfs) {
printf("forward: ");
ipx_printhost(&ipx->ipx_sna);
printf(" to ");
ipx_printhost(&ipx->ipx_dna);
printf(" hops %d\n", ipx->ipx_tc);
}
}
cleanup:
if (ok_there)
ipx_undo_route(&ipx_droute);
if (ok_back)
ipx_undo_route(&ipx_sroute);
}
static int
ipx_do_route(struct ipx_addr *src, struct route *ro)
{
struct sockaddr_ipx *dst;
bzero((caddr_t)ro, sizeof(*ro));
dst = (struct sockaddr_ipx *)&ro->ro_dst;
dst->sipx_len = sizeof(*dst);
dst->sipx_family = AF_IPX;
dst->sipx_addr = *src;
dst->sipx_addr.x_port = 0;
rtalloc_ign(ro, 0);
if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL) {
return (0);
}
counter_u64_add(ro->ro_rt->rt_pksent, 1);
return (1);
}
static void
ipx_undo_route(struct route *ro)
{
if (ro->ro_rt != NULL) {
RTFREE(ro->ro_rt);
}
}