From 7790c8c1996ad89a22b8bd194a230cf23ee67f4b Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Thu, 17 Oct 2019 16:23:03 +0000 Subject: [PATCH] Split out a more generic debugnet(4) from netdump(4) Debugnet is a simplistic and specialized panic- or debug-time reliable datagram transport. It can drive a single connection at a time and is currently unidirectional (debug/panic machine transmit to remote server only). It is mostly a verbatim code lift from netdump(4). Netdump(4) remains the only consumer (until the rest of this patch series lands). The INET-specific logic has been extracted somewhat more thoroughly than previously in netdump(4), into debugnet_inet.c. UDP-layer logic and up, as much as possible as is protocol-independent, remains in debugnet.c. The separation is not perfect and future improvement is welcome. Supporting INET6 is a long-term goal. Much of the diff is "gratuitous" renaming from 'netdump_' or 'nd_' to 'debugnet_' or 'dn_' -- sorry. I thought keeping the netdump name on the generic module would be more confusing than the refactoring. The only functional change here is the mbuf allocation / tracking. Instead of initiating solely on netdump-configured interface(s) at dumpon(8) configuration time, we watch for any debugnet-enabled NIC for link activation and query it for mbuf parameters at that time. If they exceed the existing high-water mark allocation, we re-allocate and track the new high-water mark. Otherwise, we leave the pre-panic mbuf allocation alone. In a future patch in this series, this will allow initiating netdump from panic ddb(4) without pre-panic configuration. No other functional change intended. Reviewed by: markj (earlier version) Some discussion with: emaste, jhb Objection from: marius Differential Revision: https://reviews.freebsd.org/D21421 --- sys/amd64/conf/GENERIC | 1 + sys/arm64/conf/GENERIC | 1 + sys/conf/NOTES | 4 + sys/conf/files | 2 + sys/conf/options | 2 + sys/dev/alc/if_alc.c | 22 +- sys/dev/bge/if_bge.c | 22 +- sys/dev/bxe/bxe.c | 20 +- sys/dev/bxe/bxe.h | 2 +- sys/dev/cxgb/cxgb_adapter.h | 8 +- sys/dev/cxgb/cxgb_main.c | 30 +- sys/dev/cxgb/cxgb_sge.c | 12 +- sys/dev/mlx4/mlx4_en/en.h | 2 +- sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c | 18 +- sys/dev/re/if_re.c | 21 +- sys/dev/virtio/network/if_vtnet.c | 24 +- sys/i386/conf/GENERIC | 1 + sys/kern/kern_mbuf.c | 163 +++-- sys/net/debugnet.c | 655 ++++++++++++++++++ sys/net/debugnet.h | 211 ++++++ sys/net/debugnet_inet.c | 485 ++++++++++++++ sys/net/debugnet_int.h | 91 +++ sys/net/if.c | 4 +- sys/net/if_var.h | 6 +- sys/net/iflib.c | 20 +- sys/netinet/netdump/netdump.h | 79 +-- sys/netinet/netdump/netdump_client.c | 912 ++------------------------ sys/powerpc/conf/GENERIC | 1 + sys/powerpc/conf/GENERIC64 | 1 + sys/sparc64/conf/GENERIC | 1 + sys/sys/mbuf.h | 11 +- sys/sys/param.h | 2 +- 32 files changed, 1730 insertions(+), 1104 deletions(-) create mode 100644 sys/net/debugnet.c create mode 100644 sys/net/debugnet.h create mode 100644 sys/net/debugnet_inet.c create mode 100644 sys/net/debugnet_int.h diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 5688ec1f60a8..a3e75a7ac136 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -111,6 +111,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index dfb7dc63a234..33e2bbf1ecc1 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -103,6 +103,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # SoC support diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 96872cbe04e8..7892a2391bc8 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1042,6 +1042,10 @@ options DUMMYNET # This allows a panicking kernel to transmit a kernel dump to a remote host. options NETDUMP +# The DEBUGNET option enables a basic debug/panic-time networking API. It +# is used by NETDUMP. +options DEBUGNET + ##################################################################### # FILESYSTEM OPTIONS diff --git a/sys/conf/files b/sys/conf/files index 48d9fde103ca..d910a1f0ce17 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4077,6 +4077,8 @@ net/mp_ring.c optional ether iflib net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard +net/debugnet.c optional inet debugnet +net/debugnet_inet.c optional inet debugnet net/pfil.c optional ether | inet net/radix.c standard net/radix_mpath.c standard diff --git a/sys/conf/options b/sys/conf/options index 96af0bf1e24d..d9ea0a821cb3 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -321,6 +321,8 @@ NETSMB opt_netsmb.h # Enable netdump(4) client support. NETDUMP opt_global.h +# Enable debugnet(4) networking support. +DEBUGNET opt_global.h # Options used only in subr_param.c. HZ opt_param.h diff --git a/sys/dev/alc/if_alc.c b/sys/dev/alc/if_alc.c index 1bafdab235d0..b053bf2f921a 100644 --- a/sys/dev/alc/if_alc.c +++ b/sys/dev/alc/if_alc.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -64,7 +65,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -215,7 +215,7 @@ static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_alc_proc_limit(SYSCTL_HANDLER_ARGS); static int sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS); -NETDUMP_DEFINE(alc); +DEBUGNET_DEFINE(alc); static device_method_t alc_methods[] = { /* Device interface. */ @@ -1657,8 +1657,8 @@ alc_attach(device_t dev) goto fail; } - /* Attach driver netdump methods. */ - NETDUMP_SET(ifp, alc); + /* Attach driver debugnet methods. */ + DEBUGNET_SET(ifp, alc); fail: if (error != 0) @@ -4658,9 +4658,9 @@ sysctl_hw_alc_int_mod(SYSCTL_HANDLER_ARGS) ALC_IM_TIMER_MIN, ALC_IM_TIMER_MAX)); } -#ifdef NETDUMP +#ifdef DEBUGNET static void -alc_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +alc_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct alc_softc *sc; @@ -4668,17 +4668,17 @@ alc_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) KASSERT(sc->alc_buf_size <= MCLBYTES, ("incorrect cluster size")); *nrxr = ALC_RX_RING_CNT; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = MCLBYTES; } static void -alc_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +alc_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused) { } static int -alc_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +alc_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { struct alc_softc *sc; int error; @@ -4695,7 +4695,7 @@ alc_netdump_transmit(struct ifnet *ifp, struct mbuf *m) } static int -alc_netdump_poll(struct ifnet *ifp, int count) +alc_debugnet_poll(struct ifnet *ifp, int count) { struct alc_softc *sc; @@ -4707,4 +4707,4 @@ alc_netdump_poll(struct ifnet *ifp, int count) alc_txeof(sc); return (alc_rxintr(sc, count)); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c index bae18acf96ae..30b8c9ac76b3 100644 --- a/sys/dev/bge/if_bge.c +++ b/sys/dev/bge/if_bge.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -100,7 +101,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -519,7 +519,7 @@ static void bge_add_sysctl_stats(struct bge_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static int bge_sysctl_stats(SYSCTL_HANDLER_ARGS); -NETDUMP_DEFINE(bge); +DEBUGNET_DEFINE(bge); static device_method_t bge_methods[] = { /* Device interface */ @@ -3983,8 +3983,8 @@ bge_attach(device_t dev) goto fail; } - /* Attach driver netdump methods. */ - NETDUMP_SET(ifp, bge); + /* Attach driver debugnet methods. */ + DEBUGNET_SET(ifp, bge); fail: if (error) @@ -6844,16 +6844,16 @@ bge_get_counter(if_t ifp, ift_counter cnt) } } -#ifdef NETDUMP +#ifdef DEBUGNET static void -bge_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) +bge_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { struct bge_softc *sc; sc = if_getsoftc(ifp); BGE_LOCK(sc); *nrxr = sc->bge_return_ring_cnt; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; if ((sc->bge_flags & BGE_FLAG_JUMBO_STD) != 0 && (if_getmtu(sc->bge_ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN > (MCLBYTES - ETHER_ALIGN))) @@ -6864,12 +6864,12 @@ bge_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) } static void -bge_netdump_event(if_t ifp __unused, enum netdump_ev event __unused) +bge_debugnet_event(if_t ifp __unused, enum debugnet_ev event __unused) { } static int -bge_netdump_transmit(if_t ifp, struct mbuf *m) +bge_debugnet_transmit(if_t ifp, struct mbuf *m) { struct bge_softc *sc; uint32_t prodidx; @@ -6888,7 +6888,7 @@ bge_netdump_transmit(if_t ifp, struct mbuf *m) } static int -bge_netdump_poll(if_t ifp, int count) +bge_debugnet_poll(if_t ifp, int count) { struct bge_softc *sc; uint32_t rx_prod, tx_cons; @@ -6913,4 +6913,4 @@ bge_netdump_poll(if_t ifp, int count) bge_txeof(sc, tx_cons); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/bxe/bxe.c b/sys/dev/bxe/bxe.c index f66cf8239571..4e35867e8bf8 100644 --- a/sys/dev/bxe/bxe.c +++ b/sys/dev/bxe/bxe.c @@ -237,7 +237,7 @@ MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); -NETDUMP_DEFINE(bxe); +DEBUGNET_DEFINE(bxe); /* resources needed for unloading a previously loaded device */ @@ -13124,8 +13124,8 @@ bxe_init_ifnet(struct bxe_softc *sc) /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); - /* Attach driver netdump methods. */ - NETDUMP_SET(ifp, bxe); + /* Attach driver debugnet methods. */ + DEBUGNET_SET(ifp, bxe); return (0); } @@ -19533,27 +19533,27 @@ bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, return (rval); } -#ifdef NETDUMP +#ifdef DEBUGNET static void -bxe_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +bxe_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct bxe_softc *sc; sc = if_getsoftc(ifp); BXE_CORE_LOCK(sc); *nrxr = sc->num_queues; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = sc->fp[0].mbuf_alloc_size; BXE_CORE_UNLOCK(sc); } static void -bxe_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +bxe_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused) { } static int -bxe_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +bxe_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc; int error; @@ -19570,7 +19570,7 @@ bxe_netdump_transmit(struct ifnet *ifp, struct mbuf *m) } static int -bxe_netdump_poll(struct ifnet *ifp, int count) +bxe_debugnet_poll(struct ifnet *ifp, int count) { struct bxe_softc *sc; int i; @@ -19585,4 +19585,4 @@ bxe_netdump_poll(struct ifnet *ifp, int count) (void)bxe_txeof(sc, &sc->fp[0]); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/bxe/bxe.h b/sys/dev/bxe/bxe.h index 7998829723fa..bf06c92196d1 100644 --- a/sys/dev/bxe/bxe.h +++ b/sys/dev/bxe/bxe.h @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -70,7 +71,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index 0806e036eb2d..6e5b8da2dc23 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -578,10 +578,10 @@ void cxgb_qflush(struct ifnet *ifp); void t3_iterate(void (*)(struct adapter *, void *), void *); void cxgb_refresh_stats(struct port_info *); -#ifdef NETDUMP -int cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m); -int cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs); -int cxgb_netdump_poll_tx(struct sge_qset *qs); +#ifdef DEBUGNET +int cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m); +int cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs); +int cxgb_debugnet_poll_tx(struct sge_qset *qs); #endif #endif diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index c23ba3d0037b..ffe2b680e12f 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -74,7 +75,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -218,7 +218,7 @@ static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); MODULE_VERSION(cxgb, 1); -NETDUMP_DEFINE(cxgb); +DEBUGNET_DEFINE(cxgb); static struct mtx t3_list_lock; static SLIST_HEAD(, adapter) t3_list; @@ -1053,8 +1053,8 @@ cxgb_port_attach(device_t dev) ether_ifattach(ifp, p->hw_addr); - /* Attach driver netdump methods. */ - NETDUMP_SET(ifp, cxgb); + /* Attach driver debugnet methods. */ + DEBUGNET_SET(ifp, cxgb); #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) @@ -3590,9 +3590,9 @@ cxgbc_mod_event(module_t mod, int cmd, void *arg) return (rc); } -#ifdef NETDUMP +#ifdef DEBUGNET static void -cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +cxgb_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct port_info *pi; adapter_t *adap; @@ -3607,18 +3607,18 @@ cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) } static void -cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event) +cxgb_debugnet_event(struct ifnet *ifp, enum debugnet_ev event) { struct port_info *pi; struct sge_qset *qs; int i; pi = if_getsoftc(ifp); - if (event == NETDUMP_START) + if (event == DEBUGNET_START) for (i = 0; i < pi->adapter->nqsets; i++) { qs = &pi->adapter->sge.qs[i]; - /* Need to reinit after netdump_mbuf_dump(). */ + /* Need to reinit after debugnet_mbuf_start(). */ qs->fl[0].zone = zone_pack; qs->fl[1].zone = zone_clust; qs->lro.enabled = 0; @@ -3626,7 +3626,7 @@ cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event) } static int -cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +cxgb_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { struct port_info *pi; struct sge_qset *qs; @@ -3637,11 +3637,11 @@ cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m) return (ENOENT); qs = &pi->adapter->sge.qs[pi->first_qset]; - return (cxgb_netdump_encap(qs, &m)); + return (cxgb_debugnet_encap(qs, &m)); } static int -cxgb_netdump_poll(struct ifnet *ifp, int count) +cxgb_debugnet_poll(struct ifnet *ifp, int count) { struct port_info *pi; adapter_t *adap; @@ -3653,8 +3653,8 @@ cxgb_netdump_poll(struct ifnet *ifp, int count) adap = pi->adapter; for (i = 0; i < adap->nqsets; i++) - (void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]); - (void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]); + (void)cxgb_debugnet_poll_rx(adap, &adap->sge.qs[i]); + (void)cxgb_debugnet_poll_tx(&adap->sge.qs[pi->first_qset]); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index 74c3b873f593..60876543bf7b 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -390,9 +390,9 @@ reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) return (reclaim); } -#ifdef NETDUMP +#ifdef DEBUGNET int -cxgb_netdump_poll_tx(struct sge_qset *qs) +cxgb_debugnet_poll_tx(struct sge_qset *qs) { return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); @@ -1595,9 +1595,9 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) return (0); } -#ifdef NETDUMP +#ifdef DEBUGNET int -cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m) +cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m) { int error; @@ -3040,9 +3040,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq) return (work); } -#ifdef NETDUMP +#ifdef DEBUGNET int -cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs) +cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs) { return (process_responses_gts(adap, &qs->rspq)); diff --git a/sys/dev/mlx4/mlx4_en/en.h b/sys/dev/mlx4/mlx4_en/en.h index fb765a698c6c..e7a02f02c8b7 100644 --- a/sys/dev/mlx4/mlx4_en/en.h +++ b/sys/dev/mlx4/mlx4_en/en.h @@ -53,8 +53,8 @@ #include #include +#include #include -#include #include "en_port.h" #include diff --git a/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c b/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c index 4159cc6274bd..f7671d9f94e9 100644 --- a/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c +++ b/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c @@ -54,7 +54,7 @@ #include "en.h" #include "en_port.h" -NETDUMP_DEFINE(mlx4_en); +DEBUGNET_DEFINE(mlx4_en); static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); @@ -2304,7 +2304,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); - NETDUMP_SET(dev, mlx4_en); + DEBUGNET_SET(dev, mlx4_en); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); @@ -2888,27 +2888,27 @@ static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) } } -#ifdef NETDUMP +#ifdef DEBUGNET static void -mlx4_en_netdump_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) +mlx4_en_debugnet_init(struct ifnet *dev, int *nrxr, int *ncl, int *clsize) { struct mlx4_en_priv *priv; priv = if_getsoftc(dev); mutex_lock(&priv->mdev->state_lock); *nrxr = priv->rx_ring_num; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = priv->rx_mb_size; mutex_unlock(&priv->mdev->state_lock); } static void -mlx4_en_netdump_event(struct ifnet *dev, enum netdump_ev event) +mlx4_en_debugnet_event(struct ifnet *dev, enum debugnet_ev event) { } static int -mlx4_en_netdump_transmit(struct ifnet *dev, struct mbuf *m) +mlx4_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx4_en_priv *priv; int err; @@ -2925,7 +2925,7 @@ mlx4_en_netdump_transmit(struct ifnet *dev, struct mbuf *m) } static int -mlx4_en_netdump_poll(struct ifnet *dev, int count) +mlx4_en_debugnet_poll(struct ifnet *dev, int count) { struct mlx4_en_priv *priv; @@ -2937,4 +2937,4 @@ mlx4_en_netdump_poll(struct ifnet *dev, int count) return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index cccd0440162e..3b31bb286fe9 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -128,6 +128,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -139,8 +140,6 @@ __FBSDID("$FreeBSD$"); #include -#include - #include #include #include @@ -310,7 +309,7 @@ static void re_setwol (struct rl_softc *); static void re_clrwol (struct rl_softc *); static void re_set_linkspeed (struct rl_softc *); -NETDUMP_DEFINE(re); +DEBUGNET_DEFINE(re); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include @@ -1745,7 +1744,7 @@ re_attach(device_t dev) goto fail; } - NETDUMP_SET(ifp, re); + DEBUGNET_SET(ifp, re); fail: if (error) @@ -4093,28 +4092,28 @@ sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS) RL_TIMER_MAX)); } -#ifdef NETDUMP +#ifdef DEBUGNET static void -re_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +re_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct rl_softc *sc; sc = if_getsoftc(ifp); RL_LOCK(sc); *nrxr = sc->rl_ldata.rl_rx_desc_cnt; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) ? MJUM9BYTES : MCLBYTES; RL_UNLOCK(sc); } static void -re_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +re_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused) { } static int -re_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +re_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { struct rl_softc *sc; int error; @@ -4131,7 +4130,7 @@ re_netdump_transmit(struct ifnet *ifp, struct mbuf *m) } static int -re_netdump_poll(struct ifnet *ifp, int count) +re_debugnet_poll(struct ifnet *ifp, int count) { struct rl_softc *sc; int error; @@ -4147,4 +4146,4 @@ re_netdump_poll(struct ifnet *ifp, int count) return (error); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 936b249dd90b..46be384aedc3 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include @@ -69,7 +70,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -230,7 +230,7 @@ static void vtnet_disable_interrupts(struct vtnet_softc *); static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); -NETDUMP_DEFINE(vtnet); +DEBUGNET_DEFINE(vtnet); /* Tunables. */ static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); @@ -1025,7 +1025,7 @@ vtnet_setup_interface(struct vtnet_softc *sc) vtnet_set_rx_process_limit(sc); vtnet_set_tx_intr_threshold(sc); - NETDUMP_SET(ifp, vtnet); + DEBUGNET_SET(ifp, vtnet); return (0); } @@ -3972,9 +3972,9 @@ vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) return (def); } -#ifdef NETDUMP +#ifdef DEBUGNET static void -vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct vtnet_softc *sc; @@ -3982,7 +3982,7 @@ vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) VTNET_CORE_LOCK(sc); *nrxr = sc->vtnet_max_vq_pairs; - *ncl = NETDUMP_MAX_IN_FLIGHT; + *ncl = DEBUGNET_MAX_IN_FLIGHT; *clsize = sc->vtnet_rx_clsize; VTNET_CORE_UNLOCK(sc); @@ -3992,17 +3992,17 @@ vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) * XXX add a separate zone like we do for mbufs? otherwise we may alloc * buckets */ - uma_zone_reserve(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); - uma_prealloc(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2); + uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); + uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); } static void -vtnet_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) +vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused) { } static int -vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { struct vtnet_softc *sc; struct vtnet_txq *txq; @@ -4021,7 +4021,7 @@ vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m) } static int -vtnet_netdump_poll(struct ifnet *ifp, int count) +vtnet_debugnet_poll(struct ifnet *ifp, int count) { struct vtnet_softc *sc; int i; @@ -4036,4 +4036,4 @@ vtnet_netdump_poll(struct ifnet *ifp, int count) (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index 806c4743afbc..4d29e889defd 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -99,6 +99,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # To make an SMP kernel, the next two lines are needed diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 370297ea4210..72c06fb69e68 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -413,10 +413,10 @@ mbuf_init(void *dummy) } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); -#ifdef NETDUMP +#ifdef DEBUGNET /* - * netdump makes use of a pre-allocated pool of mbufs and clusters. When - * netdump is configured, we initialize a set of UMA cache zones which return + * debugnet makes use of a pre-allocated pool of mbufs and clusters. When + * debugnet is configured, we initialize a set of UMA cache zones which return * items from this pool. At panic-time, the regular UMA zone pointers are * overwritten with those of the cache zones so that drivers may allocate and * free mbufs and clusters without attempting to allocate physical memory. @@ -424,18 +424,28 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); * We keep mbufs and clusters in a pair of mbuf queues. In particular, for * the purpose of caching clusters, we treat them as mbufs. */ -static struct mbufq nd_mbufq = - { STAILQ_HEAD_INITIALIZER(nd_mbufq.mq_head), 0, INT_MAX }; -static struct mbufq nd_clustq = - { STAILQ_HEAD_INITIALIZER(nd_clustq.mq_head), 0, INT_MAX }; +static struct mbufq dn_mbufq = + { STAILQ_HEAD_INITIALIZER(dn_mbufq.mq_head), 0, INT_MAX }; +static struct mbufq dn_clustq = + { STAILQ_HEAD_INITIALIZER(dn_clustq.mq_head), 0, INT_MAX }; -static int nd_clsize; -static uma_zone_t nd_zone_mbuf; -static uma_zone_t nd_zone_clust; -static uma_zone_t nd_zone_pack; +static int dn_clsize; +static uma_zone_t dn_zone_mbuf; +static uma_zone_t dn_zone_clust; +static uma_zone_t dn_zone_pack; + +static struct debugnet_saved_zones { + uma_zone_t dsz_mbuf; + uma_zone_t dsz_clust; + uma_zone_t dsz_pack; + uma_zone_t dsz_jumbop; + uma_zone_t dsz_jumbo9; + uma_zone_t dsz_jumbo16; + bool dsz_debugnet_zones_enabled; +} dn_saved_zones; static int -nd_buf_import(void *arg, void **store, int count, int domain __unused, +dn_buf_import(void *arg, void **store, int count, int domain __unused, int flags) { struct mbufq *q; @@ -448,7 +458,7 @@ nd_buf_import(void *arg, void **store, int count, int domain __unused, m = mbufq_dequeue(q); if (m == NULL) break; - trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags); + trash_init(m, q == &dn_mbufq ? MSIZE : dn_clsize, flags); store[i] = m; } KASSERT((flags & M_WAITOK) == 0 || i == count, @@ -457,7 +467,7 @@ nd_buf_import(void *arg, void **store, int count, int domain __unused, } static void -nd_buf_release(void *arg, void **store, int count) +dn_buf_release(void *arg, void **store, int count) { struct mbufq *q; struct mbuf *m; @@ -472,7 +482,7 @@ nd_buf_release(void *arg, void **store, int count) } static int -nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, +dn_pack_import(void *arg __unused, void **store, int count, int domain __unused, int flags __unused) { struct mbuf *m; @@ -483,12 +493,12 @@ nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, m = m_get(MT_DATA, M_NOWAIT); if (m == NULL) break; - clust = uma_zalloc(nd_zone_clust, M_NOWAIT); + clust = uma_zalloc(dn_zone_clust, M_NOWAIT); if (clust == NULL) { m_free(m); break; } - mb_ctor_clust(clust, nd_clsize, m, 0); + mb_ctor_clust(clust, dn_clsize, m, 0); store[i] = m; } KASSERT((flags & M_WAITOK) == 0 || i == count, @@ -497,7 +507,7 @@ nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, } static void -nd_pack_release(void *arg __unused, void **store, int count) +dn_pack_release(void *arg __unused, void **store, int count) { struct mbuf *m; void *clust; @@ -506,109 +516,142 @@ nd_pack_release(void *arg __unused, void **store, int count) for (i = 0; i < count; i++) { m = store[i]; clust = m->m_ext.ext_buf; - uma_zfree(nd_zone_clust, clust); - uma_zfree(nd_zone_mbuf, m); + uma_zfree(dn_zone_clust, clust); + uma_zfree(dn_zone_mbuf, m); } } /* - * Free the pre-allocated mbufs and clusters reserved for netdump, and destroy + * Free the pre-allocated mbufs and clusters reserved for debugnet, and destroy * the corresponding UMA cache zones. */ void -netdump_mbuf_drain(void) +debugnet_mbuf_drain(void) { struct mbuf *m; void *item; - if (nd_zone_mbuf != NULL) { - uma_zdestroy(nd_zone_mbuf); - nd_zone_mbuf = NULL; + if (dn_zone_mbuf != NULL) { + uma_zdestroy(dn_zone_mbuf); + dn_zone_mbuf = NULL; } - if (nd_zone_clust != NULL) { - uma_zdestroy(nd_zone_clust); - nd_zone_clust = NULL; + if (dn_zone_clust != NULL) { + uma_zdestroy(dn_zone_clust); + dn_zone_clust = NULL; } - if (nd_zone_pack != NULL) { - uma_zdestroy(nd_zone_pack); - nd_zone_pack = NULL; + if (dn_zone_pack != NULL) { + uma_zdestroy(dn_zone_pack); + dn_zone_pack = NULL; } - while ((m = mbufq_dequeue(&nd_mbufq)) != NULL) + while ((m = mbufq_dequeue(&dn_mbufq)) != NULL) m_free(m); - while ((item = mbufq_dequeue(&nd_clustq)) != NULL) - uma_zfree(m_getzone(nd_clsize), item); + while ((item = mbufq_dequeue(&dn_clustq)) != NULL) + uma_zfree(m_getzone(dn_clsize), item); } /* - * Callback invoked immediately prior to starting a netdump. + * Callback invoked immediately prior to starting a debugnet connection. */ void -netdump_mbuf_dump(void) +debugnet_mbuf_start(void) { + MPASS(!dn_saved_zones.dsz_debugnet_zones_enabled); + + /* Save the old zone pointers to restore when debugnet is closed. */ + dn_saved_zones = (struct debugnet_saved_zones) { + .dsz_debugnet_zones_enabled = true, + .dsz_mbuf = zone_mbuf, + .dsz_clust = zone_clust, + .dsz_pack = zone_pack, + .dsz_jumbop = zone_jumbop, + .dsz_jumbo9 = zone_jumbo9, + .dsz_jumbo16 = zone_jumbo16, + }; + /* * All cluster zones return buffers of the size requested by the * drivers. It's up to the driver to reinitialize the zones if the - * MTU of a netdump-enabled interface changes. + * MTU of a debugnet-enabled interface changes. */ - printf("netdump: overwriting mbuf zone pointers\n"); - zone_mbuf = nd_zone_mbuf; - zone_clust = nd_zone_clust; - zone_pack = nd_zone_pack; - zone_jumbop = nd_zone_clust; - zone_jumbo9 = nd_zone_clust; - zone_jumbo16 = nd_zone_clust; + printf("debugnet: overwriting mbuf zone pointers\n"); + zone_mbuf = dn_zone_mbuf; + zone_clust = dn_zone_clust; + zone_pack = dn_zone_pack; + zone_jumbop = dn_zone_clust; + zone_jumbo9 = dn_zone_clust; + zone_jumbo16 = dn_zone_clust; } /* - * Reinitialize the netdump mbuf+cluster pool and cache zones. + * Callback invoked when a debugnet connection is closed/finished. */ void -netdump_mbuf_reinit(int nmbuf, int nclust, int clsize) +debugnet_mbuf_finish(void) +{ + + MPASS(dn_saved_zones.dsz_debugnet_zones_enabled); + + printf("debugnet: restoring mbuf zone pointers\n"); + zone_mbuf = dn_saved_zones.dsz_mbuf; + zone_clust = dn_saved_zones.dsz_clust; + zone_pack = dn_saved_zones.dsz_pack; + zone_jumbop = dn_saved_zones.dsz_jumbop; + zone_jumbo9 = dn_saved_zones.dsz_jumbo9; + zone_jumbo16 = dn_saved_zones.dsz_jumbo16; + + memset(&dn_saved_zones, 0, sizeof(dn_saved_zones)); +} + +/* + * Reinitialize the debugnet mbuf+cluster pool and cache zones. + */ +void +debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize) { struct mbuf *m; void *item; - netdump_mbuf_drain(); + debugnet_mbuf_drain(); - nd_clsize = clsize; + dn_clsize = clsize; - nd_zone_mbuf = uma_zcache_create("netdump_" MBUF_MEM_NAME, + dn_zone_mbuf = uma_zcache_create("debugnet_" MBUF_MEM_NAME, MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, #ifdef INVARIANTS trash_init, trash_fini, #else NULL, NULL, #endif - nd_buf_import, nd_buf_release, - &nd_mbufq, UMA_ZONE_NOBUCKET); + dn_buf_import, dn_buf_release, + &dn_mbufq, UMA_ZONE_NOBUCKET); - nd_zone_clust = uma_zcache_create("netdump_" MBUF_CLUSTER_MEM_NAME, + dn_zone_clust = uma_zcache_create("debugnet_" MBUF_CLUSTER_MEM_NAME, clsize, mb_ctor_clust, #ifdef INVARIANTS trash_dtor, trash_init, trash_fini, #else NULL, NULL, NULL, #endif - nd_buf_import, nd_buf_release, - &nd_clustq, UMA_ZONE_NOBUCKET); + dn_buf_import, dn_buf_release, + &dn_clustq, UMA_ZONE_NOBUCKET); - nd_zone_pack = uma_zcache_create("netdump_" MBUF_PACKET_MEM_NAME, + dn_zone_pack = uma_zcache_create("debugnet_" MBUF_PACKET_MEM_NAME, MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, - nd_pack_import, nd_pack_release, + dn_pack_import, dn_pack_release, NULL, UMA_ZONE_NOBUCKET); while (nmbuf-- > 0) { m = m_get(MT_DATA, M_WAITOK); - uma_zfree(nd_zone_mbuf, m); + uma_zfree(dn_zone_mbuf, m); } while (nclust-- > 0) { - item = uma_zalloc(m_getzone(nd_clsize), M_WAITOK); - uma_zfree(nd_zone_clust, item); + item = uma_zalloc(m_getzone(dn_clsize), M_WAITOK); + uma_zfree(dn_zone_clust, item); } } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ /* * UMA backend page allocator for the jumbo frame zones. diff --git a/sys/net/debugnet.c b/sys/net/debugnet.c new file mode 100644 index 000000000000..741e3156372f --- /dev/null +++ b/sys/net/debugnet.c @@ -0,0 +1,655 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Isilon Systems, LLC. + * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. + * Copyright (c) 2000 Darrell Anderson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#define DEBUGNET_INTERNAL +#include + +FEATURE(debugnet, "Debugnet support"); + +SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD, NULL, + "debugnet parameters"); + +unsigned debugnet_debug; +SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, + &debugnet_debug, 0, + "Debug message verbosity (0: off; 1: on; 2: verbose)"); + +int debugnet_npolls = 2000; +SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, + &debugnet_npolls, 0, + "Number of times to poll before assuming packet loss (0.5ms per poll)"); +int debugnet_nretries = 10; +SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, + &debugnet_nretries, 0, + "Number of retransmit attempts before giving up"); + +static bool g_debugnet_pcb_inuse; +static struct debugnet_pcb g_dnet_pcb; + +/* + * Simple accessors for opaque PCB. + */ +const unsigned char * +debugnet_get_gw_mac(const struct debugnet_pcb *pcb) +{ + MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && + pcb->dp_state >= DN_STATE_HAVE_GW_MAC); + return (pcb->dp_gw_mac.octet); +} + +/* + * Start of network primitives, beginning with output primitives. + */ + +/* + * Handles creation of the ethernet header, then places outgoing packets into + * the tx buffer for the NIC + * + * Parameters: + * m The mbuf containing the packet to be sent (will be freed by + * this function or the NIC driver) + * ifp The interface to send on + * dst The destination ethernet address (source address will be looked + * up using ifp) + * etype The ETHERTYPE_* value for the protocol that is being sent + * + * Returns: + * int see errno.h, 0 for success + */ +int +debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, + u_short etype) +{ + struct ether_header *eh; + + if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || + (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { + if_printf(ifp, "%s: interface isn't up\n", __func__); + m_freem(m); + return (ENETDOWN); + } + + /* Fill in the ethernet header. */ + M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); + if (m == NULL) { + printf("%s: out of mbufs\n", __func__); + return (ENOBUFS); + } + eh = mtod(m, struct ether_header *); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); + memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); + eh->ether_type = htons(etype); + return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); +} + +/* + * Unreliable transmission of an mbuf chain to the debugnet server + * Note: can't handle fragmentation; fails if the packet is larger than + * ifp->if_mtu after adding the UDP/IP headers + * + * Parameters: + * pcb The debugnet context block + * m mbuf chain + * + * Returns: + * int see errno.h, 0 for success + */ +static int +debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) +{ + struct udphdr *udp; + + MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); + + M_PREPEND(m, sizeof(*udp), M_NOWAIT); + if (m == NULL) { + printf("%s: out of mbufs\n", __func__); + return (ENOBUFS); + } + + udp = mtod(m, void *); + udp->uh_ulen = htons(m->m_pkthdr.len); + /* Use this src port so that the server can connect() the socket */ + udp->uh_sport = htons(pcb->dp_client_ack_port); + udp->uh_dport = htons(pcb->dp_server_port); + /* Computed later (protocol-dependent). */ + udp->uh_sum = 0; + + return (debugnet_ip_output(pcb, m)); +} + +/* + * Dummy free function for debugnet clusters. + */ +static void +debugnet_mbuf_free(struct mbuf *m __unused) +{ +} + +/* + * Construct and reliably send a debugnet packet. May fail from a resource + * shortage or extreme number of unacknowledged retransmissions. Wait for + * an acknowledgement before returning. Splits packets into chunks small + * enough to be sent without fragmentation (looks up the interface MTU) + * + * Parameters: + * type debugnet packet type (HERALD, FINISHED, ...) + * data data + * datalen data size (bytes) + * auxdata optional auxiliary information + * + * Returns: + * int see errno.h, 0 for success + */ +int +debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, + uint32_t datalen, const struct debugnet_proto_aux *auxdata) +{ + struct debugnet_msg_hdr *dn_msg_hdr; + struct mbuf *m, *m2; + uint64_t want_acks; + uint32_t i, pktlen, sent_so_far; + int retries, polls, error; + + want_acks = 0; + pcb->dp_rcvd_acks = 0; + retries = 0; + +retransmit: + /* Chunks can be too big to fit in packets. */ + for (i = sent_so_far = 0; sent_so_far < datalen || + (i == 0 && datalen == 0); i++) { + pktlen = datalen - sent_so_far; + + /* Bound: the interface MTU (assume no IP options). */ + pktlen = min(pktlen, pcb->dp_ifp->if_mtu - + sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); + + /* + * Check if it is retransmitting and this has been ACKed + * already. + */ + if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { + sent_so_far += pktlen; + continue; + } + + /* + * Get and fill a header mbuf, then chain data as an extended + * mbuf. + */ + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + printf("%s: Out of mbufs\n", __func__); + return (ENOBUFS); + } + m->m_len = sizeof(struct debugnet_msg_hdr); + m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); + MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); + dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); + dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); + dn_msg_hdr->mh_type = htonl(type); + dn_msg_hdr->mh_len = htonl(pktlen); + + if (auxdata != NULL) { + dn_msg_hdr->mh_offset = + htobe64(auxdata->dp_offset_start + sent_so_far); + dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); + } else { + dn_msg_hdr->mh_offset = htobe64(sent_so_far); + dn_msg_hdr->mh_aux2 = 0; + } + + if (pktlen != 0) { + m2 = m_get(M_NOWAIT, MT_DATA); + if (m2 == NULL) { + m_freem(m); + printf("%s: Out of mbufs\n", __func__); + return (ENOBUFS); + } + MEXTADD(m2, __DECONST(char *, data) + sent_so_far, + pktlen, debugnet_mbuf_free, NULL, NULL, 0, + EXT_DISPOSABLE); + m2->m_len = pktlen; + + m_cat(m, m2); + m->m_pkthdr.len += pktlen; + } + error = debugnet_udp_output(pcb, m); + if (error != 0) + return (error); + + /* Note that we're waiting for this packet in the bitfield. */ + want_acks |= (1 << i); + sent_so_far += pktlen; + } + if (i >= DEBUGNET_MAX_IN_FLIGHT) + printf("Warning: Sent more than %d packets (%d). " + "Acknowledgements will fail unless the size of " + "rcvd_acks/want_acks is increased.\n", + DEBUGNET_MAX_IN_FLIGHT, i); + + /* + * Wait for acks. A *real* window would speed things up considerably. + */ + polls = 0; + while (pcb->dp_rcvd_acks != want_acks) { + if (polls++ > debugnet_npolls) { + if (retries++ > debugnet_nretries) + return (ETIMEDOUT); + printf(". "); + goto retransmit; + } + debugnet_network_poll(pcb->dp_ifp); + DELAY(500); + } + pcb->dp_seqno += i; + return (0); +} + +/* + * Network input primitives. + */ + +static void +debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) +{ + const struct debugnet_ack *dn_ack; + struct mbuf *m; + uint32_t rcv_ackno; + + m = *mb; + + if (m->m_pkthdr.len < sizeof(*dn_ack)) { + DNETDEBUG("ignoring small ACK packet\n"); + return; + } + /* Get Ack. */ + if (m->m_len < sizeof(*dn_ack)) { + m = m_pullup(m, sizeof(*dn_ack)); + *mb = m; + if (m == NULL) { + DNETDEBUG("m_pullup failed\n"); + return; + } + } + dn_ack = mtod(m, const void *); + + /* Debugnet processing. */ + /* + * Packet is meant for us. Extract the ack sequence number and the + * port number if necessary. + */ + rcv_ackno = ntohl(dn_ack->da_seqno); + if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { + pcb->dp_server_port = sport; + pcb->dp_state = DN_STATE_GOT_HERALD_PORT; + } + if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) + printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); + else if (rcv_ackno >= pcb->dp_seqno) { + /* We're interested in this ack. Record it. */ + pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); + } +} + +void +debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) +{ + const struct udphdr *udp; + struct mbuf *m; + uint16_t sport; + + /* UDP processing. */ + + m = *mb; + if (m->m_pkthdr.len < sizeof(*udp)) { + DNETDEBUG("ignoring small UDP packet\n"); + return; + } + + /* Get UDP headers. */ + if (m->m_len < sizeof(*udp)) { + m = m_pullup(m, sizeof(*udp)); + *mb = m; + if (m == NULL) { + DNETDEBUG("m_pullup failed\n"); + return; + } + } + udp = mtod(m, const void *); + + /* For now, the only UDP packets we expect to receive are acks. */ + if (ntohs(udp->uh_dport) != pcb->dp_client_ack_port) { + DNETDEBUG("not on the expected ACK port.\n"); + return; + } + sport = ntohs(udp->uh_sport); + + m_adj(m, sizeof(*udp)); + debugnet_handle_ack(pcb, mb, sport); +} + +/* + * Handler for incoming packets directly from the network adapter + * Identifies the packet type (IP or ARP) and passes it along to one of the + * helper functions debugnet_handle_ip or debugnet_handle_arp. + * + * It needs to partially replicate the behaviour of ether_input() and + * ether_demux(). + * + * Parameters: + * ifp the interface the packet came from + * m an mbuf containing the packet received + */ +static void +debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m) +{ + struct ifreq ifr; + struct ether_header *eh; + u_short etype; + + /* Ethernet processing. */ + if ((m->m_flags & M_PKTHDR) == 0) { + DNETDEBUG_IF(ifp, "discard frame without packet header\n"); + goto done; + } + if (m->m_len < ETHER_HDR_LEN) { + DNETDEBUG_IF(ifp, + "discard frame without leading eth header (len %u pktlen %u)\n", + m->m_len, m->m_pkthdr.len); + goto done; + } + if ((m->m_flags & M_HASFCS) != 0) { + m_adj(m, -ETHER_CRC_LEN); + m->m_flags &= ~M_HASFCS; + } + eh = mtod(m, struct ether_header *); + etype = ntohs(eh->ether_type); + if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { + DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); + goto done; + } + if (if_gethwaddr(ifp, &ifr) != 0) { + DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); + goto done; + } + if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, + ETHER_ADDR_LEN) != 0) { + DNETDEBUG_IF(ifp, + "discard frame with incorrect destination addr\n"); + goto done; + } + + MPASS(g_debugnet_pcb_inuse); + + /* Done ethernet processing. Strip off the ethernet header. */ + m_adj(m, ETHER_HDR_LEN); + switch (etype) { + case ETHERTYPE_ARP: + debugnet_handle_arp(&g_dnet_pcb, &m); + break; + case ETHERTYPE_IP: + debugnet_handle_ip(&g_dnet_pcb, &m); + break; + default: + DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); + break; + } +done: + if (m != NULL) + m_freem(m); +} + +/* + * Network polling primitive. + * + * Instead of assuming that most of the network stack is sane, we just poll the + * driver directly for packets. + */ +void +debugnet_network_poll(struct ifnet *ifp) +{ + ifp->if_debugnet_methods->dn_poll(ifp, 1000); +} + +/* + * Start of consumer API surface. + */ +void +debugnet_free(struct debugnet_pcb *pcb) +{ + struct ifnet *ifp; + + MPASS(g_debugnet_pcb_inuse); + MPASS(pcb == &g_dnet_pcb); + + ifp = pcb->dp_ifp; + ifp->if_input = pcb->dp_drv_input; + ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); + debugnet_mbuf_finish(); + + g_debugnet_pcb_inuse = false; + memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); +} + +int +debugnet_connect(const struct debugnet_conn_params *dcp, + struct debugnet_pcb **pcb_out) +{ + struct debugnet_pcb *pcb; + struct ifnet *ifp; + int error; + + if (g_debugnet_pcb_inuse) { + printf("%s: Only one connection at a time.\n", __func__); + return (EBUSY); + } + + pcb = &g_dnet_pcb; + *pcb = (struct debugnet_pcb) { + .dp_state = DN_STATE_INIT, + .dp_client = dcp->dc_client, + .dp_server = dcp->dc_server, + .dp_gateway = dcp->dc_gateway, + .dp_server_port = dcp->dc_herald_port, /* Initially */ + .dp_client_ack_port = dcp->dc_client_ack_port, + .dp_seqno = 1, + .dp_ifp = dcp->dc_ifp, + }; + + /* Switch to the debugnet mbuf zones. */ + debugnet_mbuf_start(); + + ifp = pcb->dp_ifp; + ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); + + /* + * We maintain the invariant that g_debugnet_pcb_inuse is always true + * while the debugnet ifp's if_input is overridden with + * debugnet_pkt_in. + */ + g_debugnet_pcb_inuse = true; + + /* Make the card use *our* receive callback. */ + pcb->dp_drv_input = ifp->if_input; + ifp->if_input = debugnet_pkt_in; + + printf("%s: searching for %s MAC...\n", __func__, + (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); + + error = debugnet_arp_gw(pcb); + if (error != 0) { + printf("%s: failed to locate MAC address\n", __func__); + goto cleanup; + } + MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); + + error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, + dcp->dc_herald_datalen, NULL); + if (error != 0) { + printf("%s: failed to herald debugnet server\n", __func__); + goto cleanup; + } + + *pcb_out = pcb; + return (0); + +cleanup: + debugnet_free(pcb); + return (error); +} + +/* + * Pre-allocated dump-time mbuf tracking. + * + * We just track the high water mark we've ever seen and allocate appropriately + * for that iface/mtu combo. + */ +static struct { + int nmbuf; + int ncl; + int clsize; +} dn_hwm; +static struct mtx dn_hwm_lk; +MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); + +static void +dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) +{ + bool any; + + any = false; + mtx_lock(&dn_hwm_lk); + + if (nmbuf > dn_hwm.nmbuf) { + any = true; + dn_hwm.nmbuf = nmbuf; + } else + nmbuf = dn_hwm.nmbuf; + + if (ncl > dn_hwm.ncl) { + any = true; + dn_hwm.ncl = ncl; + } else + ncl = dn_hwm.ncl; + + if (clsize > dn_hwm.clsize) { + any = true; + dn_hwm.clsize = clsize; + } else + clsize = dn_hwm.clsize; + + mtx_unlock(&dn_hwm_lk); + + if (any) + debugnet_mbuf_reinit(nmbuf, ncl, clsize); +} + +void +debugnet_any_ifnet_update(struct ifnet *ifp) +{ + int clsize, nmbuf, ncl, nrxr; + + if (!DEBUGNET_SUPPORTED_NIC(ifp)) + return; + + ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); + KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); + + /* + * We need two headers per message on the transmit side. Multiply by + * four to give us some breathing room. + */ + nmbuf = ncl * (4 + nrxr); + ncl *= nrxr; + + dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); +} + +/* + * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless + * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). + * + * On the other hand, hooking DEBUGNET_SET() itself may still be too early, + * because the driver is still in attach. Since we cannot use down interfaces, + * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least + * with vtnet and dhcpclient that event just never occurs. + * + * So that's how I've landed on the lower level ifnet_link_event. + */ + +static void +dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) +{ + if (link_state == LINK_STATE_UP) + debugnet_any_ifnet_update(ifp); +} + +static eventhandler_tag dn_attach_cookie; +static void +dn_evh_init(void *ctx __unused) +{ + dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, + dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); +} +SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); diff --git a/sys/net/debugnet.h b/sys/net/debugnet.h new file mode 100644 index 000000000000..2e34d43cec88 --- /dev/null +++ b/sys/net/debugnet.h @@ -0,0 +1,211 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Isilon Systems, LLC. + * Copyright (c) 2005-2014 Sandvine Incorporated + * Copyright (c) 2000 Darrell Anderson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Debugnet provides a reliable, bidirectional, UDP-encapsulated datagram + * transport while a machine is in a debug state. (N-1 CPUs stopped, + * interrupts disabled, may or may not be in a panic(9) state.) Only one + * stream may be active at a time. A dedicated server must be running to + * accept connections. + */ + +#pragma once + +#include +#include + +/* + * Debugnet protocol details. + */ +#define DEBUGNET_HERALD 1 /* Connection handshake. */ +#define DEBUGNET_FINISHED 2 /* Close the connection. */ +#define DEBUGNET_DATA 3 /* Contains data. */ + +struct debugnet_msg_hdr { + uint32_t mh_type; /* Debugnet message type. */ + uint32_t mh_seqno; /* Match acks with msgs. */ + uint64_t mh_offset; /* Offset in fragment. */ + uint32_t mh_len; /* Attached data (bytes). */ + uint32_t mh_aux2; /* Consumer-specific. */ +} __packed; + +struct debugnet_ack { + uint32_t da_seqno; /* Match acks with msgs. */ +} __packed; + +#define DEBUGNET_MAX_IN_FLIGHT 64 + +#ifdef _KERNEL +/* + * Hook API for network drivers. + */ +enum debugnet_ev { + DEBUGNET_START, + DEBUGNET_END, +}; + +struct ifnet; +struct mbuf; +typedef void debugnet_init_t(struct ifnet *, int *nrxr, int *ncl, int *clsize); +typedef void debugnet_event_t(struct ifnet *, enum debugnet_ev); +typedef int debugnet_transmit_t(struct ifnet *, struct mbuf *); +typedef int debugnet_poll_t(struct ifnet *, int); + +struct debugnet_methods { + debugnet_init_t *dn_init; + debugnet_event_t *dn_event; + debugnet_transmit_t *dn_transmit; + debugnet_poll_t *dn_poll; +}; + +#define DEBUGNET_SUPPORTED_NIC(ifp) \ + ((ifp)->if_debugnet_methods != NULL && (ifp)->if_type == IFT_ETHER) + +/* + * Debugnet consumer API. + */ +struct debugnet_conn_params { + struct ifnet *dc_ifp; + in_addr_t dc_client; + in_addr_t dc_server; + in_addr_t dc_gateway; + + uint16_t dc_herald_port; + uint16_t dc_client_ack_port; + + const void *dc_herald_data; + uint32_t dc_herald_datalen; +}; + +struct debugnet_pcb; /* opaque */ + +/* + * Open a unidirectional stream to the specified server's herald port. + * + * If all goes well, the server will send ACK from a different port to our ack + * port. This allows servers to somewhat gracefully handle multiple debugnet + * clients. (Clients are limited to single connections.) + * + * Returns zero on success, or errno. + */ +int debugnet_connect(const struct debugnet_conn_params *, + struct debugnet_pcb **pcb_out); + +/* + * Free a debugnet stream that was previously successfully opened. + * + * No attempt is made to cleanly terminate communication with the remote + * server. Consumers should first send an empty DEBUGNET_FINISHED message, or + * otherwise let the remote know they are signing off. + */ +void debugnet_free(struct debugnet_pcb *); + +/* + * Send a message, with common debugnet_msg_hdr header, to the connected remote + * server. + * + * - mhtype translates directly to mh_type (e.g., DEBUGNET_DATA, or some other + * protocol-specific type). + * - Data and datalen describe the attached data; datalen may be zero. + * - If auxdata is NULL, mh_offset's initial value and mh_aux2 will be zero. + * Otherwise, mh_offset's initial value will be auxdata->dp_offset_start and + * mh_aux2 will have the value of auxdata->dp_aux2. + * + * Returns zero on success, or an errno on failure. + */ +struct debugnet_proto_aux { + uint64_t dp_offset_start; + uint32_t dp_aux2; +}; +int debugnet_send(struct debugnet_pcb *, uint32_t mhtype, const void *data, + uint32_t datalen, const struct debugnet_proto_aux *auxdata); + +/* + * A simple wrapper around the above when no data or auxdata is needed. + */ +static inline int +debugnet_sendempty(struct debugnet_pcb *pcb, uint32_t mhtype) +{ + return (debugnet_send(pcb, mhtype, NULL, 0, NULL)); +} + +/* + * PCB accessors. + */ + +/* + * Get the 48-bit MAC address of the discovered next hop (gateway, or + * destination server if it is on the same segment. + */ +const unsigned char *debugnet_get_gw_mac(const struct debugnet_pcb *); + +/* + * Callbacks from core mbuf code. + */ +void debugnet_any_ifnet_update(struct ifnet *); + +/* Expose sysctl variables for netdump(4) to alias. */ +extern int debugnet_npolls; +extern int debugnet_nretries; +extern int debugnet_arp_nretries; + +/* + * Conditionally-defined macros for device drivers so we can avoid ifdef + * wrappers in every single implementation. + */ +#ifdef DEBUGNET +#define DEBUGNET_DEFINE(driver) \ + static debugnet_init_t driver##_debugnet_init; \ + static debugnet_event_t driver##_debugnet_event; \ + static debugnet_transmit_t driver##_debugnet_transmit; \ + static debugnet_poll_t driver##_debugnet_poll; \ + \ + static struct debugnet_methods driver##_debugnet_methods = { \ + .dn_init = driver##_debugnet_init, \ + .dn_event = driver##_debugnet_event, \ + .dn_transmit = driver##_debugnet_transmit, \ + .dn_poll = driver##_debugnet_poll, \ + } + +#define DEBUGNET_NOTIFY_MTU(ifp) debugnet_any_ifnet_update(ifp) + +#define DEBUGNET_SET(ifp, driver) \ + (ifp)->if_debugnet_methods = &driver##_debugnet_methods + +#else /* !DEBUGNET || !INET */ + +#define DEBUGNET_DEFINE(driver) +#define DEBUGNET_NOTIFY_MTU(ifp) +#define DEBUGNET_SET(ifp, driver) + +#endif /* DEBUGNET && INET */ +#endif /* _KERNEL */ diff --git a/sys/net/debugnet_inet.c b/sys/net/debugnet_inet.c new file mode 100644 index 000000000000..048192d664da --- /dev/null +++ b/sys/net/debugnet_inet.c @@ -0,0 +1,485 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Isilon Systems, LLC. + * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. + * Copyright (c) 2000 Darrell Anderson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#define DEBUGNET_INTERNAL +#include + +int debugnet_arp_nretries = 3; +SYSCTL_INT(_net_debugnet, OID_AUTO, arp_nretries, CTLFLAG_RWTUN, + &debugnet_arp_nretries, 0, + "Number of ARP attempts before giving up"); + +/* + * Handler for IP packets: checks their sanity and then processes any debugnet + * ACK packets it finds. + * + * It needs to partially replicate the behaviour of ip_input() and udp_input(). + * + * Parameters: + * pcb a pointer to the live debugnet PCB + * mb a pointer to an mbuf * containing the packet received + * Updates *mb if m_pullup et al change the pointer + * Assumes the calling function will take care of freeing the mbuf + */ +void +debugnet_handle_ip(struct debugnet_pcb *pcb, struct mbuf **mb) +{ + struct ip *ip; + struct mbuf *m; + unsigned short hlen; + + /* IP processing. */ + m = *mb; + if (m->m_pkthdr.len < sizeof(struct ip)) { + DNETDEBUG("dropping packet too small for IP header\n"); + return; + } + if (m->m_len < sizeof(struct ip)) { + m = m_pullup(m, sizeof(struct ip)); + *mb = m; + if (m == NULL) { + DNETDEBUG("m_pullup failed\n"); + return; + } + } + ip = mtod(m, struct ip *); + + /* IP version. */ + if (ip->ip_v != IPVERSION) { + DNETDEBUG("bad IP version %d\n", ip->ip_v); + return; + } + + /* Header length. */ + hlen = ip->ip_hl << 2; + if (hlen < sizeof(struct ip)) { + DNETDEBUG("bad IP header length (%hu)\n", hlen); + return; + } + if (hlen > m->m_len) { + m = m_pullup(m, hlen); + *mb = m; + if (m == NULL) { + DNETDEBUG("m_pullup failed\n"); + return; + } + ip = mtod(m, struct ip *); + } + /* Ignore packets with IP options. */ + if (hlen > sizeof(struct ip)) { + DNETDEBUG("drop packet with IP options\n"); + return; + } + +#ifdef INVARIANTS + if ((IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || + IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) && + (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { + DNETDEBUG("Bad IP header (RFC1122)\n"); + return; + } +#endif + + /* Checksum. */ + if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) { + if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) { + DNETDEBUG("bad IP checksum\n"); + return; + } + } else { + /* XXX */ ; + } + + /* Convert fields to host byte order. */ + ip->ip_len = ntohs(ip->ip_len); + if (ip->ip_len < hlen) { + DNETDEBUG("IP packet smaller (%hu) than header (%hu)\n", + ip->ip_len, hlen); + return; + } + if (m->m_pkthdr.len < ip->ip_len) { + DNETDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n", + ip->ip_len, m->m_pkthdr.len); + return; + } + if (m->m_pkthdr.len > ip->ip_len) { + + /* Truncate the packet to the IP length. */ + if (m->m_len == m->m_pkthdr.len) { + m->m_len = ip->ip_len; + m->m_pkthdr.len = ip->ip_len; + } else + m_adj(m, ip->ip_len - m->m_pkthdr.len); + } + + ip->ip_off = ntohs(ip->ip_off); + + /* Check that the source is the server's IP. */ + if (ip->ip_src.s_addr != pcb->dp_server) { + DNETDEBUG("drop packet not from server (from 0x%x)\n", + ip->ip_src.s_addr); + return; + } + + /* Check if the destination IP is ours. */ + if (ip->ip_dst.s_addr != pcb->dp_client) { + DNETDEBUGV("drop packet not to our IP\n"); + return; + } + + if (ip->ip_p != IPPROTO_UDP) { + DNETDEBUG("drop non-UDP packet\n"); + return; + } + + /* Do not deal with fragments. */ + if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) { + DNETDEBUG("drop fragmented packet\n"); + return; + } + + /* UDP custom is to have packet length not include IP header. */ + ip->ip_len -= hlen; + + /* Checked above before decoding IP header. */ + MPASS(m->m_pkthdr.len >= sizeof(struct ipovly)); + + /* Put the UDP header at start of chain. */ + m_adj(m, sizeof(struct ipovly)); + debugnet_handle_udp(pcb, mb); +} + +/* + * Builds and sends a single ARP request to locate the L2 address for a given + * INET address. + * + * Return value: + * 0 on success + * errno on error + */ +static int +debugnet_send_arp(struct debugnet_pcb *pcb, in_addr_t dst) +{ + struct ether_addr bcast; + struct arphdr *ah; + struct ifnet *ifp; + struct mbuf *m; + int pktlen; + + ifp = pcb->dp_ifp; + + /* Fill-up a broadcast address. */ + memset(&bcast, 0xFF, ETHER_ADDR_LEN); + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + printf("%s: Out of mbufs\n", __func__); + return (ENOBUFS); + } + pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr)); + m->m_len = pktlen; + m->m_pkthdr.len = pktlen; + MH_ALIGN(m, pktlen); + ah = mtod(m, struct arphdr *); + ah->ar_hrd = htons(ARPHRD_ETHER); + ah->ar_pro = htons(ETHERTYPE_IP); + ah->ar_hln = ETHER_ADDR_LEN; + ah->ar_pln = sizeof(struct in_addr); + ah->ar_op = htons(ARPOP_REQUEST); + memcpy(ar_sha(ah), IF_LLADDR(ifp), ETHER_ADDR_LEN); + ((struct in_addr *)ar_spa(ah))->s_addr = pcb->dp_client; + bzero(ar_tha(ah), ETHER_ADDR_LEN); + ((struct in_addr *)ar_tpa(ah))->s_addr = dst; + return (debugnet_ether_output(m, ifp, bcast, ETHERTYPE_ARP)); +} + +/* + * Handler for ARP packets: checks their sanity and then + * 1. If the ARP is a request for our IP, respond with our MAC address + * 2. If the ARP is a response from our server, record its MAC address + * + * It needs to replicate partially the behaviour of arpintr() and + * in_arpinput(). + * + * Parameters: + * pcb a pointer to the live debugnet PCB + * mb a pointer to an mbuf * containing the packet received + * Updates *mb if m_pullup et al change the pointer + * Assumes the calling function will take care of freeing the mbuf + */ +void +debugnet_handle_arp(struct debugnet_pcb *pcb, struct mbuf **mb) +{ + char buf[INET_ADDRSTRLEN]; + struct in_addr isaddr, itaddr; + struct ether_addr dst; + struct mbuf *m; + struct arphdr *ah; + struct ifnet *ifp; + uint8_t *enaddr; + int req_len, op; + + m = *mb; + ifp = m->m_pkthdr.rcvif; + if (m->m_len < sizeof(struct arphdr)) { + m = m_pullup(m, sizeof(struct arphdr)); + *mb = m; + if (m == NULL) { + DNETDEBUG("runt packet: m_pullup failed\n"); + return; + } + } + + ah = mtod(m, struct arphdr *); + if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) { + DNETDEBUG("unknown hardware address 0x%2D)\n", + (unsigned char *)&ah->ar_hrd, ""); + return; + } + if (ntohs(ah->ar_pro) != ETHERTYPE_IP) { + DNETDEBUG("drop ARP for unknown protocol %d\n", + ntohs(ah->ar_pro)); + return; + } + req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); + if (m->m_len < req_len) { + m = m_pullup(m, req_len); + *mb = m; + if (m == NULL) { + DNETDEBUG("runt packet: m_pullup failed\n"); + return; + } + } + ah = mtod(m, struct arphdr *); + + op = ntohs(ah->ar_op); + memcpy(&isaddr, ar_spa(ah), sizeof(isaddr)); + memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr)); + enaddr = (uint8_t *)IF_LLADDR(ifp); + + if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) { + DNETDEBUG("ignoring ARP from myself\n"); + return; + } + + if (isaddr.s_addr == pcb->dp_client) { + printf("%s: %*D is using my IP address %s!\n", __func__, + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + inet_ntoa_r(isaddr, buf)); + return; + } + + if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) { + DNETDEBUG("ignoring ARP from broadcast address\n"); + return; + } + + if (op == ARPOP_REPLY) { + if (isaddr.s_addr != pcb->dp_gateway && + isaddr.s_addr != pcb->dp_server) { + inet_ntoa_r(isaddr, buf); + DNETDEBUG("ignoring ARP reply from %s (not configured" + " server or gateway)\n", buf); + return; + } + memcpy(pcb->dp_gw_mac.octet, ar_sha(ah), + min(ah->ar_hln, ETHER_ADDR_LEN)); + + DNETDEBUG("got server MAC address %6D\n", + pcb->dp_gw_mac.octet, ":"); + + MPASS(pcb->dp_state == DN_STATE_INIT); + pcb->dp_state = DN_STATE_HAVE_GW_MAC; + return; + } + + if (op != ARPOP_REQUEST) { + DNETDEBUG("ignoring ARP non-request/reply\n"); + return; + } + + if (itaddr.s_addr != pcb->dp_client) { + DNETDEBUG("ignoring ARP not to our IP\n"); + return; + } + + memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); + memcpy(ar_sha(ah), enaddr, ah->ar_hln); + memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); + memcpy(ar_spa(ah), &itaddr, ah->ar_pln); + ah->ar_op = htons(ARPOP_REPLY); + ah->ar_pro = htons(ETHERTYPE_IP); + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_len = arphdr_len(ah); + m->m_pkthdr.len = m->m_len; + + memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN); + debugnet_ether_output(m, ifp, dst, ETHERTYPE_ARP); + *mb = NULL; +} + +/* + * Sends ARP requests to locate the server and waits for a response. + * We first try to ARP the server itself, and fall back to the provided + * gateway if the server appears to be off-link. + * + * Return value: + * 0 on success + * errno on error + */ +int +debugnet_arp_gw(struct debugnet_pcb *pcb) +{ + in_addr_t dst; + int error, polls, retries; + + dst = pcb->dp_server; +restart: + for (retries = 0; retries < debugnet_arp_nretries; retries++) { + error = debugnet_send_arp(pcb, dst); + if (error != 0) + return (error); + for (polls = 0; polls < debugnet_npolls && + pcb->dp_state < DN_STATE_HAVE_GW_MAC; polls++) { + debugnet_network_poll(pcb->dp_ifp); + DELAY(500); + } + if (pcb->dp_state >= DN_STATE_HAVE_GW_MAC) + break; + printf("(ARP retry)"); + } + if (pcb->dp_state >= DN_STATE_HAVE_GW_MAC) + return (0); + if (dst == pcb->dp_server) { + printf("\nFailed to ARP server"); + if (pcb->dp_gateway != INADDR_ANY) { + printf(", trying to reach gateway...\n"); + dst = pcb->dp_gateway; + goto restart; + } else + printf(".\n"); + } else + printf("\nFailed to ARP gateway.\n"); + + return (ETIMEDOUT); +} + +/* + * Unreliable IPv4 transmission of an mbuf chain to the debugnet server + * Note: can't handle fragmentation; fails if the packet is larger than + * ifp->if_mtu after adding the UDP/IP headers + * + * Parameters: + * pcb The debugnet context block + * m mbuf chain + * + * Returns: + * int see errno.h, 0 for success + */ +int +debugnet_ip_output(struct debugnet_pcb *pcb, struct mbuf *m) +{ + struct udphdr *udp; + struct ifnet *ifp; + struct ip *ip; + + MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); + + ifp = pcb->dp_ifp; + + M_PREPEND(m, sizeof(*ip), M_NOWAIT); + if (m == NULL) { + printf("%s: out of mbufs\n", __func__); + return (ENOBUFS); + } + + if (m->m_pkthdr.len > ifp->if_mtu) { + printf("%s: Packet is too big: %d > MTU %u\n", __func__, + m->m_pkthdr.len, ifp->if_mtu); + m_freem(m); + return (ENOBUFS); + } + + ip = mtod(m, void *); + udp = (void *)(ip + 1); + + memset(ip, 0, offsetof(struct ip, ip_p)); + ip->ip_p = IPPROTO_UDP; + ip->ip_sum = udp->uh_ulen; + ip->ip_src = (struct in_addr) { pcb->dp_client }; + ip->ip_dst = (struct in_addr) { pcb->dp_server }; + + /* Compute UDP-IPv4 checksum. */ + udp->uh_sum = in_cksum(m, m->m_pkthdr.len); + if (udp->uh_sum == 0) + udp->uh_sum = 0xffff; + + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = 0; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = 0; + ip->ip_off = htons(IP_DF); + ip->ip_ttl = 255; + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, sizeof(struct ip)); + + return (debugnet_ether_output(m, ifp, pcb->dp_gw_mac, ETHERTYPE_IP)); +} diff --git a/sys/net/debugnet_int.h b/sys/net/debugnet_int.h new file mode 100644 index 000000000000..0dd206cf6761 --- /dev/null +++ b/sys/net/debugnet_int.h @@ -0,0 +1,91 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Isilon Systems, LLC. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +#ifndef DEBUGNET_INTERNAL +#error "Don't include this" +#endif + +#define DNETDEBUG(f, ...) do { \ + if (debugnet_debug > 0) \ + printf(("%s: " f), __func__, ## __VA_ARGS__); \ +} while (0) +#define DNETDEBUG_IF(i, f, ...) do { \ + if (debugnet_debug > 0) \ + if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__); \ +} while (0) +#define DNETDEBUGV(f, ...) do { \ + if (debugnet_debug > 1) \ + printf(("%s: " f), __func__, ## __VA_ARGS__); \ +} while (0) + +enum dnet_pcb_st { + DN_STATE_INIT = 1, + DN_STATE_HAVE_GW_MAC, + DN_STATE_GOT_HERALD_PORT, +}; + +struct debugnet_pcb { + uint64_t dp_rcvd_acks; + + in_addr_t dp_client; + in_addr_t dp_server; + in_addr_t dp_gateway; + uint32_t dp_seqno; + + struct ether_addr dp_gw_mac; + uint16_t dp_server_port; + + struct ifnet *dp_ifp; + /* Saved driver if_input to restore on close. */ + void (*dp_drv_input)(struct ifnet *, struct mbuf *); + + enum dnet_pcb_st dp_state; + uint16_t dp_client_ack_port; +}; + +/* TODO(CEM): Obviate this assertion by using a BITSET(9) for acks. */ +CTASSERT(sizeof(((struct debugnet_pcb *)0)->dp_rcvd_acks) * NBBY >= + DEBUGNET_MAX_IN_FLIGHT); + +extern unsigned debugnet_debug; +SYSCTL_DECL(_net_debugnet); + +int debugnet_ether_output(struct mbuf *, struct ifnet *, struct ether_addr, + u_short); +void debugnet_handle_udp(struct debugnet_pcb *, struct mbuf **); +void debugnet_network_poll(struct ifnet *); + +#ifdef INET +int debugnet_arp_gw(struct debugnet_pcb *); +void debugnet_handle_arp(struct debugnet_pcb *, struct mbuf **); +void debugnet_handle_ip(struct debugnet_pcb *, struct mbuf **); +int debugnet_ip_output(struct debugnet_pcb *, struct mbuf *); +#endif diff --git a/sys/net/if.c b/sys/net/if.c index a22eb7ef362b..87755c8a4360 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -88,8 +88,8 @@ #include #include #ifdef INET +#include #include -#include #endif /* INET */ #ifdef INET6 #include @@ -2787,7 +2787,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); #ifdef INET - NETDUMP_REINIT(ifp); + DEBUGNET_NOTIFY_MTU(ifp); #endif } /* diff --git a/sys/net/if_var.h b/sys/net/if_var.h index f5c58bb56c7b..522b265b6055 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -70,7 +70,7 @@ struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; -struct netdump_methods; +struct debugnet_methods; #ifdef _KERNEL #include @@ -417,9 +417,9 @@ struct ifnet { uint8_t if_pcp; /* - * Netdump hooks to be called while dumping. + * Debugnet (Netdump) hooks to be called while in db/panic. */ - struct netdump_methods *if_netdump_methods; + struct debugnet_methods *if_debugnet_methods; struct epoch_context if_epoch_ctx; /* diff --git a/sys/net/iflib.c b/sys/net/iflib.c index ed604ab384c8..73606981a492 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -71,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -726,7 +726,7 @@ static struct mtx cpu_offset_mtx; MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", MTX_DEF); -NETDUMP_DEFINE(iflib); +DEBUGNET_DEFINE(iflib); #ifdef DEV_NETMAP #include @@ -4775,7 +4775,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct } *ctxp = ctx; - NETDUMP_SET(ctx->ifc_ifp, iflib); + DEBUGNET_SET(ctx->ifc_ifp, iflib); if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); @@ -6719,9 +6719,9 @@ iflib_fixup_rx(struct mbuf *m) } #endif -#ifdef NETDUMP +#ifdef DEBUGNET static void -iflib_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) +iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; @@ -6734,7 +6734,7 @@ iflib_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) } static void -iflib_netdump_event(if_t ifp, enum netdump_ev event) +iflib_debugnet_event(if_t ifp, enum debugnet_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; @@ -6746,7 +6746,7 @@ iflib_netdump_event(if_t ifp, enum netdump_ev event) scctx = &ctx->ifc_softc_ctx; switch (event) { - case NETDUMP_START: + case DEBUGNET_START: for (i = 0; i < scctx->isc_nrxqsets; i++) { rxq = &ctx->ifc_rxqs[i]; for (j = 0; j < rxq->ifr_nfl; j++) { @@ -6762,7 +6762,7 @@ iflib_netdump_event(if_t ifp, enum netdump_ev event) } static int -iflib_netdump_transmit(if_t ifp, struct mbuf *m) +iflib_debugnet_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; @@ -6781,7 +6781,7 @@ iflib_netdump_transmit(if_t ifp, struct mbuf *m) } static int -iflib_netdump_poll(if_t ifp, int count) +iflib_debugnet_poll(if_t ifp, int count) { if_ctx_t ctx; if_softc_ctx_t scctx; @@ -6802,4 +6802,4 @@ iflib_netdump_poll(if_t ifp, int count) (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); return (0); } -#endif /* NETDUMP */ +#endif /* DEBUGNET */ diff --git a/sys/netinet/netdump/netdump.h b/sys/netinet/netdump/netdump.h index cdf53b78c50e..aeb44c0b0d21 100644 --- a/sys/netinet/netdump/netdump.h +++ b/sys/netinet/netdump/netdump.h @@ -37,28 +37,25 @@ #include #include +/* Netdump wire protocol definitions are consumed by the ftp/netdumpd port. */ #define NETDUMP_PORT 20023 /* Server UDP port for heralds. */ #define NETDUMP_ACKPORT 20024 /* Client UDP port for acks. */ -#define NETDUMP_HERALD 1 /* Broadcast before starting a dump. */ -#define NETDUMP_FINISHED 2 /* Send after finishing a dump. */ -#define NETDUMP_VMCORE 3 /* Contains dump data. */ +#define NETDUMP_HERALD DEBUGNET_HERALD +#define NETDUMP_FINISHED DEBUGNET_FINISHED +#define NETDUMP_VMCORE DEBUGNET_DATA #define NETDUMP_KDH 4 /* Contains kernel dump header. */ #define NETDUMP_EKCD_KEY 5 /* Contains kernel dump key. */ #define NETDUMP_DATASIZE 4096 /* Arbitrary packet size limit. */ -struct netdump_msg_hdr { - uint32_t mh_type; /* Netdump message type. */ - uint32_t mh_seqno; /* Match acks with msgs. */ - uint64_t mh_offset; /* vmcore offset (bytes). */ - uint32_t mh_len; /* Attached data (bytes). */ - uint32_t mh__pad; -} __packed; - -struct netdump_ack { - uint32_t na_seqno; /* Match acks with msgs. */ -} __packed; +/* For netdumpd. */ +#ifndef _KERNEL +#define netdump_msg_hdr debugnet_msg_hdr +#define mh__pad mh_aux2 +#define netdump_ack debugnet_ack +#define na_seqno da_seqno +#endif /* !_KERNEL */ struct netdump_conf_freebsd12 { struct diocskerneldump_arg_freebsd12 ndc12_kda; @@ -73,58 +70,4 @@ struct netdump_conf_freebsd12 { #define _PATH_NETDUMP "/dev/netdump" -#ifdef _KERNEL -#ifdef NETDUMP - -#define NETDUMP_MAX_IN_FLIGHT 64 - -enum netdump_ev { - NETDUMP_START, - NETDUMP_END, -}; - -struct ifnet; -struct mbuf; - -void netdump_reinit(struct ifnet *); - -typedef void netdump_init_t(struct ifnet *, int *nrxr, int *ncl, int *clsize); -typedef void netdump_event_t(struct ifnet *, enum netdump_ev); -typedef int netdump_transmit_t(struct ifnet *, struct mbuf *); -typedef int netdump_poll_t(struct ifnet *, int); - -struct netdump_methods { - netdump_init_t *nd_init; - netdump_event_t *nd_event; - netdump_transmit_t *nd_transmit; - netdump_poll_t *nd_poll; -}; - -#define NETDUMP_DEFINE(driver) \ - static netdump_init_t driver##_netdump_init; \ - static netdump_event_t driver##_netdump_event; \ - static netdump_transmit_t driver##_netdump_transmit; \ - static netdump_poll_t driver##_netdump_poll; \ - \ - static struct netdump_methods driver##_netdump_methods = { \ - .nd_init = driver##_netdump_init, \ - .nd_event = driver##_netdump_event, \ - .nd_transmit = driver##_netdump_transmit, \ - .nd_poll = driver##_netdump_poll, \ - } - -#define NETDUMP_REINIT(ifp) netdump_reinit(ifp) - -#define NETDUMP_SET(ifp, driver) \ - (ifp)->if_netdump_methods = &driver##_netdump_methods - -#else /* !NETDUMP */ - -#define NETDUMP_DEFINE(driver) -#define NETDUMP_REINIT(ifp) -#define NETDUMP_SET(ifp, driver) - -#endif /* NETDUMP */ -#endif /* _KERNEL */ - #endif /* _NETINET_NETDUMP_H_ */ diff --git a/sys/netinet/netdump/netdump_client.c b/sys/netinet/netdump/netdump_client.c index 5887a3796212..8c5b8c446bf0 100644 --- a/sys/netinet/netdump/netdump_client.c +++ b/sys/netinet/netdump/netdump_client.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -71,57 +73,26 @@ __FBSDID("$FreeBSD$"); #include #include -#define NETDDEBUG(f, ...) do { \ - if (nd_debug > 0) \ - printf(("%s: " f), __func__, ## __VA_ARGS__); \ -} while (0) -#define NETDDEBUG_IF(i, f, ...) do { \ - if (nd_debug > 0) \ - if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__); \ -} while (0) #define NETDDEBUGV(f, ...) do { \ if (nd_debug > 1) \ printf(("%s: " f), __func__, ## __VA_ARGS__); \ } while (0) -#define NETDDEBUGV_IF(i, f, ...) do { \ - if (nd_debug > 1) \ - if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__); \ -} while (0) -static int netdump_arp_gw(void); -static void netdump_cleanup(void); static int netdump_configure(struct diocskerneldump_arg *, struct thread *); static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length); static bool netdump_enabled(void); static int netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS); -static int netdump_ether_output(struct mbuf *m, struct ifnet *ifp, - struct ether_addr dst, u_short etype); -static void netdump_handle_arp(struct mbuf **mb); -static void netdump_handle_ip(struct mbuf **mb); static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td); static int netdump_modevent(module_t mod, int type, void *priv); -static void netdump_network_poll(void); -static void netdump_pkt_in(struct ifnet *ifp, struct mbuf *m); -static void netdump_reinit_internal(struct ifnet *ifp); -static int netdump_send(uint32_t type, off_t offset, unsigned char *data, - uint32_t datalen); -static int netdump_send_arp(in_addr_t dst); static int netdump_start(struct dumperinfo *di); -static int netdump_udp_output(struct mbuf *m); static void netdump_unconfigure(void); /* Must be at least as big as the chunks dumpsys() gives us. */ static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE]; -static uint32_t nd_seqno; -static int dump_failed, have_gw_mac; -static void (*drv_if_input)(struct ifnet *, struct mbuf *); -static int restore_gw_addr; - -static uint64_t rcvd_acks; -CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT); +static int dump_failed; /* Configuration parameters. */ static struct { @@ -131,6 +102,7 @@ static struct { union kd_ip ndc_gateway; uint8_t ndc_af; /* Runtime State */ + struct debugnet_pcb *nd_pcb; off_t nd_tx_off; size_t nd_buf_len; } nd_conf; @@ -147,10 +119,8 @@ SX_SYSINIT(nd_conf, &nd_conf_lk, "netdump configuration lock"); #define NETDUMP_RUNLOCK() sx_sunlock(&nd_conf_lk) #define NETDUMP_ASSERT_WLOCKED() sx_assert(&nd_conf_lk, SA_XLOCKED) #define NETDUMP_ASSERT_LOCKED() sx_assert(&nd_conf_lk, SA_LOCKED) -static struct ether_addr nd_gw_mac; static struct ifnet *nd_ifp; static eventhandler_tag nd_detach_cookie; -static uint16_t nd_server_port = NETDUMP_PORT; FEATURE(netdump, "Netdump client support"); @@ -167,17 +137,18 @@ static char nd_path[MAXPATHLEN]; SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW, nd_path, sizeof(nd_path), "Server path for output files"); -static int nd_polls = 2000; +/* + * The following three variables were moved to debugnet(4), but these knobs + * were retained as aliases. + */ SYSCTL_INT(_net_netdump, OID_AUTO, polls, CTLFLAG_RWTUN, - &nd_polls, 0, + &debugnet_npolls, 0, "Number of times to poll before assuming packet loss (0.5ms per poll)"); -static int nd_retries = 10; SYSCTL_INT(_net_netdump, OID_AUTO, retries, CTLFLAG_RWTUN, - &nd_retries, 0, + &debugnet_nretries, 0, "Number of retransmit attempts before giving up"); -static int nd_arp_retries = 3; SYSCTL_INT(_net_netdump, OID_AUTO, arp_retries, CTLFLAG_RWTUN, - &nd_arp_retries, 0, + &debugnet_arp_nretries, 0, "Number of ARP attempts before giving up"); static bool @@ -203,721 +174,6 @@ netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS) return (EPERM); } -/* - * Checks for netdump support on a network interface - * - * Parameters: - * ifp The network interface that is being tested for support - * - * Returns: - * int 1 if the interface is supported, 0 if not - */ -static bool -netdump_supported_nic(struct ifnet *ifp) -{ - - return (ifp->if_netdump_methods != NULL); -} - -/*- - * Network specific primitives. - * Following down the code they are divided ordered as: - * - Packet buffer primitives - * - Output primitives - * - Input primitives - * - Polling primitives - */ - -/* - * Handles creation of the ethernet header, then places outgoing packets into - * the tx buffer for the NIC - * - * Parameters: - * m The mbuf containing the packet to be sent (will be freed by - * this function or the NIC driver) - * ifp The interface to send on - * dst The destination ethernet address (source address will be looked - * up using ifp) - * etype The ETHERTYPE_* value for the protocol that is being sent - * - * Returns: - * int see errno.h, 0 for success - */ -static int -netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, - u_short etype) -{ - struct ether_header *eh; - - if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || - (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { - if_printf(ifp, "netdump_ether_output: interface isn't up\n"); - m_freem(m); - return (ENETDOWN); - } - - /* Fill in the ethernet header. */ - M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); - if (m == NULL) { - printf("%s: out of mbufs\n", __func__); - return (ENOBUFS); - } - eh = mtod(m, struct ether_header *); - memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); - memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); - eh->ether_type = htons(etype); - return ((ifp->if_netdump_methods->nd_transmit)(ifp, m)); -} - -/* - * Unreliable transmission of an mbuf chain to the netdump server - * Note: can't handle fragmentation; fails if the packet is larger than - * nd_ifp->if_mtu after adding the UDP/IP headers - * - * Parameters: - * m mbuf chain - * - * Returns: - * int see errno.h, 0 for success - */ -static int -netdump_udp_output(struct mbuf *m) -{ - struct udpiphdr *ui; - struct ip *ip; - - MPASS(netdump_enabled()); - - M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); - if (m == NULL) { - printf("%s: out of mbufs\n", __func__); - return (ENOBUFS); - } - - if (m->m_pkthdr.len > nd_ifp->if_mtu) { - printf("netdump_udp_output: Packet is too big: %d > MTU %u\n", - m->m_pkthdr.len, nd_ifp->if_mtu); - m_freem(m); - return (ENOBUFS); - } - - ui = mtod(m, struct udpiphdr *); - bzero(ui->ui_x1, sizeof(ui->ui_x1)); - ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip)); - ui->ui_ulen = ui->ui_len; - ui->ui_src = nd_client; - ui->ui_dst = nd_server; - /* Use this src port so that the server can connect() the socket */ - ui->ui_sport = htons(NETDUMP_ACKPORT); - ui->ui_dport = htons(nd_server_port); - ui->ui_sum = 0; - if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0) - ui->ui_sum = 0xffff; - - ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(struct ip) >> 2; - ip->ip_tos = 0; - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_id = 0; - ip->ip_off = htons(IP_DF); - ip->ip_ttl = 255; - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, sizeof(struct ip)); - - return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP)); -} - -/* - * Builds and sends a single ARP request to locate the server - * - * Return value: - * 0 on success - * errno on error - */ -static int -netdump_send_arp(in_addr_t dst) -{ - struct ether_addr bcast; - struct mbuf *m; - struct arphdr *ah; - int pktlen; - - MPASS(netdump_enabled()); - - /* Fill-up a broadcast address. */ - memset(&bcast, 0xFF, ETHER_ADDR_LEN); - m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) { - printf("netdump_send_arp: Out of mbufs\n"); - return (ENOBUFS); - } - pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr)); - m->m_len = pktlen; - m->m_pkthdr.len = pktlen; - MH_ALIGN(m, pktlen); - ah = mtod(m, struct arphdr *); - ah->ar_hrd = htons(ARPHRD_ETHER); - ah->ar_pro = htons(ETHERTYPE_IP); - ah->ar_hln = ETHER_ADDR_LEN; - ah->ar_pln = sizeof(struct in_addr); - ah->ar_op = htons(ARPOP_REQUEST); - memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN); - ((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr; - bzero(ar_tha(ah), ETHER_ADDR_LEN); - ((struct in_addr *)ar_tpa(ah))->s_addr = dst; - return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP)); -} - -/* - * Sends ARP requests to locate the server and waits for a response. - * We first try to ARP the server itself, and fall back to the provided - * gateway if the server appears to be off-link. - * - * Return value: - * 0 on success - * errno on error - */ -static int -netdump_arp_gw(void) -{ - in_addr_t dst; - int error, polls, retries; - - dst = nd_server.s_addr; -restart: - for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0; - retries++) { - error = netdump_send_arp(dst); - if (error != 0) - return (error); - for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) { - netdump_network_poll(); - DELAY(500); - } - if (have_gw_mac == 0) - printf("(ARP retry)"); - } - if (have_gw_mac != 0) - return (0); - if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) { - printf("Failed to ARP server, trying to reach gateway...\n"); - dst = nd_gateway.s_addr; - goto restart; - } - - printf("\nARP timed out.\n"); - return (ETIMEDOUT); -} - -/* - * Dummy free function for netdump clusters. - */ -static void -netdump_mbuf_free(struct mbuf *m __unused) -{ -} - -/* - * Construct and reliably send a netdump packet. May fail from a resource - * shortage or extreme number of unacknowledged retransmissions. Wait for - * an acknowledgement before returning. Splits packets into chunks small - * enough to be sent without fragmentation (looks up the interface MTU) - * - * Parameters: - * type netdump packet type (HERALD, FINISHED, or VMCORE) - * offset vmcore data offset (bytes) - * data vmcore data - * datalen vmcore data size (bytes) - * - * Returns: - * int see errno.h, 0 for success - */ -static int -netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen) -{ - struct netdump_msg_hdr *nd_msg_hdr; - struct mbuf *m, *m2; - uint64_t want_acks; - uint32_t i, pktlen, sent_so_far; - int retries, polls, error; - - want_acks = 0; - rcvd_acks = 0; - retries = 0; - - MPASS(netdump_enabled()); - -retransmit: - /* Chunks can be too big to fit in packets. */ - for (i = sent_so_far = 0; sent_so_far < datalen || - (i == 0 && datalen == 0); i++) { - pktlen = datalen - sent_so_far; - - /* First bound: the packet structure. */ - pktlen = min(pktlen, NETDUMP_DATASIZE); - - /* Second bound: the interface MTU (assume no IP options). */ - pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) - - sizeof(struct netdump_msg_hdr)); - - /* - * Check if it is retransmitting and this has been ACKed - * already. - */ - if ((rcvd_acks & (1 << i)) != 0) { - sent_so_far += pktlen; - continue; - } - - /* - * Get and fill a header mbuf, then chain data as an extended - * mbuf. - */ - m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) { - printf("netdump_send: Out of mbufs\n"); - return (ENOBUFS); - } - m->m_len = sizeof(struct netdump_msg_hdr); - m->m_pkthdr.len = sizeof(struct netdump_msg_hdr); - MH_ALIGN(m, sizeof(struct netdump_msg_hdr)); - nd_msg_hdr = mtod(m, struct netdump_msg_hdr *); - nd_msg_hdr->mh_seqno = htonl(nd_seqno + i); - nd_msg_hdr->mh_type = htonl(type); - nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far); - nd_msg_hdr->mh_len = htonl(pktlen); - nd_msg_hdr->mh__pad = 0; - - if (pktlen != 0) { - m2 = m_get(M_NOWAIT, MT_DATA); - if (m2 == NULL) { - m_freem(m); - printf("netdump_send: Out of mbufs\n"); - return (ENOBUFS); - } - MEXTADD(m2, data + sent_so_far, pktlen, - netdump_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE); - m2->m_len = pktlen; - - m_cat(m, m2); - m->m_pkthdr.len += pktlen; - } - error = netdump_udp_output(m); - if (error != 0) - return (error); - - /* Note that we're waiting for this packet in the bitfield. */ - want_acks |= (1 << i); - sent_so_far += pktlen; - } - if (i >= NETDUMP_MAX_IN_FLIGHT) - printf("Warning: Sent more than %d packets (%d). " - "Acknowledgements will fail unless the size of " - "rcvd_acks/want_acks is increased.\n", - NETDUMP_MAX_IN_FLIGHT, i); - - /* - * Wait for acks. A *real* window would speed things up considerably. - */ - polls = 0; - while (rcvd_acks != want_acks) { - if (polls++ > nd_polls) { - if (retries++ > nd_retries) - return (ETIMEDOUT); - printf(". "); - goto retransmit; - } - netdump_network_poll(); - DELAY(500); - } - nd_seqno += i; - return (0); -} - -/* - * Handler for IP packets: checks their sanity and then processes any netdump - * ACK packets it finds. - * - * It needs to replicate partially the behaviour of ip_input() and - * udp_input(). - * - * Parameters: - * mb a pointer to an mbuf * containing the packet received - * Updates *mb if m_pullup et al change the pointer - * Assumes the calling function will take care of freeing the mbuf - */ -static void -netdump_handle_ip(struct mbuf **mb) -{ - struct ip *ip; - struct udpiphdr *udp; - struct netdump_ack *nd_ack; - struct mbuf *m; - int rcv_ackno; - unsigned short hlen; - - /* IP processing. */ - m = *mb; - if (m->m_pkthdr.len < sizeof(struct ip)) { - NETDDEBUG("dropping packet too small for IP header\n"); - return; - } - if (m->m_len < sizeof(struct ip)) { - m = m_pullup(m, sizeof(struct ip)); - *mb = m; - if (m == NULL) { - NETDDEBUG("m_pullup failed\n"); - return; - } - } - ip = mtod(m, struct ip *); - - /* IP version. */ - if (ip->ip_v != IPVERSION) { - NETDDEBUG("bad IP version %d\n", ip->ip_v); - return; - } - - /* Header length. */ - hlen = ip->ip_hl << 2; - if (hlen < sizeof(struct ip)) { - NETDDEBUG("bad IP header length (%hu)\n", hlen); - return; - } - if (hlen > m->m_len) { - m = m_pullup(m, hlen); - *mb = m; - if (m == NULL) { - NETDDEBUG("m_pullup failed\n"); - return; - } - ip = mtod(m, struct ip *); - } - /* Ignore packets with IP options. */ - if (hlen > sizeof(struct ip)) { - NETDDEBUG("drop packet with IP options\n"); - return; - } - -#ifdef INVARIANTS - if ((IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || - IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) && - (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { - NETDDEBUG("Bad IP header (RFC1122)\n"); - return; - } -#endif - - /* Checksum. */ - if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) { - if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) { - NETDDEBUG("bad IP checksum\n"); - return; - } - } else { - /* XXX */ ; - } - - /* Convert fields to host byte order. */ - ip->ip_len = ntohs(ip->ip_len); - if (ip->ip_len < hlen) { - NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n", - ip->ip_len, hlen); - return; - } - if (m->m_pkthdr.len < ip->ip_len) { - NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n", - ip->ip_len, m->m_pkthdr.len); - return; - } - if (m->m_pkthdr.len > ip->ip_len) { - - /* Truncate the packet to the IP length. */ - if (m->m_len == m->m_pkthdr.len) { - m->m_len = ip->ip_len; - m->m_pkthdr.len = ip->ip_len; - } else - m_adj(m, ip->ip_len - m->m_pkthdr.len); - } - - ip->ip_off = ntohs(ip->ip_off); - - /* Check that the source is the server's IP. */ - if (ip->ip_src.s_addr != nd_server.s_addr) { - NETDDEBUG("drop packet not from server (from 0x%x)\n", - ip->ip_src.s_addr); - return; - } - - /* Check if the destination IP is ours. */ - if (ip->ip_dst.s_addr != nd_client.s_addr) { - NETDDEBUGV("drop packet not to our IP\n"); - return; - } - - if (ip->ip_p != IPPROTO_UDP) { - NETDDEBUG("drop non-UDP packet\n"); - return; - } - - /* Do not deal with fragments. */ - if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) { - NETDDEBUG("drop fragmented packet\n"); - return; - } - - /* UDP custom is to have packet length not include IP header. */ - ip->ip_len -= hlen; - - /* UDP processing. */ - - /* Get IP and UDP headers together, along with the netdump packet. */ - if (m->m_pkthdr.len < - sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { - NETDDEBUG("ignoring small packet\n"); - return; - } - if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) { - m = m_pullup(m, sizeof(struct udpiphdr) + - sizeof(struct netdump_ack)); - *mb = m; - if (m == NULL) { - NETDDEBUG("m_pullup failed\n"); - return; - } - } - udp = mtod(m, struct udpiphdr *); - - if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) { - NETDDEBUG("not on the netdump port.\n"); - return; - } - - /* Netdump processing. */ - - /* - * Packet is meant for us. Extract the ack sequence number and the - * port number if necessary. - */ - nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) + - sizeof(struct udpiphdr)); - rcv_ackno = ntohl(nd_ack->na_seqno); - if (nd_server_port == NETDUMP_PORT) - nd_server_port = ntohs(udp->ui_u.uh_sport); - if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT) - printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno); - else if (rcv_ackno >= nd_seqno) { - /* We're interested in this ack. Record it. */ - rcvd_acks |= 1 << (rcv_ackno - nd_seqno); - } -} - -/* - * Handler for ARP packets: checks their sanity and then - * 1. If the ARP is a request for our IP, respond with our MAC address - * 2. If the ARP is a response from our server, record its MAC address - * - * It needs to replicate partially the behaviour of arpintr() and - * in_arpinput(). - * - * Parameters: - * mb a pointer to an mbuf * containing the packet received - * Updates *mb if m_pullup et al change the pointer - * Assumes the calling function will take care of freeing the mbuf - */ -static void -netdump_handle_arp(struct mbuf **mb) -{ - char buf[INET_ADDRSTRLEN]; - struct in_addr isaddr, itaddr, myaddr; - struct ether_addr dst; - struct mbuf *m; - struct arphdr *ah; - struct ifnet *ifp; - uint8_t *enaddr; - int req_len, op; - - m = *mb; - ifp = m->m_pkthdr.rcvif; - if (m->m_len < sizeof(struct arphdr)) { - m = m_pullup(m, sizeof(struct arphdr)); - *mb = m; - if (m == NULL) { - NETDDEBUG("runt packet: m_pullup failed\n"); - return; - } - } - - ah = mtod(m, struct arphdr *); - if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) { - NETDDEBUG("unknown hardware address 0x%2D)\n", - (unsigned char *)&ah->ar_hrd, ""); - return; - } - if (ntohs(ah->ar_pro) != ETHERTYPE_IP) { - NETDDEBUG("drop ARP for unknown protocol %d\n", - ntohs(ah->ar_pro)); - return; - } - req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); - if (m->m_len < req_len) { - m = m_pullup(m, req_len); - *mb = m; - if (m == NULL) { - NETDDEBUG("runt packet: m_pullup failed\n"); - return; - } - } - ah = mtod(m, struct arphdr *); - - op = ntohs(ah->ar_op); - memcpy(&isaddr, ar_spa(ah), sizeof(isaddr)); - memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr)); - enaddr = (uint8_t *)IF_LLADDR(ifp); - myaddr = nd_client; - - if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) { - NETDDEBUG("ignoring ARP from myself\n"); - return; - } - - if (isaddr.s_addr == nd_client.s_addr) { - printf("%s: %*D is using my IP address %s!\n", __func__, - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - inet_ntoa_r(isaddr, buf)); - return; - } - - if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) { - NETDDEBUG("ignoring ARP from broadcast address\n"); - return; - } - - if (op == ARPOP_REPLY) { - if (isaddr.s_addr != nd_gateway.s_addr && - isaddr.s_addr != nd_server.s_addr) { - inet_ntoa_r(isaddr, buf); - NETDDEBUG( - "ignoring ARP reply from %s (not netdump server)\n", - buf); - return; - } - memcpy(nd_gw_mac.octet, ar_sha(ah), - min(ah->ar_hln, ETHER_ADDR_LEN)); - have_gw_mac = 1; - NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":"); - return; - } - - if (op != ARPOP_REQUEST) { - NETDDEBUG("ignoring ARP non-request/reply\n"); - return; - } - - if (itaddr.s_addr != nd_client.s_addr) { - NETDDEBUG("ignoring ARP not to our IP\n"); - return; - } - - memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - memcpy(ar_sha(ah), enaddr, ah->ar_hln); - memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); - memcpy(ar_spa(ah), &itaddr, ah->ar_pln); - ah->ar_op = htons(ARPOP_REPLY); - ah->ar_pro = htons(ETHERTYPE_IP); - m->m_flags &= ~(M_BCAST|M_MCAST); - m->m_len = arphdr_len(ah); - m->m_pkthdr.len = m->m_len; - - memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN); - netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP); - *mb = NULL; -} - -/* - * Handler for incoming packets directly from the network adapter - * Identifies the packet type (IP or ARP) and passes it along to one of the - * helper functions netdump_handle_ip or netdump_handle_arp. - * - * It needs to replicate partially the behaviour of ether_input() and - * ether_demux(). - * - * Parameters: - * ifp the interface the packet came from (should be nd_ifp) - * m an mbuf containing the packet received - */ -static void -netdump_pkt_in(struct ifnet *ifp, struct mbuf *m) -{ - struct ifreq ifr; - struct ether_header *eh; - u_short etype; - - /* Ethernet processing. */ - if ((m->m_flags & M_PKTHDR) == 0) { - NETDDEBUG_IF(ifp, "discard frame without packet header\n"); - goto done; - } - if (m->m_len < ETHER_HDR_LEN) { - NETDDEBUG_IF(ifp, - "discard frame without leading eth header (len %u pktlen %u)\n", - m->m_len, m->m_pkthdr.len); - goto done; - } - if ((m->m_flags & M_HASFCS) != 0) { - m_adj(m, -ETHER_CRC_LEN); - m->m_flags &= ~M_HASFCS; - } - eh = mtod(m, struct ether_header *); - etype = ntohs(eh->ether_type); - if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { - NETDDEBUG_IF(ifp, "ignoring vlan packets\n"); - goto done; - } - if (if_gethwaddr(ifp, &ifr) != 0) { - NETDDEBUG_IF(ifp, "failed to get hw addr for interface\n"); - goto done; - } - if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, - ETHER_ADDR_LEN) != 0) { - NETDDEBUG_IF(ifp, - "discard frame with incorrect destination addr\n"); - goto done; - } - - /* Done ethernet processing. Strip off the ethernet header. */ - m_adj(m, ETHER_HDR_LEN); - switch (etype) { - case ETHERTYPE_ARP: - netdump_handle_arp(&m); - break; - case ETHERTYPE_IP: - netdump_handle_ip(&m); - break; - default: - NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); - break; - } -done: - if (m != NULL) - m_freem(m); -} - -/* - * After trapping, instead of assuming that most of the network stack is sane, - * we just poll the driver directly for packets. - */ -static void -netdump_network_poll(void) -{ - - MPASS(netdump_enabled()); - - nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000); -} - /*- * Dumping specific primitives. */ @@ -932,8 +188,11 @@ netdump_flush_buf(void) error = 0; if (nd_conf.nd_buf_len != 0) { - error = netdump_send(NETDUMP_VMCORE, nd_conf.nd_tx_off, - nd_buf, nd_conf.nd_buf_len); + struct debugnet_proto_aux auxdata = { + .dp_offset_start = nd_conf.nd_tx_off, + }; + error = debugnet_send(nd_conf.nd_pcb, DEBUGNET_DATA, nd_buf, + nd_conf.nd_buf_len, &auxdata); if (error == 0) nd_conf.nd_buf_len = 0; } @@ -972,11 +231,13 @@ netdump_dumper(void *priv __unused, void *virtual, if (dump_failed != 0) printf("failed to dump the kernel core\n"); - else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0) + else if ( + debugnet_sendempty(nd_conf.nd_pcb, DEBUGNET_FINISHED) != 0) printf("failed to close the transaction\n"); else printf("\nnetdump finished.\n"); - netdump_cleanup(); + debugnet_free(nd_conf.nd_pcb); + nd_conf.nd_pcb = NULL; return (0); } if (length > sizeof(nd_buf)) @@ -1005,9 +266,9 @@ netdump_dumper(void *priv __unused, void *virtual, static int netdump_start(struct dumperinfo *di) { - char *path; + struct debugnet_conn_params dcp; + struct debugnet_pcb *pcb; char buf[INET_ADDRSTRLEN]; - uint32_t len; int error; error = 0; @@ -1022,6 +283,8 @@ netdump_start(struct dumperinfo *di) return (EINVAL); } + memset(&dcp, 0, sizeof(dcp)); + if (nd_server.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no server IP given\n"); return (EINVAL); @@ -1034,54 +297,29 @@ netdump_start(struct dumperinfo *di) /* We start dumping at offset 0. */ di->dumpoff = 0; - nd_seqno = 1; + dcp.dc_ifp = nd_ifp; - /* - * nd_server_port could have switched after the first ack the - * first time it gets called. Adjust it accordingly. - */ - nd_server_port = NETDUMP_PORT; + dcp.dc_client = nd_client.s_addr; + dcp.dc_server = nd_server.s_addr; + dcp.dc_gateway = nd_gateway.s_addr; - /* Switch to the netdump mbuf zones. */ - netdump_mbuf_dump(); + dcp.dc_herald_port = NETDUMP_PORT; + dcp.dc_client_ack_port = NETDUMP_ACKPORT; - nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START); + dcp.dc_herald_data = nd_path; + dcp.dc_herald_datalen = (nd_path[0] == 0) ? 0 : strlen(nd_path) + 1; - /* Make the card use *our* receive callback. */ - drv_if_input = nd_ifp->if_input; - nd_ifp->if_input = netdump_pkt_in; - - if (nd_gateway.s_addr == INADDR_ANY) { - restore_gw_addr = 1; - nd_gateway.s_addr = nd_server.s_addr; - } - - printf("netdump in progress. searching for server...\n"); - if (netdump_arp_gw()) { - printf("failed to locate server MAC address\n"); - error = EINVAL; - goto trig_abort; - } - - if (nd_path[0] != '\0') { - path = nd_path; - len = strlen(path) + 1; - } else { - path = NULL; - len = 0; - } - if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) { + error = debugnet_connect(&dcp, &pcb); + if (error != 0) { printf("failed to contact netdump server\n"); - error = EINVAL; - goto trig_abort; + /* Squash debugnet to something the dumper code understands. */ + return (EINVAL); } - printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), - nd_gw_mac.octet, ":"); - return (0); -trig_abort: - netdump_cleanup(); - return (error); + printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), + debugnet_get_gw_mac(pcb), ":"); + nd_conf.nd_pcb = pcb; + return (0); } static int @@ -1094,34 +332,18 @@ netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh, if (error != 0) return (error); memcpy(nd_buf, kdh, sizeof(*kdh)); - error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh)); + error = debugnet_send(nd_conf.nd_pcb, NETDUMP_KDH, nd_buf, + sizeof(*kdh), NULL); if (error == 0 && keysize > 0) { if (keysize > sizeof(nd_buf)) return (EINVAL); memcpy(nd_buf, key, keysize); - error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize); + error = debugnet_send(nd_conf.nd_pcb, NETDUMP_EKCD_KEY, nd_buf, + keysize, NULL); } return (error); } -/* - * Cleanup routine for a possibly failed netdump. - */ -static void -netdump_cleanup(void) -{ - - if (restore_gw_addr != 0) { - nd_gateway.s_addr = INADDR_ANY; - restore_gw_addr = 0; - } - if (drv_if_input != NULL) { - nd_ifp->if_input = drv_if_input; - drv_if_input = NULL; - } - nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END); -} - /*- * KLD specific code. */ @@ -1146,11 +368,12 @@ netdump_unconfigure(void) kda.kda_index = KDA_REMOVE_DEV; (void)dumper_remove(nd_conf.ndc_iface, &kda); - netdump_mbuf_drain(); - if_rele(nd_ifp); nd_ifp = NULL; + log(LOG_WARNING, "netdump: Lost configured interface %s\n", + nd_conf.ndc_iface); + bzero(&nd_conf, sizeof(nd_conf)); } @@ -1185,7 +408,7 @@ netdump_configure(struct diocskerneldump_arg *conf, struct thread *td) if_rele(ifp); return (ENXIO); } - if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER) { + if (!DEBUGNET_SUPPORTED_NIC(ifp)) { if_rele(ifp); return (ENODEV); } @@ -1194,8 +417,6 @@ netdump_configure(struct diocskerneldump_arg *conf, struct thread *td) if_rele(nd_ifp); nd_ifp = ifp; - netdump_reinit_internal(ifp); - #define COPY_SIZED(elm) do { \ _Static_assert(sizeof(nd_conf.ndc_ ## elm) == \ sizeof(conf->kda_ ## elm), "elm " __XSTRING(elm) " mismatch"); \ @@ -1212,43 +433,6 @@ netdump_configure(struct diocskerneldump_arg *conf, struct thread *td) return (0); } -/* - * Reinitialize the mbuf pool used by drivers while dumping. This is called - * from the generic ioctl handler for SIOCSIFMTU after any NIC driver has - * reconfigured itself. (I.e., it may not be a configured netdump interface.) - */ -void -netdump_reinit(struct ifnet *ifp) -{ - - NETDUMP_WLOCK(); - if (ifp != nd_ifp) { - NETDUMP_WUNLOCK(); - return; - } - netdump_reinit_internal(ifp); - NETDUMP_WUNLOCK(); -} - -static void -netdump_reinit_internal(struct ifnet *ifp) -{ - int clsize, nmbuf, ncl, nrxr; - - NETDUMP_ASSERT_WLOCKED(); - - ifp->if_netdump_methods->nd_init(ifp, &nrxr, &ncl, &clsize); - KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); - - /* - * We need two headers per message on the transmit side. Multiply by - * four to give us some breathing room. - */ - nmbuf = ncl * (4 + nrxr); - ncl *= nrxr; - netdump_mbuf_reinit(nmbuf, ncl, clsize); -} - /* * ioctl(2) handler for the netdump device. This is currently only used to * register netdump as a dump device. diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC index 78d83c9ba12d..c17abd5000b3 100644 --- a/sys/powerpc/conf/GENERIC +++ b/sys/powerpc/conf/GENERIC @@ -104,6 +104,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 409062047b24..cf732f6fcd44 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -110,6 +110,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index 75fe3f842577..4509269c19ff 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -94,6 +94,7 @@ options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps +options DEBUGNET # debugnet networking options NETDUMP # netdump(4) client support # Make an SMP-capable kernel by default diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index f851376d6964..1df8f8285025 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1501,11 +1501,12 @@ mbuf_tstmp2timespec(struct mbuf *m, struct timespec *ts) } #endif -#ifdef NETDUMP -/* Invoked from the netdump client code. */ -void netdump_mbuf_drain(void); -void netdump_mbuf_dump(void); -void netdump_mbuf_reinit(int nmbuf, int nclust, int clsize); +#ifdef DEBUGNET +/* Invoked from the debugnet client code. */ +void debugnet_mbuf_drain(void); +void debugnet_mbuf_start(void); +void debugnet_mbuf_finish(void); +void debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize); #endif static inline bool diff --git a/sys/sys/param.h b/sys/sys/param.h index a9a3cef7114d..c5ebcd98d7cb 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300050 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300051 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,