diff --git a/sys/conf/NOTES b/sys/conf/NOTES index c6478f9bb78a..c1210e240191 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -960,6 +960,9 @@ device lagg # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # +# TCPPCAP enables code which keeps the last n packets sent and received +# on a TCP socket. +# # RADIX_MPATH provides support for equal-cost multi-path routing. # options MROUTING # Multicast routing @@ -976,6 +979,7 @@ options IPFILTER_DEFAULT_BLOCK #block all packets by default options IPSTEALTH #support for stealth forwarding options PF_DEFAULT_TO_DROP #drop everything by default options TCPDEBUG +options TCPPCAP options RADIX_MPATH # The MBUF_STRESS_TEST option enables options which create diff --git a/sys/conf/files b/sys/conf/files index 3ddf41287a09..5e7bdf384bbe 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3682,6 +3682,7 @@ netinet/tcp_input.c optional inet | inet6 netinet/tcp_lro.c optional inet | inet6 netinet/tcp_output.c optional inet | inet6 netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6 +netinet/tcp_pcap.c optional tcppcap netinet/tcp_reass.c optional inet | inet6 netinet/tcp_sack.c optional inet | inet6 netinet/tcp_subr.c optional inet | inet6 diff --git a/sys/conf/options b/sys/conf/options index ccbbd800c3cf..dcea435a3c97 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -436,6 +436,7 @@ ROUTETABLES opt_route.h RSS opt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG +TCPPCAP opt_global.h SIFTR TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_SIGNATURE opt_inet.h diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 7f994fef1ef5..de6b313f1b43 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -395,7 +395,7 @@ mb_free_ext(struct mbuf *m) * Attach the cluster from *m to *n, set up m_ext in *n * and bump the refcount of the cluster. */ -static void +void mb_dupcl(struct mbuf *n, const struct mbuf *m) { diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index fb2f8108d4f6..37608605367f 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -165,6 +165,8 @@ struct tcphdr { #define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */ #define TCP_KEEPINTVL 512 /* L,N interval between keepalives */ #define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */ +#define TCP_PCAP_OUT 2048 /* number of output packets to keep */ +#define TCP_PCAP_IN 4096 /* number of input packets to keep */ /* Start of reserved space for third-party user-settable options. */ #define TCP_VENDOR SO_VENDOR diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index b6ea859de310..64971f3da293 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -104,6 +104,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef TCPPCAP +#include +#endif #include #ifdef TCPDEBUG #include @@ -1524,6 +1527,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); +#ifdef TCPPCAP + /* Save segment, if requested. */ + tcp_pcap_add(th, m, &(tp->t_inpkts)); +#endif + /* * Segment received on connection. * Reset idle time and keep-alive timer. diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index fc90b069b9d4..d295dcfbcd6b 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -74,6 +74,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef TCPPCAP +#include +#endif #ifdef TCPDEBUG #include #endif @@ -1305,6 +1308,11 @@ send: TCP_PROBE5(send, NULL, tp, ip6, tp, th); +#ifdef TCPPCAP + /* Save packet, if requested. */ + tcp_pcap_add(th, m, &(tp->t_outpkts)); +#endif + /* TODO: IPv6 IP6TOS_ECT bit on */ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), @@ -1348,6 +1356,11 @@ send: TCP_PROBE5(send, NULL, tp, ip, tp, th); +#ifdef TCPPCAP + /* Save packet, if requested. */ + tcp_pcap_add(th, m, &(tp->t_outpkts)); +#endif + error = ip_output(m, tp->t_inpcb->inp_options, &ro, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); diff --git a/sys/netinet/tcp_pcap.c b/sys/netinet/tcp_pcap.c new file mode 100644 index 000000000000..7a3514f9646d --- /dev/null +++ b/sys/netinet/tcp_pcap.c @@ -0,0 +1,437 @@ +/*- + * Copyright (c) 2015 + * Jonathan Looney. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define M_LEADINGSPACE_NOWRITE(m) \ + ((m)->m_data - M_START(m)) + +static int tcp_pcap_clusters_referenced_cur = 0; +static int tcp_pcap_clusters_referenced_max = 0; + +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur, + CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0, + "Number of clusters currently referenced on TCP PCAP queues"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max, + CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0, + "Maximum number of clusters allowed to be referenced on TCP PCAP " + "queues"); + +static int tcp_pcap_alloc_reuse_ext = 0; +static int tcp_pcap_alloc_reuse_mbuf = 0; +static int tcp_pcap_alloc_new_mbuf = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext, + CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0, + "Number of mbufs with external storage reused for the TCP PCAP " + "functionality"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf, + CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0, + "Number of mbufs with internal storage reused for the TCP PCAP " + "functionality"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf, + CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0, + "Number of new mbufs allocated for the TCP PCAP functionality"); + +VNET_DEFINE(int, tcp_pcap_packets) = 0; +#define V_tcp_pcap_packets VNET(tcp_pcap_packets) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets, CTLFLAG_RW, + &V_tcp_pcap_packets, 0, "Default number of packets saved per direction " + "per TCPCB"); + +/* Initialize the values. */ +static void +tcp_pcap_max_set() { + tcp_pcap_clusters_referenced_max = nmbclusters / 4; +} + +void +tcp_pcap_init() { + tcp_pcap_max_set(); + EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set, + NULL, EVENTHANDLER_PRI_ANY); +} + +/* + * If we are below the maximum allowed cluster references, + * increment the reference count and return TRUE. Otherwise, + * leave the reference count alone and return FALSE. + */ +static __inline bool +tcp_pcap_take_cluster_reference(void) +{ + if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >= + tcp_pcap_clusters_referenced_max) { + atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1); + return FALSE; + } + return TRUE; +} + +/* + * For all the external entries in m, apply the given adjustment. + * This can be used to adjust the counter when an mbuf chain is + * copied or freed. + */ +static __inline void +tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj) +{ + while (m) { + if (m->m_flags & M_EXT) + atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj); + + m = m->m_next; + } +} + +/* + * Free all mbufs in a chain, decrementing the reference count as + * necessary. + * + * Functions in this file should use this instead of m_freem() when + * they are freeing mbuf chains that may contain clusters that were + * already included in tcp_pcap_clusters_referenced_cur. + */ +static void +tcp_pcap_m_freem(struct mbuf *mb) +{ + while (mb != NULL) { + if (mb->m_flags & M_EXT) + atomic_subtract_int(&tcp_pcap_clusters_referenced_cur, + 1); + mb = m_free(mb); + } +} + +/* + * Copy data from m to n, where n cannot fit all the data we might + * want from m. + * + * Prioritize data like this: + * 1. TCP header + * 2. IP header + * 3. Data + */ +static void +tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n) +{ + struct mbuf *m_cur = m; + int bytes_to_copy=0, trailing_data, skip=0, tcp_off; + + /* Below, we assume these will be non-NULL. */ + KASSERT(th, ("%s: called with th == NULL", __func__)); + KASSERT(m, ("%s: called with m == NULL", __func__)); + KASSERT(n, ("%s: called with n == NULL", __func__)); + + /* We assume this initialization occurred elsewhere. */ + KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)", + __func__, n->m_len)); + KASSERT(n->m_data == M_START(n), + ("%s: called with n->m_data != M_START(n)", __func__)); + + /* + * Calculate the size of the TCP header. We use this often + * enough that it is worth just calculating at the start. + */ + tcp_off = th->th_off << 2; + + /* Trim off leading empty mbufs. */ + while (m && m->m_len == 0) + m = m->m_next; + + if (m) { + m_cur = m; + } + else { + /* + * No data? Highly unusual. We would expect to at + * least see a TCP header in the mbuf. + * As we have a pointer to the TCP header, I guess + * we should just copy that. (???) + */ +fallback: + bytes_to_copy = tcp_off; + if (bytes_to_copy > M_SIZE(n)) + bytes_to_copy = M_SIZE(n); + bcopy(th, n->m_data, bytes_to_copy); + n->m_len = bytes_to_copy; + return; + } + + /* + * Find TCP header. Record the total number of bytes up to, + * and including, the TCP header. + */ + while (m_cur) { + if ((caddr_t) th >= (caddr_t) m_cur->m_data && + (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len)) + break; + bytes_to_copy += m_cur->m_len; + m_cur = m_cur->m_next; + } + if (m_cur) + bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data; + else + goto fallback; + bytes_to_copy += tcp_off; + + /* + * If we already want to copy more bytes than we can hold + * in the destination mbuf, skip leading bytes and copy + * what we can. + * + * Otherwise, consider trailing data. + */ + if (bytes_to_copy > M_SIZE(n)) { + skip = bytes_to_copy - M_SIZE(n); + bytes_to_copy = M_SIZE(n); + } + else { + /* + * Determine how much trailing data is in the chain. + * We start with the length of this mbuf (the one + * containing th) and subtract the size of the TCP + * header (tcp_off) and the size of the data prior + * to th (th - m_cur->m_data). + * + * This *should not* be negative, as the TCP code + * should put the whole TCP header in a single + * mbuf. But, it isn't a problem if it is. We will + * simple work off our negative balance as we look + * at subsequent mbufs. + */ + trailing_data = m_cur->m_len - tcp_off; + trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data; + m_cur = m_cur->m_next; + while (m_cur) { + trailing_data += m_cur->m_len; + m_cur = m_cur->m_next; + } + if ((bytes_to_copy + trailing_data) > M_SIZE(n)) + bytes_to_copy = M_SIZE(n); + else + bytes_to_copy += trailing_data; + } + + m_copydata(m, skip, bytes_to_copy, n->m_data); + n->m_len = bytes_to_copy; +} + +void +tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue) +{ + struct mbuf *n = NULL, *mhead; + + KASSERT(th, ("%s: called with th == NULL", __func__)); + KASSERT(m, ("%s: called with m == NULL", __func__)); + KASSERT(queue, ("%s: called with queue == NULL", __func__)); + + /* We only care about data packets. */ + while (m && m->m_type != MT_DATA) + m = m->m_next; + + /* We only need to do something if we still have an mbuf. */ + if (!m) + return; + + /* If we are not saving mbufs, return now. */ + if (queue->mq_maxlen == 0) + return; + + /* + * Check to see if we will need to recycle mbufs. + * + * If we need to get rid of mbufs to stay below + * our packet count, try to reuse the mbuf. Once + * we already have a new mbuf (n), then we can + * simply free subsequent mbufs. + * + * Note that most of the logic in here is to deal + * with the reuse. If we are fine with constant + * mbuf allocs/deallocs, we could ditch this logic. + * But, it only seems to make sense to reuse + * mbufs we already have. + */ + while (mbufq_full(queue)) { + mhead = mbufq_dequeue(queue); + + if (n) { + tcp_pcap_m_freem(mhead); + } + else { + /* + * If this held an external cluster, try to + * detach the cluster. But, if we held the + * last reference, go through the normal + * free-ing process. + */ + if (mhead->m_flags & M_EXT) { + switch (mhead->m_ext.ext_type) { + case EXT_SFBUF: + /* Don't mess around with these. */ + tcp_pcap_m_freem(mhead); + continue; + default: + if (atomic_fetchadd_int( + mhead->m_ext.ext_cnt, -1) == 1) + { + /* + * We held the last reference + * on this cluster. Restore + * the reference count and put + * it back in the pool. + */ + *(mhead->m_ext.ext_cnt) = 1; + tcp_pcap_m_freem(mhead); + continue; + } + /* + * We were able to cleanly free the + * reference. + */ + atomic_subtract_int( + &tcp_pcap_clusters_referenced_cur, + 1); + tcp_pcap_alloc_reuse_ext++; + break; + } + } + else { + tcp_pcap_alloc_reuse_mbuf++; + } + + n = mhead; + tcp_pcap_m_freem(n->m_next); + m_init(n, NULL, 0, M_NOWAIT, MT_DATA, 0); + } + } + + /* Check to see if we need to get a new mbuf. */ + if (!n) { + if (!(n = m_get(M_NOWAIT, MT_DATA))) + return; + tcp_pcap_alloc_new_mbuf++; + } + + /* + * What are we dealing with? If a cluster, attach it. Otherwise, + * try to copy the data from the beginning of the mbuf to the + * end of data. (There may be data between the start of the data + * area and the current data pointer. We want to get this, because + * it may contain header information that is useful.) + * In cases where that isn't possible, settle for what we can + * get. + */ + if ((m->m_flags & M_EXT) && tcp_pcap_take_cluster_reference()) { + n->m_data = m->m_data; + n->m_len = m->m_len; + mb_dupcl(n, m); + } + else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) { + /* + * At this point, n is guaranteed to be a normal mbuf + * with no cluster and no packet header. Because the + * logic in this code block requires this, the assert + * is here to catch any instances where someone + * changes the logic to invalidate that assumption. + */ + KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0, + ("%s: Unexpected flags (%#x) for mbuf", + __func__, n->m_flags)); + n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m); + n->m_len = m->m_len; + bcopy(M_START(m), n->m_dat, + m->m_len + M_LEADINGSPACE_NOWRITE(m)); + } + else { + /* + * This is the case where we need to "settle for what + * we can get". The most probable way to this code + * path is that we've already taken references to the + * maximum number of mbuf clusters we can, and the data + * is too long to fit in an mbuf's internal storage. + * Try for a "best fit". + */ + tcp_pcap_copy_bestfit(th, m, n); + + /* Don't try to get additional data. */ + goto add_to_queue; + } + + if (m->m_next) { + n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT); + tcp_pcap_adj_cluster_reference(n->m_next, 1); + } + +add_to_queue: + /* Add the new mbuf to the list. */ + if (mbufq_enqueue(queue, n)) { + /* This shouldn't happen. If INVARIANTS is defined, panic. */ + KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__)); + tcp_pcap_m_freem(n); + } +} + +void +tcp_pcap_drain(struct mbufq *queue) +{ + struct mbuf *m; + while ((m = mbufq_dequeue(queue))) + tcp_pcap_m_freem(m); +} + +void +tcp_pcap_tcpcb_init(struct tcpcb *tp) +{ + mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets); + mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets); +} + +void +tcp_pcap_set_sock_max(struct mbufq *queue, int newval) +{ + queue->mq_maxlen = newval; + while (queue->mq_len > queue->mq_maxlen) + tcp_pcap_m_freem(mbufq_dequeue(queue)); +} + +int +tcp_pcap_get_sock_max(struct mbufq *queue) +{ + return queue->mq_maxlen; +} diff --git a/sys/netinet/tcp_pcap.h b/sys/netinet/tcp_pcap.h new file mode 100644 index 000000000000..ec8fdde03837 --- /dev/null +++ b/sys/netinet/tcp_pcap.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2015 + * Jonathan Looney. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_PCAP_H_ +#define _NETINET_TCP_PCAP_H_ + +void tcp_pcap_init(void); +void tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue); +void tcp_pcap_drain(struct mbufq *queue); +void tcp_pcap_tcpcb_init(struct tcpcb *tp); +void tcp_pcap_set_sock_max(struct mbufq *queue, int newval); +int tcp_pcap_get_sock_max(struct mbufq *queue); + +#endif /* _NETINET_TCP_PCAP_H_ */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index a20bd8109316..e3f5b1324ce8 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -92,6 +92,9 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#ifdef TCPPCAP +#include +#endif #ifdef TCPDEBUG #include #endif @@ -427,6 +430,9 @@ tcp_init(void) SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); +#ifdef TCPPCAP + tcp_pcap_init(); +#endif } #ifdef VIMAGE @@ -832,6 +838,12 @@ tcp_newtcpcb(struct inpcb *inp) */ inp->inp_ip_ttl = V_ip_defttl; inp->inp_ppcb = tp; +#ifdef TCPPCAP + /* + * Init the TCP PCAP queues. + */ + tcp_pcap_tcpcb_init(tp); +#endif return (tp); /* XXX */ } @@ -1016,6 +1028,12 @@ tcp_discardcb(struct tcpcb *tp) tcp_free_sackholes(tp); +#ifdef TCPPCAP + /* Free the TCP PCAP queues. */ + tcp_pcap_drain(&(tp->t_inpkts)); + tcp_pcap_drain(&(tp->t_outpkts)); +#endif + /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 7d1ec6a0f2a9..a1f8a0c07fa2 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -86,6 +86,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef TCPPCAP +#include +#endif #ifdef TCPDEBUG #include #endif @@ -1577,6 +1580,25 @@ unlock_and_done: TP_MAXIDLE(tp)); goto unlock_and_done; +#ifdef TCPPCAP + case TCP_PCAP_OUT: + case TCP_PCAP_IN: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (optval >= 0) + tcp_pcap_set_sock_max(TCP_PCAP_OUT ? + &(tp->t_outpkts) : &(tp->t_inpkts), + optval); + else + error = EINVAL; + goto unlock_and_done; +#endif + default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1647,6 +1669,15 @@ unlock_and_done: INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ui, sizeof(ui)); break; +#ifdef TCPPCAP + case TCP_PCAP_OUT: + case TCP_PCAP_IN: + optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ? + &(tp->t_outpkts) : &(tp->t_inpkts)); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; +#endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 61fc41903fd6..07a28feb4956 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -37,6 +37,7 @@ #ifdef _KERNEL #include +#include /* * Kernel variables for tcp. @@ -204,7 +205,17 @@ struct tcpcb { uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ - uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ +#if defined(_KERNEL) && defined(TCPPCAP) + struct mbufq t_inpkts; /* List of saved input packets. */ + struct mbufq t_outpkts; /* List of saved output packets. */ +#ifdef _LP64 + uint64_t _pad[0]; /* all used! */ +#else + uint64_t _pad[2]; /* 2 are available */ +#endif /* _LP64 */ +#else + uint64_t _pad[6]; +#endif /* defined(_KERNEL) && defined(TCPPCAP) */ }; /* diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 8d335507456a..119bb6dd5ce1 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -524,6 +524,7 @@ extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; extern uma_zone_t zone_ext_refcnt; +void mb_dupcl(struct mbuf *, const struct mbuf *); void mb_free_ext(struct mbuf *); int m_pkthdr_init(struct mbuf *, int);