Implement parts of the TCP_INFO socket option as found in Linux 2.6.
This socket option allows processes query a TCP socket for some low level transmission details, such as the current send, bandwidth, and congestion windows. Linux provides a 'struct tcpinfo' structure containing various variables, rather than separate socket options; this makes the API somewhat fragile as it makes it dificult to add new entries of interest as requirements and implementation evolve. As such, I've included a large pad at the end of the structure. Right now, relatively few of the Linux API fields are filled in, and some contain no logical equivilent on FreeBSD. I've include __'d entries in the structure to make it easier to figure ou what is and isn't omitted. This API/ABI should be considered unstable for the time being.
This commit is contained in:
parent
c76a6d58b3
commit
0aa3c6f817
@ -159,6 +159,72 @@ struct tcphdr {
|
|||||||
#define TCP_NOPUSH 0x04 /* don't push last block of write */
|
#define TCP_NOPUSH 0x04 /* don't push last block of write */
|
||||||
#define TCP_NOOPT 0x08 /* don't use TCP options */
|
#define TCP_NOOPT 0x08 /* don't use TCP options */
|
||||||
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
|
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
|
||||||
|
#define TCP_INFO 0x20 /* retrieve tcp_info structure */
|
||||||
|
|
||||||
|
#define TCPI_OPT_TIMESTAMPS 0x01
|
||||||
|
#define TCPI_OPT_SACK 0x02
|
||||||
|
#define TCPI_OPT_WSCALE 0x04
|
||||||
|
#define TCPI_OPT_ECN 0x08
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
|
||||||
|
* the caller to query certain information about the state of a TCP
|
||||||
|
* connection. We provide an overlapping set of fields with the Linux
|
||||||
|
* implementation, but since this is a fixed size structure, room has been
|
||||||
|
* left for growth. In order to maximize potential future compatibility with
|
||||||
|
* the Linux API, the same variable names and order have been adopted, and
|
||||||
|
* padding left to make room for omitted fields in case they are added later.
|
||||||
|
*
|
||||||
|
* XXX: This is currently an unstable ABI/API, in that it is expected to
|
||||||
|
* change.
|
||||||
|
*/
|
||||||
|
struct tcp_info {
|
||||||
|
u_int8_t tcpi_state; /* TCP FSM state. */
|
||||||
|
u_int8_t __tcpi_ca_state;
|
||||||
|
u_int8_t __tcpi_retransmits;
|
||||||
|
u_int8_t __tcpi_probes;
|
||||||
|
u_int8_t __tcpi_backoff;
|
||||||
|
u_int8_t tcpi_options; /* Options enabled on conn. */
|
||||||
|
u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
|
||||||
|
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
|
||||||
|
|
||||||
|
u_int32_t __tcpi_rto;
|
||||||
|
u_int32_t __tcpi_ato;
|
||||||
|
u_int32_t __tcpi_snd_mss;
|
||||||
|
u_int32_t __tcpi_rcv_mss;
|
||||||
|
|
||||||
|
u_int32_t __tcpi_unacked;
|
||||||
|
u_int32_t __tcpi_sacked;
|
||||||
|
u_int32_t __tcpi_lost;
|
||||||
|
u_int32_t __tcpi_retrans;
|
||||||
|
u_int32_t __tcpi_fackets;
|
||||||
|
|
||||||
|
/* Times; measurements in usecs. */
|
||||||
|
u_int32_t __tcpi_last_data_sent;
|
||||||
|
u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
|
||||||
|
u_int32_t __tcpi_last_data_recv;
|
||||||
|
u_int32_t __tcpi_last_ack_recv;
|
||||||
|
|
||||||
|
/* Metrics; variable units. */
|
||||||
|
u_int32_t __tcpi_pmtu;
|
||||||
|
u_int32_t __tcpi_rcv_ssthresh;
|
||||||
|
u_int32_t __tcpi_rtt;
|
||||||
|
u_int32_t __tcpi_rttvar;
|
||||||
|
u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */
|
||||||
|
u_int32_t tcpi_snd_cwnd; /* Send congestion window. */
|
||||||
|
u_int32_t __tcpi_advmss;
|
||||||
|
u_int32_t __tcpi_reordering;
|
||||||
|
|
||||||
|
u_int32_t __tcpi_rcv_rtt;
|
||||||
|
u_int32_t __tcpi_rcv_space;
|
||||||
|
|
||||||
|
/* FreeBSD extensions to tcp_info. */
|
||||||
|
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
|
||||||
|
u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */
|
||||||
|
|
||||||
|
/* Padding to grow without breaking ABI. */
|
||||||
|
u_int32_t __tcpi_pad[32]; /* Padding. */
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* !_NETINET_TCP_H_ */
|
#endif /* !_NETINET_TCP_H_ */
|
||||||
|
@ -97,6 +97,7 @@ static struct tcpcb *
|
|||||||
tcp_disconnect(struct tcpcb *);
|
tcp_disconnect(struct tcpcb *);
|
||||||
static struct tcpcb *
|
static struct tcpcb *
|
||||||
tcp_usrclosed(struct tcpcb *);
|
tcp_usrclosed(struct tcpcb *);
|
||||||
|
static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
|
||||||
|
|
||||||
#ifdef TCPDEBUG
|
#ifdef TCPDEBUG
|
||||||
#define TCPDEBUG0 int ostate = 0
|
#define TCPDEBUG0 int ostate = 0
|
||||||
@ -941,12 +942,51 @@ tcp6_connect(tp, nam, td)
|
|||||||
}
|
}
|
||||||
#endif /* INET6 */
|
#endif /* INET6 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Export TCP internal state information via a struct tcp_info, based on the
|
||||||
|
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
|
||||||
|
* (TCP state machine, etc). We export all information using FreeBSD-native
|
||||||
|
* constants -- for example, the numeric values for tcpi_state will differ
|
||||||
|
* from Linux.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
tcp_fill_info(tp, ti)
|
||||||
|
struct tcpcb *tp;
|
||||||
|
struct tcp_info *ti;
|
||||||
|
{
|
||||||
|
|
||||||
|
INP_LOCK_ASSERT(tp->t_inpcb);
|
||||||
|
bzero(ti, sizeof(*ti));
|
||||||
|
|
||||||
|
ti->tcpi_state = tp->t_state;
|
||||||
|
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
|
||||||
|
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
|
||||||
|
if (tp->sack_enable)
|
||||||
|
ti->tcpi_options |= TCPI_OPT_SACK;
|
||||||
|
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
|
||||||
|
ti->tcpi_options |= TCPI_OPT_WSCALE;
|
||||||
|
ti->tcpi_snd_wscale = tp->snd_scale;
|
||||||
|
ti->tcpi_rcv_wscale = tp->rcv_scale;
|
||||||
|
}
|
||||||
|
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
|
||||||
|
ti->tcpi_snd_cwnd = tp->snd_cwnd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FreeBSD-specific extension fields for tcp_info.
|
||||||
|
*/
|
||||||
|
ti->tcpi_snd_wnd = tp->snd_wnd;
|
||||||
|
ti->tcpi_snd_bwnd = tp->snd_bwnd;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The new sockopt interface makes it possible for us to block in the
|
* The new sockopt interface makes it possible for us to block in the
|
||||||
* copyin/out step (if we take a page fault). Taking a page fault at
|
* copyin/out step (if we take a page fault). Taking a page fault at
|
||||||
* splnet() is probably a Bad Thing. (Since sockets and pcbs both now
|
* splnet() is probably a Bad Thing. (Since sockets and pcbs both now
|
||||||
* use TSM, there probably isn't any need for this function to run at
|
* use TSM, there probably isn't any need for this function to run at
|
||||||
* splnet() any more. This needs more examination.)
|
* splnet() any more. This needs more examination.)
|
||||||
|
*
|
||||||
|
* XXXRW: The locking here is wrong; we may take a page fault while holding
|
||||||
|
* the inpcb lock.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
tcp_ctloutput(so, sopt)
|
tcp_ctloutput(so, sopt)
|
||||||
@ -956,6 +996,7 @@ tcp_ctloutput(so, sopt)
|
|||||||
int error, opt, optval;
|
int error, opt, optval;
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct tcpcb *tp;
|
struct tcpcb *tp;
|
||||||
|
struct tcp_info ti;
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
INP_INFO_RLOCK(&tcbinfo);
|
INP_INFO_RLOCK(&tcbinfo);
|
||||||
@ -1046,6 +1087,10 @@ tcp_ctloutput(so, sopt)
|
|||||||
error = EINVAL;
|
error = EINVAL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case TCP_INFO:
|
||||||
|
error = EINVAL;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
error = ENOPROTOOPT;
|
error = ENOPROTOOPT;
|
||||||
break;
|
break;
|
||||||
@ -1057,26 +1102,33 @@ tcp_ctloutput(so, sopt)
|
|||||||
#ifdef TCP_SIGNATURE
|
#ifdef TCP_SIGNATURE
|
||||||
case TCP_MD5SIG:
|
case TCP_MD5SIG:
|
||||||
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
|
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
|
||||||
|
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case TCP_NODELAY:
|
case TCP_NODELAY:
|
||||||
optval = tp->t_flags & TF_NODELAY;
|
optval = tp->t_flags & TF_NODELAY;
|
||||||
|
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||||
break;
|
break;
|
||||||
case TCP_MAXSEG:
|
case TCP_MAXSEG:
|
||||||
optval = tp->t_maxseg;
|
optval = tp->t_maxseg;
|
||||||
|
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||||
break;
|
break;
|
||||||
case TCP_NOOPT:
|
case TCP_NOOPT:
|
||||||
optval = tp->t_flags & TF_NOOPT;
|
optval = tp->t_flags & TF_NOOPT;
|
||||||
|
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||||
break;
|
break;
|
||||||
case TCP_NOPUSH:
|
case TCP_NOPUSH:
|
||||||
optval = tp->t_flags & TF_NOPUSH;
|
optval = tp->t_flags & TF_NOPUSH;
|
||||||
|
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||||
|
break;
|
||||||
|
case TCP_INFO:
|
||||||
|
tcp_fill_info(tp, &ti);
|
||||||
|
error = sooptcopyout(sopt, &ti, sizeof ti);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
error = ENOPROTOOPT;
|
error = ENOPROTOOPT;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (error == 0)
|
|
||||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
INP_UNLOCK(inp);
|
INP_UNLOCK(inp);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user