Implement parts of the TCP_INFO socket option as found in Linux 2.6.

This socket option allows processes query a TCP socket for some low
level transmission details, such as the current send, bandwidth, and
congestion windows.  Linux provides a 'struct tcpinfo' structure
containing various variables, rather than separate socket options;
this makes the API somewhat fragile as it makes it dificult to add
new entries of interest as requirements and implementation evolve.
As such, I've included a large pad at the end of the structure.
Right now, relatively few of the Linux API fields are filled in, and
some contain no logical equivilent on FreeBSD.  I've include __'d
entries in the structure to make it easier to figure ou what is and
isn't omitted.  This API/ABI should be considered unstable for the
time being.
This commit is contained in:
Robert Watson 2004-11-26 18:58:46 +00:00
parent 17ded67ee3
commit b8af5dfa81
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=138118
2 changed files with 120 additions and 2 deletions

View File

@ -159,6 +159,72 @@ struct tcphdr {
#define TCP_NOPUSH 0x04 /* don't push last block of write */
#define TCP_NOOPT 0x08 /* don't use TCP options */
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
#define TCP_INFO 0x20 /* retrieve tcp_info structure */
#define TCPI_OPT_TIMESTAMPS 0x01
#define TCPI_OPT_SACK 0x02
#define TCPI_OPT_WSCALE 0x04
#define TCPI_OPT_ECN 0x08
/*
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
* the caller to query certain information about the state of a TCP
* connection. We provide an overlapping set of fields with the Linux
* implementation, but since this is a fixed size structure, room has been
* left for growth. In order to maximize potential future compatibility with
* the Linux API, the same variable names and order have been adopted, and
* padding left to make room for omitted fields in case they are added later.
*
* XXX: This is currently an unstable ABI/API, in that it is expected to
* change.
*/
struct tcp_info {
u_int8_t tcpi_state; /* TCP FSM state. */
u_int8_t __tcpi_ca_state;
u_int8_t __tcpi_retransmits;
u_int8_t __tcpi_probes;
u_int8_t __tcpi_backoff;
u_int8_t tcpi_options; /* Options enabled on conn. */
u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
u_int32_t __tcpi_rto;
u_int32_t __tcpi_ato;
u_int32_t __tcpi_snd_mss;
u_int32_t __tcpi_rcv_mss;
u_int32_t __tcpi_unacked;
u_int32_t __tcpi_sacked;
u_int32_t __tcpi_lost;
u_int32_t __tcpi_retrans;
u_int32_t __tcpi_fackets;
/* Times; measurements in usecs. */
u_int32_t __tcpi_last_data_sent;
u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
u_int32_t __tcpi_last_data_recv;
u_int32_t __tcpi_last_ack_recv;
/* Metrics; variable units. */
u_int32_t __tcpi_pmtu;
u_int32_t __tcpi_rcv_ssthresh;
u_int32_t __tcpi_rtt;
u_int32_t __tcpi_rttvar;
u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */
u_int32_t tcpi_snd_cwnd; /* Send congestion window. */
u_int32_t __tcpi_advmss;
u_int32_t __tcpi_reordering;
u_int32_t __tcpi_rcv_rtt;
u_int32_t __tcpi_rcv_space;
/* FreeBSD extensions to tcp_info. */
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */
/* Padding to grow without breaking ABI. */
u_int32_t __tcpi_pad[32]; /* Padding. */
};
#endif
#endif /* !_NETINET_TCP_H_ */

View File

@ -97,6 +97,7 @@ static struct tcpcb *
tcp_disconnect(struct tcpcb *);
static struct tcpcb *
tcp_usrclosed(struct tcpcb *);
static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
#ifdef TCPDEBUG
#define TCPDEBUG0 int ostate = 0
@ -941,12 +942,51 @@ tcp6_connect(tp, nam, td)
}
#endif /* INET6 */
/*
* Export TCP internal state information via a struct tcp_info, based on the
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
* (TCP state machine, etc). We export all information using FreeBSD-native
* constants -- for example, the numeric values for tcpi_state will differ
* from Linux.
*/
static void
tcp_fill_info(tp, ti)
struct tcpcb *tp;
struct tcp_info *ti;
{
INP_LOCK_ASSERT(tp->t_inpcb);
bzero(ti, sizeof(*ti));
ti->tcpi_state = tp->t_state;
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
if (tp->sack_enable)
ti->tcpi_options |= TCPI_OPT_SACK;
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
ti->tcpi_options |= TCPI_OPT_WSCALE;
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
ti->tcpi_snd_cwnd = tp->snd_cwnd;
/*
* FreeBSD-specific extension fields for tcp_info.
*/
ti->tcpi_snd_wnd = tp->snd_wnd;
ti->tcpi_snd_bwnd = tp->snd_bwnd;
}
/*
* The new sockopt interface makes it possible for us to block in the
* copyin/out step (if we take a page fault). Taking a page fault at
* splnet() is probably a Bad Thing. (Since sockets and pcbs both now
* use TSM, there probably isn't any need for this function to run at
* splnet() any more. This needs more examination.)
*
* XXXRW: The locking here is wrong; we may take a page fault while holding
* the inpcb lock.
*/
int
tcp_ctloutput(so, sopt)
@ -956,6 +996,7 @@ tcp_ctloutput(so, sopt)
int error, opt, optval;
struct inpcb *inp;
struct tcpcb *tp;
struct tcp_info ti;
error = 0;
INP_INFO_RLOCK(&tcbinfo);
@ -1046,6 +1087,10 @@ tcp_ctloutput(so, sopt)
error = EINVAL;
break;
case TCP_INFO:
error = EINVAL;
break;
default:
error = ENOPROTOOPT;
break;
@ -1057,26 +1102,33 @@ tcp_ctloutput(so, sopt)
#ifdef TCP_SIGNATURE
case TCP_MD5SIG:
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
case TCP_NODELAY:
optval = tp->t_flags & TF_NODELAY;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_MAXSEG:
optval = tp->t_maxseg;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_NOOPT:
optval = tp->t_flags & TF_NOOPT;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_NOPUSH:
optval = tp->t_flags & TF_NOPUSH;
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
case TCP_INFO:
tcp_fill_info(tp, &ti);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
default:
error = ENOPROTOOPT;
break;
}
if (error == 0)
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
}
INP_UNLOCK(inp);