Implement parts of the TCP_INFO socket option as found in Linux 2.6.
This socket option allows processes query a TCP socket for some low level transmission details, such as the current send, bandwidth, and congestion windows. Linux provides a 'struct tcpinfo' structure containing various variables, rather than separate socket options; this makes the API somewhat fragile as it makes it dificult to add new entries of interest as requirements and implementation evolve. As such, I've included a large pad at the end of the structure. Right now, relatively few of the Linux API fields are filled in, and some contain no logical equivilent on FreeBSD. I've include __'d entries in the structure to make it easier to figure ou what is and isn't omitted. This API/ABI should be considered unstable for the time being.
This commit is contained in:
parent
c76a6d58b3
commit
0aa3c6f817
@ -159,6 +159,72 @@ struct tcphdr {
|
||||
#define TCP_NOPUSH 0x04 /* don't push last block of write */
|
||||
#define TCP_NOOPT 0x08 /* don't use TCP options */
|
||||
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
|
||||
#define TCP_INFO 0x20 /* retrieve tcp_info structure */
|
||||
|
||||
#define TCPI_OPT_TIMESTAMPS 0x01
|
||||
#define TCPI_OPT_SACK 0x02
|
||||
#define TCPI_OPT_WSCALE 0x04
|
||||
#define TCPI_OPT_ECN 0x08
|
||||
|
||||
/*
|
||||
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
|
||||
* the caller to query certain information about the state of a TCP
|
||||
* connection. We provide an overlapping set of fields with the Linux
|
||||
* implementation, but since this is a fixed size structure, room has been
|
||||
* left for growth. In order to maximize potential future compatibility with
|
||||
* the Linux API, the same variable names and order have been adopted, and
|
||||
* padding left to make room for omitted fields in case they are added later.
|
||||
*
|
||||
* XXX: This is currently an unstable ABI/API, in that it is expected to
|
||||
* change.
|
||||
*/
|
||||
struct tcp_info {
|
||||
u_int8_t tcpi_state; /* TCP FSM state. */
|
||||
u_int8_t __tcpi_ca_state;
|
||||
u_int8_t __tcpi_retransmits;
|
||||
u_int8_t __tcpi_probes;
|
||||
u_int8_t __tcpi_backoff;
|
||||
u_int8_t tcpi_options; /* Options enabled on conn. */
|
||||
u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
|
||||
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
|
||||
|
||||
u_int32_t __tcpi_rto;
|
||||
u_int32_t __tcpi_ato;
|
||||
u_int32_t __tcpi_snd_mss;
|
||||
u_int32_t __tcpi_rcv_mss;
|
||||
|
||||
u_int32_t __tcpi_unacked;
|
||||
u_int32_t __tcpi_sacked;
|
||||
u_int32_t __tcpi_lost;
|
||||
u_int32_t __tcpi_retrans;
|
||||
u_int32_t __tcpi_fackets;
|
||||
|
||||
/* Times; measurements in usecs. */
|
||||
u_int32_t __tcpi_last_data_sent;
|
||||
u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
|
||||
u_int32_t __tcpi_last_data_recv;
|
||||
u_int32_t __tcpi_last_ack_recv;
|
||||
|
||||
/* Metrics; variable units. */
|
||||
u_int32_t __tcpi_pmtu;
|
||||
u_int32_t __tcpi_rcv_ssthresh;
|
||||
u_int32_t __tcpi_rtt;
|
||||
u_int32_t __tcpi_rttvar;
|
||||
u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */
|
||||
u_int32_t tcpi_snd_cwnd; /* Send congestion window. */
|
||||
u_int32_t __tcpi_advmss;
|
||||
u_int32_t __tcpi_reordering;
|
||||
|
||||
u_int32_t __tcpi_rcv_rtt;
|
||||
u_int32_t __tcpi_rcv_space;
|
||||
|
||||
/* FreeBSD extensions to tcp_info. */
|
||||
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
|
||||
u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */
|
||||
|
||||
/* Padding to grow without breaking ABI. */
|
||||
u_int32_t __tcpi_pad[32]; /* Padding. */
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* !_NETINET_TCP_H_ */
|
||||
|
@ -97,6 +97,7 @@ static struct tcpcb *
|
||||
tcp_disconnect(struct tcpcb *);
|
||||
static struct tcpcb *
|
||||
tcp_usrclosed(struct tcpcb *);
|
||||
static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
|
||||
|
||||
#ifdef TCPDEBUG
|
||||
#define TCPDEBUG0 int ostate = 0
|
||||
@ -941,12 +942,51 @@ tcp6_connect(tp, nam, td)
|
||||
}
|
||||
#endif /* INET6 */
|
||||
|
||||
/*
|
||||
* Export TCP internal state information via a struct tcp_info, based on the
|
||||
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
|
||||
* (TCP state machine, etc). We export all information using FreeBSD-native
|
||||
* constants -- for example, the numeric values for tcpi_state will differ
|
||||
* from Linux.
|
||||
*/
|
||||
static void
|
||||
tcp_fill_info(tp, ti)
|
||||
struct tcpcb *tp;
|
||||
struct tcp_info *ti;
|
||||
{
|
||||
|
||||
INP_LOCK_ASSERT(tp->t_inpcb);
|
||||
bzero(ti, sizeof(*ti));
|
||||
|
||||
ti->tcpi_state = tp->t_state;
|
||||
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
|
||||
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
|
||||
if (tp->sack_enable)
|
||||
ti->tcpi_options |= TCPI_OPT_SACK;
|
||||
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
|
||||
ti->tcpi_options |= TCPI_OPT_WSCALE;
|
||||
ti->tcpi_snd_wscale = tp->snd_scale;
|
||||
ti->tcpi_rcv_wscale = tp->rcv_scale;
|
||||
}
|
||||
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
|
||||
ti->tcpi_snd_cwnd = tp->snd_cwnd;
|
||||
|
||||
/*
|
||||
* FreeBSD-specific extension fields for tcp_info.
|
||||
*/
|
||||
ti->tcpi_snd_wnd = tp->snd_wnd;
|
||||
ti->tcpi_snd_bwnd = tp->snd_bwnd;
|
||||
}
|
||||
|
||||
/*
|
||||
* The new sockopt interface makes it possible for us to block in the
|
||||
* copyin/out step (if we take a page fault). Taking a page fault at
|
||||
* splnet() is probably a Bad Thing. (Since sockets and pcbs both now
|
||||
* use TSM, there probably isn't any need for this function to run at
|
||||
* splnet() any more. This needs more examination.)
|
||||
*
|
||||
* XXXRW: The locking here is wrong; we may take a page fault while holding
|
||||
* the inpcb lock.
|
||||
*/
|
||||
int
|
||||
tcp_ctloutput(so, sopt)
|
||||
@ -956,6 +996,7 @@ tcp_ctloutput(so, sopt)
|
||||
int error, opt, optval;
|
||||
struct inpcb *inp;
|
||||
struct tcpcb *tp;
|
||||
struct tcp_info ti;
|
||||
|
||||
error = 0;
|
||||
INP_INFO_RLOCK(&tcbinfo);
|
||||
@ -1046,6 +1087,10 @@ tcp_ctloutput(so, sopt)
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
||||
case TCP_INFO:
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
||||
default:
|
||||
error = ENOPROTOOPT;
|
||||
break;
|
||||
@ -1057,26 +1102,33 @@ tcp_ctloutput(so, sopt)
|
||||
#ifdef TCP_SIGNATURE
|
||||
case TCP_MD5SIG:
|
||||
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
#endif
|
||||
case TCP_NODELAY:
|
||||
optval = tp->t_flags & TF_NODELAY;
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
case TCP_MAXSEG:
|
||||
optval = tp->t_maxseg;
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
case TCP_NOOPT:
|
||||
optval = tp->t_flags & TF_NOOPT;
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
case TCP_NOPUSH:
|
||||
optval = tp->t_flags & TF_NOPUSH;
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
case TCP_INFO:
|
||||
tcp_fill_info(tp, &ti);
|
||||
error = sooptcopyout(sopt, &ti, sizeof ti);
|
||||
break;
|
||||
default:
|
||||
error = ENOPROTOOPT;
|
||||
break;
|
||||
}
|
||||
if (error == 0)
|
||||
error = sooptcopyout(sopt, &optval, sizeof optval);
|
||||
break;
|
||||
}
|
||||
INP_UNLOCK(inp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user