George V. Neville-Neil 15dbc1605b On FreeBSD there is a setsockopt option SO_USER_COOKIE which allows
setting a 32 bit value on each socket. This can be used by applications
and DTrace as a rendezvous point so that an applicaton's data can
more easily be captured at run time. Expose the user cookie via
DTrace by updating the translator in tcp.d and add a quick test
program, a TCP server, that sets the cookie on each connection
accepted.

Reviewed by:	hiren
MFC after:	1 week
Sponsored by:	Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D7152
2016-07-08 23:44:09 +00:00

393 lines
14 KiB
D

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* $FreeBSD$
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 Mark Johnston <markj@freebsd.org>
*/
#pragma D depends_on library ip.d
#pragma D depends_on module kernel
#pragma D depends_on provider tcp
/*
* Convert a TCP state value to a string.
*/
#pragma D binding "1.6.3" TCPS_CLOSED
inline int TCPS_CLOSED = 0;
#pragma D binding "1.6.3" TCPS_LISTEN
inline int TCPS_LISTEN = 1;
#pragma D binding "1.6.3" TCPS_SYN_SENT
inline int TCPS_SYN_SENT = 2;
#pragma D binding "1.6.3" TCPS_SYN_RECEIVED
inline int TCPS_SYN_RECEIVED = 3;
#pragma D binding "1.6.3" TCPS_ESTABLISHED
inline int TCPS_ESTABLISHED = 4;
#pragma D binding "1.6.3" TCPS_CLOSE_WAIT
inline int TCPS_CLOSE_WAIT = 5;
#pragma D binding "1.6.3" TCPS_FIN_WAIT_1
inline int TCPS_FIN_WAIT_1 = 6;
#pragma D binding "1.6.3" TCPS_CLOSING
inline int TCPS_CLOSING = 7;
#pragma D binding "1.6.3" TCPS_LAST_ACK
inline int TCPS_LAST_ACK = 8;
#pragma D binding "1.6.3" TCPS_FIN_WAIT_2
inline int TCPS_FIN_WAIT_2 = 9;
#pragma D binding "1.6.3" TCPS_TIME_WAIT
inline int TCPS_TIME_WAIT = 10;
/* TCP segment flags. */
#pragma D binding "1.6.3" TH_FIN
inline uint8_t TH_FIN = 0x01;
#pragma D binding "1.6.3" TH_SYN
inline uint8_t TH_SYN = 0x02;
#pragma D binding "1.6.3" TH_RST
inline uint8_t TH_RST = 0x04;
#pragma D binding "1.6.3" TH_PUSH
inline uint8_t TH_PUSH = 0x08;
#pragma D binding "1.6.3" TH_ACK
inline uint8_t TH_ACK = 0x10;
#pragma D binding "1.6.3" TH_URG
inline uint8_t TH_URG = 0x20;
#pragma D binding "1.6.3" TH_ECE
inline uint8_t TH_ECE = 0x40;
#pragma D binding "1.6.3" TH_CWR
inline uint8_t TH_CWR = 0x80;
/* TCP connection state strings. */
#pragma D binding "1.6.3" tcp_state_string
inline string tcp_state_string[int32_t state] =
state == TCPS_CLOSED ? "state-closed" :
state == TCPS_LISTEN ? "state-listen" :
state == TCPS_SYN_SENT ? "state-syn-sent" :
state == TCPS_SYN_RECEIVED ? "state-syn-received" :
state == TCPS_ESTABLISHED ? "state-established" :
state == TCPS_CLOSE_WAIT ? "state-close-wait" :
state == TCPS_FIN_WAIT_1 ? "state-fin-wait-1" :
state == TCPS_CLOSING ? "state-closing" :
state == TCPS_LAST_ACK ? "state-last-ack" :
state == TCPS_FIN_WAIT_2 ? "state-fin-wait-2" :
state == TCPS_TIME_WAIT ? "state-time-wait" :
"<unknown>";
/*
* tcpsinfo contains stable TCP details from tcp_t.
*/
typedef struct tcpsinfo {
uintptr_t tcps_addr;
int tcps_local; /* is delivered locally, boolean */
int tcps_active; /* active open (from here), boolean */
uint16_t tcps_lport; /* local port */
uint16_t tcps_rport; /* remote port */
string tcps_laddr; /* local address, as a string */
string tcps_raddr; /* remote address, as a string */
int32_t tcps_state; /* TCP state */
uint32_t tcps_iss; /* Initial sequence # sent */
uint32_t tcps_irs; /* Initial sequence # received */
uint32_t tcps_suna; /* sequence # sent but unacked */
uint32_t tcps_smax; /* highest sequence number sent */
uint32_t tcps_snxt; /* next sequence # to send */
uint32_t tcps_rack; /* sequence # we have acked */
uint32_t tcps_rnxt; /* next sequence # expected */
u_long tcps_swnd; /* send window size */
int32_t tcps_snd_ws; /* send window scaling */
uint32_t tcps_swl1; /* window update seg seq number */
uint32_t tcps_swl2; /* window update seg ack number */
uint32_t tcps_rup; /* receive urgent pointer */
uint32_t tcps_radv; /* advertised window */
u_long tcps_rwnd; /* receive window size */
int32_t tcps_rcv_ws; /* receive window scaling */
u_long tcps_cwnd; /* congestion window */
u_long tcps_cwnd_ssthresh; /* threshold for congestion avoidance */
uint32_t tcps_srecover; /* for use in NewReno Fast Recovery */
uint32_t tcps_sack_fack; /* SACK sequence # we have acked */
uint32_t tcps_sack_snxt; /* next SACK seq # for retransmission */
uint32_t tcps_rto; /* round-trip timeout, msec */
uint32_t tcps_mss; /* max segment size */
int tcps_retransmit; /* retransmit send event, boolean */
int tcps_srtt; /* smoothed RTT in units of (TCP_RTT_SCALE*hz) */
int tcps_debug; /* socket has SO_DEBUG set */
int tcps_cookie; /* expose the socket's SO_USER_COOKIE */
int32_t tcps_dupacks; /* consecutive dup acks received */
uint32_t tcps_rtttime; /* RTT measurement start time */
uint32_t tcps_rtseq; /* sequence # being timed */
uint32_t tcps_ts_recent; /* timestamp echo data */
} tcpsinfo_t;
/*
* tcplsinfo provides the old tcp state for state changes.
*/
typedef struct tcplsinfo {
int32_t tcps_state; /* previous TCP state */
} tcplsinfo_t;
/*
* tcpinfo is the TCP header fields.
*/
typedef struct tcpinfo {
uint16_t tcp_sport; /* source port */
uint16_t tcp_dport; /* destination port */
uint32_t tcp_seq; /* sequence number */
uint32_t tcp_ack; /* acknowledgment number */
uint8_t tcp_offset; /* data offset, in bytes */
uint8_t tcp_flags; /* flags */
uint16_t tcp_window; /* window size */
uint16_t tcp_checksum; /* checksum */
uint16_t tcp_urgent; /* urgent data pointer */
struct tcphdr *tcp_hdr; /* raw TCP header */
} tcpinfo_t;
/*
* A clone of tcpinfo_t used to handle the fact that the TCP input path
* overwrites some fields of the TCP header with their host-order equivalents.
* Unfortunately, DTrace doesn't let us simply typedef a new name for struct
* tcpinfo and define a separate translator for it.
*/
typedef struct tcpinfoh {
uint16_t tcp_sport; /* source port */
uint16_t tcp_dport; /* destination port */
uint32_t tcp_seq; /* sequence number */
uint32_t tcp_ack; /* acknowledgment number */
uint8_t tcp_offset; /* data offset, in bytes */
uint8_t tcp_flags; /* flags */
uint16_t tcp_window; /* window size */
uint16_t tcp_checksum; /* checksum */
uint16_t tcp_urgent; /* urgent data pointer */
struct tcphdr *tcp_hdr; /* raw TCP header */
} tcpinfoh_t;
#pragma D binding "1.6.3" translator
translator csinfo_t < struct tcpcb *p > {
cs_addr = NULL;
cs_cid = (uint64_t)(p == NULL ? 0 : p->t_inpcb);
cs_pid = 0;
cs_zoneid = 0;
};
#pragma D binding "1.6.3" translator
translator tcpsinfo_t < struct tcpcb *p > {
tcps_addr = (uintptr_t)p;
tcps_local = -1; /* XXX */
tcps_active = -1; /* XXX */
tcps_lport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_lport);
tcps_rport = p == NULL ? 0 : ntohs(p->t_inpcb->inp_inc.inc_ie.ie_fport);
tcps_laddr = p == NULL ? 0 :
p->t_inpcb->inp_vflag == INP_IPV4 ?
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie46_local.ia46_addr4.s_addr) :
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependladdr.ie6_local);
tcps_raddr = p == NULL ? 0 :
p->t_inpcb->inp_vflag == INP_IPV4 ?
inet_ntoa(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie46_foreign.ia46_addr4.s_addr) :
inet_ntoa6(&p->t_inpcb->inp_inc.inc_ie.ie_dependfaddr.ie6_foreign);
tcps_state = p == NULL ? -1 : p->t_state;
tcps_iss = p == NULL ? 0 : p->iss;
tcps_irs = p == NULL ? 0 : p->irs;
tcps_suna = p == NULL ? 0 : p->snd_una;
tcps_smax = p == NULL ? 0 : p->snd_max;
tcps_snxt = p == NULL ? 0 : p->snd_nxt;
tcps_rack = p == NULL ? 0 : p->last_ack_sent;
tcps_rnxt = p == NULL ? 0 : p->rcv_nxt;
tcps_swnd = p == NULL ? -1 : p->snd_wnd;
tcps_snd_ws = p == NULL ? -1 : p->snd_scale;
tcps_swl1 = p == NULL ? -1 : p->snd_wl1;
tcps_swl2 = p == NULL ? -1 : p->snd_wl2;
tcps_radv = p == NULL ? -1 : p->rcv_adv;
tcps_rwnd = p == NULL ? -1 : p->rcv_wnd;
tcps_rup = p == NULL ? -1 : p->rcv_up;
tcps_rcv_ws = p == NULL ? -1 : p->rcv_scale;
tcps_cwnd = p == NULL ? -1 : p->snd_cwnd;
tcps_cwnd_ssthresh = p == NULL ? -1 : p->snd_ssthresh;
tcps_srecover = p == NULL ? -1 : p->snd_recover;
tcps_sack_fack = p == NULL ? 0 : p->snd_fack;
tcps_sack_snxt = p == NULL ? 0 : p->sack_newdata;
tcps_rto = p == NULL ? -1 : (p->t_rxtcur * 1000) / `hz;
tcps_mss = p == NULL ? -1 : p->t_maxseg;
tcps_retransmit = p == NULL ? -1 : p->t_rxtshift > 0 ? 1 : 0;
tcps_srtt = p == NULL ? -1 : p->t_srtt; /* smoothed RTT in units of (TCP_RTT_SCALE*hz) */
tcps_debug = p == NULL ? 0 :
p->t_inpcb->inp_socket->so_options & 1;
tcps_cookie = p == NULL ? -1 :
p->t_inpcb->inp_socket->so_user_cookie;
tcps_dupacks = p == NULL ? -1 : p->t_dupacks;
tcps_rtttime = p == NULL ? -1 : p->t_rtttime;
tcps_rtseq = p == NULL ? -1 : p->t_rtseq;
tcps_ts_recent = p == NULL ? -1 : p->ts_recent;
};
#pragma D binding "1.6.3" translator
translator tcpinfo_t < struct tcphdr *p > {
tcp_sport = p == NULL ? 0 : ntohs(p->th_sport);
tcp_dport = p == NULL ? 0 : ntohs(p->th_dport);
tcp_seq = p == NULL ? -1 : ntohl(p->th_seq);
tcp_ack = p == NULL ? -1 : ntohl(p->th_ack);
tcp_offset = p == NULL ? -1 : (p->th_off >> 2);
tcp_flags = p == NULL ? 0 : p->th_flags;
tcp_window = p == NULL ? 0 : ntohs(p->th_win);
tcp_checksum = p == NULL ? 0 : ntohs(p->th_sum);
tcp_urgent = p == NULL ? 0 : ntohs(p->th_urp);
tcp_hdr = (struct tcphdr *)p;
};
/*
* This translator differs from the one for tcpinfo_t in that the sequence
* number, acknowledgement number, window size and urgent pointer are already
* in host order and thus don't need to be converted.
*/
#pragma D binding "1.6.3" translator
translator tcpinfoh_t < struct tcphdr *p > {
tcp_sport = p == NULL ? 0 : ntohs(p->th_sport);
tcp_dport = p == NULL ? 0 : ntohs(p->th_dport);
tcp_seq = p == NULL ? -1 : p->th_seq;
tcp_ack = p == NULL ? -1 : p->th_ack;
tcp_offset = p == NULL ? -1 : (p->th_off >> 2);
tcp_flags = p == NULL ? 0 : p->th_flags;
tcp_window = p == NULL ? 0 : (p->th_win);
tcp_checksum = p == NULL ? 0 : ntohs(p->th_sum);
tcp_urgent = p == NULL ? 0 : p->th_urp;
tcp_hdr = (struct tcphdr *)p;
};
#pragma D binding "1.6.3" translator
translator tcplsinfo_t < int s > {
tcps_state = s;
};
/* Support for TCP debug */
#pragma D binding "1.12.1" TA_INPUT
inline int TA_INPUT = 0;
#pragma D binding "1.12.1" TA_OUTPUT
inline int TA_OUTPUT = 1;
#pragma D binding "1.12.1" TA_USER
inline int TA_USER = 2;
#pragma D binding "1.12.1" TA_RESPOND
inline int TA_RESPOND = 3;
#pragma D binding "1.12.1" TA_DROP
inline int TA_DROP = 4;
/* direction strings. */
#pragma D binding "1.12.1" tcpdebug_dir_string
inline string tcpdebug_dir_string[uint8_t direction] =
direction == TA_INPUT ? "input" :
direction == TA_OUTPUT ? "output" :
direction == TA_USER ? "user" :
direction == TA_RESPOND ? "respond" :
direction == TA_OUTPUT ? "drop" :
"unknown" ;
#pragma D binding "1.12.1" tcpflag_string
inline string tcpflag_string[uint8_t flags] =
flags & TH_FIN ? "FIN" :
flags & TH_SYN ? "SYN" :
flags & TH_RST ? "RST" :
flags & TH_PUSH ? "PUSH" :
flags & TH_ACK ? "ACK" :
flags & TH_URG ? "URG" :
flags & TH_ECE ? "ECE" :
flags & TH_CWR ? "CWR" :
"unknown" ;
#pragma D binding "1.12.1" PRU_ATTACH
inline int PRU_ATTACH = 0;
#pragma D binding "1.12.1" PRU_DETACH
inline int PRU_DETACH = 1;
#pragma D binding "1.12.1" PRU_BIND
inline int PRU_BIND = 2;
#pragma D binding "1.12.1" PRU_LISTEN
inline int PRU_LISTEN = 3;
#pragma D binding "1.12.1" PRU_CONNECT
inline int PRU_CONNECT = 4;
#pragma D binding "1.12.1" PRU_ACCEPT
inline int PRU_ACCEPT = 5 ;
#pragma D binding "1.12.1" PRU_DISCONNECT
inline int PRU_DISCONNECT= 6;
#pragma D binding "1.12.1" PRU_SHUTDOWN
inline int PRU_SHUTDOWN = 7;
#pragma D binding "1.12.1" PRU_RCVD
inline int PRU_RCVD = 8;
#pragma D binding "1.12.1" PRU_SEND
inline int PRU_SEND = 9;
#pragma D binding "1.12.1" PRU_ABORT
inline int PRU_ABORT = 10;
#pragma D binding "1.12.1" PRU_CONTROL
inline int PRU_CONTROL = 11;
#pragma D binding "1.12.1" PRU_SENSE
inline int PRU_SENSE = 12;
#pragma D binding "1.12.1" PRU_RCVOOB
inline int PRU_RCVOOB = 13;
#pragma D binding "1.12.1" PRU_SENDOOB
inline int PRU_SENDOOB = 14;
#pragma D binding "1.12.1" PRU_SOCKADDR
inline int PRU_SOCKADDR = 15;
#pragma D binding "1.12.1" PRU_PEERADDR
inline int PRU_PEERADDR = 16;
#pragma D binding "1.12.1" PRU_CONNECT2
inline int PRU_CONNECT2 = 17;
#pragma D binding "1.12.1" PRU_FASTTIMO
inline int PRU_FASTTIMO = 18;
#pragma D binding "1.12.1" PRU_SLOWTIMO
inline int PRU_SLOWTIMO = 19;
#pragma D binding "1.12.1" PRU_PROTORCV
inline int PRU_PROTORCV = 20;
#pragma D binding "1.12.1" PRU_PROTOSEND
inline int PRU_PROTOSEND = 21;
#pragma D binding "1.12.1" PRU_SEND_EOF
inline int PRU_SEND_EOF = 22;
#pragma D binding "1.12.1" PRU_SOSETLABEL
inline int PRU_SOSETLABEL = 23;
#pragma D binding "1.12.1" PRU_CLOSE
inline int PRU_CLOSE = 24;
#pragma D binding "1.12.1" PRU_FLUSH
inline int PRU_FLUSH = 25;
#pragma D binding "1.12.1" prureq_string
inline string prureq_string[uint8_t req] =
req == PRU_ATTACH ? "ATTACH" :
req == PRU_DETACH ? "DETACH" :
req == PRU_BIND ? "BIND" :
req == PRU_LISTEN ? "LISTEN" :
req == PRU_CONNECT ? "CONNECT" :
req == PRU_ACCEPT ? "ACCEPT" :
req == PRU_DISCONNECT ? "DISCONNECT" :
req == PRU_SHUTDOWN ? "SHUTDOWN" :
req == PRU_RCVD ? "RCVD" :
req == PRU_SEND ? "SEND" :
req == PRU_ABORT ? "ABORT" :
req == PRU_CONTROL ? "CONTROL" :
req == PRU_SENSE ? "SENSE" :
req == PRU_RCVOOB ? "RCVOOB" :
req == PRU_SENDOOB ? "SENDOOB" :
req == PRU_SOCKADDR ? "SOCKADDR" :
req == PRU_PEERADDR ? "PEERADDR" :
req == PRU_CONNECT2 ? "CONNECT2" :
req == PRU_FASTTIMO ? "FASTTIMO" :
req == PRU_SLOWTIMO ? "SLOWTIMO" :
req == PRU_PROTORCV ? "PROTORCV" :
req == PRU_PROTOSEND ? "PROTOSEND" :
req == PRU_SEND ? "SEND_EOF" :
req == PRU_SOSETLABEL ? "SOSETLABEL" :
req == PRU_CLOSE ? "CLOSE" :
req == PRU_FLUSH ? "FLUSE" :
"unknown" ;