Plug a mbuf leak in tcp_usr_send(). pru_send() routines are expected

to either enqueue or free their mbuf chains, but tcp_usr_send() was dropping them on the floor if the tcpcb/inpcb has been torn down in the middle of a send/write attempt. This has been responsible for a wide variety of mbuf leak patterns, ranging from slow gradual leakage to rather rapid exhaustion. This has been a problem since before 2.2 was branched and appears to have been fixed in rev 1.16 and lost in 1.23/1.28. Thanks to Jayanth Vijayaraghavan <jayanth@yahoo-inc.com> for checking (extensively) into this on a live production 2.2.x system and that it was the actual cause of the leak and looks like it fixes it. The machine in question was loosing (from memory) about 150 mbufs per hour under load and a change similar to this stopped it. (Don't blame Jayanth for this patch though) An alternative approach to this would be to recheck SS_CANTSENDMORE etc inside the splnet() right before calling pru_send() after all the potential sleeps, interrupts and delays have happened. However, this would mean exposing knowledge of the tcp stack's reset handling and removal of the pcb to the generic code. There are other things that call pru_send() directly though. Problem originally noted by: John Plevyak <jplevyak@inktomi.com>
svn path=/head/; revision=47720
1999-06-04 02:27:06 +00:00 · 1999-06-04 02:27:06 +00:00 · 9c9906e912 · 2020-12-20 02:59:44 +00:00
commit 9c9906e912
parent 5004cc2ecf
2 changed files with 38 additions and 8 deletions
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
- *	$Id: uipc_socket.c,v 1.57 1999/05/03 23:57:23 billf Exp $
+ *	$Id: uipc_socket.c,v 1.58 1999/05/21 15:54:40 ache Exp $
 */

 #include <sys/param.h>
@ -538,6 +538,15 @@ sosend(so, addr, uio, top, control, flags, p)
 		    if (dontroute)
 			    so->so_options |= SO_DONTROUTE;
 		    s = splnet();				/* XXX */
+		    /*
+		     * XXX all the SS_CANTSENDMORE checks previously
+		     * done could be out of date.  We could have recieved
+		     * a reset packet in an interrupt or maybe we slept
+		     * while doing page faults in uiomove() etc. We could
+		     * probably recheck again inside the splnet() protection
+		     * here, but there are probably other places that this
+		     * also happens.  We must rethink this.
+		     */
 		    error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			(flags & MSG_OOB) ? PRUS_OOB :
 			/*
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@ -31,7 +31,7 @@
 * SUCH DAMAGE.
 *
 *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
- *	$Id: tcp_usrreq.c,v 1.42 1999/04/28 11:37:50 phk Exp $
+ *	$Id: tcp_usrreq.c,v 1.43 1999/05/03 23:57:32 billf Exp $
 */

 #include "opt_tcpdebug.h"
@ -326,7 +326,10 @@ tcp_usr_rcvd(struct socket *so, int flags)

 /*
 * Do a send by putting data in output queue and updating urgent
- * marker if URG set.  Possibly send more data.
+ * marker if URG set.  Possibly send more data.  Unlike the other
+ * pru_*() routines, the mbuf chains are our responsibility.  We
+ * must either enqueue them or free them.  The other pru_* routines
+ * generally are caller-frees.
 */
 static int
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 
@ -336,16 +339,34 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 	int error = 0;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp;
+	TCPDEBUG0;

-	COMMON_START();
-	if (control && control->m_len) {
-		m_freem(control); /* XXX shouldn't caller do this??? */
+	if (inp == NULL) {
+		/*
+		 * OOPS! we lost a race, the TCP session got reset after
+		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
+		 * network interrupt in the non-splnet() section of sosend().
+		 */
 		if (m)
 			m_freem(m);
-		error = EINVAL;
+		if (control)
+			m_freem(control);
+		error = ECONNRESET;	/* XXX EPIPE? */
 		goto out;
 	}
-
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if (control) {
+		/* TCP doesn't do control messages (rights, creds, etc) */
+		if (control->m_len) {
+			m_freem(control);
+			if (m)
+				m_freem(m);
+			error = EINVAL;
+			goto out;
+		}
+		m_freem(control);	/* empty control, just free it */
+	}
 	if(!(flags & PRUS_OOB)) {
 		sbappend(&so->so_snd, m);
 		if (nam && tp->t_state < TCPS_SYN_SENT) {