From 41bb85146ba8c6da5006bcea0716d83e7c042534 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Sat, 2 Apr 2011 09:31:02 +0000 Subject: [PATCH] Handle ENOBUFS on send(2) by retrying for a while and logging the problem. MFC after: 1 week --- sbin/hastd/proto_common.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sbin/hastd/proto_common.c b/sbin/hastd/proto_common.c index 1d4c60654bcd..183b4e4fdcd2 100644 --- a/sbin/hastd/proto_common.c +++ b/sbin/hastd/proto_common.c @@ -94,6 +94,7 @@ proto_common_send(int sock, const unsigned char *data, size_t size, int fd) { ssize_t done; size_t sendsize; + int errcount = 0; PJDLOG_ASSERT(sock >= 0); @@ -118,6 +119,23 @@ proto_common_send(int sock, const unsigned char *data, size_t size, int fd) } else if (done < 0) { if (errno == EINTR) continue; + if (errno == ENOBUFS) { + /* + * If there are no buffers we retry. + * After each try we increase delay before the + * next one and we give up after fifteen times. + * This gives 11s of total wait time. + */ + if (errcount == 15) { + pjdlog_warning("Getting ENOBUFS errors for 11s on send(), giving up."); + } else { + if (errcount == 0) + pjdlog_warning("Got ENOBUFS error on send(), retrying for a bit."); + errcount++; + usleep(100000 * errcount); + continue; + } + } /* * If this is blocking socket and we got EAGAIN, this * means the request timed out. Translate errno to @@ -131,6 +149,10 @@ proto_common_send(int sock, const unsigned char *data, size_t size, int fd) data += done; size -= done; } while (size > 0); + if (errcount > 0) { + pjdlog_info("Data sent successfully after %d ENOBUFS error%s.", + errcount, errcount == 1 ? "" : "s"); + } if (fd == -1) return (0);