freebsd-dev/sbin/hastd/proto_common.c
Maxim Sobolev 98453c81af Read from the socket using the same max buffer size as we use while
sending. What happens otherwise is that the sender splits all the
traffic into 32k chunks, while the receiver is waiting for the whole
packet. Then for a certain packet sizes, particularly 66607 bytes in
my case, the communication stucks to secondary is expecting to
read one chunk of 66607 bytes, while primary is sending two chunks
of 32768 bytes and third chunk of 1071. Probably due to TCP windowing
and buffering the final chunk gets stuck somewhere, so neither server
not client can make any progress.

This patch also protect from short reads, as according to the manual
page there are some cases when MSG_WAITALL can give less data than
expected.

MFC after:	3 days
2011-06-04 16:01:30 +00:00

245 lines
6.0 KiB
C

/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include "pjdlog.h"
#include "proto_impl.h"
/* Maximum size of packet we want to use when sending data. */
#ifndef MAX_SEND_SIZE
#define MAX_SEND_SIZE 32768
#endif
static bool
blocking_socket(int sock)
{
int flags;
flags = fcntl(sock, F_GETFL);
PJDLOG_ASSERT(flags >= 0);
return ((flags & O_NONBLOCK) == 0);
}
static int
proto_descriptor_send(int sock, int fd)
{
unsigned char ctrl[CMSG_SPACE(sizeof(fd))];
struct msghdr msg;
struct cmsghdr *cmsg;
PJDLOG_ASSERT(sock >= 0);
PJDLOG_ASSERT(fd >= 0);
bzero(&msg, sizeof(msg));
bzero(&ctrl, sizeof(ctrl));
msg.msg_iov = NULL;
msg.msg_iovlen = 0;
msg.msg_control = ctrl;
msg.msg_controllen = sizeof(ctrl);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd));
if (sendmsg(sock, &msg, 0) == -1)
return (errno);
return (0);
}
int
proto_common_send(int sock, const unsigned char *data, size_t size, int fd)
{
ssize_t done;
size_t sendsize;
int errcount = 0;
PJDLOG_ASSERT(sock >= 0);
if (data == NULL) {
/* The caller is just trying to decide about direction. */
PJDLOG_ASSERT(size == 0);
if (shutdown(sock, SHUT_RD) == -1)
return (errno);
return (0);
}
PJDLOG_ASSERT(data != NULL);
PJDLOG_ASSERT(size > 0);
do {
sendsize = size < MAX_SEND_SIZE ? size : MAX_SEND_SIZE;
done = send(sock, data, sendsize, MSG_NOSIGNAL);
if (done == 0) {
return (ENOTCONN);
} else if (done < 0) {
if (errno == EINTR)
continue;
if (errno == ENOBUFS) {
/*
* If there are no buffers we retry.
* After each try we increase delay before the
* next one and we give up after fifteen times.
* This gives 11s of total wait time.
*/
if (errcount == 15) {
pjdlog_warning("Getting ENOBUFS errors for 11s on send(), giving up.");
} else {
if (errcount == 0)
pjdlog_warning("Got ENOBUFS error on send(), retrying for a bit.");
errcount++;
usleep(100000 * errcount);
continue;
}
}
/*
* If this is blocking socket and we got EAGAIN, this
* means the request timed out. Translate errno to
* ETIMEDOUT, to give administrator a hint to
* eventually increase timeout.
*/
if (errno == EAGAIN && blocking_socket(sock))
errno = ETIMEDOUT;
return (errno);
}
data += done;
size -= done;
} while (size > 0);
if (errcount > 0) {
pjdlog_info("Data sent successfully after %d ENOBUFS error%s.",
errcount, errcount == 1 ? "" : "s");
}
if (fd == -1)
return (0);
return (proto_descriptor_send(sock, fd));
}
static int
proto_descriptor_recv(int sock, int *fdp)
{
unsigned char ctrl[CMSG_SPACE(sizeof(*fdp))];
struct msghdr msg;
struct cmsghdr *cmsg;
PJDLOG_ASSERT(sock >= 0);
PJDLOG_ASSERT(fdp != NULL);
bzero(&msg, sizeof(msg));
bzero(&ctrl, sizeof(ctrl));
msg.msg_iov = NULL;
msg.msg_iovlen = 0;
msg.msg_control = ctrl;
msg.msg_controllen = sizeof(ctrl);
if (recvmsg(sock, &msg, 0) == -1)
return (errno);
cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg->cmsg_level != SOL_SOCKET ||
cmsg->cmsg_type != SCM_RIGHTS) {
return (EINVAL);
}
bcopy(CMSG_DATA(cmsg), fdp, sizeof(*fdp));
return (0);
}
int
proto_common_recv(int sock, unsigned char *data, size_t size, int *fdp)
{
ssize_t done;
size_t total_done, recvsize;
unsigned char *dp;
PJDLOG_ASSERT(sock >= 0);
if (data == NULL) {
/* The caller is just trying to decide about direction. */
PJDLOG_ASSERT(size == 0);
if (shutdown(sock, SHUT_WR) == -1)
return (errno);
return (0);
}
PJDLOG_ASSERT(data != NULL);
PJDLOG_ASSERT(size > 0);
total_done = 0;
dp = data;
do {
recvsize = size - total_done;
recvsize = recvsize < MAX_SEND_SIZE ? recvsize : MAX_SEND_SIZE;
done = recv(sock, dp, recvsize, MSG_WAITALL);
if (done == -1 && errno == EINTR)
continue;
if (done <= 0)
break;
total_done += done;
dp += done;
} while (total_done < size);
if (done == 0) {
return (ENOTCONN);
} else if (done < 0) {
/*
* If this is blocking socket and we got EAGAIN, this
* means the request timed out. Translate errno to
* ETIMEDOUT, to give administrator a hint to
* eventually increase timeout.
*/
if (errno == EAGAIN && blocking_socket(sock))
errno = ETIMEDOUT;
return (errno);
}
if (fdp == NULL)
return (0);
return (proto_descriptor_recv(sock, fdp));
}