Replace the code for reading and writing the kernel message buffer

with a new implementation that has a mostly reentrant "addchar"
routine, supports multiple message buffers in the kernel, and hides
the implementation details from callers.

The new code uses a kind of sequence number to represend the current
read and write positions in the buffer. This approach (suggested
mainly by bde) permits the read and write pointers to be maintained
separately, which reduces the number of atomic operations that are
required. The "mostly reentrant" above refers to the way that while
it is now always safe to have any number of concurrent writers,
readers could see the message buffer after a writer has advanced
the pointers but before it has witten the new character.

Discussed on:	freebsd-arch
This commit is contained in:
Ian Dowse 2003-06-22 02:18:31 +00:00
parent dffca5a624
commit 4784a46912
6 changed files with 296 additions and 119 deletions

View File

@ -137,9 +137,7 @@ main(int argc, char *argv[])
errx(1, "kvm_read: %s", kvm_geterr(kd));
kvm_close(kd);
buflen = cur.msg_size;
bufpos = cur.msg_bufx;
if (bufpos >= buflen)
bufpos = 0;
bufpos = MSGBUF_SEQ_TO_POS(&cur, cur.msg_wseq);
}
/*

View File

@ -1091,6 +1091,7 @@ kern/subr_log.c standard
kern/subr_mbuf.c standard
kern/subr_mchain.c optional libmchain
kern/subr_module.c standard
kern/subr_msgbuf.c standard
kern/subr_param.c standard
kern/subr_pcpu.c standard
kern/subr_power.c standard

View File

@ -126,11 +126,12 @@ logclose(dev_t dev, int flag, int mode, struct thread *td)
static int
logread(dev_t dev, struct uio *uio, int flag)
{
char buf[128];
struct msgbuf *mbp = msgbufp;
int error = 0, l, s;
s = splhigh();
while (mbp->msg_bufr == mbp->msg_bufx) {
while (msgbuf_getcount(mbp) == 0) {
if (flag & IO_NDELAY) {
splx(s);
return (EWOULDBLOCK);
@ -145,19 +146,13 @@ logread(dev_t dev, struct uio *uio, int flag)
logsoftc.sc_state &= ~LOG_RDWAIT;
while (uio->uio_resid > 0) {
l = mbp->msg_bufx - mbp->msg_bufr;
if (l < 0)
l = mbp->msg_size - mbp->msg_bufr;
l = imin(l, uio->uio_resid);
l = imin(sizeof(buf), uio->uio_resid);
l = msgbuf_getbytes(mbp, buf, l);
if (l == 0)
break;
error = uiomove((char *)msgbufp->msg_ptr + mbp->msg_bufr,
l, uio);
error = uiomove(buf, l, uio);
if (error)
break;
mbp->msg_bufr += l;
if (mbp->msg_bufr >= mbp->msg_size)
mbp->msg_bufr = 0;
}
return (error);
}
@ -172,7 +167,7 @@ logpoll(dev_t dev, int events, struct thread *td)
s = splhigh();
if (events & (POLLIN | POLLRDNORM)) {
if (msgbufp->msg_bufr != msgbufp->msg_bufx)
if (msgbuf_getcount(msgbufp) > 0)
revents |= events & (POLLIN | POLLRDNORM);
else
selrecord(td, &logsoftc.sc_selp);
@ -212,18 +207,12 @@ logtimeout(void *arg)
static int
logioctl(dev_t dev, u_long com, caddr_t data, int flag, struct thread *td)
{
int l, s;
switch (com) {
/* return number of characters immediately available */
case FIONREAD:
s = splhigh();
l = msgbufp->msg_bufx - msgbufp->msg_bufr;
splx(s);
if (l < 0)
l += msgbufp->msg_size;
*(int *)data = l;
*(int *)data = msgbuf_getcount(msgbufp);
break;
case FIONBIO:

239
sys/kern/subr_msgbuf.c Normal file
View File

@ -0,0 +1,239 @@
/*
* Copyright (c) 2003 Ian Dowse. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Generic message buffer support routines.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/msgbuf.h>
/* Read/write sequence numbers are modulo a multiple of the buffer size. */
#define SEQMOD(size) ((size) * 16)
static u_int msgbuf_cksum(struct msgbuf *mbp);
/*
* Initialize a message buffer of the specified size at the specified
* location. This also zeros the buffer area.
*/
void
msgbuf_init(struct msgbuf *mbp, void *ptr, int size)
{
mbp->msg_ptr = ptr;
mbp->msg_size = size;
mbp->msg_seqmod = SEQMOD(size);
msgbuf_clear(mbp);
mbp->msg_magic = MSG_MAGIC;
}
/*
* Reinitialize a message buffer, retaining its previous contents if
* the size and checksum are correct. If the old contents cannot be
* recovered, the message buffer is cleared.
*/
void
msgbuf_reinit(struct msgbuf *mbp, void *ptr, int size)
{
u_int cksum;
if (mbp->msg_magic != MSG_MAGIC || mbp->msg_size != size) {
msgbuf_init(mbp, ptr, size);
return;
}
mbp->msg_seqmod = SEQMOD(size);
mbp->msg_wseq = MSGBUF_SEQNORM(mbp, mbp->msg_wseq);
mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq);
mbp->msg_ptr = ptr;
cksum = msgbuf_cksum(mbp);
if (cksum != mbp->msg_cksum) {
printf("msgbuf cksum mismatch (read %x, calc %x)\n",
mbp->msg_cksum, cksum);
msgbuf_clear(mbp);
}
}
/*
* Clear the message buffer.
*/
void
msgbuf_clear(struct msgbuf *mbp)
{
bzero(mbp->msg_ptr, mbp->msg_size);
mbp->msg_wseq = 0;
mbp->msg_rseq = 0;
mbp->msg_cksum = 0;
}
/*
* Get a count of the number of unread characters in the message buffer.
*/
int
msgbuf_getcount(struct msgbuf *mbp)
{
u_int len;
len = MSGBUF_SEQSUB(mbp, mbp->msg_wseq, mbp->msg_rseq);
if (len > mbp->msg_size)
len = mbp->msg_size;
return (len);
}
/*
* Append a character to a message buffer. This function can be
* considered fully reentrant so long as the number of concurrent
* callers is less than the number of characters in the buffer.
* However, the message buffer is only guaranteed to be consistent
* for reading when there are no callers in this function.
*/
void
msgbuf_addchar(struct msgbuf *mbp, int c)
{
u_int new_seq, pos, seq;
do {
seq = mbp->msg_wseq;
new_seq = MSGBUF_SEQNORM(mbp, seq + 1);
} while (atomic_cmpset_rel_int(&mbp->msg_wseq, seq, new_seq) == 0);
pos = MSGBUF_SEQ_TO_POS(mbp, seq);
atomic_add_int(&mbp->msg_cksum, (u_int)(u_char)c -
(u_int)(u_char)mbp->msg_ptr[pos]);
mbp->msg_ptr[pos] = c;
}
/*
* Read and mark as read a character from a message buffer.
* Returns the character, or -1 if no characters are available.
*/
int
msgbuf_getchar(struct msgbuf *mbp)
{
u_int len, wseq;
int c;
wseq = mbp->msg_wseq;
len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq);
if (len == 0)
return (-1);
if (len > mbp->msg_size)
mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
c = (u_char)mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, mbp->msg_rseq)];
mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + 1);
return (c);
}
/*
* Read and mark as read a number of characters from a message buffer.
* Returns the number of characters that were placed in `buf'.
*/
int
msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen)
{
u_int len, pos, wseq;
wseq = mbp->msg_wseq;
len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq);
if (len == 0)
return (0);
if (len > mbp->msg_size) {
mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
len = mbp->msg_size;
}
pos = MSGBUF_SEQ_TO_POS(mbp, mbp->msg_rseq);
len = min(len, mbp->msg_size - pos);
len = min(len, (u_int)buflen);
bcopy(&mbp->msg_ptr[pos], buf, len);
mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + len);
return (len);
}
/*
* Peek at the full contents of a message buffer without marking any
* data as read. `seqp' should point to an unsigned integer that
* msgbuf_peekbytes() can use to retain state between calls so that
* the whole message buffer can be read in multiple short reads.
* To initialise this variable to the start of the message buffer,
* call msgbuf_peekbytes() with a NULL `buf' parameter.
*
* Returns the number of characters that were placed in `buf'.
*/
int
msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen, u_int *seqp)
{
u_int len, pos, wseq;
if (buf == NULL) {
/* Just initialise *seqp. */
*seqp = MSGBUF_SEQNORM(mbp, mbp->msg_wseq - mbp->msg_size);
return (0);
}
wseq = mbp->msg_wseq;
len = MSGBUF_SEQSUB(mbp, wseq, *seqp);
if (len == 0)
return (0);
if (len > mbp->msg_size) {
*seqp = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size);
len = mbp->msg_size;
}
pos = MSGBUF_SEQ_TO_POS(mbp, *seqp);
len = min(len, mbp->msg_size - pos);
len = min(len, (u_int)buflen);
bcopy(&mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, *seqp)], buf, len);
*seqp = MSGBUF_SEQNORM(mbp, *seqp + len);
return (len);
}
/*
* Compute the checksum for the complete message buffer contents.
*/
static u_int
msgbuf_cksum(struct msgbuf *mbp)
{
u_int i, sum;
sum = 0;
for (i = 0; i < mbp->msg_size; i++)
sum += (u_char)mbp->msg_ptr[i];
return (sum);
}
/*
* Copy from one message buffer to another.
*/
void
msgbuf_copy(struct msgbuf *src, struct msgbuf *dst)
{
int c;
while ((c = msgbuf_getchar(src)) >= 0)
msgbuf_addchar(dst, c);
}

View File

@ -93,8 +93,6 @@ struct tty *constty; /* pointer to console "window" tty */
static void (*v_putc)(int) = cnputc; /* routine to putc on virtual console */
static void msglogchar(int c, int pri);
static void msgaddchar(int c, void *dummy);
static u_int msgbufcksum(char *cp, size_t size, u_int cksum);
static void putchar(int ch, void *arg);
static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len);
static void snprintf_func(int ch, void *arg);
@ -788,16 +786,16 @@ msglogchar(int c, int pri)
return;
if (pri != -1 && pri != lastpri) {
if (dangling) {
msgaddchar('\n', NULL);
msgbuf_addchar(msgbufp, '\n');
dangling = 0;
}
msgaddchar('<', NULL);
msgbuf_addchar(msgbufp, '<');
for (p = ksprintn(nbuf, (uintmax_t)pri, 10, NULL); *p;)
msgaddchar(*p--, NULL);
msgaddchar('>', NULL);
msgbuf_addchar(msgbufp, *p--);
msgbuf_addchar(msgbufp, '>');
lastpri = pri;
}
msgaddchar(c, NULL);
msgbuf_addchar(msgbufp, c);
if (c == '\n') {
dangling = 0;
lastpri = -1;
@ -806,41 +804,6 @@ msglogchar(int c, int pri)
}
}
/*
* Put char in log buffer
*/
static void
msgaddchar(int c, void *dummy)
{
struct msgbuf *mbp;
if (!msgbufmapped)
return;
mbp = msgbufp;
mbp->msg_cksum += (u_char)c - (u_char)mbp->msg_ptr[mbp->msg_bufx];
mbp->msg_ptr[mbp->msg_bufx++] = c;
if (mbp->msg_bufx >= mbp->msg_size)
mbp->msg_bufx = 0;
/* If the buffer is full, keep the most recent data. */
if (mbp->msg_bufr == mbp->msg_bufx) {
if (++mbp->msg_bufr >= mbp->msg_size)
mbp->msg_bufr = 0;
}
}
static void
msgbufcopy(struct msgbuf *oldp)
{
int pos;
pos = oldp->msg_bufr;
while (pos != oldp->msg_bufx) {
msglogchar(oldp->msg_ptr[pos], -1);
if (++pos >= oldp->msg_size)
pos = 0;
}
}
void
msgbufinit(void *ptr, int size)
{
@ -850,38 +813,13 @@ msgbufinit(void *ptr, int size)
size -= sizeof(*msgbufp);
cp = (char *)ptr;
msgbufp = (struct msgbuf *)(cp + size);
if (msgbufp->msg_magic != MSG_MAGIC || msgbufp->msg_size != size ||
msgbufp->msg_bufx >= size || msgbufp->msg_bufx < 0 ||
msgbufp->msg_bufr >= size || msgbufp->msg_bufr < 0 ||
msgbufcksum(cp, size, msgbufp->msg_cksum) != msgbufp->msg_cksum) {
bzero(cp, size);
bzero(msgbufp, sizeof(*msgbufp));
msgbufp->msg_magic = MSG_MAGIC;
msgbufp->msg_size = size;
}
msgbufp->msg_ptr = cp;
msgbuf_reinit(msgbufp, cp, size);
if (msgbufmapped && oldp != msgbufp)
msgbufcopy(oldp);
msgbuf_copy(oldp, msgbufp);
msgbufmapped = 1;
oldp = msgbufp;
}
static u_int
msgbufcksum(char *cp, size_t size, u_int cksum)
{
u_int sum;
int i;
sum = 0;
for (i = 0; i < size; i++)
sum += (u_char)cp[i];
if (sum != cksum)
printf("msgbuf cksum mismatch (read %x, calc %x)\n", cksum,
sum);
return (sum);
}
SYSCTL_DECL(_security_bsd);
static int unprivileged_read_msgbuf = 1;
@ -893,7 +831,9 @@ SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_read_msgbuf,
static int
sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS)
{
int error;
char buf[128];
u_int seq;
int error, len;
if (!unprivileged_read_msgbuf) {
error = suser(req->td);
@ -901,25 +841,20 @@ sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS)
return (error);
}
/*
* Unwind the buffer, so that it's linear (possibly starting with
* some initial nulls).
*/
error = sysctl_handle_opaque(oidp, msgbufp->msg_ptr + msgbufp->msg_bufx,
msgbufp->msg_size - msgbufp->msg_bufx, req);
/* Read the whole buffer, one chunk at a time. */
msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
while ((len = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq)) > 0) {
error = sysctl_handle_opaque(oidp, buf, len, req);
if (error)
return (error);
if (msgbufp->msg_bufx > 0) {
error = sysctl_handle_opaque(oidp, msgbufp->msg_ptr,
msgbufp->msg_bufx, req);
}
return (error);
return (0);
}
SYSCTL_PROC(_kern, OID_AUTO, msgbuf, CTLTYPE_STRING | CTLFLAG_RD,
0, 0, sysctl_kern_msgbuf, "A", "Contents of kernel message buffer");
static int msgbuf_clear;
static int msgbuf_clearflag;
static int
sysctl_kern_msgbuf_clear(SYSCTL_HANDLER_ARGS)
@ -927,17 +862,14 @@ sysctl_kern_msgbuf_clear(SYSCTL_HANDLER_ARGS)
int error;
error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
if (!error && req->newptr) {
/* Clear the buffer and reset write pointer */
bzero(msgbufp->msg_ptr, msgbufp->msg_size);
msgbufp->msg_bufr = msgbufp->msg_bufx = 0;
msgbufp->msg_cksum = 0;
msgbuf_clear = 0;
msgbuf_clear(msgbufp);
msgbuf_clearflag = 0;
}
return (error);
}
SYSCTL_PROC(_kern, OID_AUTO, msgbuf_clear,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, &msgbuf_clear, 0,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, &msgbuf_clearflag, 0,
sysctl_kern_msgbuf_clear, "I", "Clear kernel message buffer");
#ifdef DDB
@ -951,11 +883,11 @@ DB_SHOW_COMMAND(msgbuf, db_show_msgbuf)
return;
}
db_printf("msgbufp = %p\n", msgbufp);
db_printf("magic = %x, size = %d, r= %d, w = %d, ptr = %p, cksum= %d\n",
msgbufp->msg_magic, msgbufp->msg_size, msgbufp->msg_bufr,
msgbufp->msg_bufx, msgbufp->msg_ptr, msgbufp->msg_cksum);
db_printf("magic = %x, size = %d, r= %u, w = %u, ptr = %p, cksum= %u\n",
msgbufp->msg_magic, msgbufp->msg_size, msgbufp->msg_rseq,
msgbufp->msg_wseq, msgbufp->msg_ptr, msgbufp->msg_cksum);
for (i = 0; i < msgbufp->msg_size; i++) {
j = (i + msgbufp->msg_bufr) % msgbufp->msg_size;
j = MSGBUF_SEQ_TO_POS(msgbufp, i + msgbufp->msg_rseq);
db_printf("%c", msgbufp->msg_ptr[j]);
}
db_printf("\n");

View File

@ -38,19 +38,37 @@
#define _SYS_MSGBUF_H_
struct msgbuf {
char *msg_ptr; /* pointer to buffer */
#define MSG_MAGIC 0x063062
u_int msg_magic;
int msg_size; /* size of buffer area */
int msg_bufx; /* write pointer */
int msg_bufr; /* read pointer */
char *msg_ptr; /* pointer to buffer */
u_int msg_size; /* size of buffer area */
u_int msg_wseq; /* write sequence number */
u_int msg_rseq; /* read sequence number */
u_int msg_cksum; /* checksum of contents */
u_int msg_seqmod; /* range for sequence numbers */
};
/* Normalise a sequence number or a difference between sequence numbers */
#define MSGBUF_SEQNORM(mbp, seq) (((seq) + (mbp)->msg_seqmod) % \
(mbp)->msg_seqmod)
#define MSGBUF_SEQ_TO_POS(mbp, seq) ((seq) % (mbp)->msg_size)
/* Subtract sequence numbers, but note that only positive values result. */
#define MSGBUF_SEQSUB(mbp, seq1, seq2) (MSGBUF_SEQNORM((mbp), (seq1) - (seq2)))
#ifdef _KERNEL
extern int msgbuftrigger;
extern struct msgbuf *msgbufp;
void msgbufinit(void *ptr, int size);
void msgbuf_addchar(struct msgbuf *mbp, int c);
void msgbuf_clear(struct msgbuf *mbp);
void msgbuf_copy(struct msgbuf *src, struct msgbuf *dst);
int msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen);
int msgbuf_getchar(struct msgbuf *mbp);
int msgbuf_getcount(struct msgbuf *mbp);
void msgbuf_init(struct msgbuf *mbp, void *ptr, int size);
void msgbuf_reinit(struct msgbuf *mbp, void *ptr, int size);
int msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen,
u_int *seqp);
#if !defined(MSGBUF_SIZE)
#define MSGBUF_SIZE 32768