Merge robustness improvements for the ALTERA JTAG UART driver from

CheriBSD, which attempt to work around an inherent race in the UART's
control-register design in detecting whether JTAG is currently,
present, which will otherwise lead to moderately frequent output
drops when running in polled rather than interrupt-driven operation.
Now, these drops are quite infrequent.

commit 9f33fddac9215e32781a4f016ba17eab804fb6d4
Author: Robert N. M. Watson <robert.watson@cl.cam.ac.uk>
Date:   Thu Jul 16 17:34:12 2015 +0000

    Add a new sysctl, hw.altera_jtag_uart.ac_poll_delay, which allows the
    (default 10ms) delay associated with a full JTAG UART buffer combined
    with a lack of a JTAG-present flag to be tuned.  Setting this higher
    may cause some JTAG configurations to be more reliable when printing
    out low-level console output at a speed greater than the JTAG UART is
    willing to carry data.  Or it may not.

commit 73992ef7607738b2973736e409ccd644b30eadba
Author: Robert N. M. Watson <robert.watson@cl.cam.ac.uk>
Date:   Sun Jan 1 15:13:07 2017 +0000

    Minor improvements to the Altera JTAG UART device driver:

    - Minor rework to the logic to detect JTAG presence in order to be a bit
      more resilient to inevitable races: increase the retry period from two
      seconds to four seconds for trying to find JTAG, and more agressively
      clear the miss counter if JTAG has been reconnected.  Once JTAG has
      vanished, stop prodding the miss counter.

    - Do a bit of reworking of the output code to frob the control register
      less by checking whether write interrupts are enabled/disabled before
      changing their state.  This should reduce the opportunity for races
      with JTAG discovery (which are inherent to the Altera
      hardware-software interface, but can at least be minimised).

    - Add statistics relating to interrupt enable/disable/JTAG
      discovery/etc.

    With these changes, polled-mode JTAG UART ttys appear substantially
    more robust.

MFC after:	1 week
Sponsored by:	DARPA, AFRL
This commit is contained in:
Robert Watson 2017-01-28 12:43:19 +00:00
parent 55e0d88afd
commit 95fadd99d5
2 changed files with 127 additions and 46 deletions

View File

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/reboot.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/tty.h>
@ -49,6 +50,9 @@ __FBSDID("$FreeBSD$");
devclass_t altera_jtag_uart_devclass;
static SYSCTL_NODE(_hw, OID_AUTO, altera_jtag_uart, CTLFLAG_RW, 0,
"Altera JTAG UART configuration knobs");
/*
* One-byte buffer as we can't check whether the UART is readable without
* actually reading from it, synchronised by a spinlock; this lock also
@ -82,6 +86,11 @@ static cn_ungrab_t aju_cnungrab;
* no AC bit set.
*/
#define ALTERA_JTAG_UART_AC_POLL_DELAY 10000
static u_int altera_jtag_uart_ac_poll_delay =
ALTERA_JTAG_UART_AC_POLL_DELAY;
SYSCTL_UINT(_hw_altera_jtag_uart, OID_AUTO, ac_poll_delay,
CTLFLAG_RW, &altera_jtag_uart_ac_poll_delay, 0,
"Maximum delay waiting for JTAG present flag when buffer is full");
/*
* I/O routines lifted from Deimos. This is not only MIPS-specific, but also
@ -220,10 +229,10 @@ aju_cons_write(char ch)
* layer clearing of the bit doesn't trigger a TTY-layer
* disconnection.
*
* XXXRW: The polling delay may require tuning.
*
* XXXRW: Notice the inherent race with hardware: in clearing the
* bit, we may race with hardware setting the same bit.
* bit, we may race with hardware setting the same bit. This can
* cause real-world reliability problems due to lost output on the
* console.
*/
v = aju_cons_control_read();
if (v & ALTERA_JTAG_UART_CONTROL_AC) {
@ -235,7 +244,7 @@ aju_cons_write(char ch)
while ((v & ALTERA_JTAG_UART_CONTROL_WSPACE) == 0) {
if (!aju_cons_jtag_present)
return;
DELAY(ALTERA_JTAG_UART_AC_POLL_DELAY);
DELAY(altera_jtag_uart_ac_poll_delay);
v = aju_cons_control_read();
if (v & ALTERA_JTAG_UART_CONTROL_AC) {
aju_cons_jtag_present = 1;

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2011-2012 Robert N. M. Watson
* Copyright (c) 2011-2012, 2016 Robert N. M. Watson
* All rights reserved.
*
* This software was developed by SRI International and the University of
@ -40,10 +40,12 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/reboot.h>
#include <sys/sysctl.h>
#include <sys/tty.h>
#include <ddb/ddb.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <dev/altera/jtag_uart/altera_jtag_uart.h>
@ -65,9 +67,9 @@ static struct ttydevsw aju_ttydevsw = {
/*
* When polling for the AC bit, the number of times we have to not see it
* before assuming JTAG has disappeared on us. By default, two seconds.
* before assuming JTAG has disappeared on us. By default, four seconds.
*/
#define AJU_JTAG_MAXMISS 10
#define AJU_JTAG_MAXMISS 20
/*
* Polling intervals for input/output and JTAG connection events.
@ -75,6 +77,53 @@ static struct ttydevsw aju_ttydevsw = {
#define AJU_IO_POLLINTERVAL (hz/100)
#define AJU_AC_POLLINTERVAL (hz/5)
/*
* Statistics on JTAG removal events when sending, for debugging purposes
* only.
*/
static u_int aju_jtag_vanished;
SYSCTL_UINT(_debug, OID_AUTO, aju_jtag_vanished, CTLFLAG_RW,
&aju_jtag_vanished, 0, "Number of times JTAG has vanished");
static u_int aju_jtag_appeared;
SYSCTL_UINT(_debug, OID_AUTO, aju_jtag_appeared, CTLFLAG_RW,
&aju_jtag_appeared, 0, "Number of times JTAG has appeared");
SYSCTL_INT(_debug, OID_AUTO, aju_cons_jtag_present, CTLFLAG_RW,
&aju_cons_jtag_present, 0, "JTAG console present flag");
SYSCTL_UINT(_debug, OID_AUTO, aju_cons_jtag_missed, CTLFLAG_RW,
&aju_cons_jtag_missed, 0, "JTAG console missed counter");
/*
* Interrupt-related statistics.
*/
static u_int aju_intr_readable_enabled;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_readable_enabled, CTLFLAG_RW,
&aju_intr_readable_enabled, 0, "Number of times read interrupt enabled");
static u_int aju_intr_writable_disabled;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_writable_disabled, CTLFLAG_RW,
&aju_intr_writable_disabled, 0,
"Number of times write interrupt disabled");
static u_int aju_intr_writable_enabled;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_writable_enabled, CTLFLAG_RW,
&aju_intr_writable_enabled, 0,
"Number of times write interrupt enabled");
static u_int aju_intr_disabled;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_disabled, CTLFLAG_RW,
&aju_intr_disabled, 0, "Number of times write interrupt disabled");
static u_int aju_intr_read_count;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_read_count, CTLFLAG_RW,
&aju_intr_read_count, 0, "Number of times read interrupt fired");
static u_int aju_intr_write_count;
SYSCTL_UINT(_debug, OID_AUTO, aju_intr_write_count, CTLFLAG_RW,
&aju_intr_write_count, 0, "Number of times write interrupt fired");
/*
* Low-level read and write register routines; the Altera UART is little
* endian, so we byte swap 32-bit reads and writes.
@ -160,6 +209,7 @@ aju_intr_readable_enable(struct altera_jtag_uart_softc *sc)
AJU_LOCK_ASSERT(sc);
atomic_add_int(&aju_intr_readable_enabled, 1);
v = aju_control_read(sc);
v |= ALTERA_JTAG_UART_CONTROL_RE;
aju_control_write(sc, v);
@ -172,6 +222,7 @@ aju_intr_writable_enable(struct altera_jtag_uart_softc *sc)
AJU_LOCK_ASSERT(sc);
atomic_add_int(&aju_intr_writable_enabled, 1);
v = aju_control_read(sc);
v |= ALTERA_JTAG_UART_CONTROL_WE;
aju_control_write(sc, v);
@ -184,6 +235,7 @@ aju_intr_writable_disable(struct altera_jtag_uart_softc *sc)
AJU_LOCK_ASSERT(sc);
atomic_add_int(&aju_intr_writable_disabled, 1);
v = aju_control_read(sc);
v &= ~ALTERA_JTAG_UART_CONTROL_WE;
aju_control_write(sc, v);
@ -196,6 +248,7 @@ aju_intr_disable(struct altera_jtag_uart_softc *sc)
AJU_LOCK_ASSERT(sc);
atomic_add_int(&aju_intr_disabled, 1);
v = aju_control_read(sc);
v &= ~(ALTERA_JTAG_UART_CONTROL_RE | ALTERA_JTAG_UART_CONTROL_WE);
aju_control_write(sc, v);
@ -249,30 +302,7 @@ aju_handle_output(struct altera_jtag_uart_softc *sc, struct tty *tp)
AJU_UNLOCK(sc);
while (ttydisc_getc_poll(tp) != 0) {
AJU_LOCK(sc);
v = aju_control_read(sc);
if ((v & ALTERA_JTAG_UART_CONTROL_WSPACE) != 0) {
AJU_UNLOCK(sc);
if (ttydisc_getc(tp, &ch, sizeof(ch)) != sizeof(ch))
panic("%s: ttydisc_getc", __func__);
AJU_LOCK(sc);
/*
* XXXRW: There is a slight race here in which we test
* for writability, drop the lock, get the character
* from the tty layer, re-acquire the lock, and then
* write. It's possible for other code --
* specifically, the low-level console -- to have
* written in the mean time, which might mean that
* there is no longer space. The BERI memory bus will
* cause this write to block, wedging the processor
* until space is available -- which could be a while
* if JTAG is not attached!
*
* The 'easy' fix is to drop the character if WSPACE
* has become unset. Not sure what the 'hard' fix is.
*/
aju_data_write(sc, ch);
} else {
if (*sc->ajus_jtag_presentp == 0) {
/*
* If JTAG is not present, then we will drop this
* character instead of perhaps scheduling an
@ -281,21 +311,50 @@ aju_handle_output(struct altera_jtag_uart_softc *sc, struct tty *tp)
* later even though we aren't interested in sending
* anymore. Loop to drain TTY-layer buffer.
*/
if (*sc->ajus_jtag_presentp == 0) {
if (ttydisc_getc(tp, &ch, sizeof(ch)) !=
sizeof(ch))
panic("%s: ttydisc_getc 2", __func__);
AJU_UNLOCK(sc);
continue;
}
if (sc->ajus_irq_res != NULL)
AJU_UNLOCK(sc);
if (ttydisc_getc(tp, &ch, sizeof(ch)) !=
sizeof(ch))
panic("%s: ttydisc_getc", __func__);
continue;
}
v = aju_control_read(sc);
if ((v & ALTERA_JTAG_UART_CONTROL_WSPACE) == 0) {
if (sc->ajus_irq_res != NULL &&
(v & ALTERA_JTAG_UART_CONTROL_WE) == 0)
aju_intr_writable_enable(sc);
return;
}
AJU_UNLOCK(sc);
if (ttydisc_getc(tp, &ch, sizeof(ch)) != sizeof(ch))
panic("%s: ttydisc_getc 2", __func__);
AJU_LOCK(sc);
/*
* XXXRW: There is a slight race here in which we test for
* writability, drop the lock, get the character from the tty
* layer, re-acquire the lock, and then write. It's possible
* for other code -- specifically, the low-level console -- to
* have* written in the mean time, which might mean that there
* is no longer space. The BERI memory bus will cause this
* write to block, wedging the processor until space is
* available -- which could be a while if JTAG is not
* attached!
*
* The 'easy' fix is to drop the character if WSPACE has
* become unset. Not sure what the 'hard' fix is.
*/
aju_data_write(sc, ch);
AJU_UNLOCK(sc);
}
AJU_LOCK(sc);
aju_intr_writable_disable(sc);
/*
* If interrupts are configured, and there's no data to write, but we
* had previously enabled write interrupts, disable them now.
*/
v = aju_control_read(sc);
if (sc->ajus_irq_res != NULL && (v & ALTERA_JTAG_UART_CONTROL_WE) != 0)
aju_intr_writable_disable(sc);
}
static void
@ -355,16 +414,25 @@ aju_ac_callout(void *arg)
v &= ~ALTERA_JTAG_UART_CONTROL_AC;
aju_control_write(sc, v);
if (*sc->ajus_jtag_presentp == 0) {
*sc->ajus_jtag_missedp = 0;
*sc->ajus_jtag_presentp = 1;
atomic_add_int(&aju_jtag_appeared, 1);
aju_handle_output(sc, tp);
}
/* Any hit eliminates all recent misses. */
*sc->ajus_jtag_missedp = 0;
} else if (*sc->ajus_jtag_presentp != 0) {
(*sc->ajus_jtag_missedp)++;
if (*sc->ajus_jtag_missedp >= AJU_JTAG_MAXMISS) {
/*
* If we've exceeded our tolerance for misses, mark JTAG as
* disconnected and drain output. Otherwise, bump the miss
* counter.
*/
if (*sc->ajus_jtag_missedp > AJU_JTAG_MAXMISS) {
*sc->ajus_jtag_presentp = 0;
atomic_add_int(&aju_jtag_vanished, 1);
aju_handle_output(sc, tp);
}
} else
(*sc->ajus_jtag_missedp)++;
}
callout_reset(&sc->ajus_ac_callout, AJU_AC_POLLINTERVAL,
aju_ac_callout, sc);
@ -382,10 +450,14 @@ aju_intr(void *arg)
tty_lock(tp);
AJU_LOCK(sc);
v = aju_control_read(sc);
if (v & ALTERA_JTAG_UART_CONTROL_RI)
if (v & ALTERA_JTAG_UART_CONTROL_RI) {
atomic_add_int(&aju_intr_read_count, 1);
aju_handle_input(sc, tp);
if (v & ALTERA_JTAG_UART_CONTROL_WI)
}
if (v & ALTERA_JTAG_UART_CONTROL_WI) {
atomic_add_int(&aju_intr_write_count, 1);
aju_handle_output(sc, tp);
}
AJU_UNLOCK(sc);
tty_unlock(tp);
}