Optimise the DWC OTG host mode driver's transmit path:

1) Use the TX FIFO empty interrupts to poll the transmit FIFO usage,
instead of using own software counters and waiting for SOF
interrupts. Assume that enough FIFO space is available to execute one
USB OUT transfer of any kind when the TX FIFO is empty.

2) Use the host channel halted event to asynchronously wait for host
channels to be disabled instead of waiting for SOF interrupts. This
results in less turnaround time for re-using host channels and at the
same time increases the performance.

The network transmit performance measured by "iperf" for the "RPi-B v1
2011/12" board, increased from 45MBit/s to 65Mbit/s after applying the
changes above.

No regressions seen using:
 - High Speed (BULK, CONTROL, INTERRUPT)
 - Full Speed (All transfer types)
 - Low Speed (Control and Interrupt)

MFC after:	1 month
Submitted by:	Daisuke Aoyama <aoyama@peach.ne.jp>
This commit is contained in:
Hans Petter Selasky 2015-07-16 16:08:40 +00:00
parent 50f960e60e
commit a529288d65
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=285638
3 changed files with 77 additions and 114 deletions

View File

@ -1,5 +1,6 @@
/* $FreeBSD$ */
/*-
* Copyright (c) 2015 Daisuke Aoyama. All rights reserved.
* Copyright (c) 2012 Hans Petter Selasky. All rights reserved.
* Copyright (c) 2010-2011 Aleksandr Rybalko. All rights reserved.
*
@ -151,7 +152,6 @@ static void dwc_otg_do_poll(struct usb_bus *);
static void dwc_otg_standard_done(struct usb_xfer *);
static void dwc_otg_root_intr(struct dwc_otg_softc *);
static void dwc_otg_interrupt_poll_locked(struct dwc_otg_softc *);
static void dwc_otg_host_channel_disable(struct dwc_otg_softc *, uint8_t);
/*
* Here is a configuration that the chip supports.
@ -224,7 +224,7 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
/* split equally for IN and OUT */
fifo_size /= 2;
/* align to 4 bytes boundary */
/* Align to 4 bytes boundary (refer to PGM) */
fifo_size &= ~3;
/* set global receive FIFO size */
@ -237,13 +237,6 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
return (EINVAL);
}
/* disable any leftover host channels */
for (x = 0; x != sc->sc_host_ch_max; x++) {
if (sc->sc_chan_state[x].wait_sof == 0)
continue;
dwc_otg_host_channel_disable(sc, x);
}
if (mode == DWC_MODE_HOST) {
/* reset active endpoints */
@ -252,6 +245,8 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
/* split equally for periodic and non-periodic */
fifo_size /= 2;
DPRINTF("PTX/NPTX FIFO=%u\n", fifo_size);
/* align to 4 bytes boundary */
fifo_size &= ~3;
@ -262,7 +257,7 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
tx_start += fifo_size;
for (x = 0; x != sc->sc_host_ch_max; x++) {
/* disable all host interrupts */
/* enable all host interrupts */
DWC_OTG_WRITE_4(sc, DOTG_HCINTMSK(x),
HCINT_DEFAULT_MASK);
}
@ -274,13 +269,6 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
/* reset host channel state */
memset(sc->sc_chan_state, 0, sizeof(sc->sc_chan_state));
/* reset FIFO TX levels */
sc->sc_tx_cur_p_level = 0;
sc->sc_tx_cur_np_level = 0;
/* store maximum periodic and non-periodic FIFO TX size */
sc->sc_tx_max_size = fifo_size;
/* enable all host channel interrupts */
DWC_OTG_WRITE_4(sc, DOTG_HAINTMSK,
(1U << sc->sc_host_ch_max) - 1U);
@ -358,15 +346,8 @@ dwc_otg_init_fifo(struct dwc_otg_softc *sc, uint8_t mode)
/* reset active endpoints */
sc->sc_active_rx_ep = 0;
/* reset periodic and non-periodic FIFO TX size */
sc->sc_tx_max_size = fifo_size;
/* reset host channel state */
memset(sc->sc_chan_state, 0, sizeof(sc->sc_chan_state));
/* reset FIFO TX levels */
sc->sc_tx_cur_p_level = 0;
sc->sc_tx_cur_np_level = 0;
}
return (0);
}
@ -612,10 +593,39 @@ dwc_otg_clear_hcint(struct dwc_otg_softc *sc, uint8_t x)
}
static uint8_t
dwc_otg_host_channel_alloc(struct dwc_otg_softc *sc, struct dwc_otg_td *td, uint8_t is_out)
dwc_otg_host_check_fifo_empty(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
{
uint32_t temp;
temp = DWC_OTG_READ_4(sc, DOTG_GINTSTS);
if (td->ep_type == UE_INTERRUPT ||
td->ep_type == UE_ISOCHRONOUS) {
if (!(temp & GINTSTS_PTXFEMP)) {
DPRINTF("Periodic TX FIFO is not empty\n");
if (!(sc->sc_irq_mask & GINTMSK_PTXFEMPMSK)) {
sc->sc_irq_mask |= GINTMSK_PTXFEMPMSK;
DWC_OTG_WRITE_4(sc, DOTG_GINTMSK, sc->sc_irq_mask);
}
return (1); /* busy */
}
} else {
if (!(temp & GINTSTS_NPTXFEMP)) {
DPRINTF("Non-periodic TX FIFO is not empty\n");
if (!(sc->sc_irq_mask & GINTMSK_NPTXFEMPMSK)) {
sc->sc_irq_mask |= GINTMSK_NPTXFEMPMSK;
DWC_OTG_WRITE_4(sc, DOTG_GINTMSK, sc->sc_irq_mask);
}
return (1); /* busy */
}
}
return (0); /* ready for transmit */
}
static uint8_t
dwc_otg_host_channel_alloc(struct dwc_otg_softc *sc,
struct dwc_otg_td *td, uint8_t is_out)
{
uint32_t tx_p_size;
uint32_t tx_np_size;
uint8_t x;
if (td->channel < DWC_OTG_MAX_CHANNELS)
@ -627,45 +637,19 @@ dwc_otg_host_channel_alloc(struct dwc_otg_softc *sc, struct dwc_otg_td *td, uint
/* compute needed TX FIFO size */
if (is_out != 0) {
if (td->ep_type == UE_ISOCHRONOUS) {
tx_p_size = td->max_packet_size;
tx_np_size = 0;
if (td->hcsplt != 0 && tx_p_size > HCSPLT_XACTLEN_BURST)
tx_p_size = HCSPLT_XACTLEN_BURST;
if ((sc->sc_tx_cur_p_level + tx_p_size) > sc->sc_tx_max_size) {
DPRINTF("Too little FIFO space\n");
return (1); /* too little FIFO */
}
} else {
tx_p_size = 0;
tx_np_size = td->max_packet_size;
if (td->hcsplt != 0 && tx_np_size > HCSPLT_XACTLEN_BURST)
tx_np_size = HCSPLT_XACTLEN_BURST;
if ((sc->sc_tx_cur_np_level + tx_np_size) > sc->sc_tx_max_size) {
DPRINTF("Too little FIFO space\n");
return (1); /* too little FIFO */
}
}
} else {
/* not a TX transaction */
tx_p_size = 0;
tx_np_size = 0;
if (dwc_otg_host_check_fifo_empty(sc, td) != 0)
return (1); /* busy - cannot transfer data */
}
for (x = 0; x != sc->sc_host_ch_max; x++) {
/* check if channel is allocated */
if (sc->sc_chan_state[x].allocated != 0)
continue;
/* check if channel is still enabled */
if (sc->sc_chan_state[x].wait_sof != 0)
if (sc->sc_chan_state[x].wait_halted != 0)
continue;
sc->sc_chan_state[x].allocated = 1;
sc->sc_chan_state[x].tx_p_size = tx_p_size;
sc->sc_chan_state[x].tx_np_size = tx_np_size;
/* keep track of used TX FIFO, if any */
sc->sc_tx_cur_p_level += tx_p_size;
sc->sc_tx_cur_np_level += tx_np_size;
/* clear interrupts */
dwc_otg_clear_hcint(sc, x);
@ -689,6 +673,7 @@ dwc_otg_host_channel_alloc(struct dwc_otg_softc *sc, struct dwc_otg_td *td, uint
static void
dwc_otg_host_channel_free(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
{
uint32_t hcchar;
uint8_t x;
if (td->channel >= DWC_OTG_MAX_CHANNELS)
@ -702,18 +687,8 @@ dwc_otg_host_channel_free(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
/*
* We need to let programmed host channels run till complete
* else the host channel will stop functioning. Assume that
* after a fixed given amount of time the host channel is no
* longer doing any USB traffic:
* else the host channel will stop functioning.
*/
if (td->ep_type == UE_ISOCHRONOUS) {
/* double buffered */
sc->sc_chan_state[x].wait_sof = DWC_OTG_SLOT_IDLE_MAX;
} else {
/* single buffered */
sc->sc_chan_state[x].wait_sof = DWC_OTG_SLOT_IDLE_MIN;
}
sc->sc_chan_state[x].allocated = 0;
/* ack any pending messages */
@ -724,6 +699,16 @@ dwc_otg_host_channel_free(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
/* clear active channel */
sc->sc_active_rx_ep &= ~(1 << x);
/* disable host channel */
hcchar = DWC_OTG_READ_4(sc, DOTG_HCCHAR(x));
if (hcchar & HCCHAR_CHENA) {
DPRINTF("Halting channel %d\n", x);
DWC_OTG_WRITE_4(sc, DOTG_HCCHAR(x),
hcchar | HCCHAR_CHDIS);
sc->sc_chan_state[x].wait_halted = 1;
/* don't write HCCHAR until the channel is halted */
}
}
static void
@ -1402,7 +1387,8 @@ dwc_otg_host_data_rx(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
hcchar |= HCCHAR_EPDIR_IN;
/* receive complete split ASAP */
if ((sc->sc_last_frame_num & 1) != 0)
if ((sc->sc_last_frame_num & 1) != 0 &&
(td->ep_type == UE_INTERRUPT || td->ep_type == UE_ISOCHRONOUS))
hcchar |= HCCHAR_ODDFRM;
else
hcchar &= ~HCCHAR_ODDFRM;
@ -1450,7 +1436,8 @@ dwc_otg_host_data_rx(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
DWC_OTG_WRITE_4(sc, DOTG_HCSPLT(channel), td->hcsplt);
/* send after next SOF event */
if ((sc->sc_last_frame_num & 1) == 0)
if ((sc->sc_last_frame_num & 1) == 0 &&
(td->ep_type == UE_INTERRUPT || td->ep_type == UE_ISOCHRONOUS))
td->hcchar |= HCCHAR_ODDFRM;
else
td->hcchar &= ~HCCHAR_ODDFRM;
@ -1890,7 +1877,8 @@ dwc_otg_host_data_tx(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
hcchar &= ~HCCHAR_EPDIR_IN;
/* send after next SOF event */
if ((sc->sc_last_frame_num & 1) == 0)
if ((sc->sc_last_frame_num & 1) == 0 &&
(td->ep_type == UE_INTERRUPT || td->ep_type == UE_ISOCHRONOUS))
hcchar |= HCCHAR_ODDFRM;
else
hcchar &= ~HCCHAR_ODDFRM;
@ -1954,7 +1942,8 @@ dwc_otg_host_data_tx(struct dwc_otg_softc *sc, struct dwc_otg_td *td)
hcchar &= ~HCCHAR_EPDIR_IN;
/* receive complete split ASAP */
if ((sc->sc_last_frame_num & 1) != 0)
if ((sc->sc_last_frame_num & 1) != 0 &&
(td->ep_type == UE_INTERRUPT || td->ep_type == UE_ISOCHRONOUS))
hcchar |= HCCHAR_ODDFRM;
else
hcchar &= ~HCCHAR_ODDFRM;
@ -2350,31 +2339,6 @@ dwc_otg_timer_stop(struct dwc_otg_softc *sc)
usb_callout_stop(&sc->sc_timer);
}
static void
dwc_otg_host_channel_disable(struct dwc_otg_softc *sc, uint8_t x)
{
uint32_t hcchar;
hcchar = DWC_OTG_READ_4(sc, DOTG_HCCHAR(x));
/* disable host channel, if any */
if (hcchar & (HCCHAR_CHENA | HCCHAR_CHDIS)) {
/* disable channel */
DWC_OTG_WRITE_4(sc, DOTG_HCCHAR(x),
HCCHAR_CHENA | HCCHAR_CHDIS);
/* wait for chip to get its brains in order */
sc->sc_chan_state[x].wait_sof = 2;
}
/* release TX FIFO usage, if any */
sc->sc_tx_cur_p_level -= sc->sc_chan_state[x].tx_p_size;
sc->sc_tx_cur_np_level -= sc->sc_chan_state[x].tx_np_size;
/* don't release TX FIFO usage twice */
sc->sc_chan_state[x].tx_p_size = 0;
sc->sc_chan_state[x].tx_np_size = 0;
}
static uint16_t
dwc_otg_compute_isoc_rx_tt_slot(struct dwc_otg_tt_info *pinfo)
{
@ -2392,7 +2356,6 @@ dwc_otg_update_host_transfer_schedule_locked(struct dwc_otg_softc *sc)
struct dwc_otg_td *td;
uint16_t temp;
uint16_t slot;
uint8_t x;
temp = DWC_OTG_READ_4(sc, DOTG_HFNUM) & DWC_OTG_FRAME_MASK;
@ -2403,15 +2366,6 @@ dwc_otg_update_host_transfer_schedule_locked(struct dwc_otg_softc *sc)
TAILQ_INIT(&head);
for (x = 0; x != sc->sc_host_ch_max; x++) {
if (sc->sc_chan_state[x].wait_sof == 0)
continue;
sc->sc_needsof = 1;
if (--(sc->sc_chan_state[x].wait_sof) == 0)
dwc_otg_host_channel_disable(sc, x);
}
if ((temp & 7) == 0) {
/* reset the schedule */
@ -2631,6 +2585,12 @@ dwc_otg_interrupt_poll_locked(struct dwc_otg_softc *sc)
if (temp != GRXSTSRD_STP_DATA &&
temp != GRXSTSRD_STP_COMPLETE &&
temp != GRXSTSRD_OUT_DATA) {
/* check for halted channel */
if (temp == GRXSTSRH_HALTED) {
ep_no = GRXSTSRD_CHNUM_GET(sc->sc_last_rx_status);
sc->sc_chan_state[ep_no].wait_halted = 0;
DPRINTFN(5, "channel halt complete ch=%u\n", ep_no);
}
dwc_otg_common_rx_ack(sc);
goto repeat;
}
@ -2764,6 +2724,12 @@ dwc_otg_filter_interrupt(void *arg)
if ((status & DWC_OTG_MSK_GINT_THREAD_IRQ) != 0)
retval = FILTER_SCHEDULE_THREAD;
/* clear FIFO empty interrupts */
if (status & sc->sc_irq_mask &
(GINTSTS_PTXFEMP | GINTSTS_NPTXFEMP)) {
sc->sc_irq_mask &= ~(GINTSTS_PTXFEMP | GINTSTS_NPTXFEMP);
DWC_OTG_WRITE_4(sc, DOTG_GINTMSK, sc->sc_irq_mask);
}
/* clear all IN endpoint interrupts */
if (status & GINTSTS_IEPINT) {
uint32_t temp;

View File

@ -37,7 +37,7 @@
#define DWC_OTG_TT_SLOT_MAX 8
#define DWC_OTG_SLOT_IDLE_MAX 3
#define DWC_OTG_SLOT_IDLE_MIN 2
#define DWC_OTG_NAK_MAX 8 /* 1 ms */
#define DWC_OTG_NAK_MAX 16 /* 16 NAKs = 2 ms */
#ifndef DWC_OTG_TX_MAX_FIFO_SIZE
#define DWC_OTG_TX_MAX_FIFO_SIZE DWC_OTG_MAX_TXN
#endif
@ -156,10 +156,8 @@ struct dwc_otg_profile {
struct dwc_otg_chan_state {
uint16_t allocated;
uint16_t wait_sof;
uint16_t wait_halted;
uint32_t hcint;
uint16_t tx_p_size; /* periodic */
uint16_t tx_np_size; /* non-periodic */
};
struct dwc_otg_softc {
@ -181,9 +179,6 @@ struct dwc_otg_softc {
uint32_t sc_tx_bounce_buffer[MAX(512 * DWC_OTG_MAX_TXP, 1024) / 4];
uint32_t sc_fifo_size;
uint32_t sc_tx_max_size;
uint32_t sc_tx_cur_p_level; /* periodic */
uint32_t sc_tx_cur_np_level; /* non-periodic */
uint32_t sc_irq_mask;
uint32_t sc_last_rx_status;
uint32_t sc_out_ctl[DWC_OTG_MAX_ENDPOINTS];

View File

@ -47,6 +47,8 @@
#define DOTG_GGPIO 0x0038
#define DOTG_GUID 0x003C
#define DOTG_GSNPSID 0x0040
#define DOTG_GSNPSID_REV_2_80a 0x4f54280a /* RPi model B/RPi2 */
#define DOTG_GSNPSID_REV_3_10a 0x4f54310a /* ODROID-C1 */
#define DOTG_GHWCFG1 0x0044
#define DOTG_GHWCFG2 0x0048
#define DOTG_GHWCFG3 0x004C