Overhaul bus_dma(9) usage and fix various things.

o Separate TX/RX buffer DMA tag from TX/RX descriptor ring DMA tag.
 o Separate RX buffer DMA tag from common buffer DMA tag. RX DMA
   tag has different restriction compared to TX DMA tag.
 o Add 40bit DMA address support.
 o Adjust TX/RX descriptor ring alignment to 64 bytes from 256
   bytes as documented in datasheet.
 o Added check to ensure TX/RX ring reside within a 4GB boundary.
   Since TX/RX ring shares the same high address register they
   should have the same high address.
 o TX/RX side bus_dmamap_load_mbuf_sg(9) support.
 o Add lock assertion to vge_setmulti().
 o Add RX spare DMA map to recover from DMA map load failure.
 o Add optimized RX buffer handler, vge_discard_rxbuf which is
   activated when vge(4) sees bad frames.
 o Don't blindly update VGE_RXDESC_RESIDUECNT register. Datasheet
   says the register should be updated only when number of
   available RX descriptors are multiple of 4.
 o Use __NO_STRICT_ALIGNMENT instead of defining VGE_FIXUP_RX which
   is only set for i386 architecture. Previously vge(4) also
   performed expensive copy operation to align IP header on amd64.
   This change should give RX performance boost on amd64
   architecture.
 o Don't reinitialize controller if driver is already running. This
   should reduce number of link state flipping.
 o Since vge(4) drops a driver lock before passing received frame
   to upper layer, make sure vge(4) is still running after
   re-acquiring driver lock.
 o Add second argument count to vge_rxeof(). The argument will
   limit number of packets could be processed in RX handler.
 o Rearrange vge_rxeof() not to allocate RX buffer if received
   frame was bad packet.
 o Removed if_printf that prints DMA map failure. This type of
   message shouldn't be used in fast path of driver.
 o Reduce number of allowed TX buffer fragments to 6 from 7. A TX
   descriptor allows 7 fragments of a frame. However the CMZ field
   of descriptor has just 3bits and the controller wants to see
   fragment + 1 in the field. So if we have 7 fragments the field
   value would be 0 which seems to cause unexpected results under
   certain conditions. This change should fix occasional TX hang
   observed on vge(4).
 o Simplify vge_stat_locked() and add number of available TX
   descriptor check.
 o vge(4) controllers lack padding short frames. Make sure to fill
   zero for the padded bytes. This closes unintended information
   disclosure.
 o Don't set VGE_TDCTL_JUMBO flag. Datasheet is not clear whether
   this bit should be set by driver or write-back status bit after
   transmission. At least vendor's driver does not set this bit so
   remove it. Without this bit vge(4) still can send jumbo frames.
 o Don't start driver when vge(4) know there are not enough RX
   buffers.
 o Remove volatile keyword in RX descriptor structure. This should
   be handled by bus_dma(9).
 o Collapse two 16bits member of TX/RX descriptor into single 32bits
   member.
 o Reduce number of RX descriptors to 252 from 256. The
   VGE_RXDESCNUM is 16bits register but only lower 8bits are valid.
   So the maximum number of RX descriptors would be 255. However
   the number of should be multiple of 4 as controller wants to
   update 4 RX descriptors at a time. This limits the maximum
   number of RX descriptor to be 252.

Tested by:	Dewayne Geraghty (dewayne.geraghty <> heuristicsystems dot com dot au)
		Carey Jones (m.carey.jones <> gmail dot com)
		Yoshiaki Kasahara (kasahara <> nc dor kyushu-u dot ac dotjp)
This commit is contained in:
Pyun YongHyeon 2009-12-14 18:44:23 +00:00
parent 2d7340428b
commit 410f4c60ad
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=200525
3 changed files with 867 additions and 719 deletions

File diff suppressed because it is too large Load Diff

View File

@ -587,8 +587,7 @@
struct vge_tx_frag {
uint32_t vge_addrlo;
uint16_t vge_addrhi;
uint16_t vge_buflen;
uint32_t vge_addrhi;
};
/*
@ -600,7 +599,7 @@ struct vge_tx_frag {
* to obtain this behavior, the special 'queue' bit must be set.
*/
#define VGE_TXDESC_Q 0x8000
#define VGE_TXDESC_Q 0x80000000
struct vge_tx_desc {
uint32_t vge_sts;
@ -645,11 +644,10 @@ struct vge_tx_desc {
/* Receive DMA descriptors have a single fragment pointer. */
struct vge_rx_desc {
volatile uint32_t vge_sts;
volatile uint32_t vge_ctl;
volatile uint32_t vge_addrlo;
volatile uint16_t vge_addrhi;
volatile uint16_t vge_buflen;
uint32_t vge_sts;
uint32_t vge_ctl;
uint32_t vge_addrlo;
uint32_t vge_addrhi;
};
/*
@ -658,7 +656,7 @@ struct vge_rx_desc {
* not interrupts are generated for this descriptor.
*/
#define VGE_RXDESC_I 0x8000
#define VGE_RXDESC_I 0x80000000
#define VGE_RDSTS_VIDM 0x00000001 /* VLAN tag filter miss */
#define VGE_RDSTS_CRCERR 0x00000002 /* bad CRC error */

View File

@ -32,27 +32,39 @@
* $FreeBSD$
*/
#if !defined(__i386__)
#define VGE_FIXUP_RX
#endif
#define VGE_JUMBO_MTU 9000
#define VGE_IFQ_MAXLEN 64
#define VGE_TX_DESC_CNT 256
#define VGE_RX_DESC_CNT 256 /* Must be a multiple of 4!! */
#define VGE_RING_ALIGN 256
#define VGE_RX_DESC_CNT 252 /* Must be a multiple of 4!! */
#define VGE_TX_RING_ALIGN 64
#define VGE_RX_RING_ALIGN 64
#define VGE_MAXTXSEGS 6
#define VGE_RX_BUF_ALIGN sizeof(uint32_t)
/*
* VIA Velocity allows 64bit DMA addressing but high 16bits
* of the DMA address should be the same for Tx/Rx buffers.
* Because this condition can't be guaranteed vge(4) limit
* DMA address space to 48bits.
*/
#if (BUS_SPACE_MAXADDR < 0xFFFFFFFFFF)
#define VGE_BUF_DMA_MAXADDR BUS_SPACE_MAXADDR
#else
#define VGE_BUF_DMA_MAXADDR 0xFFFFFFFFFFFF
#endif
#define VGE_RX_LIST_SZ (VGE_RX_DESC_CNT * sizeof(struct vge_rx_desc))
#define VGE_TX_LIST_SZ (VGE_TX_DESC_CNT * sizeof(struct vge_tx_desc))
#define VGE_TX_DESC_INC(x) (x = (x + 1) % VGE_TX_DESC_CNT)
#define VGE_RX_DESC_INC(x) (x = (x + 1) % VGE_RX_DESC_CNT)
#define VGE_ADDR_LO(y) ((u_int64_t) (y) & 0xFFFFFFFF)
#define VGE_ADDR_HI(y) ((u_int64_t) (y) >> 32)
#define VGE_BUFLEN(y) ((y) & 0x7FFF)
#define VGE_OWN(x) (le32toh((x)->vge_sts) & VGE_RDSTS_OWN)
#define VGE_RXBYTES(x) ((le32toh((x)->vge_sts) & \
VGE_RDSTS_BUFSIZ) >> 16)
#define VGE_TX_DESC_INC(x) ((x) = ((x) + 1) % VGE_TX_DESC_CNT)
#define VGE_TX_DESC_DEC(x) \
((x) = (((x) + VGE_TX_DESC_CNT - 1) % VGE_TX_DESC_CNT))
#define VGE_RX_DESC_INC(x) ((x) = ((x) + 1) % VGE_RX_DESC_CNT)
#define VGE_ADDR_LO(y) ((uint64_t) (y) & 0xFFFFFFFF)
#define VGE_ADDR_HI(y) ((uint64_t) (y) >> 32)
#define VGE_BUFLEN(y) ((y) & 0x3FFF)
#define VGE_RXBYTES(x) (((x) & VGE_RDSTS_BUFSIZ) >> 16)
#define VGE_MIN_FRAMELEN 60
#ifdef VGE_FIXUP_RX
@ -67,34 +79,57 @@ struct vge_type {
char *vge_name;
};
struct vge_softc;
struct vge_dmaload_arg {
struct vge_softc *sc;
int vge_idx;
int vge_maxsegs;
struct mbuf *vge_m0;
u_int32_t vge_flags;
struct vge_txdesc {
struct mbuf *tx_m;
bus_dmamap_t tx_dmamap;
struct vge_tx_desc *tx_desc;
struct vge_txdesc *txd_prev;
};
struct vge_list_data {
struct mbuf *vge_tx_mbuf[VGE_TX_DESC_CNT];
struct mbuf *vge_rx_mbuf[VGE_RX_DESC_CNT];
struct vge_rxdesc {
struct mbuf *rx_m;
bus_dmamap_t rx_dmamap;
struct vge_rx_desc *rx_desc;
struct vge_rxdesc *rxd_prev;
};
struct vge_chain_data{
bus_dma_tag_t vge_ring_tag;
bus_dma_tag_t vge_buffer_tag;
bus_dma_tag_t vge_tx_tag;
struct vge_txdesc vge_txdesc[VGE_TX_DESC_CNT];
bus_dma_tag_t vge_rx_tag;
struct vge_rxdesc vge_rxdesc[VGE_RX_DESC_CNT];
bus_dma_tag_t vge_tx_ring_tag;
bus_dmamap_t vge_tx_ring_map;
bus_dma_tag_t vge_rx_ring_tag;
bus_dmamap_t vge_rx_ring_map;
bus_dmamap_t vge_rx_sparemap;
int vge_tx_prodidx;
int vge_rx_prodidx;
int vge_tx_considx;
int vge_tx_free;
bus_dmamap_t vge_tx_dmamap[VGE_TX_DESC_CNT];
bus_dmamap_t vge_rx_dmamap[VGE_RX_DESC_CNT];
bus_dma_tag_t vge_mtag; /* mbuf mapping tag */
bus_dma_tag_t vge_rx_list_tag;
bus_dmamap_t vge_rx_list_map;
struct vge_rx_desc *vge_rx_list;
bus_addr_t vge_rx_list_addr;
bus_dma_tag_t vge_tx_list_tag;
bus_dmamap_t vge_tx_list_map;
struct vge_tx_desc *vge_tx_list;
bus_addr_t vge_tx_list_addr;
int vge_tx_cnt;
int vge_rx_prodidx;
int vge_rx_commit;
struct mbuf *vge_head;
struct mbuf *vge_tail;
};
#define VGE_CHAIN_RESET(_sc) \
do { \
if ((_sc)->vge_cdata.vge_head != NULL) { \
m_freem((_sc)->vge_cdata.vge_head); \
(_sc)->vge_cdata.vge_head = NULL; \
(_sc)->vge_cdata.vge_tail = NULL; \
} \
} while (0);
struct vge_ring_data {
struct vge_tx_desc *vge_tx_ring;
bus_addr_t vge_tx_ring_paddr;
struct vge_rx_desc *vge_rx_ring;
bus_addr_t vge_rx_ring_paddr;
};
struct vge_softc {
@ -104,25 +139,18 @@ struct vge_softc {
struct resource *vge_irq;
void *vge_intrhand;
device_t vge_miibus;
bus_dma_tag_t vge_parent_tag;
bus_dma_tag_t vge_tag;
u_int8_t vge_type;
int vge_if_flags;
int vge_rx_consumed;
int vge_link;
int vge_camidx;
struct mtx vge_mtx;
struct callout vge_watchdog;
int vge_timer;
struct mbuf *vge_head;
struct mbuf *vge_tail;
struct vge_list_data vge_ldata;
struct vge_chain_data vge_cdata;
struct vge_ring_data vge_rdata;
int suspended; /* 0 = normal 1 = suspended */
#ifdef DEVICE_POLLING
int rxcycles;
#endif
};
#define VGE_LOCK(_sc) mtx_lock(&(_sc)->vge_mtx)
@ -162,5 +190,6 @@ struct vge_softc {
#define CSR_CLRBIT_4(sc, reg, x) \
CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) & ~(x))
#define VGE_RXCHUNK 4
#define VGE_TIMEOUT 10000