9ca4041b6c
-It has new hardware support -It uses a new method of TX cleanup called Head Write Back -It includes the provisional generic TCP LRO feature contributed by Myricom and made general purpose by me. This should move into the stack upon approval but for this driver drop its in here. -Also bug fixes and etc... MFC in a week if no serious issues arise.
381 lines
10 KiB
C
381 lines
10 KiB
C
/******************************************************************************
|
|
|
|
Copyright (c) 2007, Myricom Inc.
|
|
Copyright (c) 2008, Intel Corporation.
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
2. Neither the name of the Myricom Inc, nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
3. Neither the name of the Intel Corporation, nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
$FreeBSD$
|
|
***************************************************************************/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/endian.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/socket.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/ethernet.h>
|
|
#include <net/if_media.h>
|
|
|
|
#include <netinet/in_systm.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/ip.h>
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <machine/bus.h>
|
|
#include <machine/in_cksum.h>
|
|
|
|
#include "tcp_lro.h"
|
|
|
|
static uint16_t do_csum_data(uint16_t *raw, int len)
|
|
{
|
|
uint32_t csum;
|
|
csum = 0;
|
|
while (len > 0) {
|
|
csum += *raw;
|
|
raw++;
|
|
csum += *raw;
|
|
raw++;
|
|
len -= 4;
|
|
}
|
|
csum = (csum >> 16) + (csum & 0xffff);
|
|
csum = (csum >> 16) + (csum & 0xffff);
|
|
return (uint16_t)csum;
|
|
}
|
|
|
|
/*
|
|
* Allocate and init the LRO data structures
|
|
*/
|
|
int
|
|
tcp_lro_init(struct lro_ctrl *cntl)
|
|
{
|
|
struct lro_entry *lro;
|
|
int i, error = 0;
|
|
|
|
SLIST_INIT(&cntl->lro_free);
|
|
SLIST_INIT(&cntl->lro_active);
|
|
|
|
cntl->lro_bad_csum = 0;
|
|
cntl->lro_queued = 0;
|
|
cntl->lro_flushed = 0;
|
|
|
|
for (i = 0; i < LRO_ENTRIES; i++) {
|
|
lro = (struct lro_entry *) malloc(sizeof (struct lro_entry),
|
|
M_DEVBUF, M_NOWAIT | M_ZERO);
|
|
if (lro == NULL) {
|
|
if (i == 0)
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
cntl->lro_cnt = i;
|
|
SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
void
|
|
tcp_lro_free(struct lro_ctrl *cntl)
|
|
{
|
|
struct lro_entry *entry;
|
|
|
|
while (!SLIST_EMPTY(&cntl->lro_free)) {
|
|
entry = SLIST_FIRST(&cntl->lro_free);
|
|
SLIST_REMOVE_HEAD(&cntl->lro_free, next);
|
|
free(entry, M_DEVBUF);
|
|
}
|
|
}
|
|
|
|
void
|
|
tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro)
|
|
{
|
|
struct ifnet *ifp;
|
|
struct ip *ip;
|
|
struct tcphdr *tcp;
|
|
uint32_t *ts_ptr;
|
|
uint32_t tcplen, tcp_csum;
|
|
|
|
//printf("tcp_lro_flush: entry\n");
|
|
|
|
if (lro->append_cnt) {
|
|
/* incorporate the new len into the ip header and
|
|
* re-calculate the checksum */
|
|
ip = lro->ip;
|
|
ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
|
|
ip->ip_sum = 0;
|
|
ip->ip_sum = 0xffff ^
|
|
do_csum_data((uint16_t*)ip,
|
|
sizeof (*ip));
|
|
|
|
lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
|
|
CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
|
|
lro->m_head->m_pkthdr.csum_data = 0xffff;
|
|
lro->m_head->m_pkthdr.len = lro->len;
|
|
|
|
/* incorporate the latest ack into the tcp header */
|
|
tcp = (struct tcphdr *) (ip + 1);
|
|
tcp->th_ack = lro->ack_seq;
|
|
tcp->th_win = lro->window;
|
|
/* incorporate latest timestamp into the tcp header */
|
|
if (lro->timestamp) {
|
|
ts_ptr = (uint32_t *)(tcp + 1);
|
|
ts_ptr[1] = htonl(lro->tsval);
|
|
ts_ptr[2] = lro->tsecr;
|
|
}
|
|
/*
|
|
* update checksum in tcp header by re-calculating the
|
|
* tcp pseudoheader checksum, and adding it to the checksum
|
|
* of the tcp payload data
|
|
*/
|
|
tcp->th_sum = 0;
|
|
tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
|
|
tcp_csum = lro->data_csum;
|
|
tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
|
|
htons(tcplen + IPPROTO_TCP));
|
|
tcp_csum += do_csum_data((uint16_t*)tcp,
|
|
tcp->th_off << 2);
|
|
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
|
|
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
|
|
tcp->th_sum = 0xffff ^ tcp_csum;
|
|
}
|
|
ifp = cntl->ifp;
|
|
(*ifp->if_input)(cntl->ifp, lro->m_head);
|
|
cntl->lro_queued += lro->append_cnt + 1;
|
|
cntl->lro_flushed++;
|
|
lro->m_head = NULL;
|
|
lro->timestamp = 0;
|
|
lro->append_cnt = 0;
|
|
SLIST_INSERT_HEAD(&cntl->lro_free, lro, next);
|
|
}
|
|
|
|
int
|
|
tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum)
|
|
{
|
|
struct ether_header *eh;
|
|
struct ip *ip;
|
|
struct tcphdr *tcp;
|
|
uint32_t *ts_ptr;
|
|
struct mbuf *m_nxt, *m_tail;
|
|
struct lro_entry *lro;
|
|
int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
|
|
int opt_bytes, trim;
|
|
uint32_t seq, tmp_csum, device_mtu;
|
|
|
|
|
|
eh = mtod(m_head, struct ether_header *);
|
|
if (eh->ether_type != htons(ETHERTYPE_IP))
|
|
return 1;
|
|
ip = (struct ip *) (eh + 1);
|
|
if (ip->ip_p != IPPROTO_TCP)
|
|
return 1;
|
|
|
|
/* ensure there are no options */
|
|
if ((ip->ip_hl << 2) != sizeof (*ip))
|
|
return -1;
|
|
|
|
/* .. and the packet is not fragmented */
|
|
if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
|
|
return -1;
|
|
|
|
/* verify that the IP header checksum is correct */
|
|
tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip));
|
|
if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
|
|
cntl->lro_bad_csum++;
|
|
return -1;
|
|
}
|
|
|
|
/* find the TCP header */
|
|
tcp = (struct tcphdr *) (ip + 1);
|
|
|
|
/* Get the TCP checksum if we dont have it */
|
|
if (!csum)
|
|
csum = tcp->th_sum;
|
|
|
|
/* ensure no bits set besides ack or psh */
|
|
if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
|
|
return -1;
|
|
|
|
/* check for timestamps. Since the only option we handle are
|
|
timestamps, we only have to handle the simple case of
|
|
aligned timestamps */
|
|
|
|
opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
|
|
tcp_hdr_len = sizeof (*tcp) + opt_bytes;
|
|
ts_ptr = (uint32_t *)(tcp + 1);
|
|
if (opt_bytes != 0) {
|
|
if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
|
|
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
|
|
TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
|
|
return -1;
|
|
}
|
|
|
|
ip_len = ntohs(ip->ip_len);
|
|
tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
|
|
|
|
|
|
/*
|
|
* If frame is padded beyond the end of the IP packet,
|
|
* then we must trim the extra bytes off the end.
|
|
*/
|
|
tot_len = m_head->m_pkthdr.len;
|
|
trim = tot_len - (ip_len + ETHER_HDR_LEN);
|
|
if (trim != 0) {
|
|
if (trim < 0) {
|
|
/* truncated packet */
|
|
return -1;
|
|
}
|
|
m_adj(m_head, -trim);
|
|
tot_len = m_head->m_pkthdr.len;
|
|
}
|
|
|
|
m_nxt = m_head;
|
|
m_tail = NULL; /* -Wuninitialized */
|
|
while (m_nxt != NULL) {
|
|
m_tail = m_nxt;
|
|
m_nxt = m_tail->m_next;
|
|
}
|
|
|
|
hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
|
|
seq = ntohl(tcp->th_seq);
|
|
|
|
SLIST_FOREACH(lro, &cntl->lro_active, next) {
|
|
if (lro->source_port == tcp->th_sport &&
|
|
lro->dest_port == tcp->th_dport &&
|
|
lro->source_ip == ip->ip_src.s_addr &&
|
|
lro->dest_ip == ip->ip_dst.s_addr) {
|
|
/* Try to append it */
|
|
|
|
if (__predict_false(seq != lro->next_seq)) {
|
|
/* out of order packet */
|
|
SLIST_REMOVE(&cntl->lro_active, lro,
|
|
lro_entry, next);
|
|
tcp_lro_flush(cntl, lro);
|
|
return -1;
|
|
}
|
|
|
|
if (opt_bytes) {
|
|
uint32_t tsval = ntohl(*(ts_ptr + 1));
|
|
/* make sure timestamp values are increasing */
|
|
if (__predict_false(lro->tsval > tsval ||
|
|
*(ts_ptr + 2) == 0)) {
|
|
return -1;
|
|
}
|
|
lro->tsval = tsval;
|
|
lro->tsecr = *(ts_ptr + 2);
|
|
}
|
|
|
|
lro->next_seq += tcp_data_len;
|
|
lro->ack_seq = tcp->th_ack;
|
|
lro->window = tcp->th_win;
|
|
lro->append_cnt++;
|
|
if (tcp_data_len == 0) {
|
|
m_freem(m_head);
|
|
return 0;
|
|
}
|
|
/* subtract off the checksum of the tcp header
|
|
* from the hardware checksum, and add it to the
|
|
* stored tcp data checksum. Byteswap the checksum
|
|
* if the total length so far is odd
|
|
*/
|
|
tmp_csum = do_csum_data((uint16_t*)tcp,
|
|
tcp_hdr_len);
|
|
csum = csum + (tmp_csum ^ 0xffff);
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
if (lro->len & 0x1) {
|
|
/* Odd number of bytes so far, flip bytes */
|
|
csum = ((csum << 8) | (csum >> 8)) & 0xffff;
|
|
}
|
|
csum = csum + lro->data_csum;
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
lro->data_csum = csum;
|
|
|
|
lro->len += tcp_data_len;
|
|
|
|
/* adjust mbuf so that m->m_data points to
|
|
the first byte of the payload */
|
|
m_adj(m_head, hlen);
|
|
/* append mbuf chain */
|
|
lro->m_tail->m_next = m_head;
|
|
/* advance the last pointer */
|
|
lro->m_tail = m_tail;
|
|
/* flush packet if required */
|
|
device_mtu = cntl->ifp->if_mtu;
|
|
if (lro->len > (65535 - device_mtu)) {
|
|
SLIST_REMOVE(&cntl->lro_active, lro,
|
|
lro_entry, next);
|
|
tcp_lro_flush(cntl, lro);
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (SLIST_EMPTY(&cntl->lro_free))
|
|
return -1;
|
|
|
|
/* start a new chain */
|
|
lro = SLIST_FIRST(&cntl->lro_free);
|
|
SLIST_REMOVE_HEAD(&cntl->lro_free, next);
|
|
SLIST_INSERT_HEAD(&cntl->lro_active, lro, next);
|
|
lro->source_port = tcp->th_sport;
|
|
lro->dest_port = tcp->th_dport;
|
|
lro->source_ip = ip->ip_src.s_addr;
|
|
lro->dest_ip = ip->ip_dst.s_addr;
|
|
lro->next_seq = seq + tcp_data_len;
|
|
lro->mss = tcp_data_len;
|
|
lro->ack_seq = tcp->th_ack;
|
|
lro->window = tcp->th_win;
|
|
|
|
/* save the checksum of just the TCP payload by
|
|
* subtracting off the checksum of the TCP header from
|
|
* the entire hardware checksum
|
|
* Since IP header checksum is correct, checksum over
|
|
* the IP header is -0. Substracting -0 is unnecessary.
|
|
*/
|
|
tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len);
|
|
csum = csum + (tmp_csum ^ 0xffff);
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
csum = (csum & 0xffff) + (csum >> 16);
|
|
lro->data_csum = csum;
|
|
|
|
lro->ip = ip;
|
|
/* record timestamp if it is present */
|
|
if (opt_bytes) {
|
|
lro->timestamp = 1;
|
|
lro->tsval = ntohl(*(ts_ptr + 1));
|
|
lro->tsecr = *(ts_ptr + 2);
|
|
}
|
|
lro->len = tot_len;
|
|
lro->m_head = m_head;
|
|
lro->m_tail = m_tail;
|
|
return 0;
|
|
}
|