freebsd-dev/sys/i386/i386/in_cksum.c
Tom Rhodes a122cca953 These are changes to allow to use the Intel C/C++ compiler (lang/icc)
to build the kernel. It doesn't affect the operation if gcc.

Most of the changes are just adding __INTEL_COMPILER to #ifdef's, as
icc v8 may define __GNUC__ some parts may look strange but are
necessary.

Additional changes:
 - in_cksum.[ch]:
   * use a generic C version instead of the assembly version in the !gcc
     case (ASM code breaks with the optimizations icc does)
     -> no bad checksums with an icc compiled kernel
     Help from:		andre, grehan, das
     Stolen from: 	alpha version via ppc version
     The entire checksum code should IMHO be replaced with the DragonFly
     version (because it isn't guaranteed future revisions of gcc will
     include similar optimizations) as in:
        ---snip---
          Revision  Changes    Path
          1.12      +1 -0      src/sys/conf/files.i386
          1.4       +142 -558  src/sys/i386/i386/in_cksum.c
          1.5       +33 -69    src/sys/i386/include/in_cksum.h
          1.5       +2 -0      src/sys/netinet/igmp.c
          1.6       +0 -1      src/sys/netinet/in.h
          1.6       +2 -0      src/sys/netinet/ip_icmp.c

          1.4       +3 -4      src/contrib/ipfilter/ip_compat.h
          1.3       +1 -2      src/sbin/natd/icmp.c
          1.4       +0 -1      src/sbin/natd/natd.c
          1.48      +1 -0      src/sys/conf/files
          1.2       +0 -1      src/sys/conf/files.amd64
          1.13      +0 -1      src/sys/conf/files.i386
          1.5       +0 -1      src/sys/conf/files.pc98
          1.7       +1 -1      src/sys/contrib/ipfilter/netinet/fil.c
          1.10      +2 -3      src/sys/contrib/ipfilter/netinet/ip_compat.h
          1.10      +1 -1      src/sys/contrib/ipfilter/netinet/ip_fil.c
          1.7       +1 -1      src/sys/dev/netif/txp/if_txp.c
          1.7       +1 -1      src/sys/net/ip_mroute/ip_mroute.c
          1.7       +1 -2      src/sys/net/ipfw/ip_fw2.c
          1.6       +1 -2      src/sys/netinet/igmp.c
          1.4       +158 -116  src/sys/netinet/in_cksum.c
          1.6       +1 -1      src/sys/netinet/ip_gre.c
          1.7       +1 -2      src/sys/netinet/ip_icmp.c
          1.10      +1 -1      src/sys/netinet/ip_input.c
          1.10      +1 -2      src/sys/netinet/ip_output.c
          1.13      +1 -2      src/sys/netinet/tcp_input.c
          1.9       +1 -2      src/sys/netinet/tcp_output.c
          1.10      +1 -1      src/sys/netinet/tcp_subr.c
          1.10      +1 -1      src/sys/netinet/tcp_syncache.c
          1.9       +1 -2      src/sys/netinet/udp_usrreq.c

          1.5       +1 -2      src/sys/netinet6/ipsec.c
          1.5       +1 -2      src/sys/netproto/ipsec/ipsec.c
          1.5       +1 -1      src/sys/netproto/ipsec/ipsec_input.c
          1.4       +1 -2      src/sys/netproto/ipsec/ipsec_output.c

          and finally remove
            sys/i386/i386        in_cksum.c
            sys/i386/include     in_cksum.h
        ---snip---
 - endian.h:
   * DTRT in C++ mode
 - quad.h:
   * we don't use gcc v1 anymore, remove support for it
   Suggested by:	bde (long ago)
 - assym.h:
   * avoid zero-length arrays (remove dependency on a gcc specific
     feature)
     This change changes the contents of the object file, but as it's
     only used to generate some values for a header, and the generator
     knows how to handle this, there's no impact in the gcc case.
   Explained by:	bde
   Submitted by:	Marius Strobl <marius@alchemy.franken.de>
 - aicasm.c:
   * minor change to teach it about the way icc spells "-nostdinc"
   Not approved by:	gibbs (no reply to my mail)
 - bump __FreeBSD_version (lang/icc needs to know about the changes)

Incarnations of this patch survive gcc compiles since a loooong time,
I use it on my desktop. An icc compiled kernel works since Nov. 2003
(exceptions: snd_* if used as modules), it survives a build of the
entire ports collection with icc.

Parts of this commit contains suggestions or submissions from
Marius Strobl <marius@alchemy.franken.de>.

Reviewed by:	-arch
Submitted by:	netchild
2004-03-12 21:45:33 +00:00

461 lines
11 KiB
C

/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from tahoe: in_cksum.c 1.2 86/01/05
* from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* MPsafe: alfred
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <machine/in_cksum.h>
/*
* Checksum routine for Internet Protocol family headers.
*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
*
* This implementation is 386 version.
*/
#undef ADDCARRY
#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
#if !defined(__GNUC__) || defined(__INTEL_COMPILER)
/* non gcc parts stolen from sys/alpha/alpha/in_cksum.c */
#define REDUCE32 \
{ \
q_util.q = sum; \
sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
}
#define REDUCE16 \
{ \
q_util.q = sum; \
l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
sum = l_util.s[0] + l_util.s[1]; \
ADDCARRY(sum); \
}
#endif
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
#if !defined(__GNUC__) || defined(__INTEL_COMPILER)
static const u_int32_t in_masks[] = {
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
};
union l_util {
u_int16_t s[2];
u_int32_t l;
};
union q_util {
u_int16_t s[4];
u_int32_t l[2];
u_int64_t q;
};
static u_int64_t
in_cksumdata(const u_int32_t *lw, int len)
{
u_int64_t sum = 0;
u_int64_t prefilled;
int offset;
union q_util q_util;
if ((3 & (long) lw) == 0 && len == 20) {
sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
REDUCE32;
return sum;
}
if ((offset = 3 & (long) lw) != 0) {
const u_int32_t *masks = in_masks + (offset << 2);
lw = (u_int32_t *) (((long) lw) - offset);
sum = *lw++ & masks[len >= 3 ? 3 : len];
len -= 4 - offset;
if (len <= 0) {
REDUCE32;
return sum;
}
}
#if 0
/*
* Force to cache line boundary.
*/
offset = 32 - (0x1f & (long) lw);
if (offset < 32 && len > offset) {
len -= offset;
if (4 & offset) {
sum += (u_int64_t) lw[0];
lw += 1;
}
if (8 & offset) {
sum += (u_int64_t) lw[0] + lw[1];
lw += 2;
}
if (16 & offset) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
}
#endif
/*
* access prefilling to start load of next cache line.
* then add current cache line
* save result of prefilling for loop iteration.
*/
prefilled = lw[0];
while ((len -= 32) >= 4) {
u_int64_t prefilling = lw[8];
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
prefilled = prefilling;
}
if (len >= 0) {
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
} else {
len += 32;
}
while ((len -= 16) >= 0) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
len += 16;
while ((len -= 4) >= 0) {
sum += (u_int64_t) *lw++;
}
len += 4;
if (len > 0)
sum += (u_int64_t) (in_masks[len] & *lw);
REDUCE32;
return sum;
}
u_short
in_addword(u_short a, u_short b)
{
u_int64_t sum = a + b;
ADDCARRY(sum);
return (sum);
}
u_short
in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
{
u_int64_t sum;
union q_util q_util;
union l_util l_util;
sum = (u_int64_t) a + b + c;
REDUCE16;
return (sum);
}
u_short
in_cksum_skip(struct mbuf *m, int len, int skip)
{
u_int64_t sum = 0;
int mlen = 0;
int clen = 0;
caddr_t addr;
union q_util q_util;
union l_util l_util;
len -= skip;
for (; skip && m; m = m->m_next) {
if (m->m_len > skip) {
mlen = m->m_len - skip;
addr = mtod(m, caddr_t) + skip;
goto skip_start;
} else {
skip -= m->m_len;
}
}
for (; m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
mlen = m->m_len;
addr = mtod(m, caddr_t);
skip_start:
if (len < mlen)
mlen = len;
if ((clen ^ (long) addr) & 1)
sum += in_cksumdata((const u_int32_t *)addr, mlen) << 8;
else
sum += in_cksumdata((const u_int32_t *)addr, mlen);
clen += mlen;
len -= mlen;
}
REDUCE16;
return (~sum & 0xffff);
}
u_int in_cksum_hdr(const struct ip *ip)
{
u_int64_t sum = in_cksumdata((const u_int32_t *)ip, sizeof(struct ip));
union q_util q_util;
union l_util l_util;
REDUCE16;
return (~sum & 0xffff);
}
#else
/*
* These asm statements require __volatile because they pass information
* via the condition codes. GCC does not currently provide a way to specify
* the condition codes as an input or output operand.
*
* The LOAD macro below is effectively a prefetch into cache. GCC will
* load the value into a register but will not use it. Since modern CPUs
* reorder operations, this will generally take place in parallel with
* other calculations.
*/
#define ADD(n) __asm __volatile \
("addl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)w)[n / 4]))
#define ADDC(n) __asm __volatile \
("adcl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)w)[n / 4]))
#define LOAD(n) __asm __volatile \
("" : : "r" (((const u_int32_t *)w)[n / 4]))
#define MOP __asm __volatile \
("adcl $0, %0" : "+r" (sum))
u_short
in_cksum_skip(m, len, skip)
struct mbuf *m;
int len;
int skip;
{
register u_short *w;
register unsigned sum = 0;
register int mlen = 0;
int byte_swapped = 0;
union { char c[2]; u_short s; } su;
len -= skip;
for (; skip && m; m = m->m_next) {
if (m->m_len > skip) {
mlen = m->m_len - skip;
w = (u_short *)(mtod(m, u_char *) + skip);
goto skip_start;
} else {
skip -= m->m_len;
}
}
for (;m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_short *);
if (mlen == -1) {
/*
* The first byte of this mbuf is the continuation
* of a word spanning between this mbuf and the
* last mbuf.
*/
/* su.c[0] is already saved when scanning previous
* mbuf. sum was REDUCEd when we found mlen == -1
*/
su.c[1] = *(u_char *)w;
sum += su.s;
w = (u_short *)((char *)w + 1);
mlen = m->m_len - 1;
len--;
} else
mlen = m->m_len;
skip_start:
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Force to long boundary so we do longword aligned
* memory operations
*/
if (3 & (int) w) {
REDUCE;
if ((1 & (int) w) && (mlen > 0)) {
sum <<= 8;
su.c[0] = *(char *)w;
w = (u_short *)((char *)w + 1);
mlen--;
byte_swapped = 1;
}
if ((2 & (int) w) && (mlen >= 2)) {
sum += *w++;
mlen -= 2;
}
}
/*
* Advance to a 486 cache line boundary.
*/
if (4 & (int) w && mlen >= 4) {
ADD(0);
MOP;
w += 2;
mlen -= 4;
}
if (8 & (int) w && mlen >= 8) {
ADD(0);
ADDC(4);
MOP;
w += 4;
mlen -= 8;
}
/*
* Do as much of the checksum as possible 32 bits at at time.
* In fact, this loop is unrolled to make overhead from
* branches &c small.
*/
mlen -= 1;
while ((mlen -= 32) >= 0) {
/*
* Add with carry 16 words and fold in the last
* carry by adding a 0 with carry.
*
* The early ADD(16) and the LOAD(32) are to load
* the next 2 cache lines in advance on 486's. The
* 486 has a penalty of 2 clock cycles for loading
* a cache line, plus whatever time the external
* memory takes to load the first word(s) addressed.
* These penalties are unavoidable. Subsequent
* accesses to a cache line being loaded (and to
* other external memory?) are delayed until the
* whole load finishes. These penalties are mostly
* avoided by not accessing external memory for
* 8 cycles after the ADD(16) and 12 cycles after
* the LOAD(32). The loop terminates when mlen
* is initially 33 (not 32) to guaranteed that
* the LOAD(32) is within bounds.
*/
ADD(16);
ADDC(0);
ADDC(4);
ADDC(8);
ADDC(12);
LOAD(32);
ADDC(20);
ADDC(24);
ADDC(28);
MOP;
w += 16;
}
mlen += 32 + 1;
if (mlen >= 32) {
ADD(16);
ADDC(0);
ADDC(4);
ADDC(8);
ADDC(12);
ADDC(20);
ADDC(24);
ADDC(28);
MOP;
w += 16;
mlen -= 32;
}
if (mlen >= 16) {
ADD(0);
ADDC(4);
ADDC(8);
ADDC(12);
MOP;
w += 8;
mlen -= 16;
}
if (mlen >= 8) {
ADD(0);
ADDC(4);
MOP;
w += 4;
mlen -= 8;
}
if (mlen == 0 && byte_swapped == 0)
continue; /* worth 1% maybe ?? */
REDUCE;
while ((mlen -= 2) >= 0) {
sum += *w++;
}
if (byte_swapped) {
sum <<= 8;
byte_swapped = 0;
if (mlen == -1) {
su.c[1] = *(char *)w;
sum += su.s;
mlen = 0;
} else
mlen = -1;
} else if (mlen == -1)
/*
* This mbuf has odd number of bytes.
* There could be a word split betwen
* this mbuf and the next mbuf.
* Save the last byte (to prepend to next mbuf).
*/
su.c[0] = *(char *)w;
}
if (len)
printf("%s: out of data by %d\n", __func__, len);
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
standard (the odd byte is shifted left by 8 bits) */
su.c[1] = 0;
sum += su.s;
}
REDUCE;
return (~sum & 0xffff);
}
#endif