Fix several bugs in the i386 asm statements used to speed up Internet

checksumming.  These bugs could possibly cause bad code to be
generated at elevated optimization levels.

First, eliminate the use of preprocessor magic to form the address
fields of asm instructions.  It hid the actual addresses being
referenced from the compiler.  Without knowledge of all the data
dependencies, the compiler might possibly use optimizations which
would result in incorrect code.

Use "__asm __volatile" rather than "__asm" for instruction sequences
that pass information through the condition codes (the carry bit, in
this case).  Without __volatile, the compiler might add unrelated
code between consecutive __asm instructions, modifying the condition
codes.  I have seen GCC insert stack pointer adjustments in this
way, for example.  Unfortunately, GCC doesn't provide a way to
specify dependencies on the condition codes.  You can specify that
they are clobbered, but not that you are going to use them as input.

Finally, simplify the LOAD macro.  This macro is used as a poor
man's prefetch.  The simpler version gives the compiler more leeway
about just how it performs the prefetch.

MFC after:	1 week
This commit is contained in:
John Polstra 2002-06-22 22:35:53 +00:00
parent 7e00e252b3
commit d5de6c2a5f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=98648
2 changed files with 27 additions and 17 deletions

View File

@ -62,15 +62,23 @@
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
/*
* Thanks to gcc we don't have to guess
* which registers contain sum & w.
* These asm statements require __volatile because they pass information
* via the condition codes. GCC does not currently provide a way to specify
* the condition codes as an input or output operand.
*
* The LOAD macro below is effectively a prefetch into cache. GCC will
* load the value into a register but will not use it. Since modern CPUs
* reorder operations, this will generally take place in parallel with
* other calculations.
*/
#define ADD(n) __asm __volatile \
("addl " #n "(%1), %0" : "+r" (sum) : "r" (w))
("addl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)w)[n / 4]))
#define ADDC(n) __asm __volatile \
("adcl " #n "(%1), %0" : "+r" (sum) : "r" (w))
("adcl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)w)[n / 4]))
#define LOAD(n) __asm __volatile \
("movb " #n "(%1), %0" : "=r" (junk) : "r" (w))
("" : : "r" (((const u_int32_t *)w)[n / 4]))
#define MOP __asm __volatile \
("adcl $0, %0" : "+r" (sum))
@ -163,7 +171,6 @@ in_cksum_skip(m, len, skip)
*/
mlen -= 1;
while ((mlen -= 32) >= 0) {
u_char junk;
/*
* Add with carry 16 words and fold in the last
* carry by adding a 0 with carry.

View File

@ -58,10 +58,13 @@ static __inline u_int
in_cksum_hdr(const struct ip *ip)
{
register u_int sum = 0;
#define ADD(n) __asm("addl " #n "(%1), %0" : "+r" (sum) : "r" (ip))
#define ADDC(n) __asm("adcl " #n "(%1), %0" : "+r" (sum) : "r" (ip))
#define MOP __asm("adcl $0, %0" : "+r" (sum))
/* __volatile is necessary here because the condition codes are used. */
#define ADD(n) __asm __volatile ("addl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)ip)[n / 4]))
#define ADDC(n) __asm __volatile ("adcl %1, %0" : "+r" (sum) : \
"g" (((const u_int32_t *)ip)[n / 4]))
#define MOP __asm __volatile ("adcl $0, %0" : "+r" (sum))
ADD(0);
ADDC(4);
@ -90,9 +93,9 @@ in_cksum_update(struct ip *ip)
static __inline u_short
in_addword(u_short sum, u_short b)
{
__asm("addw %1, %0" : "+r" (sum) : "r" (b));
__asm("adcw $0, %0" : "+r" (sum));
/* __volatile is necessary because the condition codes are used. */
__asm __volatile ("addw %1, %0" : "+r" (sum) : "r" (b));
__asm __volatile ("adcw $0, %0" : "+r" (sum));
return (sum);
}
@ -100,10 +103,10 @@ in_addword(u_short sum, u_short b)
static __inline u_short
in_pseudo(u_int sum, u_int b, u_int c)
{
__asm("addl %1, %0" : "+r" (sum) : "r" (b));
__asm("adcl %1, %0" : "+r" (sum) : "r" (c));
__asm("adcl $0, %0" : "+r" (sum));
/* __volatile is necessary because the condition codes are used. */
__asm __volatile ("addl %1, %0" : "+r" (sum) : "g" (b));
__asm __volatile ("adcl %1, %0" : "+r" (sum) : "g" (c));
__asm __volatile ("adcl $0, %0" : "+r" (sum));
sum = (sum & 0xffff) + (sum >> 16);
if (sum > 0xffff)