diff --git a/sys/i386/i386/in_cksum.c b/sys/i386/i386/in_cksum.c index f53f70997be7..d875ed35a912 100644 --- a/sys/i386/i386/in_cksum.c +++ b/sys/i386/i386/in_cksum.c @@ -62,15 +62,23 @@ #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} /* - * Thanks to gcc we don't have to guess - * which registers contain sum & w. + * These asm statements require __volatile because they pass information + * via the condition codes. GCC does not currently provide a way to specify + * the condition codes as an input or output operand. + * + * The LOAD macro below is effectively a prefetch into cache. GCC will + * load the value into a register but will not use it. Since modern CPUs + * reorder operations, this will generally take place in parallel with + * other calculations. */ #define ADD(n) __asm __volatile \ - ("addl " #n "(%1), %0" : "+r" (sum) : "r" (w)) + ("addl %1, %0" : "+r" (sum) : \ + "g" (((const u_int32_t *)w)[n / 4])) #define ADDC(n) __asm __volatile \ - ("adcl " #n "(%1), %0" : "+r" (sum) : "r" (w)) + ("adcl %1, %0" : "+r" (sum) : \ + "g" (((const u_int32_t *)w)[n / 4])) #define LOAD(n) __asm __volatile \ - ("movb " #n "(%1), %0" : "=r" (junk) : "r" (w)) + ("" : : "r" (((const u_int32_t *)w)[n / 4])) #define MOP __asm __volatile \ ("adcl $0, %0" : "+r" (sum)) @@ -163,7 +171,6 @@ in_cksum_skip(m, len, skip) */ mlen -= 1; while ((mlen -= 32) >= 0) { - u_char junk; /* * Add with carry 16 words and fold in the last * carry by adding a 0 with carry. diff --git a/sys/i386/include/in_cksum.h b/sys/i386/include/in_cksum.h index f30c45de79d8..b1cb3f8d98a4 100644 --- a/sys/i386/include/in_cksum.h +++ b/sys/i386/include/in_cksum.h @@ -58,10 +58,13 @@ static __inline u_int in_cksum_hdr(const struct ip *ip) { register u_int sum = 0; - -#define ADD(n) __asm("addl " #n "(%1), %0" : "+r" (sum) : "r" (ip)) -#define ADDC(n) __asm("adcl " #n "(%1), %0" : "+r" (sum) : "r" (ip)) -#define MOP __asm("adcl $0, %0" : "+r" (sum)) + +/* __volatile is necessary here because the condition codes are used. */ +#define ADD(n) __asm __volatile ("addl %1, %0" : "+r" (sum) : \ + "g" (((const u_int32_t *)ip)[n / 4])) +#define ADDC(n) __asm __volatile ("adcl %1, %0" : "+r" (sum) : \ + "g" (((const u_int32_t *)ip)[n / 4])) +#define MOP __asm __volatile ("adcl $0, %0" : "+r" (sum)) ADD(0); ADDC(4); @@ -90,9 +93,9 @@ in_cksum_update(struct ip *ip) static __inline u_short in_addword(u_short sum, u_short b) { - - __asm("addw %1, %0" : "+r" (sum) : "r" (b)); - __asm("adcw $0, %0" : "+r" (sum)); + /* __volatile is necessary because the condition codes are used. */ + __asm __volatile ("addw %1, %0" : "+r" (sum) : "r" (b)); + __asm __volatile ("adcw $0, %0" : "+r" (sum)); return (sum); } @@ -100,10 +103,10 @@ in_addword(u_short sum, u_short b) static __inline u_short in_pseudo(u_int sum, u_int b, u_int c) { - - __asm("addl %1, %0" : "+r" (sum) : "r" (b)); - __asm("adcl %1, %0" : "+r" (sum) : "r" (c)); - __asm("adcl $0, %0" : "+r" (sum)); + /* __volatile is necessary because the condition codes are used. */ + __asm __volatile ("addl %1, %0" : "+r" (sum) : "g" (b)); + __asm __volatile ("adcl %1, %0" : "+r" (sum) : "g" (c)); + __asm __volatile ("adcl $0, %0" : "+r" (sum)); sum = (sum & 0xffff) + (sum >> 16); if (sum > 0xffff)