Use asm versions of in_cksum() and friends.

This commit is contained in:
cognet 2005-05-24 21:44:34 +00:00
parent 85b2c4ecf8
commit 046b8b69f5
3 changed files with 18 additions and 245 deletions

View File

@ -70,22 +70,6 @@ __FBSDID("$FreeBSD$");
ADDCARRY(sum); \
}
static const u_int32_t in_masks[] = {
#ifndef __ARMEB__
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
#else
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00, /* offset 0 */
0x00000000, 0x00FF0000, 0x00FFFF00, 0x00FFFFFF, /* offset 1 */
0x00000000, 0x0000FF00, 0x0000FFFF, 0x0000FFFF, /* offset 2 */
0x00000000, 0x000000FF, 0x000000FF, 0x000000FF, /* offset 3 */
#endif
};
union l_util {
u_int16_t s[2];
u_int32_t l;
@ -96,87 +80,6 @@ union q_util {
u_int64_t q;
};
static u_int64_t
in_cksumdata(const void *buf, int len)
{
const u_int32_t *lw = (const u_int32_t *) buf;
u_int64_t sum = 0;
u_int64_t prefilled;
int offset;
union q_util q_util;
if ((3 & (long) lw) == 0 && len == 20) {
sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
REDUCE32;
return sum;
}
if ((offset = 3 & (long) lw) != 0) {
const u_int32_t *masks = in_masks + (offset << 2);
lw = (u_int32_t *) (((long) lw) - offset);
sum = *lw++ & masks[len >= 3 ? 3 : len];
len -= 4 - offset;
if (len <= 0) {
REDUCE32;
return sum;
}
}
#if 0
/*
* Force to cache line boundary.
*/
offset = 32 - (0x1f & (long) lw);
if (offset < 32 && len > offset) {
len -= offset;
if (4 & offset) {
sum += (u_int64_t) lw[0];
lw += 1;
}
if (8 & offset) {
sum += (u_int64_t) lw[0] + lw[1];
lw += 2;
}
if (16 & offset) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
}
#endif
/*
* access prefilling to start load of next cache line.
* then add current cache line
* save result of prefilling for loop iteration.
*/
prefilled = lw[0];
while ((len -= 32) >= 4) {
u_int64_t prefilling = lw[8];
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
prefilled = prefilling;
}
if (len >= 0) {
sum += prefilled + lw[1] + lw[2] + lw[3]
+ lw[4] + lw[5] + lw[6] + lw[7];
lw += 8;
} else {
len += 32;
}
while ((len -= 16) >= 0) {
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
lw += 4;
}
len += 16;
while ((len -= 4) >= 0) {
sum += (u_int64_t) *lw++;
}
len += 4;
if (len > 0)
sum += (u_int64_t) (in_masks[len] & *lw);
REDUCE32;
return sum;
}
u_short
in_addword(u_short a, u_short b)
{
@ -229,9 +132,9 @@ in_cksum_skip(struct mbuf *m, int len, int skip)
mlen = len;
if ((clen ^ (int) addr) & 1)
sum += in_cksumdata(addr, mlen) << 8;
sum += do_cksum(addr, mlen) << 8;
else
sum += in_cksumdata(addr, mlen);
sum += do_cksum(addr, mlen);
clen += mlen;
len -= mlen;
@ -239,12 +142,3 @@ in_cksum_skip(struct mbuf *m, int len, int skip)
REDUCE16;
return (~sum & 0xffff);
}
u_int in_cksum_hdr(const struct ip *ip)
{
u_int64_t sum = in_cksumdata(ip, sizeof(struct ip));
union q_util q_util;
union l_util l_util;
REDUCE16;
return (~sum & 0xffff);
}

View File

@ -92,142 +92,11 @@ ENTRY(in_cksum)
ldmfd sp!, {r4-r11,pc}
#ifdef INET
/*
* int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
*
* Entry:
* r0 m
* r1 nxt
* r2 off
* r3 len
*/
/* LINTSTUB: Func: int in4_cksum(struct mbuf *, u_int8_t, int, int) */
ENTRY(in4_cksum)
stmfd sp!, {r4-r11,lr}
mov r8, #0x00 /* Accumulate sum in r8 */
/*
* First, deal with a pseudo header, if present
*/
ldr r6, [r0, #(M_DATA)]
cmp r1, #0x00
beq .Lin4_cksum_skip_entry
#ifdef __XSCALE__
pld [r6, #(IP_SRC)]
#endif
add r4, r6, #(IP_SRC)
ands r4, r4, #0x03
add r8, r1, r3 /* sum = nxt + len */
addne pc, pc, r4, lsl #5 /* Handle alignment of pseudo header */
nop
/* 0x00: Data 32-bit aligned */
ldr r5, [r6, #(IP_SRC)]
ldr r4, [r6, #(IP_DST)]
b .Lin4_cksum_add_ips
nop
nop
nop
nop
nop
/* 0x01: Data 8-bit aligned */
ldr r4, [r6, #(IP_SRC - 1)] /* BE:r4 = x012 LE:r4 = 210x */
ldr r5, [r6, #(IP_SRC + 3)] /* BE:r5 = 3456 LE:r5 = 6543 */
ldrb r7, [r6, #(IP_SRC + 7)] /* r7 = ...7 */
#ifdef __ARMEB__
mov r4, r4, lsl #8 /* r4 = 012. */
orr r4, r4, r5, lsr #24 /* r4 = 0123 */
orr r5, r7, r5, lsl #8 /* r5 = 4567 */
b .Lin4_cksum_add_ips
nop
#else
mov r4, r4, lsr #8 /* r4 = .210 */
orr r4, r4, r5, lsl #24 /* r4 = 3210 */
mov r5, r5, lsr #8 /* r5 = .654 */
orr r5, r5, r7, lsl #24 /* r5 = 7654 */
b .Lin4_cksum_add_ips
#endif
/* 0x02: Data 16-bit aligned */
#ifdef __XSCALE__
ldrh r5, [r6, #(IP_SRC)] /* BE:r5 = ..01 LE:r5 = ..10 */
ldrh r7, [r6, #(IP_DST + 2)] /* BE:r7 = ..67 LE:r7 = ..76 */
ldr r4, [r6, #(IP_SRC + 2)] /* BE:r4 = 2345 LE:r4 = 5432 */
orr r5, r7, r5, lsl #16 /* BE:r5 = 0167 LE:r5 = 1076 */
b .Lin4_cksum_add_ips
nop
nop
nop
#else
ldr r4, [r6, #(IP_SRC - 2)] /* r4 = 10xx */
ldr r7, [r6, #(IP_DST - 2)] /* r7 = xx76 */
ldr r5, [r6, #(IP_SRC + 2)] /* r5 = 5432 */
mov r4, r4, lsr #16 /* r4 = ..10 */
orr r4, r4, r7, lsl #16 /* r4 = 7610 */
b .Lin4_cksum_add_ips
nop
nop
#endif
/* 0x03: Data 8-bit aligned */
ldrb r4, [r6, #(IP_SRC)] /* r4 = ...0 */
ldr r5, [r6, #(IP_SRC + 1)] /* BE:r5 = 1234 LE:r5 = 4321 */
ldr r7, [r6, #(IP_SRC + 5)] /* BE:r7 = 567x LE:r7 = x765 */
#ifdef __ARMEB__
mov r4, r4, lsl #24 /* r4 = 0... */
orr r4, r4, r5, lsr #8 /* r4 = 0123 */
mov r5, r5, lsl #24 /* r5 = 4... */
orr r5, r5, r7, lsr #8 /* r5 = 4567 */
#else
orr r4, r4, r5, lsl #8 /* r4 = 3210 */
mov r5, r5, lsr #24 /* r4 = ...4 */
orr r5, r5, r7, lsl #8 /* r5 = 7654 */
#endif
/* FALLTHROUGH */
.Lin4_cksum_add_ips:
adds r5, r5, r4
#ifndef __ARMEB__
adcs r8, r5, r8, lsl #8
#else
adcs r8, r5, r8
#endif
adc r8, r8, #0x00
mov r1, #0x00
b .Lin4_cksum_skip_entry
.Lin4_cksum_skip_loop:
ldr r1, [r0, #(M_LEN)]
ldr r6, [r0, #(M_DATA)]
ldr r0, [r0, #(M_NEXT)]
.Lin4_cksum_skip_entry:
subs r2, r2, r1
blt .Lin4_cksum_skip_done
cmp r0, #0x00
bne .Lin4_cksum_skip_loop
b .Lin4_cksum_whoops
.Lin4_cksum_skip_done:
mov ip, r0
add r0, r2, r6
add r0, r0, r1
rsb r1, r2, #0x00
mov r9, r3
mov r10, #0x00
b .Lin_cksum_entry4
.Lin4_cksum_whoops:
adr r0, .Lin4_cksum_whoops_str
bl _C_LABEL(panic)
.Lin4_cksum_whoops_str:
.asciz "in4_cksum: out of mbufs\n"
.align 5
#endif /* INET */
ENTRY(do_cksum)
stmfd sp!, {r4-r11, lr}
bl L_cksumdata
mov r0, r2
ldmfd sp!, {r4-r11, pc}
/*
* The main in*_cksum() workhorse...
*

View File

@ -43,9 +43,19 @@
#ifdef _KERNEL
u_short in_cksum(struct mbuf *m, int len);
u_int in_cksum_hdr(const struct ip *ip);
u_short in_addword(u_short sum, u_short b);
u_short in_pseudo(u_int sum, u_int b, u_int c);
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
u_int do_cksum(const void *, int);
static __inline u_int
in_cksum_hdr(const struct ip *ip)
{
u_int sum = do_cksum(ip, sizeof(struct ip));
sum = (sum & 0xffff) + (sum >> 16);
if (sum > 0xffff)
sum -= 0xffff;
return (~sum & 0xffff);
}
#endif /* _KERNEL */
#endif /* _MACHINE_IN_CKSUM_H_ */