Use asm versions of in_cksum() and friends.
This commit is contained in:
parent
85b2c4ecf8
commit
046b8b69f5
@ -70,22 +70,6 @@ __FBSDID("$FreeBSD$");
|
||||
ADDCARRY(sum); \
|
||||
}
|
||||
|
||||
static const u_int32_t in_masks[] = {
|
||||
#ifndef __ARMEB__
|
||||
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
|
||||
0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
|
||||
0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
|
||||
0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
|
||||
0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
|
||||
#else
|
||||
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
|
||||
0x00000000, 0xFF000000, 0xFFFF0000, 0xFFFFFF00, /* offset 0 */
|
||||
0x00000000, 0x00FF0000, 0x00FFFF00, 0x00FFFFFF, /* offset 1 */
|
||||
0x00000000, 0x0000FF00, 0x0000FFFF, 0x0000FFFF, /* offset 2 */
|
||||
0x00000000, 0x000000FF, 0x000000FF, 0x000000FF, /* offset 3 */
|
||||
#endif
|
||||
};
|
||||
|
||||
union l_util {
|
||||
u_int16_t s[2];
|
||||
u_int32_t l;
|
||||
@ -96,87 +80,6 @@ union q_util {
|
||||
u_int64_t q;
|
||||
};
|
||||
|
||||
static u_int64_t
|
||||
in_cksumdata(const void *buf, int len)
|
||||
{
|
||||
const u_int32_t *lw = (const u_int32_t *) buf;
|
||||
u_int64_t sum = 0;
|
||||
u_int64_t prefilled;
|
||||
int offset;
|
||||
union q_util q_util;
|
||||
|
||||
if ((3 & (long) lw) == 0 && len == 20) {
|
||||
sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
|
||||
if ((offset = 3 & (long) lw) != 0) {
|
||||
const u_int32_t *masks = in_masks + (offset << 2);
|
||||
lw = (u_int32_t *) (((long) lw) - offset);
|
||||
sum = *lw++ & masks[len >= 3 ? 3 : len];
|
||||
len -= 4 - offset;
|
||||
if (len <= 0) {
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
/*
|
||||
* Force to cache line boundary.
|
||||
*/
|
||||
offset = 32 - (0x1f & (long) lw);
|
||||
if (offset < 32 && len > offset) {
|
||||
len -= offset;
|
||||
if (4 & offset) {
|
||||
sum += (u_int64_t) lw[0];
|
||||
lw += 1;
|
||||
}
|
||||
if (8 & offset) {
|
||||
sum += (u_int64_t) lw[0] + lw[1];
|
||||
lw += 2;
|
||||
}
|
||||
if (16 & offset) {
|
||||
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
|
||||
lw += 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* access prefilling to start load of next cache line.
|
||||
* then add current cache line
|
||||
* save result of prefilling for loop iteration.
|
||||
*/
|
||||
prefilled = lw[0];
|
||||
while ((len -= 32) >= 4) {
|
||||
u_int64_t prefilling = lw[8];
|
||||
sum += prefilled + lw[1] + lw[2] + lw[3]
|
||||
+ lw[4] + lw[5] + lw[6] + lw[7];
|
||||
lw += 8;
|
||||
prefilled = prefilling;
|
||||
}
|
||||
if (len >= 0) {
|
||||
sum += prefilled + lw[1] + lw[2] + lw[3]
|
||||
+ lw[4] + lw[5] + lw[6] + lw[7];
|
||||
lw += 8;
|
||||
} else {
|
||||
len += 32;
|
||||
}
|
||||
while ((len -= 16) >= 0) {
|
||||
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
|
||||
lw += 4;
|
||||
}
|
||||
len += 16;
|
||||
while ((len -= 4) >= 0) {
|
||||
sum += (u_int64_t) *lw++;
|
||||
}
|
||||
len += 4;
|
||||
if (len > 0)
|
||||
sum += (u_int64_t) (in_masks[len] & *lw);
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
|
||||
u_short
|
||||
in_addword(u_short a, u_short b)
|
||||
{
|
||||
@ -229,9 +132,9 @@ in_cksum_skip(struct mbuf *m, int len, int skip)
|
||||
mlen = len;
|
||||
|
||||
if ((clen ^ (int) addr) & 1)
|
||||
sum += in_cksumdata(addr, mlen) << 8;
|
||||
sum += do_cksum(addr, mlen) << 8;
|
||||
else
|
||||
sum += in_cksumdata(addr, mlen);
|
||||
sum += do_cksum(addr, mlen);
|
||||
|
||||
clen += mlen;
|
||||
len -= mlen;
|
||||
@ -239,12 +142,3 @@ in_cksum_skip(struct mbuf *m, int len, int skip)
|
||||
REDUCE16;
|
||||
return (~sum & 0xffff);
|
||||
}
|
||||
|
||||
u_int in_cksum_hdr(const struct ip *ip)
|
||||
{
|
||||
u_int64_t sum = in_cksumdata(ip, sizeof(struct ip));
|
||||
union q_util q_util;
|
||||
union l_util l_util;
|
||||
REDUCE16;
|
||||
return (~sum & 0xffff);
|
||||
}
|
||||
|
@ -92,142 +92,11 @@ ENTRY(in_cksum)
|
||||
ldmfd sp!, {r4-r11,pc}
|
||||
|
||||
|
||||
#ifdef INET
|
||||
/*
|
||||
* int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
|
||||
*
|
||||
* Entry:
|
||||
* r0 m
|
||||
* r1 nxt
|
||||
* r2 off
|
||||
* r3 len
|
||||
*/
|
||||
/* LINTSTUB: Func: int in4_cksum(struct mbuf *, u_int8_t, int, int) */
|
||||
ENTRY(in4_cksum)
|
||||
stmfd sp!, {r4-r11,lr}
|
||||
mov r8, #0x00 /* Accumulate sum in r8 */
|
||||
|
||||
/*
|
||||
* First, deal with a pseudo header, if present
|
||||
*/
|
||||
ldr r6, [r0, #(M_DATA)]
|
||||
cmp r1, #0x00
|
||||
beq .Lin4_cksum_skip_entry
|
||||
|
||||
#ifdef __XSCALE__
|
||||
pld [r6, #(IP_SRC)]
|
||||
#endif
|
||||
add r4, r6, #(IP_SRC)
|
||||
ands r4, r4, #0x03
|
||||
add r8, r1, r3 /* sum = nxt + len */
|
||||
addne pc, pc, r4, lsl #5 /* Handle alignment of pseudo header */
|
||||
nop
|
||||
|
||||
/* 0x00: Data 32-bit aligned */
|
||||
ldr r5, [r6, #(IP_SRC)]
|
||||
ldr r4, [r6, #(IP_DST)]
|
||||
b .Lin4_cksum_add_ips
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
/* 0x01: Data 8-bit aligned */
|
||||
ldr r4, [r6, #(IP_SRC - 1)] /* BE:r4 = x012 LE:r4 = 210x */
|
||||
ldr r5, [r6, #(IP_SRC + 3)] /* BE:r5 = 3456 LE:r5 = 6543 */
|
||||
ldrb r7, [r6, #(IP_SRC + 7)] /* r7 = ...7 */
|
||||
#ifdef __ARMEB__
|
||||
mov r4, r4, lsl #8 /* r4 = 012. */
|
||||
orr r4, r4, r5, lsr #24 /* r4 = 0123 */
|
||||
orr r5, r7, r5, lsl #8 /* r5 = 4567 */
|
||||
b .Lin4_cksum_add_ips
|
||||
nop
|
||||
#else
|
||||
mov r4, r4, lsr #8 /* r4 = .210 */
|
||||
orr r4, r4, r5, lsl #24 /* r4 = 3210 */
|
||||
mov r5, r5, lsr #8 /* r5 = .654 */
|
||||
orr r5, r5, r7, lsl #24 /* r5 = 7654 */
|
||||
b .Lin4_cksum_add_ips
|
||||
#endif
|
||||
|
||||
/* 0x02: Data 16-bit aligned */
|
||||
#ifdef __XSCALE__
|
||||
ldrh r5, [r6, #(IP_SRC)] /* BE:r5 = ..01 LE:r5 = ..10 */
|
||||
ldrh r7, [r6, #(IP_DST + 2)] /* BE:r7 = ..67 LE:r7 = ..76 */
|
||||
ldr r4, [r6, #(IP_SRC + 2)] /* BE:r4 = 2345 LE:r4 = 5432 */
|
||||
orr r5, r7, r5, lsl #16 /* BE:r5 = 0167 LE:r5 = 1076 */
|
||||
b .Lin4_cksum_add_ips
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
#else
|
||||
ldr r4, [r6, #(IP_SRC - 2)] /* r4 = 10xx */
|
||||
ldr r7, [r6, #(IP_DST - 2)] /* r7 = xx76 */
|
||||
ldr r5, [r6, #(IP_SRC + 2)] /* r5 = 5432 */
|
||||
mov r4, r4, lsr #16 /* r4 = ..10 */
|
||||
orr r4, r4, r7, lsl #16 /* r4 = 7610 */
|
||||
b .Lin4_cksum_add_ips
|
||||
nop
|
||||
nop
|
||||
#endif
|
||||
|
||||
/* 0x03: Data 8-bit aligned */
|
||||
ldrb r4, [r6, #(IP_SRC)] /* r4 = ...0 */
|
||||
ldr r5, [r6, #(IP_SRC + 1)] /* BE:r5 = 1234 LE:r5 = 4321 */
|
||||
ldr r7, [r6, #(IP_SRC + 5)] /* BE:r7 = 567x LE:r7 = x765 */
|
||||
#ifdef __ARMEB__
|
||||
mov r4, r4, lsl #24 /* r4 = 0... */
|
||||
orr r4, r4, r5, lsr #8 /* r4 = 0123 */
|
||||
mov r5, r5, lsl #24 /* r5 = 4... */
|
||||
orr r5, r5, r7, lsr #8 /* r5 = 4567 */
|
||||
#else
|
||||
orr r4, r4, r5, lsl #8 /* r4 = 3210 */
|
||||
mov r5, r5, lsr #24 /* r4 = ...4 */
|
||||
orr r5, r5, r7, lsl #8 /* r5 = 7654 */
|
||||
#endif
|
||||
/* FALLTHROUGH */
|
||||
|
||||
.Lin4_cksum_add_ips:
|
||||
adds r5, r5, r4
|
||||
#ifndef __ARMEB__
|
||||
adcs r8, r5, r8, lsl #8
|
||||
#else
|
||||
adcs r8, r5, r8
|
||||
#endif
|
||||
adc r8, r8, #0x00
|
||||
mov r1, #0x00
|
||||
b .Lin4_cksum_skip_entry
|
||||
|
||||
.Lin4_cksum_skip_loop:
|
||||
ldr r1, [r0, #(M_LEN)]
|
||||
ldr r6, [r0, #(M_DATA)]
|
||||
ldr r0, [r0, #(M_NEXT)]
|
||||
.Lin4_cksum_skip_entry:
|
||||
subs r2, r2, r1
|
||||
blt .Lin4_cksum_skip_done
|
||||
cmp r0, #0x00
|
||||
bne .Lin4_cksum_skip_loop
|
||||
b .Lin4_cksum_whoops
|
||||
|
||||
.Lin4_cksum_skip_done:
|
||||
mov ip, r0
|
||||
add r0, r2, r6
|
||||
add r0, r0, r1
|
||||
rsb r1, r2, #0x00
|
||||
mov r9, r3
|
||||
mov r10, #0x00
|
||||
b .Lin_cksum_entry4
|
||||
|
||||
.Lin4_cksum_whoops:
|
||||
adr r0, .Lin4_cksum_whoops_str
|
||||
bl _C_LABEL(panic)
|
||||
.Lin4_cksum_whoops_str:
|
||||
.asciz "in4_cksum: out of mbufs\n"
|
||||
.align 5
|
||||
#endif /* INET */
|
||||
|
||||
|
||||
ENTRY(do_cksum)
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
bl L_cksumdata
|
||||
mov r0, r2
|
||||
ldmfd sp!, {r4-r11, pc}
|
||||
/*
|
||||
* The main in*_cksum() workhorse...
|
||||
*
|
||||
|
@ -43,9 +43,19 @@
|
||||
|
||||
#ifdef _KERNEL
|
||||
u_short in_cksum(struct mbuf *m, int len);
|
||||
u_int in_cksum_hdr(const struct ip *ip);
|
||||
u_short in_addword(u_short sum, u_short b);
|
||||
u_short in_pseudo(u_int sum, u_int b, u_int c);
|
||||
u_short in_cksum_skip(struct mbuf *m, int len, int skip);
|
||||
u_int do_cksum(const void *, int);
|
||||
static __inline u_int
|
||||
in_cksum_hdr(const struct ip *ip)
|
||||
{
|
||||
u_int sum = do_cksum(ip, sizeof(struct ip));
|
||||
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
if (sum > 0xffff)
|
||||
sum -= 0xffff;
|
||||
return (~sum & 0xffff);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _MACHINE_IN_CKSUM_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user