diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 7591f1167abf..1b3f9c54f0e8 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -70,9 +70,7 @@ #endif #ifdef DEVICE_POLLING -#include /* for NETISR_POLL */ - -extern void ether_poll1(void); +extern void init_device_poll(void); extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ @@ -147,7 +145,7 @@ initclocks(dummy) cpu_initclocks(); #ifdef DEVICE_POLLING - register_netisr(NETISR_POLL, ether_poll1); + init_device_poll(); #endif /* * Compute profhz/stathz, and fix profhz if needed. @@ -222,7 +220,7 @@ hardclock(frame) tc_windup(); #ifdef DEVICE_POLLING - hardclock_device_poll(); + hardclock_device_poll(); /* this is very short and quick */ #endif /* DEVICE_POLLING */ /* diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 3a8bc8a4bb03..6bdf00fdb2ba 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -644,7 +644,7 @@ swi_net(void *dummy) } #ifdef DEVICE_POLLING if (pollmore) - ether_pollmore(); + netisr_pollmore(); } #endif } diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index 7a45f74bef32..87376722c912 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -1,5 +1,7 @@ /*- - * Copyright (c) 2001 Luigi Rizzo + * Copyright (c) 2001-2002 Luigi Rizzo + * + * Supported by: the Xorp Project (www.xorp.org) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -42,10 +44,12 @@ #error DEVICE_POLLING is not compatible with SMP #endif -void ether_poll1(void); +static void netisr_poll(void); /* the two netisr handlers */ +void netisr_pollmore(void); + +void init_device_poll(void); /* init routine */ +void hardclock_device_poll(void); /* hook from hardclock */ void ether_poll(int); /* polling while in trap */ -void ether_pollmore(void); -void hardclock_device_poll(void); /* * Polling support for [network] device drivers. @@ -101,56 +105,73 @@ SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0, "Device polling parameters"); static u_int32_t poll_burst = 5; -SYSCTL_ULONG(_kern_polling, OID_AUTO, burst, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RW, &poll_burst, 0, "Current polling burst size"); static u_int32_t poll_each_burst = 5; -SYSCTL_ULONG(_kern_polling, OID_AUTO, each_burst, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, each_burst, CTLFLAG_RW, &poll_each_burst, 0, "Max size of each burst"); static u_int32_t poll_burst_max = 150; /* good for 100Mbit net and HZ=1000 */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, burst_max, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, burst_max, CTLFLAG_RW, &poll_burst_max, 0, "Max Polling burst size"); +static u_int32_t poll_in_idle_loop=1; /* do we poll in idle loop ? */ +SYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW, + &poll_in_idle_loop, 0, "Enable device polling in idle loop"); + u_int32_t poll_in_trap; /* used in trap.c */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, poll_in_trap, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, poll_in_trap, CTLFLAG_RW, &poll_in_trap, 0, "Poll burst size during a trap"); static u_int32_t user_frac = 50; -SYSCTL_ULONG(_kern_polling, OID_AUTO, user_frac, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, user_frac, CTLFLAG_RW, &user_frac, 0, "Desired user fraction of cpu time"); static u_int32_t reg_frac = 20 ; -SYSCTL_ULONG(_kern_polling, OID_AUTO, reg_frac, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, reg_frac, CTLFLAG_RW, ®_frac, 0, "Every this many cycles poll register"); static u_int32_t short_ticks; -SYSCTL_ULONG(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RW, &short_ticks, 0, "Hardclock ticks shorter than they should be"); static u_int32_t lost_polls; -SYSCTL_ULONG(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RW, &lost_polls, 0, "How many times we would have lost a poll tick"); +static u_int32_t pending_polls; +SYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RW, + &pending_polls, 0, "Do we need to poll again"); + +static int residual_burst = 0; +SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RW, + &residual_burst, 0, "# of residual cycles in burst"); + static u_int32_t poll_handlers; /* next free entry in pr[]. */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD, +SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD, &poll_handlers, 0, "Number of registered poll handlers"); -static u_int32_t poll_in_idle=1; /* boolean */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, poll_in_idle, CTLFLAG_RW, - &poll_in_idle, 0, "Poll during idle loop"); - -static u_int32_t idlepoll_sleeping; /* idlepoll is sleeping */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD, - &idlepoll_sleeping, 0, "idlepoll is sleeping"); - static int polling = 0; /* global polling enable */ -SYSCTL_ULONG(_kern_polling, OID_AUTO, enable, CTLFLAG_RW, +SYSCTL_UINT(_kern_polling, OID_AUTO, enable, CTLFLAG_RW, &polling, 0, "Polling enabled"); +static volatile u_int32_t phase; +SYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RW, + &phase, 0, "Polling phase"); + +static u_int32_t suspect; +SYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RW, + &suspect, 0, "suspect event"); + +static volatile u_int32_t stalled; +SYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RW, + &stalled, 0, "potential stalls"); + +static u_int32_t idlepoll_sleeping; /* idlepoll is sleeping */ +SYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD, + &idlepoll_sleeping, 0, "idlepoll is sleeping"); -static u_int32_t poll1_active; -static u_int32_t need_poll_again; #define POLL_LIST_LEN 128 struct pollrec { @@ -160,6 +181,15 @@ struct pollrec { static struct pollrec pr[POLL_LIST_LEN]; +/* + * register relevant netisr. Called from kern_clock.c: + */ +void +init_device_poll(void) +{ + register_netisr(NETISR_POLL, netisr_poll); +} + /* * Hook from hardclock. Tries to schedule a netisr, but keeps track * of lost ticks due to the previous handler taking too long. @@ -173,6 +203,9 @@ hardclock_device_poll(void) static struct timeval prev_t, t; int delta; + if (poll_handlers == 0) + return; + microuptime(&t); delta = (t.tv_usec - prev_t.tv_usec) + (t.tv_sec - prev_t.tv_sec)*1000000; @@ -181,15 +214,24 @@ hardclock_device_poll(void) else prev_t = t; - if (poll_handlers > 0) { - if (poll1_active) { - lost_polls++; - need_poll_again++; - } else { - poll1_active = 1; - schednetisr(NETISR_POLL); - } + if (pending_polls > 100) { + /* too much, assume it has stalled */ + stalled++; + printf("poll stalled [%d] in phase %d\n", + stalled, phase); + pending_polls = 0; + phase = 0; } + + if (phase <= 2) { + if (phase != 0) + suspect++; + phase = 1; + schednetisr(NETISR_POLL); + phase = 2; + } + if (pending_polls++ > 0) + lost_polls++; } /* @@ -199,7 +241,6 @@ void ether_poll(int count) { int i; - int s = splimp(); mtx_lock(&Giant); @@ -210,18 +251,17 @@ ether_poll(int count) (pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) ) pr[i].handler(pr[i].ifp, 0, count); /* quick check */ mtx_unlock(&Giant); - splx(s); } /* - * ether_pollmore is called after other netisr's, possibly scheduling + * netisr_pollmore is called after other netisr's, possibly scheduling * another NETISR_POLL call, or adapting the burst size for the next cycle. * * It is very bad to fetch large bursts of packets from a single card at once, * because the burst could take a long time to be completely processed, or * could saturate the intermediate queue (ipintrq or similar) leading to * losses or unfairness. To reduce the problem, and also to account better for - * time spent in network-related processnig, we split the burst in smaller + * time spent in network-related processing, we split the burst in smaller * chunks of fixed size, giving control to the other netisr's between chunks. * This helps in improving the fairness, reducing livelock (because we * emulate more closely the "process to completion" that we have with @@ -229,21 +269,19 @@ ether_poll(int count) * handling and forwarding. */ -static int residual_burst = 0; - static struct timeval poll_start_t; void -ether_pollmore() +netisr_pollmore() { struct timeval t; int kern_load; - int s = splhigh(); + /* XXX run at splhigh() or equivalent */ + phase = 5; if (residual_burst > 0) { schednetisr(NETISR_POLL); /* will run immediately on return, followed by netisrs */ - splx(s); return ; } /* here we can account time spent in netisr's in this tick */ @@ -259,36 +297,37 @@ ether_pollmore() poll_burst++; } - if (need_poll_again) { + pending_polls--; + if (pending_polls == 0) /* we are done */ + phase = 0; + else { /* * Last cycle was long and caused us to miss one or more - * hardclock ticks. Restart processnig again, but slightly + * hardclock ticks. Restart processing again, but slightly * reduce the burst size to prevent that this happens again. */ - need_poll_again--; poll_burst -= (poll_burst / 8); if (poll_burst < 1) poll_burst = 1; schednetisr(NETISR_POLL); - } else - poll1_active = 0; - splx(s); + phase = 6; + } } /* - * ether_poll1 is called by schednetisr when appropriate, typically once + * netisr_poll is scheduled by schednetisr when appropriate, typically once * per tick. It is called at splnet() so first thing to do is to upgrade to * splimp(), and call all registered handlers. */ -void -ether_poll1(void) +static void +netisr_poll(void) { static int reg_frac_count; int i, cycles; enum poll_cmd arg = POLL_ONLY; - int s=splimp(); mtx_lock(&Giant); + phase = 3; if (residual_burst == 0) { /* first call in this tick */ microuptime(&poll_start_t); /* @@ -344,8 +383,8 @@ ether_poll1(void) poll_handlers = 0; } /* on -stable, schednetisr(NETISR_POLLMORE); */ + phase = 4; mtx_unlock(&Giant); - splx(s); } /* @@ -353,8 +392,8 @@ ether_poll1(void) * (and polling should be enabled), 0 otherwise. * A device is not supposed to register itself multiple times. * - * This is called from within the *_intr() function, so we should - * probably not need further locking. XXX + * This is called from within the *_intr() functions, so we do not need + * further locking. */ int ether_poll_register(poll_handler_t *h, struct ifnet *ifp) @@ -400,12 +439,10 @@ ether_poll_register(poll_handler_t *h, struct ifnet *ifp) } /* - * Remove the interface from the list of polling ones. - * Normally run by *_stop(). - * We allow it being called with IFF_POLLING clear, the - * call is sufficiently rare so it is preferable to save the - * space for the extra test in each device in exchange of one - * additional function call. + * Remove interface from the polling list. Normally called by *_stop(). + * It is not an error to call it with IFF_POLLING clear, the call is + * sufficiently rare to be preferable to save the space for the extra + * test in each driver in exchange of one additional function call. */ int ether_poll_deregister(struct ifnet *ifp) @@ -450,7 +487,7 @@ poll_idle(void) mtx_unlock_spin(&sched_lock); for (;;) { - if (poll_in_idle && poll_handlers > 0) { + if (poll_in_idle_loop && poll_handlers > 0) { idlepoll_sleeping = 0; mtx_lock(&Giant); ether_poll(poll_each_burst);