diff --git a/UPDATING b/UPDATING index 985e04eb2696..7df65a3304a5 100644 --- a/UPDATING +++ b/UPDATING @@ -25,6 +25,8 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW: 20110628: The packet filter (pf) code has been updated to OpenBSD 4.5. You need to update userland tools to be in sync with kernel. + This update breaks backward compatibility with earlier pfsync(4) + versions. Care must be taken when updating redundant firewall setups. 20110608: The following sysctls and tunables are retired on x86 platforms: diff --git a/contrib/ntp/ntpd/ntp_config.c b/contrib/ntp/ntpd/ntp_config.c index beb4c4847e71..99af999ad169 100644 --- a/contrib/ntp/ntpd/ntp_config.c +++ b/contrib/ntp/ntpd/ntp_config.c @@ -414,7 +414,7 @@ enum gnn_type { static int getnetnum P((const char *, struct sockaddr_storage *, int, enum gnn_type)); static void save_resolve P((char *, int, int, int, int, u_int, int, - keyid_t, u_char *)); + keyid_t, u_char *, u_char)); static void do_resolve_internal P((void)); static void abort_resolve P((void)); #if !defined(VMS) && !defined(SYS_WINNT) @@ -870,9 +870,9 @@ getconfig( stoa(&peeraddr)); } } else if (errflg == -1) { - save_resolve(tokens[1], hmode, peerversion, + save_resolve(tokens[istart - 1], hmode, peerversion, minpoll, maxpoll, peerflags, ttl, - peerkey, peerkeystr); + peerkey, peerkeystr, peeraddr.ss_family); } break; @@ -2325,7 +2325,8 @@ save_resolve( u_int flags, int ttl, keyid_t keyid, - u_char *keystr + u_char *keystr, + u_char peeraf ) { #ifndef SYS_VXWORKS @@ -2365,11 +2366,11 @@ save_resolve( } #endif - (void)fprintf(res_fp, "%s %d %d %d %d %d %d %u %s\n", name, + (void)fprintf(res_fp, "%s %u %d %d %d %d %d %d %u %s\n", name, peeraf, mode, version, minpoll, maxpoll, flags, ttl, keyid, keystr); #ifdef DEBUG if (debug > 1) - printf("config: %s %d %d %d %d %x %d %u %s\n", name, mode, + printf("config: %s %u %d %d %d %d %x %d %u %s\n", name, peeraf, mode, version, minpoll, maxpoll, flags, ttl, keyid, keystr); #endif diff --git a/contrib/ntp/ntpd/ntp_intres.c b/contrib/ntp/ntpd/ntp_intres.c index 5facc85693c4..e18888f4db12 100644 --- a/contrib/ntp/ntpd/ntp_intres.c +++ b/contrib/ntp/ntpd/ntp_intres.c @@ -29,6 +29,7 @@ #include #include +#include #include /**/ @@ -111,15 +112,16 @@ static int resolve_value; /* next value of resolve timer */ * is supposed to consist of entries in the following order */ #define TOK_HOSTNAME 0 -#define TOK_HMODE 1 -#define TOK_VERSION 2 -#define TOK_MINPOLL 3 -#define TOK_MAXPOLL 4 -#define TOK_FLAGS 5 -#define TOK_TTL 6 -#define TOK_KEYID 7 -#define TOK_KEYSTR 8 -#define NUMTOK 9 +#define TOK_PEERAF 1 +#define TOK_HMODE 2 +#define TOK_VERSION 3 +#define TOK_MINPOLL 4 +#define TOK_MAXPOLL 5 +#define TOK_FLAGS 6 +#define TOK_TTL 7 +#define TOK_KEYID 8 +#define TOK_KEYSTR 9 +#define NUMTOK 10 #define MAXLINESIZE 512 @@ -140,7 +142,7 @@ char *req_file; /* name of the file with configuration info */ static void checkparent P((void)); static void removeentry P((struct conf_entry *)); static void addentry P((char *, int, int, int, int, u_int, - int, keyid_t, char *)); + int, keyid_t, char *, u_char)); static int findhostaddr P((struct conf_entry *)); static void openntp P((void)); static int request P((struct conf_peer *)); @@ -397,7 +399,8 @@ addentry( u_int flags, int ttl, keyid_t keyid, - char *keystr + char *keystr, + u_char peeraf ) { register char *cp; @@ -407,7 +410,7 @@ addentry( #ifdef DEBUG if (debug > 1) msyslog(LOG_INFO, - "intres: <%s> %d %d %d %d %x %d %x %s\n", name, + "intres: <%s> %u %d %d %d %d %x %d %x %s\n", name, peeraf, mode, version, minpoll, maxpoll, flags, ttl, keyid, keystr); #endif @@ -422,6 +425,7 @@ addentry( ce->ce_peeraddr6 = in6addr_any; #endif ANYSOCK(&ce->peer_store); + ce->peer_store.ss_family = peeraf; /* Save AF for getaddrinfo hints. */ ce->ce_hmode = (u_char)mode; ce->ce_version = (u_char)version; ce->ce_minpoll = (u_char)minpoll; @@ -482,7 +486,8 @@ findhostaddr( entry->ce_name)); memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; + hints.ai_family = entry->peer_store.ss_family; + hints.ai_socktype = SOCK_DGRAM; /* * If the IPv6 stack is not available look only for IPv4 addresses */ @@ -1051,6 +1056,13 @@ readconf( } } + if (intval[TOK_PEERAF] != AF_UNSPEC && intval[TOK_PEERAF] != + AF_INET && intval[TOK_PEERAF] != AF_INET6) { + msyslog(LOG_ERR, "invalid peer address family (%u) in " + "file %s", intval[TOK_PEERAF], name); + exit(1); + } + if (intval[TOK_HMODE] != MODE_ACTIVE && intval[TOK_HMODE] != MODE_CLIENT && intval[TOK_HMODE] != MODE_BROADCAST) { @@ -1107,7 +1119,7 @@ readconf( addentry(token[TOK_HOSTNAME], (int)intval[TOK_HMODE], (int)intval[TOK_VERSION], (int)intval[TOK_MINPOLL], (int)intval[TOK_MAXPOLL], flags, (int)intval[TOK_TTL], - intval[TOK_KEYID], token[TOK_KEYSTR]); + intval[TOK_KEYID], token[TOK_KEYSTR], (u_char)intval[TOK_PEERAF]); } } @@ -1129,6 +1141,9 @@ doconfigure( dores ? "with" : "without" ); #endif + if (dores) /* Reload /etc/resolv.conf - bug 1226 */ + res_init(); + ce = confentries; while (ce != NULL) { #ifdef DEBUG diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c index b4aa679d2299..ec8b5ab4729a 100644 --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -2261,6 +2261,7 @@ suj_build(void) blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: + case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index 4a223203f7d2..6fc12c82c132 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 14, 2011 +.Dd June 29, 2011 .Dt IPFW 8 .Os .Sh NAME @@ -822,6 +822,78 @@ skipto, but care should be used, as no destination caching is possible in this case so the rules are always walked to find it, starting from the .Cm skipto . +.It Cm call Ar number | tablearg +The current rule number is saved in the internal stack and +ruleset processing continues with the first rule numbered +.Ar number +or higher. +If later a rule with the +.Cm return +action is encountered, the processing returns to the first rule +with number of this +.Cm call +rule plus one or higher +(the same behaviour as with packets returning from +.Xr divert 4 +socket after a +.Cm divert +action). +This could be used to make somewhat like an assembly language +.Dq subroutine +calls to rules with common checks for different interfaces, etc. +.Pp +Rule with any number could be called, not just forward jumps as with +.Cm skipto . +So, to prevent endless loops in case of mistakes, both +.Cm call +and +.Cm return +actions don't do any jumps and simply go to the next rule if memory +can't be allocated or stack overflowed/undeflowed. +.Pp +Internally stack for rule numbers is implemented using +.Xr mbuf_tags 9 +facility and currently has size of 16 entries. +As mbuf tags are lost when packet leaves the kernel, +.Cm divert +should not be used in subroutines to avoid endless loops +and other undesired effects. +.It Cm return +Takes rule number saved to internal stack by the last +.Cm call +action and returns ruleset processing to the first rule +with number greater than number of corresponding +.Cm call +rule. See description of the +.Cm call +action for more details. +.Pp +Note that +.Cm return +rules usually end a +.Dq subroutine +and thus are unconditional, but +.Nm +command-line utility currently requires every action except +.Cm check-state +to have body. +While it is sometimes useful to return only on some packets, +usually you want to print just +.Dq return +for readability. +A workaround for this is to use new syntax and +.Fl c +switch: +.Pp +.Bd -literal -offset indent +# Add a rule without actual body +ipfw add 2999 return via any + +# List rules without "from any to any" part +ipfw -c list +.Ed +.Pp +This cosmetic annoyance may be fixed in future releases. .It Cm tee Ar port Send a copy of packets matching this rule to the .Xr divert 4 @@ -3253,3 +3325,18 @@ for the respective conversations. To avoid failures of network error detection and path MTU discovery, ICMP error messages may need to be allowed explicitly through static rules. +.Pp +Rules using +.Cm call +and +.Cm return +actions may lead to confusing behaviour if ruleset has mistakes, +and/or interaction with other subsystems (netgraph, dummynet, etc.) is used. +One possible case for this is packet leaving +.Nm +in subroutine on the input pass, while later on output encountering unpaired +.Cm return +first. +As the call stack is kept intact after input pass, packet will suddenly +return to the rule number used on input pass, not on output one. +Order of processing should be checked carefully to avoid such mistakes. diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 64e5eb1e9f20..e259f5ec8091 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -214,6 +214,8 @@ static struct _s_x rule_actions[] = { { "nat", TOK_NAT }, { "reass", TOK_REASS }, { "setfib", TOK_SETFIB }, + { "call", TOK_CALL }, + { "return", TOK_RETURN }, { NULL, 0 } /* terminator */ }; @@ -1136,6 +1138,13 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) printf("reass"); break; + case O_CALLRETURN: + if (cmd->len & F_NOT) + printf("return"); + else + PRINT_UINT_ARG("call ", cmd->arg1); + break; + default: printf("** unrecognized action %d len %d ", cmd->opcode, cmd->len); @@ -2771,6 +2780,9 @@ ipfw_add(char *av[]) goto chkarg; case TOK_TEE: action->opcode = O_TEE; + goto chkarg; + case TOK_CALL: + action->opcode = O_CALLRETURN; chkarg: if (!av[0]) errx(EX_USAGE, "missing argument for %s", *(av - 1)); @@ -2863,6 +2875,10 @@ chkarg: action->opcode = O_REASS; break; + case TOK_RETURN: + fill_cmd(action, O_CALLRETURN, F_NOT, 0); + break; + default: errx(EX_DATAERR, "invalid action %s\n", av[-1]); } diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 9562f324432e..bade0dda6529 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -99,6 +99,8 @@ enum tokens { TOK_CHECKSTATE, TOK_NAT, TOK_REASS, + TOK_CALL, + TOK_RETURN, TOK_ALTQ, TOK_LOG, diff --git a/share/man/man4/jme.4 b/share/man/man4/jme.4 index 4220ab51766c..0bcf652acb60 100644 --- a/share/man/man4/jme.4 +++ b/share/man/man4/jme.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 20, 2010 +.Dd June 29, 2011 .Dt JME 4 .Os .Sh NAME @@ -178,3 +178,17 @@ address once station address is reprogrammed via eFuse. It is highly recommended not to reprogram station address and it is responsibility of administrator to store original station address into a safe place when station address should be changed. +.Pp +There are two known 1000baseT link establishment issues with JMC25x. +If the full mask revision number of JMC25x controller is less than +or equal to 4 and link partner enabled IEEE 802.3az Energy Efficient +Ethernet feature, the controller would not be able to establish a +1000baseT link. +Also if the length of cable is longer than 120 meters, controller +can not establish a 1000baseT link. +The known workaround for the issue is to force manual link +configuration with 100baseTX instead of relying on auto-negotiation. +The full mask revision number of controller could be checked with +verbose kernel boot option. +Use lower nibble of chip revision number to get full mask revision of +the controller. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index ff4236272a2b..52a099159665 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2847,8 +2847,8 @@ pmap_remove_all(vm_page_t m) vm_offset_t va; vm_page_t free; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); free = NULL; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index fa895f38d4de..beb248c3b00a 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -33,7 +33,7 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_capabilities.h" +#include "opt_capsicum.h" #include #include @@ -180,7 +180,7 @@ sysarch(td, uap) uint64_t a64base; struct i386_ioperm_args iargs; -#ifdef CAPABILITIES +#ifdef CAPABILITY_MODE /* * Whitelist of operations which are safe enough for capability mode. */ diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index cecf3638d95f..f68440200c52 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -3120,8 +3120,8 @@ pmap_remove_all(vm_page_t m) pmap_t curpm; int flags = 0; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); if (TAILQ_EMPTY(&m->md.pv_list)) return; vm_page_lock_queues(); diff --git a/sys/arm/arm/sys_machdep.c b/sys/arm/arm/sys_machdep.c index 89e522712968..52545d8cc999 100644 --- a/sys/arm/arm/sys_machdep.c +++ b/sys/arm/arm/sys_machdep.c @@ -36,7 +36,7 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_capabilities.h" +#include "opt_capsicum.h" #include #include @@ -107,7 +107,7 @@ sysarch(td, uap) { int error; -#ifdef CAPABILITIES +#ifdef CAPABILITY_MODE /* * Whitelist of operations which are safe enough for capability mode. */ diff --git a/sys/arm/mv/common.c b/sys/arm/mv/common.c index 16bd325e8b89..09da857ecfed 100644 --- a/sys/arm/mv/common.c +++ b/sys/arm/mv/common.c @@ -122,7 +122,7 @@ static struct soc_node_spec soc_nodes[] = { { "mrvl,sata", &decode_win_sata_setup, NULL }, { "mrvl,xor", &decode_win_xor_setup, &decode_win_xor_dump }, { "mrvl,idma", &decode_win_idma_setup, &decode_win_idma_dump }, - { "mvrl,pcie", &decode_win_pcie_setup, NULL }, + { "mrvl,pcie", &decode_win_pcie_setup, NULL }, { NULL, NULL, NULL }, }; diff --git a/sys/conf/NOTES b/sys/conf/NOTES index b84d0c535b14..835c30b1a543 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1162,7 +1162,7 @@ options MAC_STUB options MAC_TEST # Support for Capsicum -options CAPABILITIES +options CAPABILITY_MODE ##################################################################### diff --git a/sys/conf/options b/sys/conf/options index 1ce30b172600..35c0dfc9b2a2 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -63,7 +63,7 @@ SYSCTL_DEBUG opt_sysctl.h ADAPTIVE_LOCKMGRS ALQ AUDIT opt_global.h -CAPABILITIES opt_capabilities.h +CAPABILITY_MODE opt_capsicum.h CODA_COMPAT_5 opt_coda.h COMPAT_43 opt_compat.h COMPAT_43TTY opt_compat.h diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index 9c534c8e2d46..35592d9b47ad 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -4693,6 +4693,7 @@ ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) struct ieee80211_node *ni = NULL; int i, error, stamode; u_int32_t rfilt; + int csa_run_transition = 0; static const HAL_LED_STATE leds[] = { HAL_LED_INIT, /* IEEE80211_S_INIT */ HAL_LED_SCAN, /* IEEE80211_S_SCAN */ @@ -4708,6 +4709,9 @@ ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate]); + if (vap->iv_state == IEEE80211_S_CSA && nstate == IEEE80211_S_RUN) + csa_run_transition = 1; + callout_drain(&sc->sc_cal_ch); ath_hal_setledstate(ah, leds[nstate]); /* set LED */ @@ -4814,8 +4818,14 @@ ath_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) * Defer beacon timer configuration to the next * beacon frame so we have a current TSF to use * (any TSF collected when scanning is likely old). + * However if it's due to a CSA -> RUN transition, + * force a beacon update so we pick up a lack of + * beacons from an AP in CAC and thus force a + * scan. */ sc->sc_syncbeacon = 1; + if (csa_run_transition) + ath_beacon_config(sc, vap); break; case IEEE80211_M_MONITOR: /* diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 156f8b23003d..4d8ba652d2e5 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -330,42 +330,71 @@ MODULE_DEPEND(em, ether, 1, 1, 1); #define CSUM_TSO 0 #endif +SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); + static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt); +SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, + 0, "Default transmit interrupt delay in usecs"); +SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, + 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt); +SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, + &em_tx_abs_int_delay_dflt, 0, + "Default transmit interrupt delay limit in usecs"); +SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, + &em_rx_abs_int_delay_dflt, 0, + "Default receive interrupt delay limit in usecs"); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; TUNABLE_INT("hw.em.rxd", &em_rxd); TUNABLE_INT("hw.em.txd", &em_txd); +SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, + "Number of receive descriptors per queue"); +SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, + "Number of transmit descriptors per queue"); static int em_smart_pwr_down = FALSE; TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); +SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, + 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; TUNABLE_INT("hw.em.sbp", &em_debug_sbp); +SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, + "Show bad packets in promiscuous mode"); static int em_enable_msix = TRUE; TUNABLE_INT("hw.em.enable_msix", &em_enable_msix); +SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, + "Enable MSI-X interrupts"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); +SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &em_rx_process_limit, 0, + "Maximum number of received packets to process at a time, -1 means unlimited"); /* Flow control setting - default to FULL */ static int em_fc_setting = e1000_fc_full; TUNABLE_INT("hw.em.fc_setting", &em_fc_setting); +SYSCTL_INT(_hw_em, OID_AUTO, fc_setting, CTLFLAG_RDTUN, &em_fc_setting, 0, + "Flow control"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 0; TUNABLE_INT("hw.em.eee_setting", &eee_setting); +SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, + "Enable Energy Efficient Ethernet"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index a1a14b6bc2e1..972b74583650 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -300,11 +300,17 @@ MODULE_DEPEND(igb, ether, 1, 1, 1); * Tunable default values. *********************************************************************/ +SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); + /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; TUNABLE_INT("hw.igb.rxd", &igb_rxd); TUNABLE_INT("hw.igb.txd", &igb_txd); +SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, + "Number of receive descriptors per queue"); +SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, + "Number of transmit descriptors per queue"); /* ** AIM: Adaptive Interrupt Moderation @@ -314,6 +320,8 @@ TUNABLE_INT("hw.igb.txd", &igb_txd); */ static int igb_enable_aim = TRUE; TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim); +SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0, + "Enable adaptive interrupt moderation"); /* * MSIX should be the default for best performance, @@ -321,12 +329,16 @@ TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim); */ static int igb_enable_msix = 1; TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix); +SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, + "Enable MSI-X interrupts"); /* ** Tuneable Interrupt rate */ static int igb_max_interrupt_rate = 8000; TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate); +SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); /* ** Header split causes the packet header to @@ -338,6 +350,8 @@ TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate); */ static bool igb_header_split = FALSE; TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); +SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, + "Enable receive mbuf header split"); /* ** This will autoconfigure based on @@ -345,10 +359,15 @@ TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); */ static int igb_num_queues = 0; TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); +SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, + "Number of queues to configure, 0 indicates autoconfigure"); /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit); +SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &igb_rx_process_limit, 0, + "Maximum number of received packets to process at a time, -1 means unlimited"); /********************************************************************* * Device identification routine @@ -435,10 +454,9 @@ igb_attach(device_t dev) OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, - &igb_enable_aim, 1, "Interrupt Moderation"); + igb_set_sysctl_value(adapter, "enable_aim", + "Interrupt Moderation", &adapter->enable_aim, + igb_enable_aim); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), @@ -1498,7 +1516,7 @@ igb_msix_que(void *arg) more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); - if (igb_enable_aim == FALSE) + if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h index f0ab6856250f..80abf6e74344 100644 --- a/sys/dev/e1000/if_igb.h +++ b/sys/dev/e1000/if_igb.h @@ -402,6 +402,7 @@ struct adapter { u16 link_duplex; u32 smartspeed; u32 dmac; + int enable_aim; /* Interface queues */ struct igb_queue *queues; diff --git a/sys/dev/mii/e1000phy.c b/sys/dev/mii/e1000phy.c index b0d7069a996f..e657592b6d15 100644 --- a/sys/dev/mii/e1000phy.c +++ b/sys/dev/mii/e1000phy.c @@ -107,6 +107,7 @@ static const struct mii_phydesc e1000phys[] = { MII_PHY_DESC(xxMARVELL, E1116), MII_PHY_DESC(xxMARVELL, E1116R), MII_PHY_DESC(xxMARVELL, E1118), + MII_PHY_DESC(xxMARVELL, E1149R), MII_PHY_DESC(xxMARVELL, E3016), MII_PHY_DESC(xxMARVELL, PHYG65G), MII_PHY_END @@ -147,6 +148,7 @@ e1000phy_attach(device_t dev) sc->mii_flags |= MIIF_HAVEFIBER; break; case MII_MODEL_xxMARVELL_E1149: + case MII_MODEL_xxMARVELL_E1149R: /* * Some 88E1149 PHY's page select is initialized to * point to other bank instead of copper/fiber bank @@ -208,6 +210,7 @@ e1000phy_reset(struct mii_softc *sc) case MII_MODEL_xxMARVELL_E1116: case MII_MODEL_xxMARVELL_E1118: case MII_MODEL_xxMARVELL_E1149: + case MII_MODEL_xxMARVELL_E1149R: case MII_MODEL_xxMARVELL_PHYG65G: /* Disable energy detect mode. */ reg &= ~E1000_SCR_EN_DETECT_MASK; @@ -240,7 +243,8 @@ e1000phy_reset(struct mii_softc *sc) PHY_WRITE(sc, E1000_SCR, reg); if (sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1116 || - sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1149) { + sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1149 || + sc->mii_mpd_model == MII_MODEL_xxMARVELL_E1149R) { PHY_WRITE(sc, E1000_EADR, 2); reg = PHY_READ(sc, E1000_SCR); reg |= E1000_SCR_RGMII_POWER_UP; diff --git a/sys/dev/mii/miidevs b/sys/dev/mii/miidevs index 527494009401..1b98542fdcfe 100644 --- a/sys/dev/mii/miidevs +++ b/sys/dev/mii/miidevs @@ -248,6 +248,7 @@ model xxMARVELL E1111 0x000c Marvell 88E1111 Gigabit PHY model xxMARVELL E1116 0x0021 Marvell 88E1116 Gigabit PHY model xxMARVELL E1116R 0x0024 Marvell 88E1116R Gigabit PHY model xxMARVELL E1118 0x0022 Marvell 88E1118 Gigabit PHY +model xxMARVELL E1149R 0x0025 Marvell 88E1149R Quad Gigabit PHY model xxMARVELL E3016 0x0026 Marvell 88E3016 10/100 Fast Ethernet PHY model xxMARVELL PHYG65G 0x0027 Marvell PHYG65G Gigabit PHY model MARVELL E1000 0x0005 Marvell 88E1000 Gigabit PHY diff --git a/sys/dev/uart/uart_bus_pci.c b/sys/dev/uart/uart_bus_pci.c index 48a134bdbd17..c47902263710 100644 --- a/sys/dev/uart/uart_bus_pci.c +++ b/sys/dev/uart/uart_bus_pci.c @@ -113,6 +113,8 @@ static struct pci_id pci_ns8250_ids[] = { { 0x9710, 0x9820, 0x1000, 1, "NetMos NM9820 Serial Port", 0x10 }, { 0x9710, 0x9835, 0x1000, 1, "NetMos NM9835 Serial Port", 0x10 }, { 0x9710, 0x9865, 0xa000, 0x1000, "NetMos NM9865 Serial Port", 0x10 }, +{ 0x9710, 0x9900, 0xa000, 0x1000, + "MosChip MCS9900 PCIe to Peripheral Controller", 0x10 }, { 0x9710, 0x9901, 0xa000, 0x1000, "MosChip MCS9901 PCIe to Peripheral Controller", 0x10 }, { 0xdeaf, 0x9051, 0xffff, 0, "Middle Digital PC Weasel Serial Port", 0x10 }, diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 5a07a4534f67..e9324cfd87c3 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -926,7 +926,7 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize) if (newpages < oldpages) { swap_pager_freespace(uobj, newpages, oldpages - newpages); - vm_object_page_remove(uobj, newpages, 0, FALSE); + vm_object_page_remove(uobj, newpages, 0, 0); } /* diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 8f11b477bbf7..fe220d7c939d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2927,8 +2927,8 @@ pmap_remove_all(vm_page_t m) vm_offset_t va; vm_page_t free; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); free = NULL; vm_page_lock_queues(); sched_pin(); diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c index 6a761bf31ec6..e7b08282af4e 100644 --- a/sys/i386/i386/sys_machdep.c +++ b/sys/i386/i386/sys_machdep.c @@ -32,7 +32,7 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_capabilities.h" +#include "opt_capsicum.h" #include "opt_kstack_pages.h" #include @@ -111,7 +111,7 @@ sysarch(td, uap) AUDIT_ARG_CMD(uap->op); -#ifdef CAPABILITIES +#ifdef CAPABILITY_MODE /* * Whitelist of operations which are safe enough for capability mode. */ diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index 1018ec9f676b..5b94bee67812 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -2430,8 +2430,8 @@ pmap_remove_all(vm_page_t m) pt_entry_t *pte, tpte; vm_page_t free; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); free = NULL; vm_page_lock_queues(); sched_pin(); diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 625d0af700c2..b36f8139e037 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -1424,8 +1424,8 @@ pmap_remove_all(vm_page_t m) pmap_t oldpmap; pv_entry_t pv; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); vm_page_lock_queues(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { struct ia64_lpte *pte; diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 0113d7b121c0..3527ed1fbedf 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -44,7 +44,7 @@ #include __FBSDID("$FreeBSD$"); -#include "opt_capabilities.h" +#include "opt_capsicum.h" #include "opt_ktrace.h" #include "opt_kdtrace.h" #include "opt_sched.h" @@ -313,7 +313,7 @@ syscallenter(struct thread *td, struct syscall_args *sa) goto retval; } -#ifdef CAPABILITIES +#ifdef CAPABILITY_MODE /* * In capability mode, we only allow access to system calls * flagged with SYF_CAPENABLED. diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index c6d9826c804f..a19e8817a6f3 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -36,7 +36,7 @@ * */ -#include "opt_capabilities.h" +#include "opt_capsicum.h" #include __FBSDID("$FreeBSD$"); @@ -59,7 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef CAPABILITIES +#ifdef CAPABILITY_MODE FEATURE(security_capabilities, "Capsicum Capability Mode"); @@ -99,7 +99,7 @@ cap_getmode(struct thread *td, struct cap_getmode_args *uap) return (copyout(&i, uap->modep, sizeof(i))); } -#else /* !CAPABILITIES */ +#else /* !CAPABILITY_MODE */ int cap_enter(struct thread *td, struct cap_enter_args *uap) @@ -115,4 +115,4 @@ cap_getmode(struct thread *td, struct cap_getmode_args *uap) return (ENOSYS); } -#endif /* CAPABILITIES */ +#endif /* CAPABILITY_MODE */ diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 00496af21ca8..cea70c772c4e 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -264,7 +264,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, - FALSE); + 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index c5ae83cbf47b..934745b433b6 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1190,8 +1190,8 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL)) == 0) { VM_OBJECT_LOCK(bo->bo_object); - vm_object_page_remove(bo->bo_object, 0, 0, - (flags & V_SAVE) ? TRUE : FALSE); + vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? + OBJPR_CLEANONLY : 0); VM_OBJECT_UNLOCK(bo->bo_object); } diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 32124fed2024..8b35e0dc5512 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -1708,8 +1708,8 @@ pmap_remove_all(vm_page_t m) pv_entry_t pv; pt_entry_t *pte, tpte; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); + KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, + ("pmap_remove_all: page %p is not managed", m)); vm_page_lock_queues(); if (m->md.pv_flags & PV_TABLE_REF) diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 06e107c2de61..ff3a67fe61ce 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -56,6 +56,12 @@ #define IPFW_ARG_MAX 65534 #define IP_FW_TABLEARG 65535 /* XXX should use 0 */ +/* + * Number of entries in the call stack of the call/return commands. + * Call stack currently is an uint16_t array with rule numbers. + */ +#define IPFW_CALLSTACK_SIZE 16 + /* * The kernel representation of ipfw rules is made of a list of * 'instructions' (for all practical purposes equivalent to BPF @@ -195,6 +201,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_SOCKARG, /* socket argument */ + O_CALLRETURN, /* arg1=called rule number */ + O_LAST_OPCODE /* not an opcode! */ }; diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index e993279199da..cd30093e1700 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -286,6 +286,7 @@ enum { }; #define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ #define MTAG_IPFW_RULE 1262273568 /* rule reference */ +#define MTAG_IPFW_CALL 1308397630 /* call stack */ struct ip_fw_args; typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args); diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 48837a7f8f17..4e25f9a086d8 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -2095,6 +2095,123 @@ do { \ continue; break; /* not reached */ + case O_CALLRETURN: { + /* + * Implementation of `subroutine' call/return, + * in the stack carried in an mbuf tag. This + * is different from `skipto' in that any call + * address is possible (`skipto' must prevent + * backward jumps to avoid endless loops). + * We have `return' action when F_NOT flag is + * present. The `m_tag_id' field is used as + * stack pointer. + */ + struct m_tag *mtag; + uint16_t jmpto, *stack; + +#define IS_CALL ((cmd->len & F_NOT) == 0) +#define IS_RETURN ((cmd->len & F_NOT) != 0) + /* + * Hand-rolled version of m_tag_locate() with + * wildcard `type'. + * If not already tagged, allocate new tag. + */ + mtag = m_tag_first(m); + while (mtag != NULL) { + if (mtag->m_tag_cookie == + MTAG_IPFW_CALL) + break; + mtag = m_tag_next(m, mtag); + } + if (mtag == NULL && IS_CALL) { + mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, + IPFW_CALLSTACK_SIZE * + sizeof(uint16_t), M_NOWAIT); + if (mtag != NULL) + m_tag_prepend(m, mtag); + } + + /* + * On error both `call' and `return' just + * continue with next rule. + */ + if (IS_RETURN && (mtag == NULL || + mtag->m_tag_id == 0)) { + l = 0; /* exit inner loop */ + break; + } + if (IS_CALL && (mtag == NULL || + mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { + printf("ipfw: call stack error, " + "go to next rule\n"); + l = 0; /* exit inner loop */ + break; + } + + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + stack = (uint16_t *)(mtag + 1); + + /* + * The `call' action may use cached f_pos + * (in f->next_rule), whose version is written + * in f->next_rule. + * The `return' action, however, doesn't have + * fixed jump address in cmd->arg1 and can't use + * cache. + */ + if (IS_CALL) { + stack[mtag->m_tag_id] = f->rulenum; + mtag->m_tag_id++; + if (cmd->arg1 != IP_FW_TABLEARG && + (uintptr_t)f->x_next == chain->id) { + f_pos = (uintptr_t)f->next_rule; + } else { + jmpto = (cmd->arg1 == + IP_FW_TABLEARG) ? tablearg: + cmd->arg1; + f_pos = ipfw_find_rule(chain, + jmpto, 0); + /* update the cache */ + if (cmd->arg1 != + IP_FW_TABLEARG) { + f->next_rule = + (void *)(uintptr_t) + f_pos; + f->x_next = + (void *)(uintptr_t) + chain->id; + } + } + } else { /* `return' action */ + mtag->m_tag_id--; + jmpto = stack[mtag->m_tag_id] + 1; + f_pos = ipfw_find_rule(chain, jmpto, 0); + } + + /* + * Skip disabled rules, and re-enter + * the inner loop with the correct + * f_pos, f, l and cmd. + * Also clear cmdlen and skip_or + */ + for (; f_pos < chain->n_rules - 1 && + (V_set_disable & + (1 << chain->map[f_pos]->set)); f_pos++) + ; + /* Re-enter the inner loop at the dest rule. */ + f = chain->map[f_pos]; + l = f->cmd_len; + cmd = f->cmd; + cmdlen = 0; + skip_or = 0; + continue; + break; /* NOTREACHED */ + } +#undef IS_CALL +#undef IS_RETURN + case O_REJECT: /* * Drop the packet and send a reject notice diff --git a/sys/netinet/ipfw/ip_fw_log.c b/sys/netinet/ipfw/ip_fw_log.c index 3560e137f1e5..2b55a382d70d 100644 --- a/sys/netinet/ipfw/ip_fw_log.c +++ b/sys/netinet/ipfw/ip_fw_log.c @@ -304,6 +304,13 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, case O_REASS: action = "Reass"; break; + case O_CALLRETURN: + if (cmd->len & F_NOT) + action = "Return"; + else + snprintf(SNPARGS(action2, 0), "Call %d", + cmd->arg1); + break; default: action = "UNKNOWN"; break; diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c index 2347456a8c22..143285850113 100644 --- a/sys/netinet/ipfw/ip_fw_sockopt.c +++ b/sys/netinet/ipfw/ip_fw_sockopt.c @@ -752,6 +752,7 @@ check_ipfw_struct(struct ip_fw *rule, int size) #endif case O_SKIPTO: case O_REASS: + case O_CALLRETURN: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index ca90089d02f5..bd4a826f8317 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include "opt_bootp.h" +#include "opt_nfs.h" #include #include @@ -1699,6 +1700,9 @@ bootpc_init(void) } rootdevnames[0] = "nfs:"; +#ifdef NFSCLIENT + rootdevnames[1] = "oldnfs:"; +#endif mountopts(&nd->root_args, NULL); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index a7b43e2f67a7..62fa8e0f3a7c 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -128,7 +128,7 @@ ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) if ((object = vp->v_object) == NULL) return; VM_OBJECT_LOCK(object); - vm_object_page_remove(object, start, end, FALSE); + vm_object_page_remove(object, start, end, 0); VM_OBJECT_UNLOCK(object); } diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index a10104d9b837..9af5f6759770 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -12081,6 +12081,7 @@ top: continue; case D_FREEWORK: + case D_FREEDEP: continue; default: diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 752354a4bc3d..31886afa69c5 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2708,7 +2708,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) ((object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || object == kernel_object || object == kmem_object)) { vm_object_collapse(object); - vm_object_page_remove(object, offidxstart, offidxend, FALSE); + + /* + * The option OBJPR_NOTMAPPED can be passed here + * because vm_map_delete() already performed + * pmap_remove() on the only mapping to this range + * of pages. + */ + vm_object_page_remove(object, offidxstart, offidxend, + OBJPR_NOTMAPPED); if (object->type == OBJT_SWAP) swap_pager_freespace(object, offidxstart, count); if (offidxend >= object->size && diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 1a3d3988efb2..b5788f587579 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -923,6 +923,10 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, * We invalidate (remove) all pages from the address space * for semantic correctness. * + * If the backing object is a device object with unmanaged pages, then any + * mappings to the specified range of pages must be removed before this + * function is called. + * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ @@ -978,12 +982,19 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { - boolean_t purge; - purge = old_msync || (object->type == OBJT_DEVICE); - vm_object_page_remove(object, - OFF_TO_IDX(offset), - OFF_TO_IDX(offset + size + PAGE_MASK), - purge ? FALSE : TRUE); + if (object->type == OBJT_DEVICE) + /* + * The option OBJPR_NOTMAPPED must be passed here + * because vm_object_page_remove() cannot remove + * unmanaged mappings. + */ + flags = OBJPR_NOTMAPPED; + else if (old_msync) + flags = 0; + else + flags = OBJPR_CLEANONLY; + vm_object_page_remove(object, OFF_TO_IDX(offset), + OFF_TO_IDX(offset + size + PAGE_MASK), flags); } VM_OBJECT_UNLOCK(object); } @@ -1754,76 +1765,70 @@ vm_object_collapse(vm_object_t object) * vm_object_page_remove: * * For the given object, either frees or invalidates each of the - * specified pages. In general, a page is freed. However, if a - * page is wired for any reason other than the existence of a - * managed, wired mapping, then it may be invalidated but not - * removed from the object. Pages are specified by the given - * range ["start", "end") and Boolean "clean_only". As a - * special case, if "end" is zero, then the range extends from - * "start" to the end of the object. If "clean_only" is TRUE, - * then only the non-dirty pages within the specified range are - * affected. + * specified pages. In general, a page is freed. However, if a page is + * wired for any reason other than the existence of a managed, wired + * mapping, then it may be invalidated but not removed from the object. + * Pages are specified by the given range ["start", "end") and the option + * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range + * extends from "start" to the end of the object. If the option + * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the + * specified range are affected. If the option OBJPR_NOTMAPPED is + * specified, then the pages within the specified range must have no + * mappings. Otherwise, if this option is not specified, any mappings to + * the specified pages are removed before the pages are freed or + * invalidated. * - * In general, this operation should only be performed on objects - * that contain managed pages. There are two exceptions. First, - * it may be performed on the kernel and kmem objects. Second, - * it may be used by msync(..., MS_INVALIDATE) to invalidate - * device-backed pages. In both of these cases, "clean_only" - * must be FALSE. + * In general, this operation should only be performed on objects that + * contain managed pages. There are, however, two exceptions. First, it + * is performed on the kernel and kmem objects by vm_map_entry_delete(). + * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- + * backed pages. In both of these cases, the option OBJPR_CLEANONLY must + * not be specified and the option OBJPR_NOTMAPPED must be specified. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, - boolean_t clean_only) + int options) { vm_page_t p, next; int wirings; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT((object->type != OBJT_DEVICE && object->type != OBJT_PHYS) || + (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, + ("vm_object_page_remove: illegal options for object %p", object)); if (object->resident_page_count == 0) goto skipmemq; - - /* - * Since physically-backed objects do not use managed pages, we can't - * remove pages from the object (we must instead remove the page - * references, and then destroy the object). - */ - KASSERT(object->type != OBJT_PHYS || object == kernel_object || - object == kmem_object, - ("attempt to remove pages from a physical object")); - vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* - * Assert: the variable p is either (1) the page with the - * least pindex greater than or equal to the parameter pindex - * or (2) NULL. + * Here, the variable "p" is either (1) the page with the least pindex + * greater than or equal to the parameter "start" or (2) NULL. */ - for (; - p != NULL && (p->pindex < end || end == 0); - p = next) { + for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* - * If the page is wired for any reason besides the - * existence of managed, wired mappings, then it cannot - * be freed. For example, fictitious pages, which - * represent device memory, are inherently wired and - * cannot be freed. They can, however, be invalidated - * if "clean_only" is FALSE. + * If the page is wired for any reason besides the existence + * of managed, wired mappings, then it cannot be freed. For + * example, fictitious pages, which represent device memory, + * are inherently wired and cannot be freed. They can, + * however, be invalidated if the option OBJPR_CLEANONLY is + * not specified. */ vm_page_lock(p); if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { - /* Fictitious pages do not have managed mappings. */ - if ((p->flags & PG_FICTITIOUS) == 0) + if ((options & OBJPR_NOTMAPPED) == 0) { pmap_remove_all(p); - /* Account for removal of managed, wired mappings. */ - p->wire_count -= wirings; - if (!clean_only) { + /* Account for removal of wired mappings. */ + if (wirings != 0) + p->wire_count -= wirings; + } + if ((options & OBJPR_CLEANONLY) == 0) { p->valid = 0; vm_page_undirty(p); } @@ -1834,17 +1839,20 @@ again: goto again; KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); - if (clean_only && p->valid) { - pmap_remove_write(p); + if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { + if ((options & OBJPR_NOTMAPPED) == 0) + pmap_remove_write(p); if (p->dirty) { vm_page_unlock(p); continue; } } - pmap_remove_all(p); - /* Account for removal of managed, wired mappings. */ - if (wirings != 0) - p->wire_count -= wirings; + if ((options & OBJPR_NOTMAPPED) == 0) { + pmap_remove_all(p); + /* Account for removal of wired mappings. */ + if (wirings != 0) + p->wire_count -= wirings; + } vm_page_free(p); vm_page_unlock(p); } @@ -1991,9 +1999,8 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, * deallocation. */ if (next_pindex < prev_object->size) { - vm_object_page_remove(prev_object, - next_pindex, - next_pindex + next_size, FALSE); + vm_object_page_remove(prev_object, next_pindex, next_pindex + + next_size, 0); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 0f4f47d204c8..a11f1445ce5d 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -168,6 +168,12 @@ struct vm_object { #define OBJPC_INVAL 0x2 /* invalidate */ #define OBJPC_NOSYNC 0x4 /* skip if PG_NOSYNC */ +/* + * The following options are supported by vm_object_page_remove(). + */ +#define OBJPR_CLEANONLY 0x1 /* Don't remove dirty pages. */ +#define OBJPR_NOTMAPPED 0x2 /* Don't unmap pages. */ + TAILQ_HEAD(object_q, vm_object); extern struct object_q vm_object_list; /* list of allocated objects */ @@ -219,7 +225,8 @@ void vm_object_set_writeable_dirty (vm_object_t); void vm_object_init (void); void vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags); -void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t); +void vm_object_page_remove(vm_object_t object, vm_pindex_t start, + vm_pindex_t end, int options); boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_print(long addr, boolean_t have_addr, long count, char *modif); void vm_object_reference (vm_object_t); diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index a8eca2028366..23ade6352acf 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -387,7 +387,7 @@ vnode_pager_setsize(vp, nsize) */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, - FALSE); + 0); /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. diff --git a/usr.bin/quota/quota.c b/usr.bin/quota/quota.c index 6cc35173a082..67d9c5a6b032 100644 --- a/usr.bin/quota/quota.c +++ b/usr.bin/quota/quota.c @@ -264,8 +264,11 @@ prthumanval(int len, u_int64_t bytes) { char buf[len + 1]; - humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, - HN_B | HN_NOSPACE | HN_DECIMAL); + /* + * Limit the width to 5 bytes as that is what users expect. + */ + humanize_number(buf, sizeof(buf) < 5 ? sizeof(buf) : 5, bytes, "", + HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); (void)printf(" %*s", len, buf); } @@ -352,10 +355,13 @@ showquotas(int type, u_long id, const char *name) prthumanval(7, dbtob(qup->dqblk.dqb_bhardlimit)); } else { printf(" %7ju%c %7ju %7ju", - dbtob(1024) * (uintmax_t)qup->dqblk.dqb_curblocks, + (uintmax_t)dbtob(qup->dqblk.dqb_curblocks) + / 1024, (msgb == NULL) ? ' ' : '*', - dbtob(1024) * (uintmax_t)qup->dqblk.dqb_bsoftlimit, - dbtob(1024) * (uintmax_t)qup->dqblk.dqb_bhardlimit); + (uintmax_t)dbtob(qup->dqblk.dqb_bsoftlimit) + / 1024, + (uintmax_t)dbtob(qup->dqblk.dqb_bhardlimit) + / 1024); } if (msgb != NULL) bgrace = timeprt(qup->dqblk.dqb_btime);