From 306abf0f35a0253765d7cf41e43fdd3bf423fd0b Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 24 Aug 2018 18:47:50 +0000 Subject: [PATCH 01/51] Either "free" or "allocated" is misleading here, since an item in a bucket is free from perspective of UMA consumer, and it is allocated from perspective of keg. Discussed with: markj Approved by: re (kib) --- sys/vm/uma_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index 3daa50597182..286a1329b08e 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -188,7 +188,7 @@ struct uma_hash { struct uma_bucket { LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */ - int16_t ub_cnt; /* Count of allocated items. */ + int16_t ub_cnt; /* Count of items in bucket. */ int16_t ub_entries; /* Max items. */ void *ub_bucket[]; /* actual allocation storage */ }; From 8369ba427a9a4db4c4886fb2a6d7a7dae00de707 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 24 Aug 2018 20:44:58 +0000 Subject: [PATCH 02/51] libbe(3)/bectl(8): Make consistent with beadm vermaden (maintainer of beadm) points out the following inconsistencies: - "missing command" is not printed prior to usage if the error is simply a missing command; this should be obvious from the context - "bectl rename" isn't using the "don't unmount" flag (zfs rename -u), so the active BE can't be renamed. It doesn't make sense in our context to *not* use -u, so use it. Documentation updates reflect the above and note an inconsistency with the 'destroy' command that is consistent with other parts of the base system. A fix for libbe(3) not properly being installed to /lib is included. SHLIBDIR should have been added when it was moved in r337995. Approved by: re (kib) --- ObsoleteFiles.inc | 2 ++ lib/libbe/Makefile | 1 + lib/libbe/be.c | 14 +++----------- lib/libbe/libbe.3 | 9 +++++++-- sbin/bectl/bectl.8 | 8 +++++--- sbin/bectl/bectl.c | 4 +--- tools/build/mk/OptionalObsoleteFiles.inc | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index 414be14d1fe7..53e3c6104ce5 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -38,6 +38,8 @@ # xargs -n1 | sort | uniq -d; # done +# 20180824: libbe(3) SHLIBDIR fixed to reflect correct location +OLD_LIBS+=usr/lib/libbe.so.1 # 20180819: Remove deprecated arc4random(3) stir/addrandom interfaces OLD_FILES+=usr/share/man/man3/arc4random_addrandom.3.gz OLD_FILES+=usr/share/man/man3/arc4random_stir.3.gz diff --git a/lib/libbe/Makefile b/lib/libbe/Makefile index dad908d2945f..5fada3204fb9 100644 --- a/lib/libbe/Makefile +++ b/lib/libbe/Makefile @@ -2,6 +2,7 @@ PACKAGE= lib${LIB} LIB= be +SHLIBDIR?= /lib SHLIB_MAJOR= 1 SHLIB_MINOR= 0 diff --git a/lib/libbe/be.c b/lib/libbe/be.c index 5b5015612646..16a64f24ca3f 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -623,10 +623,6 @@ be_rename(libbe_handle_t *lbh, const char *old, const char *new) if ((err = be_root_concat(lbh, new, full_new)) != 0) return (set_error(lbh, err)); - /* Check if old is active BE */ - if (strcmp(full_old, be_active_path(lbh)) == 0) - return (set_error(lbh, BE_ERR_MOUNTED)); - if (!zfs_dataset_exists(lbh->lzh, full_old, ZFS_TYPE_DATASET)) return (set_error(lbh, BE_ERR_NOENT)); @@ -637,14 +633,10 @@ be_rename(libbe_handle_t *lbh, const char *old, const char *new) ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); - /* XXX TODO: Allow a force flag */ - if (zfs_is_mounted(zfs_hdl, NULL)) { - zfs_close(zfs_hdl); - return (set_error(lbh, BE_ERR_MOUNTED)); - } - /* recurse, nounmount, forceunmount */ - struct renameflags flags = { 0, 0, 0 }; + struct renameflags flags = { + .nounmount = 1, + }; err = zfs_rename(zfs_hdl, NULL, full_new, flags); diff --git a/lib/libbe/libbe.3 b/lib/libbe/libbe.3 index 2e1b8cd699ef..02c14d809946 100644 --- a/lib/libbe/libbe.3 +++ b/lib/libbe/libbe.3 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 16, 2018 +.Dd August 24, 2018 .Dt LIBBE 3 .Os .Sh NAME @@ -222,7 +222,12 @@ snapshot. .Pp The .Fn be_rename -function renames a boot environment. +function renames a boot environment without unmounting it, as if renamed with +the +.Fl u +argument were passed to +.Nm zfs +.Cm rename .Pp The .Fn be_activate diff --git a/sbin/bectl/bectl.8 b/sbin/bectl/bectl.8 index e853126b6614..56ff28c71e13 100644 --- a/sbin/bectl/bectl.8 +++ b/sbin/bectl/bectl.8 @@ -18,7 +18,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 22, 2018 +.Dd August 24, 2018 .Dt BECTL 8 .Os .Sh NAME @@ -132,7 +132,8 @@ Destroys the given .Ar beName boot environment or .Ar beName@snapshot -snapshot. +snapshot without confirmation, unlike in +.Nm beadm . Specifying .Fl F will automatically unmount without confirmation. @@ -239,10 +240,11 @@ Mount at the specified .Ar mountpoint if provided. .It Cm rename Ar origBeName newBeName -Renames the given nonactive +Renames the given .Ar origBeName to the given .Ar newBeName . +The boot environment will not be unmounted in order for this rename to occur. .It Cm unjail Brq Ar jailID | jailName | beName Destroys the jail created from the given boot environment. .It Xo diff --git a/sbin/bectl/bectl.c b/sbin/bectl/bectl.c index 5a121fda8089..89a90e4af02f 100644 --- a/sbin/bectl/bectl.c +++ b/sbin/bectl/bectl.c @@ -491,10 +491,8 @@ main(int argc, char *argv[]) const char *command; int command_index, rc; - if (argc < 2) { - fprintf(stderr, "missing command\n"); + if (argc < 2) return (usage(false)); - } command = argv[1]; diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index ac0f86f3d581..5d05ddfab6d1 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -1288,7 +1288,7 @@ OLD_FILES+=usr/bin/ztest OLD_FILES+=usr/lib/libbe.a OLD_FILES+=usr/lib/libbe_p.a OLD_FILES+=usr/lib/libbe.so -OLD_LIBS+=usr/lib/libbe.so.1 +OLD_LIBS+=lib/libbe.so.1 OLD_FILES+=usr/lib/libzfs.a OLD_LIBS+=usr/lib/libzfs.so OLD_FILES+=usr/lib/libzfs_core.a From 7f6921ae183890b6ba0e63d34c749baacc97d9db Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Fri, 24 Aug 2018 21:08:05 +0000 Subject: [PATCH 03/51] The read accessors generated by __BUS_ACCESSOR() have the problem that they don't check the result of BUS_READ_IVAR(9) and silently return stack garbage on failure in case a bus doesn't implement a particular instance variable for example. With MMC bridges not providing MMCBR_IVAR_RETUNE_REQ, yet, this in turn can cause mmc(4) to get into a state in which re-tuning seems to be necessary but is inappropriate, causing mmc_wait_for_request() to fail. Thus, don't use __BUS_ACCESSOR() for mmcbr_get_retune_req() and instead provide a version of the latter which returns retune_req_none if reading MMCBR_IVAR_RETUNE_REQ fails. One more straight-forward solution would have been to change mmc(4) to not call mmcbr_get_retune_req() if the current transfer mode doesn't require re-tuning to begin with. However, for modes such as SDR50, it depends on the controller whether periodic re-tuning is need. Therefore, knowledge of whether a particular transfer mode does require re-tuning should be kept to the bridge drivers. This change is the generic version of r338271, as intended not requiring bridge drivers to be touched (unless transfer modes beyond high speed are to be supported that is). Approved by: re (gjb) --- sys/dev/mmc/mmcbrvar.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sys/dev/mmc/mmcbrvar.h b/sys/dev/mmc/mmcbrvar.h index a6285eb7e9d3..7b0bc4739863 100644 --- a/sys/dev/mmc/mmcbrvar.h +++ b/sys/dev/mmc/mmcbrvar.h @@ -97,7 +97,6 @@ MMCBR_ACCESSOR(host_ocr, HOST_OCR, int) MMCBR_ACCESSOR(mode, MODE, int) MMCBR_ACCESSOR(ocr, OCR, int) MMCBR_ACCESSOR(power_mode, POWER_MODE, int) -MMCBR_ACCESSOR(retune_req, RETUNE_REQ, int) MMCBR_ACCESSOR(vdd, VDD, int) MMCBR_ACCESSOR(vccq, VCCQ, int) MMCBR_ACCESSOR(caps, CAPS, int) @@ -105,6 +104,20 @@ MMCBR_ACCESSOR(timing, TIMING, int) MMCBR_ACCESSOR(max_data, MAX_DATA, int) MMCBR_ACCESSOR(max_busy_timeout, MAX_BUSY_TIMEOUT, u_int) +static int __inline +mmcbr_get_retune_req(device_t dev) +{ + uintptr_t v; + + if (__predict_false(BUS_READ_IVAR(device_get_parent(dev), dev, + MMCBR_IVAR_RETUNE_REQ, &v) != 0)) + return (retune_req_none); + return ((int)v); +} + +/* + * Convenience wrappers for the mmcbr interface + */ static int __inline mmcbr_update_ios(device_t dev) { From 47c39432b11a628eb2fb65f75e1a35d4771ceb9b Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Fri, 24 Aug 2018 21:48:13 +0000 Subject: [PATCH 04/51] Unbreak VLANs after r337943. ether_set_pcp should not be called from ether_output_frame for VLAN interfaces -- the vid + pcp will be inserted during vlan_transmit in that case. r337943 sets the VLAN's ifnet's if_pcp to a proper PCP value and this led to double encapsulation (once with vid 0 and second time with vid+pcp). PR: 230794 Reviewed by: kib@ Approved by: re@ (gjb@) Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D16887 --- sys/net/if_ethersubr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 713062f18c6f..f00952f49323 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -461,7 +461,8 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) uint8_t pcp; pcp = ifp->if_pcp; - if (pcp != IFNET_PCP_NONE && !ether_set_pcp(&m, ifp, pcp)) + if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN && + !ether_set_pcp(&m, ifp, pcp)) return (0); if (PFIL_HOOKED(&V_link_pfil_hook)) { From e67e4c6392dcece04cfdcdf517c4d65a2faf3c11 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 24 Aug 2018 21:49:21 +0000 Subject: [PATCH 05/51] Unbreak RSS builds after r338257. Folding both RSS blocks together I missed the closing } of the new combined block. Pointyhat to: bz Reported by: np Approved by: re (kib) --- sys/netinet6/udp6_usrreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 77cbc0b2ee2c..b32e88663172 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -960,6 +960,7 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, * be incorrect. */ flags |= IP_NODEFAULTFLOWID; + } #endif UDPSTAT_INC(udps_opackets); From 5dd1b8342c1134b9a621a7959d5060be7543a2f7 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Sat, 25 Aug 2018 04:28:02 +0000 Subject: [PATCH 06/51] lualoader: Fix override of module_path on loader prompt Earlier changes setup a config.module_path variable that was populated upon reading of loader.conf(5) and used for restoring module_path to pristine condition if multiple kernels are attempted. This broke the ability to override module_path at the loader prompt in case of emergency. Approved by: re (rgrimes) --- stand/lua/config.lua | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/stand/lua/config.lua b/stand/lua/config.lua index 24ce50bf33b1..b3cce8b4acfc 100644 --- a/stand/lua/config.lua +++ b/stand/lua/config.lua @@ -479,6 +479,21 @@ function config.loadKernel(other_kernel) return nil end + local function getModulePath() + local module_path = loader.getenv("module_path") + local kernel_path = loader.getenv("kernel_path") + + if kernel_path == nil then + return module_path + end + + -- Strip the loaded kernel path from module_path. This currently assumes + -- that the kernel path will be prepended to the module_path when it's + -- found. + kernel_path = escapeName(kernel_path .. ';') + return module_path:gsub(kernel_path, '') + end + local function loadBootfile() local bootfile = loader.getenv("bootfile") @@ -507,7 +522,7 @@ function config.loadKernel(other_kernel) else -- Use our cached module_path, so we don't end up with multiple -- automatically added kernel paths to our final module_path - local module_path = config.module_path + local module_path = getModulePath() local res if other_kernel ~= nil then @@ -527,6 +542,7 @@ function config.loadKernel(other_kernel) if module_path ~= nil then loader.setenv("module_path", v .. ";" .. module_path) + loader.setenv("kernel_path", v) end return true end @@ -563,8 +579,6 @@ function config.load(file, reloading) checkNextboot() - -- Cache the provided module_path at load time for later use - config.module_path = loader.getenv("module_path") local verbose = loader.getenv("verbose_loading") or "no" config.verbose = verbose:lower() == "yes" if not reloading then From 60b742343453a29342442b67a43a9b743bf9343e Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 25 Aug 2018 15:21:28 +0000 Subject: [PATCH 07/51] Unify amd64 and i386 vmspace0 pmap activation. Add pmap_activate_boot() for i386, move the invocation on APs from MD init_secondary() to x86 init_secondary_tail(). Suggested by: alc Reviewed by: alc, markj Sponsored by: The FreeBSD Foundation Approved by: re (marius) MFC after: 1 week Differential revision: https://reviews.freebsd.org/D16893 --- sys/amd64/amd64/mp_machdep.c | 2 -- sys/i386/i386/pmap.c | 16 +++++++++++++++- sys/i386/include/pmap.h | 1 + sys/x86/x86/mp_x86.c | 3 +++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 4ca2e07e578c..ef8b885a65d5 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -58,7 +58,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -343,7 +342,6 @@ init_secondary(void) while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); - pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); init_secondary_tail(); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index fecdb780fed9..5368c7cc2432 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2000,9 +2000,9 @@ pmap_pinit0(pmap_t pmap) #endif pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); - PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + pmap_activate_boot(pmap); } /* @@ -5804,6 +5804,20 @@ pmap_activate(struct thread *td) critical_exit(); } +void +pmap_activate_boot(pmap_t pmap) +{ + u_int cpuid; + + cpuid = PCPU_GET(cpuid); +#if defined(SMP) + CPU_SET_ATOMIC(cpuid, &pmap->pm_active); +#else + CPU_SET(cpuid, &pmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); +} + void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index eea683f5f858..da9ae6588189 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -373,6 +373,7 @@ extern vm_offset_t virtual_end; * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and * vtopte(). */ +void pmap_activate_boot(pmap_t pmap); void pmap_bootstrap(vm_paddr_t); int pmap_cache_bits(pmap_t, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index e153a38530b8..83535988435c 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -967,6 +968,8 @@ init_secondary_tail(void) { u_int cpuid; + pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); + /* * On real hardware, switch to x2apic mode if possible. Do it * after aps_ready was signalled, to avoid manipulating the From 23c97bcba1433e417ffdf2fd3b9c233170f0c89f Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 25 Aug 2018 15:31:23 +0000 Subject: [PATCH 08/51] Remove dead code in i386 cpu_throw(). Curpmap must be already valid when cpu_throw() is called, even for early AP startup. Suggested by: alc Reviewed by: alc, markj Sponsored by: The FreeBSD Foundation Approved by: re (marius) MFC after: 1 week Differential revision: https://reviews.freebsd.org/D16893 --- sys/i386/i386/swtch.s | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index b978a9cfd9b8..d9f6a3934362 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -74,16 +74,12 @@ */ ENTRY(cpu_throw) movl PCPU(CPUID), %esi - movl 4(%esp),%ecx /* Old thread */ - testl %ecx,%ecx /* no thread? */ - jz 1f /* release bit from old pm_active */ movl PCPU(CURPMAP), %ebx #ifdef SMP lock #endif btrl %esi, PM_ACTIVE(%ebx) /* clear old */ -1: movl 8(%esp),%ecx /* New thread */ movl TD_PCB(%ecx),%edx /* set bit in new pm_active */ From 4a82f3685176706671af0b09a668acc86ee8de64 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 25 Aug 2018 15:47:52 +0000 Subject: [PATCH 09/51] Add in a missing newline In the conversion, the newline got stripped. It worked fine when there was only one module, but not when there are many. Add back the missing newline. Approved by: re@ (kib) PR: 230868 Differential Revision: https://reviews.freebsd.org/D16895 --- sbin/devmatch/devmatch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbin/devmatch/devmatch.c b/sbin/devmatch/devmatch.c index bc3a4e1f03a5..938760bec32f 100644 --- a/sbin/devmatch/devmatch.c +++ b/sbin/devmatch/devmatch.c @@ -386,7 +386,7 @@ search_hints(const char *bus, const char *dev, const char *pnpinfo) if (all_flag) printf("%s: %s", *dev ? dev : "unattached", lastmod); else - printf("%s", lastmod); + printf("%s\n", lastmod); if (verbose_flag) printf("Matches --- %s ---\n", lastmod); } From fedded8d048045db541bcff2287ab894cb290bd4 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sat, 25 Aug 2018 15:59:51 +0000 Subject: [PATCH 10/51] Fix column alignment in per-thread mode. PR: 230872 Approved by: re (marius) MFC after: 1 week --- usr.bin/top/machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/usr.bin/top/machine.c b/usr.bin/top/machine.c index 5dafda0a787e..374c9da0edf4 100644 --- a/usr.bin/top/machine.c +++ b/usr.bin/top/machine.c @@ -1067,6 +1067,8 @@ format_next_process(struct handle * xhandle, char *(*get_userid)(int), int flags if (!ps.thread) { sbuf_printf(procbuf, "%4d ", pp->ki_numthreads); + } else { + sbuf_printf(procbuf, " "); } sbuf_printf(procbuf, "%3d ", pp->ki_pri.pri_level - PZERO); From ee97b2336aa47851a1dc5681ea2017cc2193aac4 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Sat, 25 Aug 2018 16:14:56 +0000 Subject: [PATCH 11/51] Speed up vt(4) by keeping a record of the most recently drawn character and the foreground and background colours. In bitblt_text functions, compare values to this cache and don't re-draw the characters if they haven't changed. When invalidating the display, clear this cache in order to force characters to be redrawn; also force full redraws between suspend/resume pairs since odd artifacts can otherwise result. When scrolling the display (which is where most time is spent within the vt driver) this yields a significant performance improvement if most lines are less than the width of the terminal, since this avoids re-drawing blanks on top of blanks. (Note that "re-drawing" here includes writing to the VGA text mode buffer; on virtualized systems this can be extremely slow since it triggers a glyph being rendered onto a 640x480 screen). On a c5.4xlarge EC2 instance (with emulated text mode VGA) this cuts the time spent in vt(4) during the kernel boot from 1200 ms to 700ms; on my laptop (with a 3200x1800 display) the corresponding time is reduced from 970 ms down to 155 ms. Reviewed by: imp, cem Approved by: re (gjb) Relnotes: Significant speedup in vt(4) and the system boot generally. Differential Revision: https://reviews.freebsd.org/D16723 --- sys/dev/vt/hw/efifb/efifb.c | 1 + sys/dev/vt/hw/fb/vt_early_fb.c | 1 + sys/dev/vt/hw/fb/vt_fb.c | 35 ++++++++++++++++++++++++++++++++ sys/dev/vt/hw/fb/vt_fb.h | 1 + sys/dev/vt/hw/vga/vt_vga.c | 37 ++++++++++++++++++++++++++++++++++ sys/dev/vt/vt.h | 7 +++++++ sys/dev/vt/vt_core.c | 21 +++++++++++++++++-- 7 files changed, 101 insertions(+), 2 deletions(-) diff --git a/sys/dev/vt/hw/efifb/efifb.c b/sys/dev/vt/hw/efifb/efifb.c index 7b8b88cad51a..c3ae3bff30fe 100644 --- a/sys/dev/vt/hw/efifb/efifb.c +++ b/sys/dev/vt/hw/efifb/efifb.c @@ -58,6 +58,7 @@ static struct vt_driver vt_efifb_driver = { .vd_init = vt_efifb_init, .vd_blank = vt_fb_blank, .vd_bitblt_text = vt_fb_bitblt_text, + .vd_invalidate_text = vt_fb_invalidate_text, .vd_bitblt_bmp = vt_fb_bitblt_bitmap, .vd_drawrect = vt_fb_drawrect, .vd_setpixel = vt_fb_setpixel, diff --git a/sys/dev/vt/hw/fb/vt_early_fb.c b/sys/dev/vt/hw/fb/vt_early_fb.c index 15af1aedbd25..e538ace2c052 100644 --- a/sys/dev/vt/hw/fb/vt_early_fb.c +++ b/sys/dev/vt/hw/fb/vt_early_fb.c @@ -60,6 +60,7 @@ static struct vt_driver vt_fb_early_driver = { .vd_init = vt_efb_init, .vd_blank = vt_fb_blank, .vd_bitblt_text = vt_fb_bitblt_text, + .vd_invalidate_text = vt_fb_invalidate_text, .vd_bitblt_bmp = vt_fb_bitblt_bitmap, .vd_drawrect = vt_fb_drawrect, .vd_setpixel = vt_fb_setpixel, diff --git a/sys/dev/vt/hw/fb/vt_fb.c b/sys/dev/vt/hw/fb/vt_fb.c index f437b8cf9e7d..091188c5e2da 100644 --- a/sys/dev/vt/hw/fb/vt_fb.c +++ b/sys/dev/vt/hw/fb/vt_fb.c @@ -50,6 +50,7 @@ static struct vt_driver vt_fb_driver = { .vd_fini = vt_fb_fini, .vd_blank = vt_fb_blank, .vd_bitblt_text = vt_fb_bitblt_text, + .vd_invalidate_text = vt_fb_invalidate_text, .vd_bitblt_bmp = vt_fb_bitblt_bitmap, .vd_drawrect = vt_fb_drawrect, .vd_setpixel = vt_fb_setpixel, @@ -335,6 +336,7 @@ vt_fb_bitblt_text(struct vt_device *vd, const struct vt_window *vw, term_char_t c; term_color_t fg, bg; const uint8_t *pattern; + size_t z; vf = vw->vw_font; @@ -351,9 +353,22 @@ vt_fb_bitblt_text(struct vt_device *vd, const struct vt_window *vw, vt_determine_colors(c, VTBUF_ISCURSOR(&vw->vw_buf, row, col), &fg, &bg); + z = row * PIXEL_WIDTH(VT_FB_MAX_WIDTH) + col; + if (vd->vd_drawn && (vd->vd_drawn[z] == c) && + vd->vd_drawnfg && (vd->vd_drawnfg[z] == fg) && + vd->vd_drawnbg && (vd->vd_drawnbg[z] == bg)) + continue; + vt_fb_bitblt_bitmap(vd, vw, pattern, NULL, vf->vf_width, vf->vf_height, x, y, fg, bg); + + if (vd->vd_drawn) + vd->vd_drawn[z] = c; + if (vd->vd_drawnfg) + vd->vd_drawnfg[z] = fg; + if (vd->vd_drawnbg) + vd->vd_drawnbg[z] = bg; } } @@ -379,6 +394,26 @@ vt_fb_bitblt_text(struct vt_device *vd, const struct vt_window *vw, #endif } +void +vt_fb_invalidate_text(struct vt_device *vd, const term_rect_t *area) +{ + unsigned int col, row; + size_t z; + + for (row = area->tr_begin.tp_row; row < area->tr_end.tp_row; ++row) { + for (col = area->tr_begin.tp_col; col < area->tr_end.tp_col; + ++col) { + z = row * PIXEL_WIDTH(VT_FB_MAX_WIDTH) + col; + if (vd->vd_drawn) + vd->vd_drawn[z] = 0; + if (vd->vd_drawnfg) + vd->vd_drawnfg[z] = 0; + if (vd->vd_drawnbg) + vd->vd_drawnbg[z] = 0; + } + } +} + void vt_fb_postswitch(struct vt_device *vd) { diff --git a/sys/dev/vt/hw/fb/vt_fb.h b/sys/dev/vt/hw/fb/vt_fb.h index 6a185bab0dcb..42b395e8ff39 100644 --- a/sys/dev/vt/hw/fb/vt_fb.h +++ b/sys/dev/vt/hw/fb/vt_fb.h @@ -43,6 +43,7 @@ vd_init_t vt_fb_init; vd_fini_t vt_fb_fini; vd_blank_t vt_fb_blank; vd_bitblt_text_t vt_fb_bitblt_text; +vd_invalidate_text_t vt_fb_invalidate_text; vd_bitblt_bmp_t vt_fb_bitblt_bitmap; vd_drawrect_t vt_fb_drawrect; vd_setpixel_t vt_fb_setpixel; diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c index 6efb6d1eeae9..88a9b1c47338 100644 --- a/sys/dev/vt/hw/vga/vt_vga.c +++ b/sys/dev/vt/hw/vga/vt_vga.c @@ -97,6 +97,7 @@ static vd_probe_t vga_probe; static vd_init_t vga_init; static vd_blank_t vga_blank; static vd_bitblt_text_t vga_bitblt_text; +static vd_invalidate_text_t vga_invalidate_text; static vd_bitblt_bmp_t vga_bitblt_bitmap; static vd_drawrect_t vga_drawrect; static vd_setpixel_t vga_setpixel; @@ -108,6 +109,7 @@ static const struct vt_driver vt_vga_driver = { .vd_init = vga_init, .vd_blank = vga_blank, .vd_bitblt_text = vga_bitblt_text, + .vd_invalidate_text = vga_invalidate_text, .vd_bitblt_bmp = vga_bitblt_bitmap, .vd_drawrect = vga_drawrect, .vd_setpixel = vga_setpixel, @@ -868,6 +870,7 @@ vga_bitblt_text_txtmode(struct vt_device *vd, const struct vt_window *vw, term_char_t c; term_color_t fg, bg; uint8_t ch, attr; + size_t z; sc = vd->vd_softc; vb = &vw->vw_buf; @@ -884,6 +887,12 @@ vga_bitblt_text_txtmode(struct vt_device *vd, const struct vt_window *vw, vt_determine_colors(c, VTBUF_ISCURSOR(vb, row, col), &fg, &bg); + z = row * PIXEL_WIDTH(VT_FB_MAX_WIDTH) + col; + if (vd->vd_drawn && (vd->vd_drawn[z] == c) && + vd->vd_drawnfg && (vd->vd_drawnfg[z] == fg) && + vd->vd_drawnbg && (vd->vd_drawnbg[z] == bg)) + continue; + /* * Convert character to CP437, which is the * character set used by the VGA hardware by @@ -898,6 +907,13 @@ vga_bitblt_text_txtmode(struct vt_device *vd, const struct vt_window *vw, MEM_WRITE2(sc, (row * 80 + col) * 2 + 0, ch + ((uint16_t)(attr) << 8)); + + if (vd->vd_drawn) + vd->vd_drawn[z] = c; + if (vd->vd_drawnfg) + vd->vd_drawnfg[z] = fg; + if (vd->vd_drawnbg) + vd->vd_drawnbg[z] = bg; } } } @@ -914,6 +930,27 @@ vga_bitblt_text(struct vt_device *vd, const struct vt_window *vw, } } +void +vga_invalidate_text(struct vt_device *vd, const term_rect_t *area) +{ + unsigned int col, row; + size_t z; + + for (row = area->tr_begin.tp_row; row < area->tr_end.tp_row; ++row) { + for (col = area->tr_begin.tp_col; + col < area->tr_end.tp_col; + ++col) { + z = row * PIXEL_WIDTH(VT_FB_MAX_WIDTH) + col; + if (vd->vd_drawn) + vd->vd_drawn[z] = 0; + if (vd->vd_drawnfg) + vd->vd_drawnfg[z] = 0; + if (vd->vd_drawnbg) + vd->vd_drawnbg[z] = 0; + } + } +} + static void vga_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw, const uint8_t *pattern, const uint8_t *mask, diff --git a/sys/dev/vt/vt.h b/sys/dev/vt/vt.h index 745f9de836cb..712da20b7634 100644 --- a/sys/dev/vt/vt.h +++ b/sys/dev/vt/vt.h @@ -156,11 +156,15 @@ struct vt_device { #define VDF_INITIALIZED 0x20 /* vtterm_cnprobe already done. */ #define VDF_MOUSECURSOR 0x40 /* Mouse cursor visible. */ #define VDF_QUIET_BELL 0x80 /* Disable bell. */ +#define VDF_SUSPENDED 0x100 /* Device has been suspended. */ #define VDF_DOWNGRADE 0x8000 /* The driver is being downgraded. */ int vd_keyboard; /* (G) Keyboard index. */ unsigned int vd_kbstate; /* (?) Device unit. */ unsigned int vd_unit; /* (c) Device unit. */ int vd_altbrk; /* (?) Alt break seq. state */ + term_char_t *vd_drawn; /* (?) Most recent char drawn. */ + term_color_t *vd_drawnfg; /* (?) Most recent fg color drawn. */ + term_color_t *vd_drawnbg; /* (?) Most recent bg color drawn. */ }; #define VD_PASTEBUF(vd) ((vd)->vd_pastebuf.vpb_buf) @@ -320,6 +324,8 @@ typedef void vd_postswitch_t(struct vt_device *vd); typedef void vd_blank_t(struct vt_device *vd, term_color_t color); typedef void vd_bitblt_text_t(struct vt_device *vd, const struct vt_window *vw, const term_rect_t *area); +typedef void vd_invalidate_text_t(struct vt_device *vd, + const term_rect_t *area); typedef void vd_bitblt_bmp_t(struct vt_device *vd, const struct vt_window *vw, const uint8_t *pattern, const uint8_t *mask, unsigned int width, unsigned int height, @@ -345,6 +351,7 @@ struct vt_driver { vd_drawrect_t *vd_drawrect; vd_setpixel_t *vd_setpixel; vd_bitblt_text_t *vd_bitblt_text; + vd_invalidate_text_t *vd_invalidate_text; vd_bitblt_bmp_t *vd_bitblt_bmp; /* Framebuffer ioctls, if present. */ diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index 81a803fcd00f..4edea2b2ed1f 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -190,6 +190,11 @@ SET_DECLARE(vt_drv_set, struct vt_driver); struct terminal vt_consterm; static struct vt_window vt_conswindow; +#ifndef SC_NO_CONSDRAWN +static term_char_t vt_consdrawn[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)]; +static term_color_t vt_consdrawnfg[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)]; +static term_color_t vt_consdrawnbg[PIXEL_HEIGHT(VT_FB_MAX_HEIGHT) * PIXEL_WIDTH(VT_FB_MAX_WIDTH)]; +#endif struct vt_device vt_consdev = { .vd_driver = NULL, .vd_softc = NULL, @@ -210,6 +215,12 @@ struct vt_device vt_consdev = { .vd_mcursor_fg = TC_WHITE, .vd_mcursor_bg = TC_BLACK, #endif + +#ifndef SC_NO_CONSDRAWN + .vd_drawn = vt_consdrawn, + .vd_drawnfg = vt_consdrawnfg, + .vd_drawnbg = vt_consdrawnbg, +#endif }; static term_char_t vt_constextbuf[(_VTDEFW) * (VBF_DEFAULT_HISTORY_SIZE)]; static term_char_t *vt_constextbufrows[VBF_DEFAULT_HISTORY_SIZE]; @@ -1181,6 +1192,8 @@ vt_mark_mouse_position_as_dirty(struct vt_device *vd, int locked) if (!locked) vtbuf_lock(&vw->vw_buf); + if (vd->vd_driver->vd_invalidate_text) + vd->vd_driver->vd_invalidate_text(vd, &area); vtbuf_dirty(&vw->vw_buf, &area); if (!locked) vtbuf_unlock(&vw->vw_buf); @@ -1280,12 +1293,14 @@ vt_flush(struct vt_device *vd) vtbuf_undirty(&vw->vw_buf, &tarea); - /* Force a full redraw when the screen contents are invalid. */ - if (vd->vd_flags & VDF_INVALID) { + /* Force a full redraw when the screen contents might be invalid. */ + if (vd->vd_flags & (VDF_INVALID | VDF_SUSPENDED)) { vd->vd_flags &= ~VDF_INVALID; vt_set_border(vd, &vw->vw_draw_area, TC_BLACK); vt_termrect(vd, vf, &tarea); + if (vd->vd_driver->vd_invalidate_text) + vd->vd_driver->vd_invalidate_text(vd, &tarea); if (vt_draw_logo_cpus) vtterm_draw_cpu_logos(vd); } @@ -2824,6 +2839,7 @@ vt_suspend_handler(void *priv) struct vt_device *vd; vd = priv; + vd->vd_flags |= VDF_SUSPENDED; if (vd->vd_driver != NULL && vd->vd_driver->vd_suspend != NULL) vd->vd_driver->vd_suspend(vd); } @@ -2836,6 +2852,7 @@ vt_resume_handler(void *priv) vd = priv; if (vd->vd_driver != NULL && vd->vd_driver->vd_resume != NULL) vd->vd_driver->vd_resume(vd); + vd->vd_flags &= ~VDF_SUSPENDED; } void From ee6281c3d385306e6b6a7cc272f122154d409cba Mon Sep 17 00:00:00 2001 From: Michal Meloun Date: Sat, 25 Aug 2018 16:54:37 +0000 Subject: [PATCH 12/51] Fix wrong offset calculation for R_ARM_TLS_TPOFF32 relocations. TLS_TCB_SIZE is already accounted in defobj-> tlsoffset so all these symbols were incorrectly relocated by +8. Note: The only consumer (for all binaries on my ARM board) of R_ARM_TLS_TPOFF32 relocation is _ThreadRuneLocale variable. And the incorrectly relocated ThreadRuneLocale accidentally pointed to zeroed memory before memory layout change from D16510 had changed status quo. MFC after: 3 weeks Reviewed by: imp, jhb Approved by: re (marius) --- libexec/rtld-elf/arm/reloc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libexec/rtld-elf/arm/reloc.c b/libexec/rtld-elf/arm/reloc.c index d2b8d5b15077..794cfe43cb0e 100644 --- a/libexec/rtld-elf/arm/reloc.c +++ b/libexec/rtld-elf/arm/reloc.c @@ -324,9 +324,7 @@ reloc_nonplt_object(Obj_Entry *obj, const Elf_Rel *rel, SymCache *cache, if (!defobj->tls_done && allocate_tls_offset(obj)) return -1; - /* XXX: FIXME */ - tmp = (Elf_Addr)def->st_value + defobj->tlsoffset + - TLS_TCB_SIZE; + tmp = (Elf_Addr)def->st_value + defobj->tlsoffset; if (__predict_true(RELOC_ALIGNED_P(where))) *where = tmp; else From 49bfa624ac861d7fafc87f06dc2cdf4972aed854 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 25 Aug 2018 19:38:08 +0000 Subject: [PATCH 13/51] Eliminate the arena parameter to kmem_free(). Implicitly this corrects an error in the function hypercall_memfree(), where the wrong arena was being passed to kmem_free(). Introduce a per-page flag, VPO_KMEM_EXEC, to mark physical pages that are mapped in kmem with execute permissions. Use this flag to determine which arena the kmem virtual addresses are returned to. Eliminate UMA_SLAB_KRWX. The introduction of VPO_KMEM_EXEC makes it redundant. Update the nearby comment for UMA_SLAB_KERNEL. Reviewed by: kib, markj Discussed with: jeff Approved by: re (marius) Differential Revision: https://reviews.freebsd.org/D16845 --- sys/amd64/amd64/sys_machdep.c | 4 +- sys/amd64/amd64/vm_machdep.c | 3 +- sys/arm/allwinner/a10_fb.c | 2 +- sys/arm/arm/busdma_machdep-v4.c | 2 +- sys/arm/arm/busdma_machdep-v6.c | 2 +- sys/arm/arm/pmap-v6.c | 3 +- sys/arm/freescale/imx/imx6_sdma.c | 3 +- sys/arm/nvidia/tegra_xhci.c | 2 +- sys/arm64/arm64/busdma_bounce.c | 3 +- sys/arm64/arm64/mp_machdep.c | 3 +- .../common/include/linux/dma-mapping.h | 2 +- sys/compat/linuxkpi/common/src/linux_page.c | 2 +- sys/dev/agp/agp.c | 4 +- sys/dev/agp/agp_amd.c | 10 ++--- sys/dev/agp/agp_ati.c | 10 ++--- sys/dev/agp/agp_i810.c | 2 +- sys/dev/amd_ecc_inject/ecc_inject.c | 2 +- sys/dev/drm/drm_scatter.c | 2 +- sys/dev/drm2/drm_scatter.c | 2 +- sys/dev/hyperv/vmbus/hyperv.c | 3 +- sys/dev/liquidio/lio_network.h | 2 +- sys/kern/kern_malloc.c | 2 +- sys/kern/subr_busdma_bufalloc.c | 2 +- sys/mips/ingenic/jz4780_lcd.c | 2 +- sys/mips/mips/busdma_machdep.c | 2 +- sys/powerpc/powerpc/busdma_machdep.c | 2 +- sys/vm/uma.h | 5 +-- sys/vm/uma_core.c | 22 ++------- sys/vm/vm_extern.h | 2 +- sys/vm/vm_kern.c | 45 +++++++++---------- sys/vm/vm_page.h | 2 +- sys/x86/iommu/busdma_dmar.c | 2 +- sys/x86/iommu/intel_intrmap.c | 4 +- sys/x86/iommu/intel_qi.c | 2 +- sys/x86/x86/busdma_bounce.c | 3 +- 35 files changed, 69 insertions(+), 96 deletions(-) diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 63f4d7c72346..b1dff88a584e 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -479,7 +479,7 @@ user_ldt_alloc(struct proc *p, int force) pldt = mdp->md_ldt; if (pldt != NULL && !force) { pmap_pti_remove_kva(sva, sva + sz); - kmem_free(kernel_arena, sva, sz); + kmem_free(sva, sz); free(new_ldt, M_SUBPROC); return (pldt); } @@ -533,7 +533,7 @@ user_ldt_derefl(struct proc_ldt *pldt) sva = (vm_offset_t)pldt->ldt_base; sz = max_ldt_segment * sizeof(struct user_segment_descriptor); pmap_pti_remove_kva(sva, sva + sz); - kmem_free(kernel_arena, sva, sz); + kmem_free(sva, sz); free(pldt, M_SUBPROC); } } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index c7a18ce06716..ba71d5127cd8 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -331,8 +331,7 @@ cpu_thread_clean(struct thread *td) if (pcb->pcb_tssp != NULL) { pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp, (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1)); - kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_tssp, - ctob(IOPAGES + 1)); + kmem_free((vm_offset_t)pcb->pcb_tssp, ctob(IOPAGES + 1)); pcb->pcb_tssp = NULL; } } diff --git a/sys/arm/allwinner/a10_fb.c b/sys/arm/allwinner/a10_fb.c index ef3899b3c368..6342eb0acf46 100644 --- a/sys/arm/allwinner/a10_fb.c +++ b/sys/arm/allwinner/a10_fb.c @@ -192,7 +192,7 @@ a10fb_allocfb(struct a10fb_softc *sc) static void a10fb_freefb(struct a10fb_softc *sc) { - kmem_free(kernel_arena, sc->vaddr, sc->fbsize); + kmem_free(sc->vaddr, sc->fbsize); } static int diff --git a/sys/arm/arm/busdma_machdep-v4.c b/sys/arm/arm/busdma_machdep-v4.c index e6e863ad9ca6..d7ab11ff07a0 100644 --- a/sys/arm/arm/busdma_machdep-v4.c +++ b/sys/arm/arm/busdma_machdep-v4.c @@ -792,7 +792,7 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else - kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); + kmem_free((vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c index 3ab34fb3322a..2aff3f8a1502 100644 --- a/sys/arm/arm/busdma_machdep-v6.c +++ b/sys/arm/arm/busdma_machdep-v6.c @@ -858,7 +858,7 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) !exclusion_bounce(dmat)) uma_zfree(bufzone->umazone, vaddr); else - kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); + kmem_free((vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 90c383ee7bca..8dd74f15d925 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -2242,8 +2242,7 @@ pmap_pinit(pmap_t pmap) * UMA_ZONE_NOFREE flag, it's important to leave * no allocation in pmap if initialization failed. */ - kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, - NB_IN_PT1); + kmem_free((vm_offset_t)pmap->pm_pt1, NB_IN_PT1); pmap->pm_pt1 = NULL; return (0); } diff --git a/sys/arm/freescale/imx/imx6_sdma.c b/sys/arm/freescale/imx/imx6_sdma.c index 6566151e68a4..3bae225a75d5 100644 --- a/sys/arm/freescale/imx/imx6_sdma.c +++ b/sys/arm/freescale/imx/imx6_sdma.c @@ -196,8 +196,7 @@ sdma_free(int chn) channel = &sc->channel[chn]; channel->in_use = 0; - kmem_free(kernel_arena, (vm_offset_t)channel->bd, - PAGE_SIZE); + kmem_free((vm_offset_t)channel->bd, PAGE_SIZE); return (0); } diff --git a/sys/arm/nvidia/tegra_xhci.c b/sys/arm/nvidia/tegra_xhci.c index f3d96752cc87..7615e8809e61 100644 --- a/sys/arm/nvidia/tegra_xhci.c +++ b/sys/arm/nvidia/tegra_xhci.c @@ -984,7 +984,7 @@ tegra_xhci_detach(device_t dev) if (sc->irq_hdl_mbox != NULL) bus_teardown_intr(dev, sc->irq_res_mbox, sc->irq_hdl_mbox); if (sc->fw_vaddr != 0) - kmem_free(kernel_arena, sc->fw_vaddr, sc->fw_size); + kmem_free(sc->fw_vaddr, sc->fw_size); LOCK_DESTROY(sc); return (0); } diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c index 0459fbc08a93..07894652ade8 100644 --- a/sys/arm64/arm64/busdma_bounce.c +++ b/sys/arm64/arm64/busdma_bounce.c @@ -532,8 +532,7 @@ bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else - kmem_free(kernel_arena, (vm_offset_t)vaddr, - dmat->common.maxsize); + kmem_free((vm_offset_t)vaddr, dmat->common.maxsize); free(map, M_DEVBUF); dmat->map_count--; CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index a3f096f60717..aa067d0194e4 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -502,8 +502,7 @@ start_cpu(u_int id, uint64_t target_cpu) ("Failed to start CPU %u (%lx)\n", id, target_cpu)); pcpu_destroy(pcpup); - kmem_free(kernel_arena, (vm_offset_t)dpcpu[cpuid - 1], - DPCPU_SIZE); + kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; mp_ncpus--; diff --git a/sys/compat/linuxkpi/common/include/linux/dma-mapping.h b/sys/compat/linuxkpi/common/include/linux/dma-mapping.h index bae94a4fbc5a..f482ccb3b53e 100644 --- a/sys/compat/linuxkpi/common/include/linux/dma-mapping.h +++ b/sys/compat/linuxkpi/common/include/linux/dma-mapping.h @@ -156,7 +156,7 @@ dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size); + kmem_free((vm_offset_t)cpu_addr, size); } /* XXX This only works with no iommu. */ diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index b65b29e345d9..63564f42a16c 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -178,7 +178,7 @@ linux_free_kmem(vm_offset_t addr, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; - kmem_free(kmem_arena, addr, size); + kmem_free(addr, size); } static int diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c index fca63f9666ae..011c89afeb37 100644 --- a/sys/dev/agp/agp.c +++ b/sys/dev/agp/agp.c @@ -171,8 +171,8 @@ agp_alloc_gatt(device_t dev) void agp_free_gatt(struct agp_gatt *gatt) { - kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, - gatt->ag_entries * sizeof(u_int32_t)); + kmem_free((vm_offset_t)gatt->ag_virtual, gatt->ag_entries * + sizeof(u_int32_t)); free(gatt, M_AGP); } diff --git a/sys/dev/agp/agp_amd.c b/sys/dev/agp/agp_amd.c index 1648f41ac4a0..8307d8d9d797 100644 --- a/sys/dev/agp/agp_amd.c +++ b/sys/dev/agp/agp_amd.c @@ -119,8 +119,8 @@ agp_amd_alloc_gatt(device_t dev) if (bootverbose) device_printf(dev, "failed to allocate page directory\n"); - kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, - entries * sizeof(u_int32_t)); + kmem_free((vm_offset_t)gatt->ag_virtual, entries * + sizeof(u_int32_t)); free(gatt, M_AGP); return 0; } @@ -168,9 +168,9 @@ agp_amd_alloc_gatt(device_t dev) static void agp_amd_free_gatt(struct agp_amd_gatt *gatt) { - kmem_free(kernel_arena, (vm_offset_t)gatt->ag_vdir, AGP_PAGE_SIZE); - kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, - gatt->ag_entries * sizeof(u_int32_t)); + kmem_free((vm_offset_t)gatt->ag_vdir, AGP_PAGE_SIZE); + kmem_free((vm_offset_t)gatt->ag_virtual, gatt->ag_entries * + sizeof(u_int32_t)); free(gatt, M_AGP); } diff --git a/sys/dev/agp/agp_ati.c b/sys/dev/agp/agp_ati.c index d6c10dc66349..6dc2746f144e 100644 --- a/sys/dev/agp/agp_ati.c +++ b/sys/dev/agp/agp_ati.c @@ -147,8 +147,8 @@ agp_ati_alloc_gatt(device_t dev) if (sc->ag_vdir == NULL) { if (bootverbose) device_printf(dev, "pagedir allocation failed\n"); - kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual, - entries * sizeof(u_int32_t)); + kmem_free((vm_offset_t)sc->ag_virtual, entries * + sizeof(u_int32_t)); return ENOMEM; } sc->ag_pdir = vtophys((vm_offset_t)sc->ag_vdir); @@ -265,9 +265,9 @@ agp_ati_detach(device_t dev) temp = pci_read_config(dev, apsize_reg, 4); pci_write_config(dev, apsize_reg, temp & ~1, 4); - kmem_free(kernel_arena, (vm_offset_t)sc->ag_vdir, AGP_PAGE_SIZE); - kmem_free(kernel_arena, (vm_offset_t)sc->ag_virtual, - sc->ag_entries * sizeof(u_int32_t)); + kmem_free((vm_offset_t)sc->ag_vdir, AGP_PAGE_SIZE); + kmem_free((vm_offset_t)sc->ag_virtual, sc->ag_entries * + sizeof(u_int32_t)); bus_release_resource(dev, SYS_RES_MEMORY, ATI_GART_MMADDR, sc->regs); agp_free_res(dev); diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 59028f0533d8..27d7f1114a08 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -1329,7 +1329,7 @@ agp_i810_deinstall_gatt(device_t dev) sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0); - kmem_free(kernel_arena, (vm_offset_t)sc->gatt->ag_virtual, 64 * 1024); + kmem_free((vm_offset_t)sc->gatt->ag_virtual, 64 * 1024); } static void diff --git a/sys/dev/amd_ecc_inject/ecc_inject.c b/sys/dev/amd_ecc_inject/ecc_inject.c index a682115b08ae..7425320f8256 100644 --- a/sys/dev/amd_ecc_inject/ecc_inject.c +++ b/sys/dev/amd_ecc_inject/ecc_inject.c @@ -195,7 +195,7 @@ ecc_ei_inject(int count) pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0); } - kmem_free(kernel_arena, memory, PAGE_SIZE); + kmem_free(memory, PAGE_SIZE); } static int diff --git a/sys/dev/drm/drm_scatter.c b/sys/dev/drm/drm_scatter.c index 163634878d93..9b02d1a69c55 100644 --- a/sys/dev/drm/drm_scatter.c +++ b/sys/dev/drm/drm_scatter.c @@ -99,7 +99,7 @@ drm_sg_cleanup(struct drm_sg_mem *entry) return; if (entry->vaddr != 0) - kmem_free(kernel_arena, entry->vaddr, IDX_TO_OFF(entry->pages)); + kmem_free(entry->vaddr, IDX_TO_OFF(entry->pages)); free(entry->busaddr, DRM_MEM_SGLISTS); free(entry, DRM_MEM_DRIVER); diff --git a/sys/dev/drm2/drm_scatter.c b/sys/dev/drm2/drm_scatter.c index 510fce4cd6a4..1ccc88a5f69c 100644 --- a/sys/dev/drm2/drm_scatter.c +++ b/sys/dev/drm2/drm_scatter.c @@ -47,7 +47,7 @@ void drm_sg_cleanup(struct drm_sg_mem * entry) return; if (entry->vaddr != 0) - kmem_free(kernel_arena, entry->vaddr, IDX_TO_OFF(entry->pages)); + kmem_free(entry->vaddr, IDX_TO_OFF(entry->pages)); free(entry->busaddr, DRM_MEM_SGLISTS); free(entry, DRM_MEM_DRIVER); diff --git a/sys/dev/hyperv/vmbus/hyperv.c b/sys/dev/hyperv/vmbus/hyperv.c index d2d252ded07c..5bb38fde8241 100644 --- a/sys/dev/hyperv/vmbus/hyperv.c +++ b/sys/dev/hyperv/vmbus/hyperv.c @@ -264,8 +264,7 @@ SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, static void hypercall_memfree(void) { - kmem_free(kernel_arena, (vm_offset_t)hypercall_context.hc_addr, - PAGE_SIZE); + kmem_free((vm_offset_t)hypercall_context.hc_addr, PAGE_SIZE); hypercall_context.hc_addr = NULL; } diff --git a/sys/dev/liquidio/lio_network.h b/sys/dev/liquidio/lio_network.h index b8cdcde76626..b1f4e1448fe9 100644 --- a/sys/dev/liquidio/lio_network.h +++ b/sys/dev/liquidio/lio_network.h @@ -212,7 +212,7 @@ static inline void lio_dma_free(size_t size, void *cpu_addr) { - kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size); + kmem_free((vm_offset_t)cpu_addr, size); } static inline uint64_t diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 0269298b6a0e..c102eb52e5bd 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -475,7 +475,7 @@ void contigfree(void *addr, unsigned long size, struct malloc_type *type) { - kmem_free(kernel_arena, (vm_offset_t)addr, size); + kmem_free((vm_offset_t)addr, size); malloc_type_freed(type, round_page(size)); } diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c index c449e8820e7b..52d1388ba7fc 100644 --- a/sys/kern/subr_busdma_bufalloc.c +++ b/sys/kern/subr_busdma_bufalloc.c @@ -171,6 +171,6 @@ void busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, uint8_t pflag) { - kmem_free(kernel_arena, (vm_offset_t)item, size); + kmem_free((vm_offset_t)item, size); } diff --git a/sys/mips/ingenic/jz4780_lcd.c b/sys/mips/ingenic/jz4780_lcd.c index 506c23e60121..77327fc093ac 100644 --- a/sys/mips/ingenic/jz4780_lcd.c +++ b/sys/mips/ingenic/jz4780_lcd.c @@ -129,7 +129,7 @@ jzlcd_allocfb(struct jzlcd_softc *sc) static void jzlcd_freefb(struct jzlcd_softc *sc) { - kmem_free(kernel_arena, sc->vaddr, sc->fbsize); + kmem_free(sc->vaddr, sc->fbsize); } static void diff --git a/sys/mips/mips/busdma_machdep.c b/sys/mips/mips/busdma_machdep.c index 46ad2c248633..84967b12d53a 100644 --- a/sys/mips/mips/busdma_machdep.c +++ b/sys/mips/mips/busdma_machdep.c @@ -756,7 +756,7 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else - kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); + kmem_free((vm_offset_t)vaddr, dmat->maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c index 78c3568785c6..cc5212198da6 100644 --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/powerpc/powerpc/busdma_machdep.c @@ -570,7 +570,7 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) if (!map->contigalloc) free(vaddr, M_DEVBUF); else - kmem_free(kmem_arena, (vm_offset_t)vaddr, dmat->maxsize); + kmem_free((vm_offset_t)vaddr, dmat->maxsize); bus_dmamap_destroy(dmat, map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 87ace146bad2..06a3f3545cfb 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -616,12 +616,11 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef); * These flags are setable in the allocf and visible in the freef. */ #define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ -#define UMA_SLAB_KRWX 0x02 /* Slab alloced from kernel_rwx_arena */ -#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */ +#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kmem */ #define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ #define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */ #define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ -/* 0x40 and 0x80 are available */ +/* 0x02, 0x40, and 0x80 are available */ /* * Used to pre-fill a zone with some number of items diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 84710963dbc5..d971c3c76a61 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1300,14 +1300,11 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, static void page_free(void *mem, vm_size_t size, uint8_t flags) { - struct vmem *vmem; - if (flags & UMA_SLAB_KERNEL) - vmem = kernel_arena; - else + if ((flags & UMA_SLAB_KERNEL) == 0) panic("UMA: page_free used with invalid flags %x", flags); - kmem_free(vmem, (vm_offset_t)mem, size); + kmem_free((vm_offset_t)mem, size); } /* @@ -3694,10 +3691,6 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait) vsetslab(addr, slab); slab->us_data = (void *)addr; slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC; -#if VM_NRESERVLEVEL > 0 - if (__predict_false((wait & M_EXEC) != 0)) - slab->us_flags |= UMA_SLAB_KRWX; -#endif slab->us_size = size; slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE( pmap_kextract(addr))); @@ -3719,19 +3712,10 @@ uma_large_malloc(vm_size_t size, int wait) void uma_large_free(uma_slab_t slab) { - struct vmem *arena; KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0, ("uma_large_free: Memory not allocated with uma_large_malloc.")); -#if VM_NRESERVLEVEL > 0 - if (__predict_true((slab->us_flags & UMA_SLAB_KRWX) == 0)) - arena = kernel_arena; - else - arena = kernel_rwx_arena; -#else - arena = kernel_arena; -#endif - kmem_free(arena, (vm_offset_t)slab->us_data, slab->us_size); + kmem_free((vm_offset_t)slab->us_data, slab->us_size); uma_total_dec(slab->us_size); zone_free_item(slabzone, slab, NULL, SKIP_NONE); } diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index b2747769496f..3174e645c8be 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -66,7 +66,7 @@ vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_memattr_t memattr); vm_offset_t kmem_malloc(vm_size_t size, int flags); vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); -void kmem_free(struct vmem *, vm_offset_t, vm_size_t); +void kmem_free(vm_offset_t addr, vm_size_t size); /* This provides memory for previously allocated address space. */ int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 38800c6d2668..5dee4d758dd0 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -462,6 +462,10 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, addr + i, m, prot, prot | PMAP_ENTER_WIRED, 0); +#if VM_NRESERVLEVEL > 0 + if (__predict_false((prot & VM_PROT_EXECUTE) != 0)) + m->oflags |= VPO_KMEM_EXEC; +#endif } VM_OBJECT_WUNLOCK(object); @@ -497,9 +501,10 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) * A physical page must exist within the specified object at each index * that is being unmapped. */ -static int +static struct vmem * _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { + struct vmem *arena; vm_page_t m, next; vm_offset_t end, offset; int domain; @@ -508,13 +513,21 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) ("kmem_unback: only supports kernel object.")); if (size == 0) - return (0); + return (NULL); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); m = vm_page_lookup(object, atop(offset)); domain = vm_phys_domain(m); +#if VM_NRESERVLEVEL > 0 + if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0)) + arena = vm_dom[domain].vmd_kernel_arena; + else + arena = vm_dom[domain].vmd_kernel_rwx_arena; +#else + arena = vm_dom[domain].vmd_kernel_arena; +#endif for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); @@ -522,14 +535,14 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) } VM_OBJECT_WUNLOCK(object); - return (domain); + return (arena); } void kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { - _kmem_unback(object, addr, size); + (void)_kmem_unback(object, addr, size); } /* @@ -539,30 +552,14 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) * original allocation. */ void -kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) +kmem_free(vm_offset_t addr, vm_size_t size) { struct vmem *arena; - int domain; - -#if VM_NRESERVLEVEL > 0 - KASSERT(vmem == kernel_arena || vmem == kernel_rwx_arena, - ("kmem_free: Only kernel_arena or kernel_rwx_arena are supported.")); -#else - KASSERT(vmem == kernel_arena, - ("kmem_free: Only kernel_arena is supported.")); -#endif size = round_page(size); - domain = _kmem_unback(kernel_object, addr, size); -#if VM_NRESERVLEVEL > 0 - if (__predict_true(vmem == kernel_arena)) - arena = vm_dom[domain].vmd_kernel_arena; - else - arena = vm_dom[domain].vmd_kernel_rwx_arena; -#else - arena = vm_dom[domain].vmd_kernel_arena; -#endif - vmem_free(arena, addr, size); + arena = _kmem_unback(kernel_object, addr, size); + if (arena != NULL) + vmem_free(arena, addr, size); } /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index d6a4115b89be..73d289fd8b13 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -234,7 +234,7 @@ struct vm_page { * mappings, and such pages are also not on any PQ queue. * */ -#define VPO_UNUSED01 0x01 /* --available-- */ +#define VPO_KMEM_EXEC 0x01 /* kmem mapping allows execution */ #define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ diff --git a/sys/x86/iommu/busdma_dmar.c b/sys/x86/iommu/busdma_dmar.c index 9b2787828114..40786d46621c 100644 --- a/sys/x86/iommu/busdma_dmar.c +++ b/sys/x86/iommu/busdma_dmar.c @@ -479,7 +479,7 @@ dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) } else { KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0, ("dmar_bus_dmamem_free for non alloced map %p", map)); - kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize); + kmem_free((vm_offset_t)vaddr, tag->common.maxsize); map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC; } diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c index 6bb2355fb205..32e11a8347e8 100644 --- a/sys/x86/iommu/intel_intrmap.c +++ b/sys/x86/iommu/intel_intrmap.c @@ -374,7 +374,7 @@ dmar_fini_irt(struct dmar_unit *unit) dmar_disable_ir(unit); dmar_qi_invalidate_iec_glob(unit); vmem_destroy(unit->irtids); - kmem_free(kernel_arena, (vm_offset_t)unit->irt, - unit->irte_cnt * sizeof(dmar_irte_t)); + kmem_free((vm_offset_t)unit->irt, unit->irte_cnt * + sizeof(dmar_irte_t)); } } diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index e2c923c4b3e9..ed72a0d8900a 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -444,7 +444,7 @@ dmar_fini_qi(struct dmar_unit *unit) ("dmar%d: waiters on disabled queue", unit->unit)); DMAR_UNLOCK(unit); - kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size); + kmem_free(unit->inv_queue, unit->inv_queue_size); unit->inv_queue = 0; unit->inv_queue_size = 0; unit->qi_enabled = 0; diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c index f124eff29bf2..1c76804a61d5 100644 --- a/sys/x86/x86/busdma_bounce.c +++ b/sys/x86/x86/busdma_bounce.c @@ -499,8 +499,7 @@ bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0) free_domain(vaddr, M_DEVBUF); else - kmem_free(kernel_arena, (vm_offset_t)vaddr, - dmat->common.maxsize); + kmem_free((vm_offset_t)vaddr, dmat->common.maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } From f1722afc8df6606b3610b247b1fc7d4c86851ed2 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Sat, 25 Aug 2018 20:19:16 +0000 Subject: [PATCH 14/51] Remove trailing slash in pathname so that valid METALOG is created in the NO_ROOT case of make packages. Submitted by: Dan McGregor Approved by: re (rgrimes) --- usr.sbin/syslogd/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/syslogd/Makefile b/usr.sbin/syslogd/Makefile index c54bc895ff4a..8e1e94c9dda8 100644 --- a/usr.sbin/syslogd/Makefile +++ b/usr.sbin/syslogd/Makefile @@ -21,7 +21,7 @@ CFLAGS+= -DINET6 .endif SYSLOGD_D= -SYSLOGD_DDIR= /etc/syslog.d/ +SYSLOGD_DDIR= /etc/syslog.d .if ${MK_FTP} != "no" SYSLOGD_D+= ftp.conf .endif From 00d5b07760115e5c238ba96d278cf024c5c26d6c Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Sun, 26 Aug 2018 02:09:20 +0000 Subject: [PATCH 15/51] Fix the install location of hcsecd.conf Submitted by: vangyzen Approved by: re (marius) --- usr.sbin/bluetooth/hcsecd/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/usr.sbin/bluetooth/hcsecd/Makefile b/usr.sbin/bluetooth/hcsecd/Makefile index 5520effdb10d..35353a3bfe94 100644 --- a/usr.sbin/bluetooth/hcsecd/Makefile +++ b/usr.sbin/bluetooth/hcsecd/Makefile @@ -2,6 +2,7 @@ # $FreeBSD$ CONFS= hcsecd.conf +CONFSDIR= /etc/bluetooth CONFSMODE_hcsecd.conf= 600 PROG= hcsecd MAN= hcsecd.8 hcsecd.conf.5 From a1b042f79d71a3a444aef1281fa61ccd45064c6a Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Sun, 26 Aug 2018 03:56:54 +0000 Subject: [PATCH 16/51] Disable atkbd0 and atkdbc0 in EC2 AMIs. This has the effect of skipping the probing and attaching of the PS/2 mouse (not present on EC2) and keyboard (emulated, but not accessible via EC2). Note that we disable atkbd0 separately even though during device probing it shows up as a child of atkbdc0; this is necessary because the device is also initialized during the early console setup from hammer_time. This change cuts the kernel boot time on an EC2 c5.4xlarge instance from 7259ms down to 4727 ms. Approved by: re (marius) --- release/tools/ec2.conf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/release/tools/ec2.conf b/release/tools/ec2.conf index 0ef5a211df1e..562647cd7024 100644 --- a/release/tools/ec2.conf +++ b/release/tools/ec2.conf @@ -68,6 +68,13 @@ vm_extra_pre_umount() { echo 'autoboot_delay="-1"' >> ${DESTDIR}/boot/loader.conf echo 'beastie_disable="YES"' >> ${DESTDIR}/boot/loader.conf + # The emulated keyboard attached to EC2 instances is inaccessible to + # users, and there is no mouse attached at all; disable to keyboard + # and the keyboard controller (to which the mouse would attach, if + # one existed) in order to save time in device probing. + echo 'hint.atkbd.0.disabled=1' >> ${DESTDIR}/boot/loader.conf + echo 'hint.atkbdc.0.disabled=1' >> ${DESTDIR}/boot/loader.conf + # EC2 has two consoles: An emulated serial port ("system log"), # which has been present since 2006; and a VGA console ("instance # screenshot") which was introduced in 2016. From 19fa89e93888ebfe79d06c7b831bcd39001dce63 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Sun, 26 Aug 2018 12:51:46 +0000 Subject: [PATCH 17/51] Remove the Yarrow PRNG algorithm option in accordance with due notice given in random(4). This includes updating of the relevant man pages, and no-longer-used harvesting parameters. Ensure that the pseudo-unit-test still does something useful, now also with the "other" algorithm instead of Yarrow. PR: 230870 Reviewed by: cem Approved by: so(delphij,gtetlow) Approved by: re(marius) Differential Revision: https://reviews.freebsd.org/D16898 --- UPDATING | 5 + share/man/man4/random.4 | 29 +- share/man/man9/random_harvest.9 | 17 +- sys/arm/amlogic/aml8726/aml8726_rng.c | 3 +- sys/arm/broadcom/bcm2835/bcm2835_rng.c | 3 +- sys/conf/NOTES | 7 +- sys/conf/files | 9 +- sys/conf/options | 3 - sys/dev/glxsb/glxsb.c | 2 +- sys/dev/hifn/hifn7751.c | 2 +- sys/dev/random/build.sh | 16 +- sys/dev/random/fortuna.c | 5 + sys/dev/random/other_algorithm.c | 34 ++- sys/dev/random/other_algorithm.h | 7 +- sys/dev/random/random_harvestq.c | 21 +- sys/dev/random/random_harvestq.h | 3 +- sys/dev/random/randomdev.c | 3 +- sys/dev/random/unit_test.c | 6 +- sys/dev/random/unit_test.h | 5 +- sys/dev/random/yarrow.c | 395 ------------------------- sys/dev/random/yarrow.h | 47 --- sys/dev/rndtest/rndtest.c | 2 +- sys/dev/safe/safe.c | 2 +- sys/dev/syscons/scmouse.c | 2 +- sys/dev/syscons/syscons.c | 2 +- sys/dev/ubsec/ubsec.c | 2 +- sys/dev/virtio/random/virtio_random.c | 3 +- sys/dev/vt/vt_core.c | 2 +- sys/dev/vt/vt_sysmouse.c | 2 +- sys/fs/tmpfs/tmpfs_subr.c | 2 +- sys/kern/kern_intr.c | 4 +- sys/kern/subr_bus.c | 17 +- sys/mips/cavium/octeon_rnd.c | 3 +- sys/modules/Makefile | 2 - sys/modules/random_yarrow/Makefile | 11 - sys/net/if_ethersubr.c | 2 +- sys/net/if_tun.c | 2 +- sys/netgraph/ng_iface.c | 2 +- sys/sys/random.h | 41 +-- sys/ufs/ffs/ffs_inode.c | 4 +- sys/vm/uma_core.c | 8 +- tools/tools/sysdoc/tunables.mdoc | 15 - 42 files changed, 123 insertions(+), 629 deletions(-) delete mode 100644 sys/dev/random/yarrow.c delete mode 100644 sys/dev/random/yarrow.h delete mode 100644 sys/modules/random_yarrow/Makefile diff --git a/UPDATING b/UPDATING index 0995a2698fba..34a043871d06 100644 --- a/UPDATING +++ b/UPDATING @@ -31,6 +31,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 12.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20180826: + The Yarrow CSPRNG has been removed from the kernel as it has not been + supported by its designers since at least 2003. Fortuna has been the + default since FreeBSD-11. + 20170822: devctl freeze/that have gone into the tree, the rc scripts have been updated to use them and devmatch has been changed. You should update diff --git a/share/man/man4/random.4 b/share/man/man4/random.4 index 23578d015bc0..bd55e048ff40 100644 --- a/share/man/man4/random.4 +++ b/share/man/man4/random.4 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 17, 2015 +.Dd August 26, 2018 .Dt RANDOM 4 .Os .Sh NAME @@ -153,26 +153,15 @@ the device is not created until an "algorithm module" is loaded. -Two of these modules -are built by default, -.Em random_fortuna -and -.Em random_yarrow . +The only module built by default is +.Em random_fortuna . The .Em random_yarrow -module is deprecated, -and will be removed in -.Fx 12. -Use of the Yarrow algorithm -is not encouraged, -but while still present -in the kernel source, -it can be selected with the -.Cd "options RANDOM_YARROW" -kernel option. -Note that these loadable modules -are slightly less efficient -than their compiled-in equivalents. +module was removed in +.Fx 12 . +Note that this loadable module +is slightly less efficient +than its compiled-in equivalent. This is because some functions must be locked against load and unload events, @@ -351,4 +340,4 @@ introduced in The Yarrow algorithm is no longer supported by its authors, -and is therefore deprecated. +and is therefore no longer available. diff --git a/share/man/man9/random_harvest.9 b/share/man/man9/random_harvest.9 index 5c0693471c24..6ae643ba2fb1 100644 --- a/share/man/man9/random_harvest.9 +++ b/share/man/man9/random_harvest.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 13, 2015 +.Dd August 26, 2018 .Dt RANDOM_HARVEST 9 .Os .Sh NAME @@ -38,21 +38,18 @@ .Fo random_harvest_direct .Fa "void *entropy" .Fa "u_int size" -.Fa "u_int bits" .Fa "enum esource source" .Fc .Ft void .Fo random_harvest_fast .Fa "void *entropy" .Fa "u_int size" -.Fa "u_int bits" .Fa "enum esource source" .Fc .Ft void .Fo random_harvest_queue .Fa "void *entropy" .Fa "u_int size" -.Fa "u_int bits" .Fa "enum esource source" .Fc .Sh DESCRIPTION @@ -108,18 +105,6 @@ choice for most entropy sources such as interrupts or console events. .Pp -The -.Fa bits -argument is only used -by the deprecated Yarrow algorithm. -For compatibility, -the caller should -.Em "very conservatively" -estimate the number of random bits -in the sample, -and pass this in -.Fa bits . -.Pp Interrupt harvesting has been in part simplified for the kernel programmer. diff --git a/sys/arm/amlogic/aml8726/aml8726_rng.c b/sys/arm/amlogic/aml8726/aml8726_rng.c index 9c38e3c8c06e..d0cc0cf8722e 100644 --- a/sys/arm/amlogic/aml8726/aml8726_rng.c +++ b/sys/arm/amlogic/aml8726/aml8726_rng.c @@ -75,8 +75,7 @@ aml8726_rng_harvest(void *arg) rn[0] = CSR_READ_4(sc, AML_RNG_0_REG); rn[1] = CSR_READ_4(sc, AML_RNG_1_REG); - random_harvest(rn, sizeof(rn), sizeof(rn) * NBBY / 2, - RANDOM_PURE_AML8726); + random_harvest(rn, sizeof(rn), RANDOM_PURE_AML8726); callout_reset(&sc->co, sc->ticks, aml8726_rng_harvest, sc); } diff --git a/sys/arm/broadcom/bcm2835/bcm2835_rng.c b/sys/arm/broadcom/bcm2835/bcm2835_rng.c index baf824e1f228..eb031e8c6ee8 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_rng.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_rng.c @@ -289,8 +289,7 @@ bcm2835_rng_harvest(void *arg) cnt = nread * sizeof(uint32_t); if (cnt > 0) - random_harvest_queue(sc->sc_buf, cnt, cnt * NBBY / 2, - RANDOM_PURE_BROADCOM); + random_harvest_queue(sc->sc_buf, cnt, RANDOM_PURE_BROADCOM); callout_reset(&sc->sc_rngto, RNG_CALLOUT_TICKS, bcm2835_rng_harvest, sc); } diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 08073b5d92d6..7b4143cf29f7 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -3008,11 +3008,8 @@ options BROOKTREE_ALLOC_PAGES=(217*4+1) options MAXFILES=999 # Random number generator -# Only ONE of the below two may be used; they are mutually exclusive. -# If neither is present, then the Fortuna algorithm is selected. -#options RANDOM_YARROW # Yarrow CSPRNG (old default) -#options RANDOM_LOADABLE # Allow the algorithm to be loaded as - # a module. +# Allow the CSPRNG algorithm to be loaded as a module. +#options RANDOM_LOADABLE # Select this to allow high-rate but potentially expensive # harvesting of Slab-Allocator entropy. In very high-rate # situations the value of doing this is dubious at best. diff --git a/sys/conf/files b/sys/conf/files index 247de01e96e2..b2fcc65773e0 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2821,12 +2821,9 @@ rt2860.fw optional rt2860fw | ralfw \ clean "rt2860.fw" dev/random/random_infra.c optional random dev/random/random_harvestq.c optional random -dev/random/randomdev.c optional random random_yarrow | \ - random !random_yarrow !random_loadable -dev/random/yarrow.c optional random random_yarrow -dev/random/fortuna.c optional random !random_yarrow !random_loadable -dev/random/hash.c optional random random_yarrow | \ - random !random_yarrow !random_loadable +dev/random/randomdev.c optional random +dev/random/fortuna.c optional random !random_loadable +dev/random/hash.c optional random dev/rc/rc.c optional rc dev/rccgpio/rccgpio.c optional rccgpio gpio dev/re/if_re.c optional re diff --git a/sys/conf/options b/sys/conf/options index 974bbb1efa9e..a024f530fe42 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -983,9 +983,6 @@ RACCT_DEFAULT_TO_DISABLED opt_global.h RCTL opt_global.h # Random number generator(s) -# Which CSPRNG hash we get. -# If Yarrow is not chosen, Fortuna is selected. -RANDOM_YARROW opt_global.h # With this, no entropy processor is loaded, but the entropy # harvesting infrastructure is present. This means an entropy # processor may be loaded as a module. diff --git a/sys/dev/glxsb/glxsb.c b/sys/dev/glxsb/glxsb.c index 4916d95cc6b7..4d89e7d6756a 100644 --- a/sys/dev/glxsb/glxsb.c +++ b/sys/dev/glxsb/glxsb.c @@ -457,7 +457,7 @@ glxsb_rnd(void *v) value = bus_read_4(sc->sc_sr, SB_RANDOM_NUM); /* feed with one uint32 */ /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(&value, sizeof(value), 32/2, RANDOM_PURE_GLXSB); + random_harvest_queue(&value, sizeof(value), RANDOM_PURE_GLXSB); } callout_reset(&sc->sc_rngco, sc->sc_rnghz, glxsb_rnd, sc); diff --git a/sys/dev/hifn/hifn7751.c b/sys/dev/hifn/hifn7751.c index e43dd7b12a6b..f1a7ea52c257 100644 --- a/sys/dev/hifn/hifn7751.c +++ b/sys/dev/hifn/hifn7751.c @@ -259,7 +259,7 @@ static void default_harvest(struct rndtest_state *rsp, void *buf, u_int count) { /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(buf, count, count*NBBY/2, RANDOM_PURE_HIFN); + random_harvest_queue(buf, count, RANDOM_PURE_HIFN); } static u_int diff --git a/sys/dev/random/build.sh b/sys/dev/random/build.sh index 326f8a6c12e4..08e033f844ba 100755 --- a/sys/dev/random/build.sh +++ b/sys/dev/random/build.sh @@ -28,23 +28,23 @@ # # Basic script to build crude unit tests. # -# Diff-reduction checking between Yarrow and fortuna is done like so: +# Diff-reduction checking between fortuna and the other algorithm is done like so: # -# $ diff -u -B <(sed -e 's/yarrow/wombat/g' \ -# -e 's/YARROW/WOMBAT/g' yarrow.c) \ -# <(sed -e 's/fortuna/wombat/g' \ -# -e 's/FORTUNA/WOMBAT/g' fortuna.c) | less +# $ diff -u -B <(sed -e 's/random_other/random_wombat/g' \ +# -e 's/RANDOM_OTHER/RANDOM_WOMBAT/g' other_algorithm.c) \ +# <(sed -e 's/random_fortuna/random_wombat/g' \ +# -e 's/RANDOM_FORTUNA/RANDOM_WOMBAT/g' fortuna.c) | less # cc -g -O0 -pthread \ -I../.. -lstdthreads -Wall \ unit_test.c \ - yarrow.c \ + other_algorithm.c \ hash.c \ ../../crypto/rijndael/rijndael-api-fst.c \ ../../crypto/rijndael/rijndael-alg-fst.c \ ../../crypto/sha2/sha256c.c \ -lz \ - -o yunit_test + -o other_unit_test cc -g -O0 -pthread \ -I../.. -lstdthreads -Wall \ unit_test.c \ @@ -54,4 +54,4 @@ cc -g -O0 -pthread \ ../../crypto/rijndael/rijndael-alg-fst.c \ ../../crypto/sha2/sha256c.c \ -lz \ - -o funit_test + -o fortuna_unit_test diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c index 17ebbd833c9a..c20cea2f21ae 100644 --- a/sys/dev/random/fortuna.c +++ b/sys/dev/random/fortuna.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #else /* !_KERNEL */ +#include #include #include #include @@ -97,9 +98,11 @@ CTASSERT(RANDOM_BLOCKSIZE == sizeof(uint128_t)); CTASSERT(RANDOM_KEYSIZE == 2*RANDOM_BLOCKSIZE); /* Probes for dtrace(1) */ +#ifdef _KERNEL SDT_PROVIDER_DECLARE(random); SDT_PROVIDER_DEFINE(random); SDT_PROBE_DEFINE2(random, fortuna, event_processor, debug, "u_int", "struct fs_pool *"); +#endif /* _KERNEL */ /* * This is the beastie that needs protecting. It contains all of the @@ -398,7 +401,9 @@ random_fortuna_pre_read(void) } else break; } +#ifdef _KERNEL SDT_PROBE2(random, fortuna, event_processor, debug, fortuna_state.fs_reseedcount, fortuna_state.fs_pool); +#endif /* FS&K */ random_fortuna_reseed_internal(s, i < RANDOM_FORTUNA_NPOOLS ? i + 1 : RANDOM_FORTUNA_NPOOLS); /* Clean up and secure */ diff --git a/sys/dev/random/other_algorithm.c b/sys/dev/random/other_algorithm.c index 0c73ef5421df..57679f53a7da 100644 --- a/sys/dev/random/other_algorithm.c +++ b/sys/dev/random/other_algorithm.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Mark R V Murray + * Copyright (c) 2015-2018 Mark R V Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,19 +30,21 @@ * containing an alternative entropy-processing algorithm for random(4). * * The functions below should be completed with the appropriate code, - * and the nearby yarrow.c and fortuna.c may be consulted for examples - * of working code. + * and the nearby fortuna.c may be consulted for examples of working code. * * The author is willing to provide reasonable help to those wishing to * write such a module for themselves. Please use the markm@ FreeBSD * email address, and ensure that you are developing this on a suitably - * supported branch (This is currently 11-CURRENT, and will be no - * older than 11-STABLE in the future). + * supported branch (This is currently 12-CURRENT, and may be no + * older than 12-STABLE in the future). */ #include __FBSDID("$FreeBSD$"); +#include + +#ifdef _KERNEL #include #include #include @@ -62,6 +64,24 @@ __FBSDID("$FreeBSD$"); #include #include #include +#else /* !_KERNEL */ +#include +#include +#include +#include +#include +#include + +#include "unit_test.h" + +#include +#include + +#include +#include +#include +#include +#endif /* _KERNEL */ static void random_other_pre_read(void); static void random_other_read(uint8_t *, u_int); @@ -73,9 +93,7 @@ static void random_other_deinit_alg(void *); /* * RANDOM_OTHER_NPOOLS is used when reading hardware random * number sources to ensure that each pool gets one read sample - * per loop iteration. Yarrow has 2 such pools (FAST and SLOW), - * and fortuna has 32 (0-31). The RNG used prior to Yarrow and - * ported from Linux had just 1 pool. + * per loop iteration. Fortuna has 32 (0-31). */ #define RANDOM_OTHER_NPOOLS 1 diff --git a/sys/dev/random/other_algorithm.h b/sys/dev/random/other_algorithm.h index 8ca2bb89b39e..8486401daca7 100644 --- a/sys/dev/random/other_algorithm.h +++ b/sys/dev/random/other_algorithm.h @@ -31,14 +31,13 @@ * containing an alternative entropy-processing algorithm for random(4). * * The functions below should be completed with the appropriate code, - * and the nearby yarrow.c and fortuna.c may be consulted for examples - * of working code. + * and the nearby fortuna.c may be consulted for examples of working code. * * The author is willing to provide reasonable help to those wishing to * write such a module for themselves. Please use the markm@ FreeBSD * email address, and ensure that you are developing this on a suitably - * supported branch (This is currently 11-CURRENT, and will be no - * older than 11-STABLE in the future). + * supported branch (This is currently 12-CURRENT, and may be no + * older than 12-STABLE in the future). */ #ifndef SYS_DEV_RANDOM_OTHER_H_INCLUDED diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index d8620c84f1d6..934464e185c1 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -140,7 +140,7 @@ static struct kproc_desc random_proc_kp = { &harvest_context.hc_kthread_proc, }; -/* Pass the given event straight through to Fortuna/Yarrow/Whatever. */ +/* Pass the given event straight through to Fortuna/Whatever. */ static __inline void random_harvestq_fast_process_event(struct harvest_event *event) { @@ -178,7 +178,7 @@ random_kthread(void) /* XXX: FIX!! Increase the high-performance data rate? Need some measurements first. */ for (i = 0; i < RANDOM_ACCUM_MAX; i++) { if (harvest_context.hc_entropy_fast_accumulator.buf[i]) { - random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), 4, RANDOM_UMA); + random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); harvest_context.hc_entropy_fast_accumulator.buf[i] = 0; } } @@ -197,8 +197,7 @@ SYSINIT(random_device_h_proc, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, kproc_start, /* * Run through all fast sources reading entropy for the given * number of rounds, which should be a multiple of the number - * of entropy accumulation pools in use; 2 for Yarrow and 32 - * for Fortuna. + * of entropy accumulation pools in use; it is 32 for Fortuna. */ static void random_sources_feed(void) @@ -234,7 +233,7 @@ random_sources_feed(void) printf("%s: rs_read for hardware device '%s' returned no entropy.\n", __func__, rrs->rrs_source->rs_ident); continue; } - random_harvest_direct(entropy, n, (n*8)/2, rrs->rrs_source->rs_source); + random_harvest_direct(entropy, n, rrs->rrs_source->rs_source); } } explicit_bzero(entropy, sizeof(entropy)); @@ -380,7 +379,7 @@ SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_SECOND, random_harvestq_in /* * This is used to prime the RNG by grabbing any early random stuff * known to the kernel, and inserting it directly into the hashing - * module, e.g. Fortuna or Yarrow. + * module, currently Fortuna. */ /* ARGSUSED */ static void @@ -414,7 +413,6 @@ random_harvestq_prime(void *unused __unused) count = sizeof(event.he_entropy); event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = count; - event.he_bits = count/4; /* Underestimate the size for Yarrow */ event.he_source = RANDOM_CACHED; event.he_destination = harvest_context.hc_destination[0]++; memcpy(event.he_entropy, data + i, sizeof(event.he_entropy)); @@ -459,8 +457,7 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_SECOND, random_harvestq_ * read which can be quite expensive. */ void -random_harvest_queue_(const void *entropy, u_int size, u_int bits, - enum random_entropy_source origin) +random_harvest_queue_(const void *entropy, u_int size, enum random_entropy_source origin) { struct harvest_event *event; u_int ring_in; @@ -474,7 +471,6 @@ random_harvest_queue_(const void *entropy, u_int size, u_int bits, event->he_somecounter = (uint32_t)get_cyclecount(); event->he_source = origin; event->he_destination = harvest_context.hc_destination[origin]++; - event->he_bits = bits; if (size <= sizeof(event->he_entropy)) { event->he_size = size; memcpy(event->he_entropy, entropy, size); @@ -496,7 +492,7 @@ random_harvest_queue_(const void *entropy, u_int size, u_int bits, * This is the right place for high-rate harvested data. */ void -random_harvest_fast_(const void *entropy, u_int size, u_int bits) +random_harvest_fast_(const void *entropy, u_int size) { u_int pos; @@ -512,7 +508,7 @@ random_harvest_fast_(const void *entropy, u_int size, u_int bits) * (e.g.) booting when initial entropy is being gathered. */ void -random_harvest_direct_(const void *entropy, u_int size, u_int bits, enum random_entropy_source origin) +random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_source origin) { struct harvest_event event; @@ -520,7 +516,6 @@ random_harvest_direct_(const void *entropy, u_int size, u_int bits, enum random_ size = MIN(size, sizeof(event.he_entropy)); event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = size; - event.he_bits = bits; event.he_source = origin; event.he_destination = harvest_context.hc_destination[origin]++; memcpy(event.he_entropy, entropy, size); diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h index d58a4940bb73..c7005cde4f22 100644 --- a/sys/dev/random/random_harvestq.h +++ b/sys/dev/random/random_harvestq.h @@ -38,10 +38,9 @@ struct harvest_event { uint32_t he_somecounter; /* fast counter for clock jitter */ uint32_t he_entropy[HARVESTSIZE];/* some harvested entropy */ uint8_t he_size; /* harvested entropy byte count */ - uint8_t he_bits; /* stats about the entropy */ uint8_t he_destination; /* destination pool of this entropy */ uint8_t he_source; /* origin of the entropy */ -} __packed; +}; void read_rate_increment(u_int); diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c index 51c25ba408a3..c7bb97c3a0fe 100644 --- a/sys/dev/random/randomdev.c +++ b/sys/dev/random/randomdev.c @@ -166,7 +166,7 @@ READ_RANDOM_UIO(struct uio *uio, bool nonblock) * Belt-and-braces. * Round up the read length to a crypto block size multiple, * which is what the underlying generator is expecting. - * See the random_buf size requirements in the Yarrow/Fortuna code. + * See the random_buf size requirements in the Fortuna code. */ read_len = roundup(read_len, RANDOM_BLOCKSIZE); /* Work in chunks page-sized or less */ @@ -250,7 +250,6 @@ randomdev_accumulate(uint8_t *buf, u_int count) for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) { event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = sizeof(event.he_entropy); - event.he_bits = event.he_size/8; event.he_source = RANDOM_CACHED; event.he_destination = destination++; /* Harmless cheating */ memcpy(event.he_entropy, entropy_data + i, sizeof(event.he_entropy)); diff --git a/sys/dev/random/unit_test.c b/sys/dev/random/unit_test.c index aec2f1d2837e..b93eb092b92b 100644 --- a/sys/dev/random/unit_test.c +++ b/sys/dev/random/unit_test.c @@ -31,7 +31,6 @@ cc -g -O0 -pthread -DRANDOM_ -I../.. -lstdthreads -Wall \ unit_test.c \ - yarrow.c \ fortuna.c \ hash.c \ ../../crypto/rijndael/rijndael-api-fst.c \ @@ -41,7 +40,9 @@ cc -g -O0 -pthread -DRANDOM_ -I../.. -lstdthreads -Wall \ -o unit_test ./unit_test -Where is YARROW or FORTUNA. +Where is FORTUNA. The parameterisation is a leftover from +when Yarrow was an option, and remains to enable the testing of +possible future algorithms. */ #include @@ -157,7 +158,6 @@ RunHarvester(void *arg __unused) e.he_somecounter = i; *((uint64_t *)e.he_entropy) = random(); e.he_size = 8; - e.he_bits = random()%4; e.he_destination = i; e.he_source = (i + 3)%7; e.he_next = NULL; diff --git a/sys/dev/random/unit_test.h b/sys/dev/random/unit_test.h index 3c05ad0bf0f6..adca19efeadd 100644 --- a/sys/dev/random/unit_test.h +++ b/sys/dev/random/unit_test.h @@ -74,9 +74,8 @@ enum random_entropy_source { struct harvest_event { uintmax_t he_somecounter; /* fast counter for clock jitter */ uint32_t he_entropy[HARVESTSIZE];/* some harvested entropy */ - u_int he_size; /* harvested entropy byte count */ - u_int he_bits; /* stats about the entropy */ - u_int he_destination; /* destination pool of this entropy */ + uint8_t he_size; /* harvested entropy byte count */ + uint8_t he_destination; /* destination pool of this entropy */ enum random_entropy_source he_source; /* origin of the entropy */ void * he_next; /* next item on the list */ }; diff --git a/sys/dev/random/yarrow.c b/sys/dev/random/yarrow.c deleted file mode 100644 index 40fc6cd9162e..000000000000 --- a/sys/dev/random/yarrow.c +++ /dev/null @@ -1,395 +0,0 @@ -/*- - * Copyright (c) 2000-2015 Mark R V Murray - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer - * in this position and unchanged. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -__FBSDID("$FreeBSD$"); - -#ifdef _KERNEL -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include -#include -#include -#include -#include -#else /* !_KERNEL */ -#include -#include -#include -#include -#include -#include -#include - -#include "unit_test.h" - -#include -#include - -#include -#include -#include -#include -#endif /* _KERNEL */ - -#define RANDOM_YARROW_TIMEBIN 16 /* max value for Pt/t */ - -#define RANDOM_YARROW_FAST 0 -#define RANDOM_YARROW_SLOW 1 -#define RANDOM_YARROW_NPOOLS 2 - -/* This algorithm (and code) presumes that RANDOM_KEYSIZE is twice as large as RANDOM_BLOCKSIZE */ -CTASSERT(RANDOM_BLOCKSIZE == sizeof(uint128_t)); -CTASSERT(RANDOM_KEYSIZE == 2*RANDOM_BLOCKSIZE); - -/* Probes for dtrace(1) */ -SDT_PROVIDER_DECLARE(random); -SDT_PROVIDER_DEFINE(random); -SDT_PROBE_DEFINE3(random, yarrow, event_processor, debug, "boolean", "u_int", "struct ys_pool *"); - -/* - * This is the beastie that needs protecting. It contains all of the - * state that we are excited about. Exactly one is instantiated. - */ -static struct yarrow_state { - uint128_t ys_counter; /* C */ - struct randomdev_key ys_key; /* K */ - u_int ys_gengateinterval; /* Pg */ - u_int ys_bins; /* Pt/t */ - u_int ys_outputblocks; /* count output blocks for gates */ - u_int ys_slowoverthresh; /* slow pool overthreshhold reseed count */ - struct ys_pool { - u_int ysp_source_bits[ENTROPYSOURCE]; /* estimated bits of entropy per source */ - u_int ysp_thresh; /* pool reseed threshold */ - struct randomdev_hash ysp_hash; /* accumulated entropy */ - } ys_pool[RANDOM_YARROW_NPOOLS];/* pool[0] is fast, pool[1] is slow */ - bool ys_seeded; - /* Reseed lock */ - mtx_t ys_mtx; -} yarrow_state; - -#ifdef _KERNEL -static struct sysctl_ctx_list random_clist; -RANDOM_CHECK_UINT(gengateinterval, 4, 64); -RANDOM_CHECK_UINT(bins, RANDOM_YARROW_NPOOLS, 16); -RANDOM_CHECK_UINT(fastthresh, (RANDOM_BLOCKSIZE*8)/4, (RANDOM_BLOCKSIZE*8)); /* Bit counts */ -RANDOM_CHECK_UINT(slowthresh, (RANDOM_BLOCKSIZE*8)/4, (RANDOM_BLOCKSIZE*8)); /* Bit counts */ -RANDOM_CHECK_UINT(slowoverthresh, 1, 5); -#endif /* _KERNEL */ - -static void random_yarrow_pre_read(void); -static void random_yarrow_read(uint8_t *, u_int); -static bool random_yarrow_seeded(void); -static void random_yarrow_process_event(struct harvest_event *); -static void random_yarrow_init_alg(void *); -static void random_yarrow_deinit_alg(void *); - -static void random_yarrow_reseed_internal(u_int); - -struct random_algorithm random_alg_context = { - .ra_ident = "Yarrow", - .ra_init_alg = random_yarrow_init_alg, - .ra_deinit_alg = random_yarrow_deinit_alg, - .ra_pre_read = random_yarrow_pre_read, - .ra_read = random_yarrow_read, - .ra_seeded = random_yarrow_seeded, - .ra_event_processor = random_yarrow_process_event, - .ra_poolcount = RANDOM_YARROW_NPOOLS, -}; - -/* ARGSUSED */ -static void -random_yarrow_init_alg(void *unused __unused) -{ - int i, j; -#ifdef _KERNEL - struct sysctl_oid *random_yarrow_o; -#endif - - RANDOM_RESEED_INIT_LOCK(); - /* Start unseeded, therefore blocked. */ - yarrow_state.ys_seeded = false; -#ifdef _KERNEL - /* - * Yarrow parameters. Do not adjust these unless you have - * have a very good clue about what they do! - */ - random_yarrow_o = SYSCTL_ADD_NODE(&random_clist, - SYSCTL_STATIC_CHILDREN(_kern_random), - OID_AUTO, "yarrow", CTLFLAG_RW, 0, - "Yarrow Parameters"); - SYSCTL_ADD_PROC(&random_clist, - SYSCTL_CHILDREN(random_yarrow_o), OID_AUTO, - "gengateinterval", CTLTYPE_UINT | CTLFLAG_RWTUN, - &yarrow_state.ys_gengateinterval, 0, - random_check_uint_gengateinterval, "UI", - "Generation gate interval"); - SYSCTL_ADD_PROC(&random_clist, - SYSCTL_CHILDREN(random_yarrow_o), OID_AUTO, - "bins", CTLTYPE_UINT | CTLFLAG_RWTUN, - &yarrow_state.ys_bins, 0, - random_check_uint_bins, "UI", - "Execution time tuner"); - SYSCTL_ADD_PROC(&random_clist, - SYSCTL_CHILDREN(random_yarrow_o), OID_AUTO, - "fastthresh", CTLTYPE_UINT | CTLFLAG_RWTUN, - &yarrow_state.ys_pool[0].ysp_thresh, 0, - random_check_uint_fastthresh, "UI", - "Fast reseed threshold"); - SYSCTL_ADD_PROC(&random_clist, - SYSCTL_CHILDREN(random_yarrow_o), OID_AUTO, - "slowthresh", CTLTYPE_UINT | CTLFLAG_RWTUN, - &yarrow_state.ys_pool[1].ysp_thresh, 0, - random_check_uint_slowthresh, "UI", - "Slow reseed threshold"); - SYSCTL_ADD_PROC(&random_clist, - SYSCTL_CHILDREN(random_yarrow_o), OID_AUTO, - "slowoverthresh", CTLTYPE_UINT | CTLFLAG_RWTUN, - &yarrow_state.ys_slowoverthresh, 0, - random_check_uint_slowoverthresh, "UI", - "Slow over-threshold reseed"); -#endif /* _KERNEL */ - yarrow_state.ys_gengateinterval = 10; - yarrow_state.ys_bins = 10; - yarrow_state.ys_pool[RANDOM_YARROW_FAST].ysp_thresh = (3*(RANDOM_BLOCKSIZE*8))/4; - yarrow_state.ys_pool[RANDOM_YARROW_SLOW].ysp_thresh = (RANDOM_BLOCKSIZE*8); - yarrow_state.ys_slowoverthresh = 2; - /* Ensure that the first time we read, we are gated. */ - yarrow_state.ys_outputblocks = yarrow_state.ys_gengateinterval; - /* Initialise the fast and slow entropy pools */ - for (i = RANDOM_YARROW_FAST; i <= RANDOM_YARROW_SLOW; i++) { - randomdev_hash_init(&yarrow_state.ys_pool[i].ysp_hash); - for (j = RANDOM_START; j < ENTROPYSOURCE; j++) - yarrow_state.ys_pool[i].ysp_source_bits[j] = 0; - } - /* Clear the counter */ - yarrow_state.ys_counter = UINT128_ZERO; -} - -/* ARGSUSED */ -static void -random_yarrow_deinit_alg(void *unused __unused) -{ - - RANDOM_RESEED_DEINIT_LOCK(); - explicit_bzero(&yarrow_state, sizeof(yarrow_state)); -#ifdef _KERNEL - sysctl_ctx_free(&random_clist); -#endif -} - -/* Process a single stochastic event off the harvest queue */ -static void -random_yarrow_process_event(struct harvest_event *event) -{ - u_int pl, overthreshhold[RANDOM_YARROW_NPOOLS]; - enum random_entropy_source src; - - RANDOM_RESEED_LOCK(); - /* - * Accumulate the event into the appropriate pool - * where each event carries the destination information. - * We lock against pool state modification which can happen - * during accumulation/reseeding and reading/regating - */ - pl = event->he_destination % RANDOM_YARROW_NPOOLS; - randomdev_hash_iterate(&yarrow_state.ys_pool[pl].ysp_hash, event, sizeof(*event)); - yarrow_state.ys_pool[pl].ysp_source_bits[event->he_source] += event->he_bits; - /* Count the over-threshold sources in each pool */ - for (pl = RANDOM_YARROW_FAST; pl <= RANDOM_YARROW_SLOW; pl++) { - overthreshhold[pl] = 0; - for (src = RANDOM_START; src < ENTROPYSOURCE; src++) { - if (yarrow_state.ys_pool[pl].ysp_source_bits[src] > yarrow_state.ys_pool[pl].ysp_thresh) - overthreshhold[pl]++; - } - } - /* - * If enough slow sources are over threshold, then slow reseed - * else if any fast source over threshold, then fast reseed. - */ - if (overthreshhold[RANDOM_YARROW_SLOW] >= yarrow_state.ys_slowoverthresh) - random_yarrow_reseed_internal(RANDOM_YARROW_SLOW); - else if (overthreshhold[RANDOM_YARROW_FAST] > 0 && yarrow_state.ys_seeded) - random_yarrow_reseed_internal(RANDOM_YARROW_FAST); - explicit_bzero(event, sizeof(*event)); - RANDOM_RESEED_UNLOCK(); -} - -static void -random_yarrow_reseed_internal(u_int fastslow) -{ - /* - * Interrupt-context stack is a limited resource; make large - * structures static. - */ - static uint8_t v[RANDOM_YARROW_TIMEBIN][RANDOM_KEYSIZE]; /* v[i] */ - static uint128_t temp; - static struct randomdev_hash context; - u_int i; - enum random_entropy_source j; - - KASSERT(yarrow_state.ys_pool[RANDOM_YARROW_FAST].ysp_thresh > 0, ("random: Yarrow fast threshold = 0")); - KASSERT(yarrow_state.ys_pool[RANDOM_YARROW_SLOW].ysp_thresh > 0, ("random: Yarrow slow threshold = 0")); - RANDOM_RESEED_ASSERT_LOCK_OWNED(); - SDT_PROBE3(random, yarrow, event_processor, debug, yarrow_state.ys_seeded, yarrow_state.ys_slowoverthresh, yarrow_state.ys_pool); - /* 1. Hash the accumulated entropy into v[0] */ - randomdev_hash_init(&context); - /* Feed the slow pool hash in if slow */ - if (fastslow == RANDOM_YARROW_SLOW) { - randomdev_hash_finish(&yarrow_state.ys_pool[RANDOM_YARROW_SLOW].ysp_hash, &temp); - randomdev_hash_iterate(&context, &temp, sizeof(temp)); - } - randomdev_hash_finish(&yarrow_state.ys_pool[RANDOM_YARROW_FAST].ysp_hash, &temp); - randomdev_hash_iterate(&context, &temp, sizeof(temp)); - randomdev_hash_finish(&context, v[0]); - /*- - * 2. Compute hash values for all v. _Supposed_ to be computationally - * intensive. - */ - if (yarrow_state.ys_bins > RANDOM_YARROW_TIMEBIN) - yarrow_state.ys_bins = RANDOM_YARROW_TIMEBIN; - for (i = 1; i < yarrow_state.ys_bins; i++) { - randomdev_hash_init(&context); - /* v[i] #= h(v[i - 1]) */ - randomdev_hash_iterate(&context, v[i - 1], RANDOM_KEYSIZE); - /* v[i] #= h(v[0]) */ - randomdev_hash_iterate(&context, v[0], RANDOM_KEYSIZE); - /* v[i] #= h(i) */ - randomdev_hash_iterate(&context, &i, sizeof(i)); - /* Return the hashval */ - randomdev_hash_finish(&context, v[i]); - } - /*- - * 3. Compute a new key; h' is the identity function here; - * it is not being ignored! - */ - randomdev_hash_init(&context); - randomdev_hash_iterate(&context, &yarrow_state.ys_key, RANDOM_KEYSIZE); - for (i = 1; i < yarrow_state.ys_bins; i++) - randomdev_hash_iterate(&context, v[i], RANDOM_KEYSIZE); - randomdev_hash_finish(&context, &temp); - randomdev_encrypt_init(&yarrow_state.ys_key, &temp); - /* 4. Recompute the counter */ - yarrow_state.ys_counter = UINT128_ZERO; - randomdev_encrypt(&yarrow_state.ys_key, &yarrow_state.ys_counter, &temp, RANDOM_BLOCKSIZE); - yarrow_state.ys_counter = temp; - /* 5. Reset entropy estimate accumulators to zero */ - for (i = 0; i <= fastslow; i++) - for (j = RANDOM_START; j < ENTROPYSOURCE; j++) - yarrow_state.ys_pool[i].ysp_source_bits[j] = 0; - /* 6. Wipe memory of intermediate values */ - explicit_bzero(v, sizeof(v)); - explicit_bzero(&temp, sizeof(temp)); - explicit_bzero(&context, sizeof(context)); -/* Not defined so writes ain't gonna happen. Kept for documenting. */ -#ifdef RANDOM_RWFILE_WRITE_IS_OK - /*- - * 7. Dump to seed file. - * This pseudo-code is documentation. Please leave it alone. - */ - seed_file = ""; - error = randomdev_write_file(seed_file, , PAGE_SIZE); - if (error == 0) - printf("random: entropy seed file '%s' successfully written\n", seed_file); -#endif - /* Unblock the device if it was blocked due to being unseeded */ - if (!yarrow_state.ys_seeded) { - yarrow_state.ys_seeded = true; - randomdev_unblock(); - } -} - -static __inline void -random_yarrow_generator_gate(void) -{ - u_int i; - uint8_t temp[RANDOM_KEYSIZE]; - - RANDOM_RESEED_ASSERT_LOCK_OWNED(); - uint128_increment(&yarrow_state.ys_counter); - for (i = 0; i < RANDOM_KEYSIZE; i += RANDOM_BLOCKSIZE) - randomdev_encrypt(&yarrow_state.ys_key, &yarrow_state.ys_counter, temp + i, RANDOM_BLOCKSIZE); - randomdev_encrypt_init(&yarrow_state.ys_key, temp); - explicit_bzero(temp, sizeof(temp)); -} - -/*- - * Used to return processed entropy from the PRNG. There is a pre_read - * required to be present (but it can be a stub) in order to allow - * specific actions at the begin of the read. - * Yarrow does its reseeding in its own thread; _pre_read() is not used - * by Yarrow but must be kept for completeness. - */ -void -random_yarrow_pre_read(void) -{ -} - -/*- - * Main read from Yarrow. - * The supplied buf MUST be a multiple (>=0) of RANDOM_BLOCKSIZE in size. - * Lots of code presumes this for efficiency, both here and in other - * routines. You are NOT allowed to break this! - */ -void -random_yarrow_read(uint8_t *buf, u_int bytecount) -{ - u_int blockcount, i; - - KASSERT((bytecount % RANDOM_BLOCKSIZE) == 0, ("%s(): bytecount (= %d) must be a multiple of %d", __func__, bytecount, RANDOM_BLOCKSIZE )); - RANDOM_RESEED_LOCK(); - blockcount = howmany(bytecount, RANDOM_BLOCKSIZE); - for (i = 0; i < blockcount; i++) { - if (yarrow_state.ys_outputblocks++ >= yarrow_state.ys_gengateinterval) { - random_yarrow_generator_gate(); - yarrow_state.ys_outputblocks = 0; - } - uint128_increment(&yarrow_state.ys_counter); - randomdev_encrypt(&yarrow_state.ys_key, &yarrow_state.ys_counter, buf, RANDOM_BLOCKSIZE); - buf += RANDOM_BLOCKSIZE; - } - RANDOM_RESEED_UNLOCK(); -} - -bool -random_yarrow_seeded(void) -{ - - return (yarrow_state.ys_seeded); -} diff --git a/sys/dev/random/yarrow.h b/sys/dev/random/yarrow.h deleted file mode 100644 index a08d1070e89b..000000000000 --- a/sys/dev/random/yarrow.h +++ /dev/null @@ -1,47 +0,0 @@ -/*- - * Copyright (c) 2000-2015 Mark R V Murray - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer - * in this position and unchanged. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef SYS_DEV_RANDOM_YARROW_H_INCLUDED -#define SYS_DEV_RANDOM_YARROW_H_INCLUDED - -#ifdef _KERNEL -typedef struct mtx mtx_t; -#define RANDOM_RESEED_INIT_LOCK(x) mtx_init(&yarrow_state.ys_mtx, "reseed mutex", NULL, MTX_DEF) -#define RANDOM_RESEED_DEINIT_LOCK(x) mtx_destroy(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_LOCK(x) mtx_lock(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_UNLOCK(x) mtx_unlock(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_ASSERT_LOCK_OWNED(x) mtx_assert(&yarrow_state.ys_mtx, MA_OWNED) -#else -#define RANDOM_RESEED_INIT_LOCK(x) mtx_init(&yarrow_state.ys_mtx, mtx_plain) -#define RANDOM_RESEED_DEINIT_LOCK(x) mtx_destroy(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_LOCK(x) mtx_lock(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_UNLOCK(x) mtx_unlock(&yarrow_state.ys_mtx) -#define RANDOM_RESEED_ASSERT_LOCK_OWNED(x) -#endif - -#endif /* SYS_DEV_RANDOM_YARROW_H_INCLUDED */ diff --git a/sys/dev/rndtest/rndtest.c b/sys/dev/rndtest/rndtest.c index 5d2490af11db..f33edbfa048d 100644 --- a/sys/dev/rndtest/rndtest.c +++ b/sys/dev/rndtest/rndtest.c @@ -149,7 +149,7 @@ rndtest_harvest(struct rndtest_state *rsp, void *buf, u_int len) rndstats.rst_discard += len; else /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(buf, len, len*NBBY/2, RANDOM_PURE_RNDTEST); + random_harvest_queue(buf, len, RANDOM_PURE_RNDTEST); } static void diff --git a/sys/dev/safe/safe.c b/sys/dev/safe/safe.c index a1673c9cd60f..0edaea85f5b6 100644 --- a/sys/dev/safe/safe.c +++ b/sys/dev/safe/safe.c @@ -212,7 +212,7 @@ static void default_harvest(struct rndtest_state *rsp, void *buf, u_int count) { /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(buf, count, count*NBBY/2, RANDOM_PURE_SAFE); + random_harvest_queue(buf, count, RANDOM_PURE_SAFE); } #endif /* SAFE_NO_RNG */ diff --git a/sys/dev/syscons/scmouse.c b/sys/dev/syscons/scmouse.c index aeff5f9b3e90..03daba3ebfab 100644 --- a/sys/dev/syscons/scmouse.c +++ b/sys/dev/syscons/scmouse.c @@ -669,7 +669,7 @@ sc_mouse_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td) mouse = (mouse_info_t*)data; - random_harvest_queue(mouse, sizeof(mouse_info_t), 2, RANDOM_MOUSE); + random_harvest_queue(mouse, sizeof(mouse_info_t), RANDOM_MOUSE); if (cmd == OLD_CONS_MOUSECTL) { static u_char swapb[] = { 0, 4, 2, 6, 1, 5, 3, 7 }; diff --git a/sys/dev/syscons/syscons.c b/sys/dev/syscons/syscons.c index dd2ecc4627ac..0785bd52c53b 100644 --- a/sys/dev/syscons/syscons.c +++ b/sys/dev/syscons/syscons.c @@ -3734,7 +3734,7 @@ scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp) sc_touch_scrn_saver(); if (!(flags & SCGETC_CN)) - random_harvest_queue(&c, sizeof(c), 1, RANDOM_KEYBOARD); + random_harvest_queue(&c, sizeof(c), RANDOM_KEYBOARD); if (sc->kbd_open_level == 0 && scp->kbd_mode != K_XLATE) return KEYCHAR(c); diff --git a/sys/dev/ubsec/ubsec.c b/sys/dev/ubsec/ubsec.c index 1c8c7c60e657..ad89986d1a1e 100644 --- a/sys/dev/ubsec/ubsec.c +++ b/sys/dev/ubsec/ubsec.c @@ -260,7 +260,7 @@ static void default_harvest(struct rndtest_state *rsp, void *buf, u_int count) { /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(buf, count, count*NBBY/2, RANDOM_PURE_UBSEC); + random_harvest_queue(buf, count, RANDOM_PURE_UBSEC); } static int diff --git a/sys/dev/virtio/random/virtio_random.c b/sys/dev/virtio/random/virtio_random.c index 18326891f05e..15311b9e6fbc 100644 --- a/sys/dev/virtio/random/virtio_random.c +++ b/sys/dev/virtio/random/virtio_random.c @@ -217,8 +217,7 @@ vtrnd_harvest(struct vtrnd_softc *sc) virtqueue_notify(vq); virtqueue_poll(vq, NULL); - random_harvest_queue(&value, sizeof(value), sizeof(value) * NBBY / 2, - RANDOM_PURE_VIRTIO); + random_harvest_queue(&value, sizeof(value), RANDOM_PURE_VIRTIO); } static void diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index 4edea2b2ed1f..a4bbb9396952 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -815,7 +815,7 @@ vt_processkey(keyboard_t *kbd, struct vt_device *vd, int c) { struct vt_window *vw = vd->vd_curwindow; - random_harvest_queue(&c, sizeof(c), 1, RANDOM_KEYBOARD); + random_harvest_queue(&c, sizeof(c), RANDOM_KEYBOARD); #if VT_ALT_TO_ESC_HACK if (c & RELKEY) { switch (c & ~RELKEY) { diff --git a/sys/dev/vt/vt_sysmouse.c b/sys/dev/vt/vt_sysmouse.c index 4afb9e1e126b..98915c2a78df 100644 --- a/sys/dev/vt/vt_sysmouse.c +++ b/sys/dev/vt/vt_sysmouse.c @@ -214,7 +214,7 @@ sysmouse_process_event(mouse_info_t *mi) unsigned char buf[MOUSE_SYS_PACKETSIZE]; int x, y, iy, z; - random_harvest_queue(mi, sizeof *mi, 2, RANDOM_MOUSE); + random_harvest_queue(mi, sizeof *mi, RANDOM_MOUSE); mtx_lock(&sysmouse_lock); switch (mi->operation) { diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 6b7e4351d8d5..80711da58774 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1823,7 +1823,7 @@ tmpfs_itimes(struct vnode *vp, const struct timespec *acc, TMPFS_NODE_UNLOCK(node); /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ - random_harvest_queue(node, sizeof(*node), 1, RANDOM_FS_ATIME); + random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); } void diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 2944c89f5825..266698676c1d 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -868,7 +868,7 @@ intr_event_schedule_thread(struct intr_event *ie) if (ie->ie_flags & IE_ENTROPY) { entropy.event = (uintptr_t)ie; entropy.td = ctd; - random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT); + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT); } KASSERT(td->td_proc != NULL, ("ithread %s has no process", ie->ie_name)); @@ -958,7 +958,7 @@ swi_sched(void *cookie, int flags) entropy.event = (uintptr_t)ih; entropy.td = curthread; - random_harvest_queue(&entropy, sizeof(entropy), 1, RANDOM_SWI); + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI); /* * Set ih_need for this handler so that if the ithread is already diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index 72bf63eb295c..27b00656fc2c 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -2925,6 +2925,7 @@ int device_attach(device_t dev) { uint64_t attachtime; + uint16_t attachentropy; int error; if (resource_disabled(dev->driver->name, dev->unit)) { @@ -2951,19 +2952,11 @@ device_attach(device_t dev) return (error); } dev->flags |= DF_ATTACHED_ONCE; - attachtime = get_cyclecount() - attachtime; - /* - * 4 bits per device is a reasonable value for desktop and server - * hardware with good get_cyclecount() implementations, but WILL - * need to be adjusted on other platforms. + /* We only need the low bits of this time, but ranges from tens to thousands + * have been seen, so keep 2 bytes' worth. */ -#define RANDOM_PROBE_BIT_GUESS 4 - if (bootverbose) - printf("random: harvesting attach, %zu bytes (%d bits) from %s%d\n", - sizeof(attachtime), RANDOM_PROBE_BIT_GUESS, - dev->driver->name, dev->unit); - random_harvest_direct(&attachtime, sizeof(attachtime), - RANDOM_PROBE_BIT_GUESS, RANDOM_ATTACH); + attachentropy = (uint16_t)(get_cyclecount() - attachtime); + random_harvest_direct(&attachentropy, sizeof(attachentropy), RANDOM_ATTACH); device_sysctl_update(dev); if (dev->busy) dev->state = DS_BUSY; diff --git a/sys/mips/cavium/octeon_rnd.c b/sys/mips/cavium/octeon_rnd.c index cf82057ec63b..9df4539ebba6 100644 --- a/sys/mips/cavium/octeon_rnd.c +++ b/sys/mips/cavium/octeon_rnd.c @@ -128,8 +128,7 @@ octeon_rnd_harvest(void *arg) for (i = 0; i < OCTEON_RND_WORDS; i++) sc->sc_entropy[i] = cvmx_rng_get_random64(); /* MarkM: FIX!! Check that this does not swamp the harvester! */ - random_harvest_queue(sc->sc_entropy, sizeof sc->sc_entropy, - (sizeof(sc->sc_entropy)*8)/2, RANDOM_PURE_OCTEON); + random_harvest_queue(sc->sc_entropy, sizeof sc->sc_entropy, RANDOM_PURE_OCTEON); callout_reset(&sc->sc_callout, hz * 5, octeon_rnd_harvest, sc); } diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 483b25089508..59203a394612 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -321,7 +321,6 @@ SUBDIR= \ ral \ ${_ralfw} \ ${_random_fortuna} \ - ${_random_yarrow} \ ${_random_other} \ rc4 \ ${_rdma} \ @@ -435,7 +434,6 @@ SUBDIR+= opensolaris _crypto= crypto _cryptodev= cryptodev _random_fortuna=random_fortuna -_random_yarrow= random_yarrow _random_other= random_other .endif .endif diff --git a/sys/modules/random_yarrow/Makefile b/sys/modules/random_yarrow/Makefile deleted file mode 100644 index 90ff3597a728..000000000000 --- a/sys/modules/random_yarrow/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# $FreeBSD$ - -.PATH: ${SRCTOP}/sys/dev/random - -KMOD = random_yarrow -SRCS = randomdev.c hash.c yarrow.c -SRCS += opt_param.h bus_if.h device_if.h -SRCS += opt_ddb.h -CFLAGS += -DRANDOM_LOADABLE - -.include diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index f00952f49323..28dac295e077 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -514,7 +514,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); - random_harvest_queue_ether(m, sizeof(*m), 2); + random_harvest_queue_ether(m, sizeof(*m)); CURVNET_SET_QUIET(ifp->if_vnet); diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index cc3a25d72740..cf404012a2e8 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -910,7 +910,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) m_freem(m); return (EAFNOSUPPORT); } - random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN); + random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); CURVNET_SET(ifp->if_vnet); diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c index bef19a0b8b53..7f3b8b16c0a4 100644 --- a/sys/netgraph/ng_iface.c +++ b/sys/netgraph/ng_iface.c @@ -720,7 +720,7 @@ ng_iface_rcvdata(hook_p hook, item_p item) m_freem(m); return (EAFNOSUPPORT); } - random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_NG); + random_harvest_queue(m, sizeof(*m), RANDOM_NET_NG); M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return (0); diff --git a/sys/sys/random.h b/sys/sys/random.h index 59a3281c6641..891672a33ec4 100644 --- a/sys/sys/random.h +++ b/sys/sys/random.h @@ -35,12 +35,6 @@ #ifdef _KERNEL -#if !defined(KLD_MODULE) -#if defined(RANDOM_LOADABLE) && defined(RANDOM_YARROW) -#error "Cannot define both RANDOM_LOADABLE and RANDOM_YARROW" -#endif -#endif - struct uio; #if defined(DEV_RANDOM) @@ -108,57 +102,54 @@ enum random_entropy_source { #if defined(DEV_RANDOM) extern u_int hc_source_mask; -void random_harvest_queue_(const void *, u_int, u_int, enum random_entropy_source); -void random_harvest_fast_(const void *, u_int, u_int); -void random_harvest_direct_(const void *, u_int, u_int, enum random_entropy_source); +void random_harvest_queue_(const void *, u_int, enum random_entropy_source); +void random_harvest_fast_(const void *, u_int); +void random_harvest_direct_(const void *, u_int, enum random_entropy_source); static __inline void -random_harvest_queue(const void *entropy, u_int size, u_int bits, - enum random_entropy_source origin) +random_harvest_queue(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) - random_harvest_queue_(entropy, size, bits, origin); + random_harvest_queue_(entropy, size, origin); } static __inline void -random_harvest_fast(const void *entropy, u_int size, u_int bits, - enum random_entropy_source origin) +random_harvest_fast(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) - random_harvest_fast_(entropy, size, bits); + random_harvest_fast_(entropy, size); } static __inline void -random_harvest_direct(const void *entropy, u_int size, u_int bits, - enum random_entropy_source origin) +random_harvest_direct(const void *entropy, u_int size, enum random_entropy_source origin) { if (hc_source_mask & (1 << origin)) - random_harvest_direct_(entropy, size, bits, origin); + random_harvest_direct_(entropy, size, origin); } void random_harvest_register_source(enum random_entropy_source); void random_harvest_deregister_source(enum random_entropy_source); #else -#define random_harvest_queue(a, b, c, d) do {} while (0) -#define random_harvest_fast(a, b, c, d) do {} while (0) -#define random_harvest_direct(a, b, c, d) do {} while (0) +#define random_harvest_queue(a, b, c) do {} while (0) +#define random_harvest_fast(a, b, c) do {} while (0) +#define random_harvest_direct(a, b, c) do {} while (0) #define random_harvest_register_source(a) do {} while (0) #define random_harvest_deregister_source(a) do {} while (0) #endif #if defined(RANDOM_ENABLE_UMA) -#define random_harvest_fast_uma(a, b, c, d) random_harvest_fast(a, b, c, d) +#define random_harvest_fast_uma(a, b, c) random_harvest_fast(a, b, c) #else /* !defined(RANDOM_ENABLE_UMA) */ -#define random_harvest_fast_uma(a, b, c, d) do {} while (0) +#define random_harvest_fast_uma(a, b, c) do {} while (0) #endif /* defined(RANDOM_ENABLE_UMA) */ #if defined(RANDOM_ENABLE_ETHER) -#define random_harvest_queue_ether(a, b, c) random_harvest_queue(a, b, c, RANDOM_NET_ETHER) +#define random_harvest_queue_ether(a, b) random_harvest_queue(a, b, RANDOM_NET_ETHER) #else /* !defined(RANDOM_ENABLE_ETHER) */ -#define random_harvest_queue_ether(a, b, c) do {} while (0) +#define random_harvest_queue_ether(a, b) do {} while (0) #endif /* defined(RANDOM_ENABLE_ETHER) */ diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 0ec662179880..a1be71511434 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -149,12 +149,12 @@ ffs_update(vp, waitfor) *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ - random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), 1, RANDOM_FS_ATIME); + random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME); } else { *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ - random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), 1, RANDOM_FS_ATIME); + random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); } if (waitfor) error = bwrite(bp); diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index d971c3c76a61..908349379763 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -2363,7 +2363,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) #endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ - random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); /* This is the fast path allocation */ CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d", @@ -2572,7 +2572,7 @@ uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags) { /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ - random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); /* This is the fast path allocation */ CTR5(KTR_UMA, @@ -3032,7 +3032,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata) #endif /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ - random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, zone->uz_name); @@ -3208,7 +3208,7 @@ uma_zfree_domain(uma_zone_t zone, void *item, void *udata) { /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ - random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread, zone->uz_name); diff --git a/tools/tools/sysdoc/tunables.mdoc b/tools/tools/sysdoc/tunables.mdoc index 7dc2f8fa4e96..0926c2556c81 100644 --- a/tools/tools/sysdoc/tunables.mdoc +++ b/tools/tools/sysdoc/tunables.mdoc @@ -1121,21 +1121,6 @@ kern.random.sys.harvest.swi --- kern.random.sys.seeded ---- -kern.random.yarrow.bins - ---- -kern.random.yarrow.fastthresh - ---- -kern.random.yarrow.gengateinterval - ---- -kern.random.yarrow.slowoverthresh - ---- -kern.random.yarrow.slowthresh - --- kern.randompid From 01a9c32322c8fa0c8133aa6abf1ac60f89e08223 Mon Sep 17 00:00:00 2001 From: Sean Bruno Date: Sun, 26 Aug 2018 17:05:43 +0000 Subject: [PATCH 18/51] r338270 had the side effect of no longer installing libmd.so into /lib. For users who have a seperate zfs mount of /usr or /usr/lib, this will cause dynamic loading failures when attempting to execute zfs mount on bootup. E.g. the system won't boot. Including sets SHLIBDIR, so SHLIBDIR?= has no effect. The other lib/ Makefiles solve this problem by moving the SHLIBDIR assignment to before .include . Submitted by: jilles Reviewed by: allanjude Approved by: re (rgrimes) Differential Revision: https://reviews.freebsd.org/D16910 --- lib/libmd/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile index e90569dd1782..edeb308c82d6 100644 --- a/lib/libmd/Makefile +++ b/lib/libmd/Makefile @@ -1,11 +1,12 @@ # $FreeBSD$ +SHLIBDIR?= /lib + .include PACKAGE=lib${LIB} LIB= md SHLIB_MAJOR= 6 -SHLIBDIR?= /lib SRCS= md4c.c md5c.c md4hl.c md5hl.c \ rmd160c.c rmd160hl.c \ sha0c.c sha0hl.c sha1c.c sha1hl.c \ From a29173be5349307a59cbf9292f765bf179db39f9 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sun, 26 Aug 2018 18:04:54 +0000 Subject: [PATCH 19/51] Remove arc4random_stir and arc4random_addrandom from stdlib.h. Users of arc4random(3) should never call them directly. All ports tree usage was fixed as part of bug 230756. Relnotes: yes Approved by: re (marius), exp-run (bug 230756 by portmgr antoine) --- include/stdlib.h | 6 ------ sys/sys/param.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/stdlib.h b/include/stdlib.h index 51f003171451..b2264784ed80 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -254,12 +254,6 @@ void arc4random_buf(void *, size_t); __uint32_t arc4random_uniform(__uint32_t); -#if !defined(BURN_BRIDGES) -/* Deprecated arc4random() functions */ -#define arc4random_stir() -#define arc4random_addrandom(a,b) -#endif - #ifdef __BLOCKS__ int atexit_b(void (^ _Nonnull)(void)); void *bsearch_b(const void *, const void *, size_t, diff --git a/sys/sys/param.h b/sys/sys/param.h index 53fa207bf9ac..91b296c56304 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1200082 /* Master, propagated to newvers */ +#define __FreeBSD_version 1200083 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, From 9ea0458663f0e58baf933e9919eed9a2de571828 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Mon, 27 Aug 2018 10:08:27 +0000 Subject: [PATCH 20/51] Use the correct register when storing the arm VFP state. Previously we have been lucky where the state was already in r0, however this is not guaranteed. Use the passed in register as the location to store the upper half of the arm VFP registers rather than relying on it being r0. Approved by: re (kib) --- sys/arm/arm/vfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arm/arm/vfp.c b/sys/arm/arm/vfp.c index d42660d7e4f9..b9495b36b308 100644 --- a/sys/arm/arm/vfp.c +++ b/sys/arm/arm/vfp.c @@ -293,7 +293,7 @@ vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp) " .fpu vfpv3\n" " vstmia %0!, {d0-d15}\n" /* d0-d15 */ " cmp %1, #0\n" /* -D16 or -D32? */ - " vstmiane r0!, {d16-d31}\n" /* d16-d31 */ + " vstmiane %0!, {d16-d31}\n" /* d16-d31 */ " addeq %0, %0, #128\n" /* skip missing regs */ : "+&r" (vfpsave) : "r" (is_d32) : "cc" ); From 78da60464191371524fd3d85da0a9efe63369497 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Mon, 27 Aug 2018 11:14:49 +0000 Subject: [PATCH 21/51] Ensure we have a large enough stack for the lua loader Lua has a few places where it allocates a large buffer on the stack. This is normally fine, except there are a few places where there can be multiple frames with this buffer. This can cause a stack overflow on some arm64 SoCs. Fix this by allocating our own stack in loader.efi large enough for these objects. The required size has been found by tracing how the stack pointer changes in a virtual machine and found to be no larger than 50kB. A larger stack is allocated to reduce the likelihood of overflow from future changes. Reviewed by: kevans Approved by: re (kib) Differential Revision: https://reviews.freebsd.org/D16886 --- stand/efi/boot1/Makefile | 1 + stand/efi/loader/arch/arm64/start.S | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/stand/efi/boot1/Makefile b/stand/efi/boot1/Makefile index 57610292bd83..88bbb312970c 100644 --- a/stand/efi/boot1/Makefile +++ b/stand/efi/boot1/Makefile @@ -6,6 +6,7 @@ PROG= boot1.sym INTERNALPROG= WARNS?= 6 +CFLAGS+= -DEFI_BOOT1 # We implement a slightly non-standard %S in that it always takes a # CHAR16 that's common in UEFI-land instead of a wchar_t. This only # seems to matter on arm64 where wchar_t defaults to an int instead diff --git a/stand/efi/loader/arch/arm64/start.S b/stand/efi/loader/arch/arm64/start.S index b58c2c50be9f..bddc2d088a64 100644 --- a/stand/efi/loader/arch/arm64/start.S +++ b/stand/efi/loader/arch/arm64/start.S @@ -160,6 +160,23 @@ _start: ldp x0, x1, [sp], #16 +#ifndef EFI_BOOT1 + /* + * Load the stack to use. The default stack may be too small for + * the lua loader. + */ + adr x2, initstack_end + mov sp, x2 +#endif + bl efi_main 1: b 1b + +#ifndef EFI_BOOT1 +.bss + .align 4 +initstack: + .space (64 * 1024) +initstack_end: +#endif From d81347652cf4b3bedee180a55758857a5a7ac7da Mon Sep 17 00:00:00 2001 From: Kirk McKusick Date: Mon, 27 Aug 2018 15:20:42 +0000 Subject: [PATCH 22/51] When doing a -S "safe copy", the install command should do an fsync(2) system call after copying the installed file to ensure that it is on stable storage. PR: 230851 Reviewed by: kib Approved by: re (marius) --- usr.bin/xinstall/xinstall.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/usr.bin/xinstall/xinstall.c b/usr.bin/xinstall/xinstall.c index 512ec962d4a1..880766b34623 100644 --- a/usr.bin/xinstall/xinstall.c +++ b/usr.bin/xinstall/xinstall.c @@ -1265,6 +1265,12 @@ copy(int from_fd, const char *from_name, int to_fd, const char *to_name, err(EX_OSERR, "%s", from_name); } } + if (safecopy && fsync(to_fd) == -1) { + serrno = errno; + (void)unlink(to_name); + errno = serrno; + err(EX_OSERR, "fsync failed for %s", to_name); + } return (digest_end(&ctx, NULL)); } From 4b82a7b62f167734f760a1a886581721c25f1eae Mon Sep 17 00:00:00 2001 From: Andrew Gallatin Date: Mon, 27 Aug 2018 18:13:20 +0000 Subject: [PATCH 23/51] Reject IPv4 SO_REUSEPORT_LB groups when looking up an IPv6 listening socket Similar to how the IPv4 code will reject an IPv6 LB group, we must ignore IPv4 LB groups when looking up an IPv6 listening socket. If this is not done, a port only match may return an IPv4 socket, which causes problems (like sending IPv6 packets with a hopcount of 0, making them unrouteable). Thanks to rrs for all the work to diagnose this. Approved by: re (rgrimes) Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D16899 --- sys/netinet6/in6_pcb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 205bc2af91ed..b45d04bf1be5 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -901,6 +901,10 @@ in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, * - Load balanced does not contain IPv4 mapped INET6 wild sockets. */ LIST_FOREACH(grp, hdr, il_list) { +#ifdef INET + if (!(grp->il_vflag & INP_IPV6)) + continue; +#endif if (grp->il_lport == lport) { idx = 0; int pkt_hash = INP_PCBLBGROUP_PKTHASH( From fd239e7ff8367445f10b535cfc44feeff9519f75 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Mon, 27 Aug 2018 19:34:50 +0000 Subject: [PATCH 24/51] Fix bsdbox build WITH_OFED hostapd requires libpcap, which links against libmlx5 and libibverbs when building WITH_OFED. These were not pulled in to bsdbox and most bsdbox builds were WITHOUT_OFED up until recently, so it was not noticed. Approved by: re (gjb) --- tools/bsdbox/Makefile.hostapd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/bsdbox/Makefile.hostapd b/tools/bsdbox/Makefile.hostapd index c0ea9c7c0d45..631643188bf1 100644 --- a/tools/bsdbox/Makefile.hostapd +++ b/tools/bsdbox/Makefile.hostapd @@ -3,6 +3,7 @@ # # $FreeBSD$ # +.include CRUNCH_PROGS_usr.sbin+= hostapd hostapd_cli CRUNCH_SRCDIR_hostapd= $(.CURDIR)/../../usr.sbin/wpa/hostapd CRUNCH_SRCDIR_hostapd_cli= $(.CURDIR)/../../usr.sbin/wpa/hostapd_cli @@ -11,5 +12,9 @@ CRUNCH_SRCDIR_hostapd_cli= $(.CURDIR)/../../usr.sbin/wpa/hostapd_cli #CRUNCH_SRCDIR_wpa_supplicant= $(.CURDIR)/../../usr.sbin/wpa/wpa_supplicant #CRUNCH_SRCDIR_wpa_cli= $(.CURDIR)/../../usr.sbin/wpa/wpa_cli +.if ${MK_OFED} != "no" +# libpcap dependencies if OFED is enabled +CRUNCH_LIBS+= -lmlx5 -libverbs +.endif CRUNCH_LIBS+= -lpcap From c0386fa3af821799856779b1b5c13d0f471a9abd Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 28 Aug 2018 14:46:49 +0000 Subject: [PATCH 25/51] Put building of drm and drm2 modules behind options. Make the building of drm dependent on MK_MODULE_DRM and the building of module drm2 on MK_MODULE_DRM2. The defaults are unchanged. Approved by: re@ (gjb) Differential Review: https://reviews.freebsd.org/D16894 --- sys/conf/kern.opts.mk | 2 ++ sys/modules/Makefile | 8 ++++++++ tools/build/options/WITHOUT_MODULE_DRM | 3 +++ tools/build/options/WITHOUT_MODULE_DRM2 | 2 ++ tools/build/options/WITH_MODULE_DRM | 2 ++ tools/build/options/WITH_MODULE_DRM2 | 2 ++ 6 files changed, 19 insertions(+) create mode 100644 tools/build/options/WITHOUT_MODULE_DRM create mode 100644 tools/build/options/WITHOUT_MODULE_DRM2 create mode 100644 tools/build/options/WITH_MODULE_DRM create mode 100644 tools/build/options/WITH_MODULE_DRM2 diff --git a/sys/conf/kern.opts.mk b/sys/conf/kern.opts.mk index 31720088d7fd..0a229822607c 100644 --- a/sys/conf/kern.opts.mk +++ b/sys/conf/kern.opts.mk @@ -38,6 +38,8 @@ __DEFAULT_YES_OPTIONS = \ IPSEC_SUPPORT \ ISCSI \ KERNEL_SYMBOLS \ + MODULE_DRM \ + MODULE_DRM2 \ NETGRAPH \ PF \ SOURCELESS_HOST \ diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 59203a394612..726144753c8b 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -583,8 +583,12 @@ _cpuctl= cpuctl _cpufreq= cpufreq _cs= cs _dpms= dpms +.if ${MK_MODULE_DRM} != "no" _drm= drm +.endif +.if ${MK_MODULE_DRM2} != "no" _drm2= drm2 +.endif _ed= ed _em= em _ena= ena @@ -781,7 +785,9 @@ _cardbus= cardbus _cbb= cbb _cfi= cfi _cpufreq= cpufreq +.if ${MK_MODULE_DRM} != "no" _drm= drm +.endif _exca= exca _ffec= ffec _nvd= nvd @@ -791,7 +797,9 @@ _wi= wi .endif .if ${MACHINE_ARCH} == "powerpc64" +.if ${MK_MODULE_DRM2} != "no" _drm2= drm2 +.endif _ipmi= ipmi .endif .if ${MACHINE_ARCH} == "powerpc64" || ${MACHINE_ARCH} == "powerpc" diff --git a/tools/build/options/WITHOUT_MODULE_DRM b/tools/build/options/WITHOUT_MODULE_DRM new file mode 100644 index 000000000000..f588566edb45 --- /dev/null +++ b/tools/build/options/WITHOUT_MODULE_DRM @@ -0,0 +1,3 @@ +.\" $FreeBSD$ +Disable creation of old drm video modules. + diff --git a/tools/build/options/WITHOUT_MODULE_DRM2 b/tools/build/options/WITHOUT_MODULE_DRM2 new file mode 100644 index 000000000000..44d401d87619 --- /dev/null +++ b/tools/build/options/WITHOUT_MODULE_DRM2 @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Disable creation of old drm2 video modules. diff --git a/tools/build/options/WITH_MODULE_DRM b/tools/build/options/WITH_MODULE_DRM new file mode 100644 index 000000000000..0034dd6af550 --- /dev/null +++ b/tools/build/options/WITH_MODULE_DRM @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Enable creation of old drm video modules. diff --git a/tools/build/options/WITH_MODULE_DRM2 b/tools/build/options/WITH_MODULE_DRM2 new file mode 100644 index 000000000000..68e304d48296 --- /dev/null +++ b/tools/build/options/WITH_MODULE_DRM2 @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Enable creation of old drm2 video modules. From 264d4ffdf12137201d68b7dd07451b3ccdfffa8b Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 28 Aug 2018 14:46:55 +0000 Subject: [PATCH 26/51] Add big, nasty abandonware tags to this code. This code works for some people, but hasn't been updated in a long time. Still allow people to use this code for the moment, but put a big, nasty obsolete message to inform and encourage people to move to the port. Approved by: re@ (gjb) Differential Review: https://reviews.freebsd.org/D16894 --- sys/dev/drm/drm.h | 11 +++++++++++ sys/dev/drm/drm_drv.c | 3 ++- sys/dev/drm2/drm_os_freebsd.c | 3 ++- sys/dev/drm2/drm_os_freebsd.h | 14 ++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/sys/dev/drm/drm.h b/sys/dev/drm/drm.h index a2612049bb0d..026907f2026d 100644 --- a/sys/dev/drm/drm.h +++ b/sys/dev/drm/drm.h @@ -1145,4 +1145,15 @@ typedef struct drm_mm_init_arg drm_mm_init_arg_t; typedef enum drm_bo_type drm_bo_type_t; #endif +#define DRM_PORT "graphics/drm-legacy-kmod" + +#define DRM_OBSOLETE(dev) \ + do { \ + device_printf(dev, "=======================================================\n"); \ + device_printf(dev, "This code is obsolete abandonware. Install the " DRM_PORT " pkg\n"); \ + device_printf(dev, "=======================================================\n"); \ + gone_in_dev(dev, 13, "drm drivers"); \ + } while (0) + + #endif diff --git a/sys/dev/drm/drm_drv.c b/sys/dev/drm/drm_drv.c index 6874d45d0335..20d7d973aed1 100644 --- a/sys/dev/drm/drm_drv.c +++ b/sys/dev/drm/drm_drv.c @@ -174,7 +174,8 @@ int drm_probe(device_t kdev, drm_pci_id_list_t *idlist) DRM_DEBUG("desc : %s\n", device_get_desc(kdev)); device_set_desc(kdev, id_entry->name); } - return 0; + DRM_OBSOLETE(kdev); + return BUS_PROBE_GENERIC; } return ENXIO; diff --git a/sys/dev/drm2/drm_os_freebsd.c b/sys/dev/drm2/drm_os_freebsd.c index 4ce8081887ba..8489ca848027 100644 --- a/sys/dev/drm2/drm_os_freebsd.c +++ b/sys/dev/drm2/drm_os_freebsd.c @@ -126,7 +126,8 @@ drm_probe_helper(device_t kdev, const drm_pci_id_list_t *idlist) device_get_nameunit(kdev), id_entry->name); device_set_desc(kdev, id_entry->name); } - return (0); + DRM_OBSOLETE(kdev); + return (-BUS_PROBE_GENERIC); } return (-ENXIO); diff --git a/sys/dev/drm2/drm_os_freebsd.h b/sys/dev/drm2/drm_os_freebsd.h index 11c9feb9b4aa..b7e81c56b94c 100644 --- a/sys/dev/drm2/drm_os_freebsd.h +++ b/sys/dev/drm2/drm_os_freebsd.h @@ -154,6 +154,20 @@ typedef void irqreturn_t; *(volatile u_int64_t *)(((vm_offset_t)(map)->handle) + \ (vm_offset_t)(offset)) = htole64(val) +#ifdef __LP64__ +#define DRM_PORT "graphics/drm-stable-kmod" +#else +#define DRM_PORT "graphics/drm-legacy-kmod" +#endif + +#define DRM_OBSOLETE(dev) \ + do { \ + device_printf(dev, "=======================================================\n"); \ + device_printf(dev, "This code is obsolete abandonware. Install the " DRM_PORT " pkg\n"); \ + device_printf(dev, "=======================================================\n"); \ + gone_in_dev(dev, 13, "drm2 drivers"); \ + } while (0) + /* DRM_READMEMORYBARRIER() prevents reordering of reads. * DRM_WRITEMEMORYBARRIER() prevents reordering of writes. * DRM_MEMORYBARRIER() prevents reordering of reads and writes. From 303233568fda2a786a7dc3d73592fd1064ca8704 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 28 Aug 2018 14:53:03 +0000 Subject: [PATCH 27/51] Regen src.conf.5 after r338347. Approved by: re@ (gjb) --- share/man/man5/src.conf.5 | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index fd992e81c66f..628a0e7d38e8 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd August 16, 2018 +.Dd August 28, 2018 .Dt SRC.CONF 5 .Os .Sh NAME @@ -1184,8 +1184,14 @@ Enable firewire support in /boot/loader on x86. This option is a nop on all other platforms. .It Va WITHOUT_LOADER_GELI Disable inclusion of GELI crypto support in the boot chain binaries. +.Pp +This is a default setting on +sparc64/sparc64. .It Va WITH_LOADER_LUA Set to build LUA bindings for the boot loader. +.Pp +This is a default setting on +amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LOADER_OFW Disable building of openfirmware bootloader components. .Pp @@ -1342,6 +1348,11 @@ Set to build .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64. +.It Va WITHOUT_MODULE_DRM +Disable creation of old drm video modules. + +.It Va WITHOUT_MODULE_DRM2 +Disable creation of old drm2 video modules. .It Va WITH_NAND Set to build the NAND Flash components. .It Va WITHOUT_NDIS From 670c1e4b1c135be8b99c24761e504446c32077ec Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 28 Aug 2018 15:18:14 +0000 Subject: [PATCH 28/51] Add missing endpwent() and endgrent() calls to nfsuserd(8). PR: 230937 Submitted by: Peter Eriksson Reviewed by: rmacklem Approved by: re (gjb) MFC after: 1 week --- usr.sbin/nfsuserd/nfsuserd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/usr.sbin/nfsuserd/nfsuserd.c b/usr.sbin/nfsuserd/nfsuserd.c index 413e5cedbb9b..cfe9fa6c7eb7 100644 --- a/usr.sbin/nfsuserd/nfsuserd.c +++ b/usr.sbin/nfsuserd/nfsuserd.c @@ -334,6 +334,7 @@ main(int argc, char *argv[]) #endif i++; } + endgrent(); /* * Loop around adding all users. @@ -382,6 +383,7 @@ main(int argc, char *argv[]) #endif i++; } + endpwent(); /* * I should feel guilty for not calling this for all the above exit() From 0b5cab4e0f6e2b0e813e9e116d69ae912f49bf78 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 28 Aug 2018 17:09:41 +0000 Subject: [PATCH 29/51] Use ip/ipv6 structures in al_eth only if they are supported The ip/ipv6 header files are included only if the appropriate definition exists, but the driver was missing similar checks when using the ip and ip6_hdr structures. If the kernel was not built with the INET or INET6 option, the driver was preventing kernel from being built. To fix that, the missing ifdef checks were added to the driver. PR: Bug 230886 Submitted by: Michal Krawczyk Reported by: O. Hartmann Approved by: re (gjb) Obtained from: Semihalf MFC after: 1 week Sponsored by: Amazon, Inc. --- sys/dev/al_eth/al_eth.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sys/dev/al_eth/al_eth.c b/sys/dev/al_eth/al_eth.c index e949191ccfc2..80e4a8e47444 100644 --- a/sys/dev/al_eth/al_eth.c +++ b/sys/dev/al_eth/al_eth.c @@ -1202,8 +1202,12 @@ al_eth_tx_csum(struct al_eth_ring *tx_ring, struct al_eth_tx_buffer *tx_info, uint32_t mss = m->m_pkthdr.tso_segsz; struct ether_vlan_header *eh; uint16_t etype; +#ifdef INET struct ip *ip; +#endif +#ifdef INET6 struct ip6_hdr *ip6; +#endif struct tcphdr *th = NULL; int ehdrlen, ip_hlen = 0; uint8_t ipproto = 0; @@ -1243,6 +1247,7 @@ al_eth_tx_csum(struct al_eth_ring *tx_ring, struct al_eth_tx_buffer *tx_info, } switch (etype) { +#ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(m->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; @@ -1256,6 +1261,8 @@ al_eth_tx_csum(struct al_eth_ring *tx_ring, struct al_eth_tx_buffer *tx_info, else hal_pkt->l4_proto_idx = AL_ETH_PROTO_ID_UDP; break; +#endif /* INET */ +#ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen); hal_pkt->l3_proto_idx = AL_ETH_PROTO_ID_IPv6; @@ -1267,6 +1274,7 @@ al_eth_tx_csum(struct al_eth_ring *tx_ring, struct al_eth_tx_buffer *tx_info, else hal_pkt->l4_proto_idx = AL_ETH_PROTO_ID_UDP; break; +#endif /* INET6 */ default: break; } From c7c5d8e387b4b92dfed82450e988297da20e9d57 Mon Sep 17 00:00:00 2001 From: Philip Paeps Date: Tue, 28 Aug 2018 17:10:19 +0000 Subject: [PATCH 30/51] Add libxo(3) support to last(1). Reviewed by: kp Approved by: re (gjb) MFC after: 1 week Relnotes: yes Differential Revision: https://reviews.freebsd.org/D16922 --- usr.bin/last/Makefile | 1 + usr.bin/last/last.1 | 16 ++++++++- usr.bin/last/last.c | 79 ++++++++++++++++++++++++++++--------------- 3 files changed, 67 insertions(+), 29 deletions(-) diff --git a/usr.bin/last/Makefile b/usr.bin/last/Makefile index fd63cd989b2e..3edb032fd6c0 100644 --- a/usr.bin/last/Makefile +++ b/usr.bin/last/Makefile @@ -2,6 +2,7 @@ # $FreeBSD$ PROG= last +LIBADD= xo NO_WFORMAT= diff --git a/usr.bin/last/last.1 b/usr.bin/last/last.1 index dba8ccd53b50..eccec95eb1fc 100644 --- a/usr.bin/last/last.1 +++ b/usr.bin/last/last.1 @@ -28,7 +28,7 @@ .\" @(#)last.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd June 1, 2018 +.Dd August 28, 2018 .Dt LAST 1 .Os .Sh NAME @@ -36,6 +36,7 @@ .Nd indicate last logins of users and ttys .Sh SYNOPSIS .Nm +.Op Fl -libxo .Op Fl swy .Oo .Fl d @@ -72,6 +73,13 @@ will so indicate. .Pp The following options are available: .Bl -tag -width indent-two +.It Fl -libxo +Generate output via +.Xr libxo 3 +in a selection of different human and machine readable formats. +See +.Xr xo_parse_args 3 +for details on command line arguments. .It Fl d Ar date Specify the snapshot date and time. All users logged in at the snapshot date and time will @@ -201,6 +209,8 @@ login data base .Xr getutxent 3 , .Xr ac 8 , .Xr lastlogin 8 +.Xr libxo 3 , +.Xr xo_parse_args 3 .Sh HISTORY .Nm utility first appeared in @@ -211,6 +221,10 @@ The original version was written by .An Howard P. Katseff ; .An Keith Bostic rewrote it in 1986/87 to add functionality and to improve code quality. +.An Philip Paeps +added +.Xr libxo 3 +support in August 2018. .Sh BUGS If a login shell should terminate abnormally for some reason, it is likely that a logout record will not be written to the diff --git a/usr.bin/last/last.c b/usr.bin/last/last.c index 42177e278718..188f393ae3fc 100644 --- a/usr.bin/last/last.c +++ b/usr.bin/last/last.c @@ -3,6 +3,7 @@ * * Copyright (c) 1987, 1993, 1994 * The Regents of the University of California. All rights reserved. + * Copyright (c) 2018 Philip Paeps * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -62,6 +63,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #define NO 0 /* false/no */ #define YES 1 /* true/yes */ #define ATOI2(ar) ((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2; @@ -112,7 +115,7 @@ static void wtmp(void); static void usage(void) { - (void)fprintf(stderr, + xo_error( "usage: last [-swy] [-d [[CC]YY][MMDD]hhmm[.SS]] [-f file] [-h host]\n" " [-n maxrec] [-t tty] [user ...]\n"); exit(1); @@ -127,6 +130,11 @@ main(int argc, char *argv[]) (void) setlocale(LC_TIME, ""); d_first = (*nl_langinfo(D_MD_ORDER) == 'd'); + argc = xo_parse_args(argc, argv); + if (argc < 0) + exit(1); + atexit(xo_finish_atexit); + maxrec = -1; snaptime = 0; while ((ch = getopt(argc, argv, "0123456789d:f:h:n:st:wy")) != -1) @@ -161,7 +169,7 @@ main(int argc, char *argv[]) maxrec = strtol(optarg, &p, 10); if (p == optarg || *p != '\0' || errno != 0 || maxrec <= 0) - errx(1, "%s: bad line count", optarg); + xo_errx(1, "%s: bad line count", optarg); break; case 's': sflag++; /* Show delta as seconds */ @@ -181,17 +189,17 @@ main(int argc, char *argv[]) } if (caph_limit_stdio() < 0) - err(1, "can't limit stdio rights"); + xo_err(1, "can't limit stdio rights"); caph_cache_catpages(); caph_cache_tzdata(); /* Cache UTX database. */ if (setutxdb(UTXDB_LOG, file) != 0) - err(1, "%s", file != NULL ? file : "(default utx db)"); + xo_err(1, "%s", file != NULL ? file : "(default utx db)"); if (caph_enter() < 0) - err(1, "cap_enter"); + xo_err(1, "cap_enter"); if (sflag && width == 8) usage(); @@ -229,12 +237,14 @@ wtmp(void) SLIST_INIT(&idlist); (void)time(&t); + xo_open_container("last-information"); + /* Load the last entries from the file. */ while ((ut = getutxent()) != NULL) { if (amount % 128 == 0) { buf = realloc(buf, (amount + 128) * sizeof *ut); if (buf == NULL) - err(1, "realloc"); + xo_err(1, "realloc"); } memcpy(&buf[amount++], ut, sizeof *ut); if (t > ut->ut_tv.tv_sec) @@ -243,12 +253,17 @@ wtmp(void) endutxent(); /* Display them in reverse order. */ + xo_open_list("last"); while (amount > 0) doentry(&buf[--amount]); + xo_close_list("last"); free(buf); tm = localtime(&t); (void) strftime(ct, sizeof(ct), "%+", tm); - printf("\n%s begins %s\n", ((file == NULL) ? "utx.log" : file), ct); + xo_emit("\n{:utxdb/%s}", (file == NULL) ? "utx.log" : file); + xo_attr("seconds", "%lu", (unsigned long) t); + xo_emit(" begins {:begins/%s}\n", ct); + xo_close_container("last-information"); } /* @@ -303,7 +318,7 @@ doentry(struct utmpx *bp) /* add new one */ tt = malloc(sizeof(struct idtab)); if (tt == NULL) - errx(1, "malloc failure"); + xo_errx(1, "malloc failure"); tt->logout = currentout; memcpy(tt->id, bp->ut_id, sizeof bp->ut_id); SLIST_INSERT_HEAD(&idlist, tt, list); @@ -339,6 +354,7 @@ printentry(struct utmpx *bp, struct idtab *tt) if (maxrec != -1 && !maxrec--) exit(0); + xo_open_instance("last"); t = bp->ut_tv.tv_sec; tm = localtime(&t); (void) strftime(ct, sizeof(ct), d_first ? @@ -346,48 +362,55 @@ printentry(struct utmpx *bp, struct idtab *tt) (yflag ? "%a %b %e %Y %R" : "%a %b %e %R"), tm); switch (bp->ut_type) { case BOOT_TIME: - printf("%-42s", "boot time"); + xo_emit("{:user/%-42s/%s}", "boot time"); break; case SHUTDOWN_TIME: - printf("%-42s", "shutdown time"); + xo_emit("{:user/%-42s/%s}", "shutdown time"); break; case OLD_TIME: - printf("%-42s", "old time"); + xo_emit("{:user/%-42s/%s}", "old time"); break; case NEW_TIME: - printf("%-42s", "new time"); + xo_emit("{:user/%-42s/%s}", "new time"); break; case USER_PROCESS: - printf("%-10s %-8s %-22.22s", + xo_emit("{:user/%-10s/%s} {:tty/%-8s/%s} {:from/%-22.22s/%s}", bp->ut_user, bp->ut_line, bp->ut_host); break; } - printf(" %s%c", ct, tt == NULL ? '\n' : ' '); + xo_attr("seconds", "%lu", (unsigned long)t); + xo_emit(" {:login-time/%s%c/%s}", ct, tt == NULL ? '\n' : ' '); if (tt == NULL) - return; + goto end; if (!tt->logout) { - puts(" still logged in"); - return; + xo_emit(" {:logout-time/still logged in}\n"); + goto end; } if (tt->logout < 0) { tt->logout = -tt->logout; - printf("- %s", crmsg); + xo_emit("- {:logout-reason/%s}", crmsg); } else { tm = localtime(&tt->logout); (void) strftime(ct, sizeof(ct), "%R", tm); - printf("- %s", ct); + xo_attr("seconds", "%lu", (unsigned long)tt->logout); + xo_emit("- {:logout-time/%s}", ct); } delta = tt->logout - bp->ut_tv.tv_sec; + xo_attr("seconds", "%ld", (long)delta); if (sflag) { - printf(" (%8ld)\n", (long)delta); + xo_emit(" ({:session-length/%8ld})\n", (long)delta); } else { tm = gmtime(&delta); (void) strftime(ct, sizeof(ct), width >= 8 ? "%T" : "%R", tm); if (delta < 86400) - printf(" (%s)\n", ct); + xo_emit(" ({:session-length/%s})\n", ct); else - printf(" (%ld+%s)\n", (long)delta / 86400, ct); + xo_emit(" ({:session-length/%ld+%s})\n", + (long)delta / 86400, ct); } + +end: + xo_close_instance("last"); } /* @@ -438,7 +461,7 @@ addarg(int type, char *arg) ARG *cur; if ((cur = malloc(sizeof(ARG))) == NULL) - errx(1, "malloc failure"); + xo_errx(1, "malloc failure"); cur->next = arglist; cur->type = type; cur->name = arg; @@ -463,7 +486,7 @@ hostconv(char *arg) if (first) { first = 0; if (gethostname(name, sizeof(name))) - err(1, "gethostname"); + xo_err(1, "gethostname"); hostdot = strchr(name, '.'); } if (hostdot && !strcasecmp(hostdot, argdot)) @@ -486,7 +509,7 @@ ttyconv(char *arg) if (strlen(arg) == 2) { /* either 6 for "ttyxx" or 8 for "console" */ if ((mval = malloc(8)) == NULL) - errx(1, "malloc failure"); + xo_errx(1, "malloc failure"); if (!strcmp(arg, "co")) (void)strcpy(mval, "console"); else { @@ -516,9 +539,9 @@ dateconv(char *arg) /* Start with the current time. */ if (time(&timet) < 0) - err(1, "time"); + xo_err(1, "time"); if ((t = localtime(&timet)) == NULL) - err(1, "localtime"); + xo_err(1, "localtime"); /* [[CC]YY]MMDDhhmm[.SS] */ if ((p = strchr(arg, '.')) == NULL) @@ -567,7 +590,7 @@ dateconv(char *arg) t->tm_isdst = -1; /* Figure out DST. */ timet = mktime(t); if (timet == -1) -terr: errx(1, +terr: xo_errx(1, "out of range or illegal time specification: [[CC]YY]MMDDhhmm[.SS]"); return timet; } From 427b88d77eb888fad96ba984ca413a89f30252d7 Mon Sep 17 00:00:00 2001 From: Philip Paeps Date: Tue, 28 Aug 2018 17:12:37 +0000 Subject: [PATCH 31/51] Add libxo(3) support to lastlogin(8). Reviewed by: kp Approved by: re (gjb) MFC after: 1 week Relnotes: yes Differential Revision: https://reviews.freebsd.org/D16919 --- usr.sbin/lastlogin/Makefile | 1 + usr.sbin/lastlogin/lastlogin.8 | 16 +++++++++++++++- usr.sbin/lastlogin/lastlogin.c | 33 ++++++++++++++++++++++++++------- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/usr.sbin/lastlogin/Makefile b/usr.sbin/lastlogin/Makefile index 715badd29341..7fe146ffd492 100644 --- a/usr.sbin/lastlogin/Makefile +++ b/usr.sbin/lastlogin/Makefile @@ -2,5 +2,6 @@ PROG= lastlogin MAN= lastlogin.8 +LIBADD= xo .include diff --git a/usr.sbin/lastlogin/lastlogin.8 b/usr.sbin/lastlogin/lastlogin.8 index fdbc871dcf17..be0a560b057f 100644 --- a/usr.sbin/lastlogin/lastlogin.8 +++ b/usr.sbin/lastlogin/lastlogin.8 @@ -31,7 +31,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 6, 2011 +.Dd August 28, 2018 .Dt LASTLOGIN 8 .Os .Sh NAME @@ -39,6 +39,7 @@ .Nd indicate last login time of users .Sh SYNOPSIS .Nm +.Op Fl -libxo .Op Fl f Ar file .Op Fl rt .Op Ar user ... @@ -68,6 +69,13 @@ The last login database is never turned over or deleted in standard usage. .Pp The following options are available: .Bl -tag -width indent +.It Fl -libxo +Generate output via +.Xr libxo 3 +in a selection of different human and machine readable formats. +See +.Xr xo_parse_args 3 +for details on command line arguments. .It Fl f Ar file Open last login database .Ar file @@ -86,9 +94,15 @@ last login database .Xr last 1 , .Xr getutxent 3 , .Xr ac 8 +.Xr libxo 3 , +.Xr xo_parse_args 3 .Sh AUTHORS +.An -nosplit .An John M. Vinopal wrote this program in January 1996 and contributed it to the .Nx project. +.An Philip Paeps added +.Xr libxo 3 +support in August 2018. diff --git a/usr.sbin/lastlogin/lastlogin.c b/usr.sbin/lastlogin/lastlogin.c index 8b62eb31b100..35aee1dcb2c5 100644 --- a/usr.sbin/lastlogin/lastlogin.c +++ b/usr.sbin/lastlogin/lastlogin.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1996 John M. Vinopal + * Copyright (c) 2018 Philip Paeps * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,6 +47,8 @@ __RCSID("$NetBSD: lastlogin.c,v 1.4 1998/02/03 04:45:35 perry Exp $"); #include #include +#include + int main(int, char **); static void output(struct utmpx *); static void usage(void); @@ -79,6 +82,10 @@ main(int argc, char *argv[]) int ch, i, ulistsize; struct utmpx *u, *ulist; + argc = xo_parse_args(argc, argv); + if (argc < 0) + exit(1); + while ((ch = getopt(argc, argv, "f:rt")) != -1) { switch (ch) { case 'f': @@ -97,13 +104,16 @@ main(int argc, char *argv[]) argc -= optind; argv += optind; + xo_open_container("lastlogin-information"); + xo_open_list("lastlogin"); + if (argc > 0) { /* Process usernames given on the command line. */ for (i = 0; i < argc; i++) { if (setutxdb(UTXDB_LASTLOGIN, file) != 0) - err(1, "failed to open lastlog database"); + xo_err(1, "failed to open lastlog database"); if ((u = getutxuser(argv[i])) == NULL) { - warnx("user '%s' not found", argv[i]); + xo_warnx("user '%s' not found", argv[i]); continue; } output(u); @@ -112,7 +122,7 @@ main(int argc, char *argv[]) } else { /* Read all lastlog entries, looking for active ones. */ if (setutxdb(UTXDB_LASTLOGIN, file) != 0) - err(1, "failed to open lastlog database"); + xo_err(1, "failed to open lastlog database"); ulist = NULL; ulistsize = 0; while ((u = getutxent()) != NULL) { @@ -122,7 +132,7 @@ main(int argc, char *argv[]) ulist = realloc(ulist, (ulistsize + 16) * sizeof(struct utmpx)); if (ulist == NULL) - err(1, "malloc"); + xo_err(1, "malloc"); } ulist[ulistsize++] = *u; } @@ -133,6 +143,10 @@ main(int argc, char *argv[]) output(&ulist[i]); } + xo_close_list("lastlogin"); + xo_close_container("lastlogin-information"); + xo_finish(); + exit(0); } @@ -142,13 +156,18 @@ output(struct utmpx *u) { time_t t = u->ut_tv.tv_sec; - printf("%-10s %-8s %-22.22s %s", - u->ut_user, u->ut_line, u->ut_host, ctime(&t)); + xo_open_instance("lastlogin"); + xo_emit("{:user/%-10s/%s} {:tty/%-8s/%s} {:from/%-22.22s/%s}", + u->ut_user, u->ut_line, u->ut_host); + xo_attr("seconds", "%lu", (unsigned long)t); + xo_emit(" {:login-time/%.24s/%.24s}\n", ctime(&t)); + xo_close_instance("lastlogin"); } static void usage(void) { - fprintf(stderr, "usage: lastlogin [-f file] [-rt] [user ...]\n"); + xo_error("usage: lastlogin [-f file] [-rt] [user ...]\n"); + xo_finish(); exit(1); } From f6a0fd4490a0fac2734b590b88dfcd93fccd6352 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 28 Aug 2018 17:14:46 +0000 Subject: [PATCH 32/51] Document the cpu_microcode_* tunables. Reviewed by: bcr (previous version), kib Approved by: re (gjb) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D16923 --- stand/defaults/loader.conf.5 | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/stand/defaults/loader.conf.5 b/stand/defaults/loader.conf.5 index 57bf45431dad..d46736223e1c 100644 --- a/stand/defaults/loader.conf.5 +++ b/stand/defaults/loader.conf.5 @@ -23,7 +23,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd March 23, 2018 +.Dd August 28, 2018 .Dt LOADER.CONF 5 .Os .Sh NAME @@ -289,6 +289,29 @@ See the entropy entries in .Pq Dq /boot/entropy The name of the very early boot-time entropy cache file. +.It Va cpu_microcode_load +.Pq Dq NO +If set to +.Dq YES , +the microcode update file specified by +.Va cpu_microcode_name +will be loaded and applied very early during boot. +This provides functionality similar to +.Xr cpucontrol 8 +but ensures that CPU features enabled by microcode updates can be +used by the kernel. +The update will be re-applied automatically when resuming from an +ACPI sleep state. +If the update file contains updates for multiple processor models, +the kernel will search for and extract a matching update. +Currently this setting is supported only on Intel +.Dv i386 +and +.Dv amd64 +processors. +It has no effect on other processor types. +.It Va cpu_microcode_name +A path to a microcode update file. .El .Sh OTHER SETTINGS Other settings that may be used in @@ -319,6 +342,7 @@ machine-specific settings for sites with a common loader.conf. .Sh SEE ALSO .Xr rc.conf 5 , .Xr boot 8 , +.Xr cpucontrol 8 , .Xr loader 8 , .Xr loader.4th 8 .Sh HISTORY From d6ddb0848c95a9b11f184ee1af68718cbf5ff1de Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Tue, 28 Aug 2018 18:16:02 +0000 Subject: [PATCH 33/51] cxgbe/tom: Unregister shared CPL handlers on module unload. This fixes a panic with INVARIANTS that occurs when t4_tom is unloaded and reloaded. Approved by: re@ (kib@) --- sys/dev/cxgbe/tom/t4_ddp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index cba39f0c83dc..02c906c47d22 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -1970,6 +1970,8 @@ t4_ddp_mod_unload(void) taskqueue_drain(taskqueue_thread, &ddp_orphan_task); MPASS(TAILQ_EMPTY(&ddp_orphan_pagesets)); mtx_destroy(&ddp_orphan_pagesets_lock); + t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP0); + t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_DDP1); t4_register_cpl_handler(CPL_RX_DATA_DDP, NULL); t4_register_cpl_handler(CPL_RX_DDP_COMPLETE, NULL); } From d3672361837848f1ec2ded14573de520d92a3992 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 28 Aug 2018 18:47:02 +0000 Subject: [PATCH 34/51] Several bug fixes and robustness improvements for the AP boot page table allocation. At the time that mp_bootaddress() is called, phys_avail[] array does not reflect some memory reservations already done, like kernel placement. Recent changes to DMAP protection which make kernel text read-only in DMAP revealed this, where on some machines AP boot page tables selection appears to intersect with the kernel itself. Fix this by checking the addresses selected using the same algorithm as bootaddr_rwx(). Also, try to chomp pages for the page table not only at the start of the contiguous range, but also at the end. This should improve robustness when the only suitable range is already consumed by the kernel. Reported and tested by: Michael Gmelin Reviewed by: jhb MFC after: 1 week Sponsored by: The FreeBSD Foundation Approved by: re (gjb) Differential revision: https://reviews.freebsd.org/D16907 --- sys/amd64/amd64/mp_machdep.c | 72 +++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index ef8b885a65d5..f7f18f2b0826 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -86,6 +86,8 @@ __FBSDID("$FreeBSD$"); #define GiB(v) (v ## ULL << 30) +#define AP_BOOTPT_SZ (PAGE_SIZE * 3) + extern struct pcpu __pcpu[]; /* Temporary variables for init_secondary() */ @@ -100,45 +102,79 @@ char *dbg_stack; static int start_ap(int apic_id); +static bool +is_kernel_paddr(vm_paddr_t pa) +{ + + return (pa >= trunc_2mpage(btext - KERNBASE) && + pa < round_page(_end - KERNBASE)); +} + +static bool +is_mpboot_good(vm_paddr_t start, vm_paddr_t end) +{ + + return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem); +} + /* * Calculate usable address in base memory for AP trampoline code. */ void mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx) { + vm_paddr_t start, end; unsigned int i; bool allocated; alloc_ap_trampoline(physmap, physmap_idx); + /* + * Find a memory region big enough below the 4GB boundary to + * store the initial page tables. Region must be mapped by + * the direct map. + * + * Note that it needs to be aligned to a page boundary. + */ allocated = false; for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { /* - * Find a memory region big enough below the 4GB - * boundary to store the initial page tables. Region - * must be mapped by the direct map. - * - * Note that it needs to be aligned to a page - * boundary. + * First, try to chomp at the start of the physmap region. + * Kernel binary might claim it already. */ - if (physmap[i] >= GiB(4) || physmap[i + 1] - - round_page(physmap[i]) < PAGE_SIZE * 3 || - atop(physmap[i + 1]) > Maxmem) - continue; + start = round_page(physmap[i]); + end = start + AP_BOOTPT_SZ; + if (start < end && end <= physmap[i + 1] && + is_mpboot_good(start, end) && + !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { + allocated = true; + physmap[i] = end; + break; + } - allocated = true; - mptramp_pagetables = round_page(physmap[i]); - physmap[i] = round_page(physmap[i]) + (PAGE_SIZE * 3); + /* + * Second, try to chomp at the end. Again, check + * against kernel. + */ + end = trunc_page(physmap[i + 1]); + start = end - AP_BOOTPT_SZ; + if (start < end && start >= physmap[i] && + is_mpboot_good(start, end) && + !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { + allocated = true; + physmap[i + 1] = start; + break; + } + } + if (allocated) { + mptramp_pagetables = start; if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { memmove(&physmap[i], &physmap[i + 2], sizeof(*physmap) * (*physmap_idx - i + 2)); *physmap_idx -= 2; } - break; - } - - if (!allocated) { - mptramp_pagetables = trunc_page(boot_address) - (PAGE_SIZE * 3); + } else { + mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ; if (bootverbose) printf( "Cannot find enough space for the initial AP page tables, placing them at %#x", From 1444bf7c81d0042a81379e5453ce010524058500 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 28 Aug 2018 18:49:39 +0000 Subject: [PATCH 35/51] Fix compat32 ftruncate cap mode after ino64. Reported by: asomers PR: 230120 Sponsored by: The FreeBSD Foundation Approved by: re (gjb) --- sys/compat/freebsd32/capabilities.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/compat/freebsd32/capabilities.conf b/sys/compat/freebsd32/capabilities.conf index 532597486fa8..c96dab7aa9d4 100644 --- a/sys/compat/freebsd32/capabilities.conf +++ b/sys/compat/freebsd32/capabilities.conf @@ -108,6 +108,7 @@ freebsd32_fstat fstatfs fsync ftruncate +freebsd32_ftruncate freebsd32_futimens freebsd32_futimes getaudit From 4ec2e460b5e0391307105c4280d802035399bf99 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 28 Aug 2018 18:50:34 +0000 Subject: [PATCH 36/51] Regen after r338357. Approved by: re (gjb) --- sys/compat/freebsd32/freebsd32_sysent.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 1a745e1068b9..27caed81eefa 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -257,7 +257,7 @@ struct sysent freebsd32_sysent[] = { { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 198 = __syscall */ { compat6(AS(freebsd6_freebsd32_lseek_args),freebsd32_lseek), AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 199 = freebsd6 freebsd32_lseek */ { compat6(AS(freebsd6_freebsd32_truncate_args),freebsd32_truncate), AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 200 = freebsd6 freebsd32_truncate */ - { compat6(AS(freebsd6_freebsd32_ftruncate_args),freebsd32_ftruncate), AUE_FTRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 201 = freebsd6 freebsd32_ftruncate */ + { compat6(AS(freebsd6_freebsd32_ftruncate_args),freebsd32_ftruncate), AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 201 = freebsd6 freebsd32_ftruncate */ { AS(freebsd32_sysctl_args), (sy_call_t *)freebsd32_sysctl, AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 202 = freebsd32_sysctl */ { AS(mlock_args), (sy_call_t *)sys_mlock, AUE_MLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 203 = mlock */ { AS(munlock_args), (sy_call_t *)sys_munlock, AUE_MUNLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 204 = munlock */ @@ -537,14 +537,14 @@ struct sysent freebsd32_sysent[] = { { AS(freebsd32_mmap_args), (sy_call_t *)freebsd32_mmap, AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 477 = freebsd32_mmap */ { AS(freebsd32_lseek_args), (sy_call_t *)freebsd32_lseek, AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 478 = freebsd32_lseek */ { AS(freebsd32_truncate_args), (sy_call_t *)freebsd32_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 479 = freebsd32_truncate */ - { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ + { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ #else { AS(freebsd32_pread_args), (sy_call_t *)freebsd32_pread, AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 475 = freebsd32_pread */ { AS(freebsd32_pwrite_args), (sy_call_t *)freebsd32_pwrite, AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 476 = freebsd32_pwrite */ { AS(freebsd32_mmap_args), (sy_call_t *)freebsd32_mmap, AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 477 = freebsd32_mmap */ { AS(freebsd32_lseek_args), (sy_call_t *)freebsd32_lseek, AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 478 = freebsd32_lseek */ { AS(freebsd32_truncate_args), (sy_call_t *)freebsd32_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 479 = freebsd32_truncate */ - { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ + { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ #endif { AS(thr_kill2_args), (sy_call_t *)sys_thr_kill2, AUE_THR_KILL2, NULL, 0, 0, 0, SY_THR_STATIC }, /* 481 = thr_kill2 */ { AS(shm_open_args), (sy_call_t *)sys_shm_open, AUE_SHMOPEN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 482 = shm_open */ From c208cb9923a9e92c801f6eebe13d191906b2719e Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 28 Aug 2018 20:21:36 +0000 Subject: [PATCH 37/51] Allow multiple FBT probes to share a tracepoint. With GNU ifuncs, multiple FBT probes may correspond to the same instruction. fbt_invop() assumed that this could not happen and would return after the first probe found in the global FBT hash table, which might not be the one that's enabled. Fix the problem on x86 by linking probes that share a tracepoint and having each linked probe fire when the tracepoint is hit. PR: 230846 Approved by: re (gjb) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D16921 --- sys/cddl/dev/fbt/aarch64/fbt_isa.c | 2 +- sys/cddl/dev/fbt/arm/fbt_isa.c | 2 +- sys/cddl/dev/fbt/fbt.c | 92 +++++++++++++++++++----------- sys/cddl/dev/fbt/fbt.h | 14 ++++- sys/cddl/dev/fbt/mips/fbt_isa.c | 2 +- sys/cddl/dev/fbt/powerpc/fbt_isa.c | 2 +- sys/cddl/dev/fbt/riscv/fbt_isa.c | 2 +- sys/cddl/dev/fbt/x86/fbt_isa.c | 28 ++++++--- 8 files changed, 97 insertions(+), 47 deletions(-) diff --git a/sys/cddl/dev/fbt/aarch64/fbt_isa.c b/sys/cddl/dev/fbt/aarch64/fbt_isa.c index 1231140a4253..581f390baf89 100644 --- a/sys/cddl/dev/fbt/aarch64/fbt_isa.c +++ b/sys/cddl/dev/fbt/aarch64/fbt_isa.c @@ -152,7 +152,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 3, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } retfbt = fbt; diff --git a/sys/cddl/dev/fbt/arm/fbt_isa.c b/sys/cddl/dev/fbt/arm/fbt_isa.c index 96dd13029d6a..dc1abf3cf786 100644 --- a/sys/cddl/dev/fbt/arm/fbt_isa.c +++ b/sys/cddl/dev/fbt/arm/fbt_isa.c @@ -165,7 +165,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 2, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } retfbt = fbt; diff --git a/sys/cddl/dev/fbt/fbt.c b/sys/cddl/dev/fbt/fbt.c index df543df4cbdc..590967ade7b3 100644 --- a/sys/cddl/dev/fbt/fbt.c +++ b/sys/cddl/dev/fbt/fbt.c @@ -156,7 +156,7 @@ fbt_doubletrap(void) for (i = 0; i < fbt_probetab_size; i++) { fbt = fbt_probetab[i]; - for (; fbt != NULL; fbt = fbt->fbtp_next) + for (; fbt != NULL; fbt = fbt->fbtp_probenext) fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); } } @@ -204,40 +204,53 @@ fbt_provide_module(void *arg, modctl_t *lf) (void) linker_file_function_listall(lf, fbt_provide_module_function, modname); } +static void +fbt_destroy_one(fbt_probe_t *fbt) +{ + fbt_probe_t *hash, *hashprev, *next; + int ndx; + + ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint); + for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL; + hash = hash->fbtp_hashnext, hashprev = hash) { + if (hash == fbt) { + if ((next = fbt->fbtp_tracenext) != NULL) + next->fbtp_hashnext = hash->fbtp_hashnext; + else + next = hash->fbtp_hashnext; + if (hashprev != NULL) + hashprev->fbtp_hashnext = next; + else + fbt_probetab[ndx] = next; + goto free; + } else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) { + for (next = hash; next->fbtp_tracenext != NULL; + next = next->fbtp_tracenext) { + if (fbt == next->fbtp_tracenext) { + next->fbtp_tracenext = + fbt->fbtp_tracenext; + goto free; + } + } + } + } + panic("probe %p not found in hash table", fbt); +free: + free(fbt, M_FBT); +} + static void fbt_destroy(void *arg, dtrace_id_t id, void *parg) { - fbt_probe_t *fbt = parg, *next, *hash, *last; + fbt_probe_t *fbt = parg, *next; modctl_t *ctl; - int ndx; do { ctl = fbt->fbtp_ctl; - ctl->fbt_nentries--; - /* - * Now we need to remove this probe from the fbt_probetab. - */ - ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint); - last = NULL; - hash = fbt_probetab[ndx]; - - while (hash != fbt) { - ASSERT(hash != NULL); - last = hash; - hash = hash->fbtp_hashnext; - } - - if (last != NULL) { - last->fbtp_hashnext = fbt->fbtp_hashnext; - } else { - fbt_probetab[ndx] = fbt->fbtp_hashnext; - } - - next = fbt->fbtp_next; - free(fbt, M_FBT); - + next = fbt->fbtp_probenext; + fbt_destroy_one(fbt); fbt = next; } while (fbt != NULL); } @@ -265,14 +278,16 @@ fbt_enable(void *arg, dtrace_id_t id, void *parg) return; } - for (; fbt != NULL; fbt = fbt->fbtp_next) + for (; fbt != NULL; fbt = fbt->fbtp_probenext) { fbt_patch_tracepoint(fbt, fbt->fbtp_patchval); + fbt->fbtp_enabled++; + } } static void fbt_disable(void *arg, dtrace_id_t id, void *parg) { - fbt_probe_t *fbt = parg; + fbt_probe_t *fbt = parg, *hash; modctl_t *ctl = fbt->fbtp_ctl; ASSERT(ctl->nenabled > 0); @@ -281,8 +296,21 @@ fbt_disable(void *arg, dtrace_id_t id, void *parg) if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; - for (; fbt != NULL; fbt = fbt->fbtp_next) - fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); + for (; fbt != NULL; fbt = fbt->fbtp_probenext) { + fbt->fbtp_enabled--; + + for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)]; + hash != NULL; hash = hash->fbtp_hashnext) { + if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) { + for (; hash != NULL; hash = hash->fbtp_tracenext) + if (hash->fbtp_enabled > 0) + break; + break; + } + } + if (hash == NULL) + fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); + } } static void @@ -296,7 +324,7 @@ fbt_suspend(void *arg, dtrace_id_t id, void *parg) if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; - for (; fbt != NULL; fbt = fbt->fbtp_next) + for (; fbt != NULL; fbt = fbt->fbtp_probenext) fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); } @@ -311,7 +339,7 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg) if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; - for (; fbt != NULL; fbt = fbt->fbtp_next) + for (; fbt != NULL; fbt = fbt->fbtp_probenext) fbt_patch_tracepoint(fbt, fbt->fbtp_patchval); } diff --git a/sys/cddl/dev/fbt/fbt.h b/sys/cddl/dev/fbt/fbt.h index f34025917b88..aa9bce564fa0 100644 --- a/sys/cddl/dev/fbt/fbt.h +++ b/sys/cddl/dev/fbt/fbt.h @@ -34,9 +34,18 @@ #include "fbt_isa.h" +/* + * fbt_probe is a bit of a misnomer. One of these structures is created for + * each trace point of an FBT probe. A probe might have multiple trace points + * (e.g., a function with multiple return instructions), and different probes + * might have a trace point at the same address (e.g., GNU ifuncs). + */ typedef struct fbt_probe { - struct fbt_probe *fbtp_hashnext; - fbt_patchval_t *fbtp_patchpoint; + struct fbt_probe *fbtp_hashnext; /* global hash table linkage */ + struct fbt_probe *fbtp_tracenext; /* next probe for tracepoint */ + struct fbt_probe *fbtp_probenext; /* next tracepoint for probe */ + int fbtp_enabled; + fbt_patchval_t *fbtp_patchpoint; int8_t fbtp_rval; fbt_patchval_t fbtp_patchval; fbt_patchval_t fbtp_savedval; @@ -46,7 +55,6 @@ typedef struct fbt_probe { modctl_t *fbtp_ctl; int fbtp_loadcnt; int fbtp_symindx; - struct fbt_probe *fbtp_next; } fbt_probe_t; struct linker_file; diff --git a/sys/cddl/dev/fbt/mips/fbt_isa.c b/sys/cddl/dev/fbt/mips/fbt_isa.c index f1213655d659..4dfc13498925 100644 --- a/sys/cddl/dev/fbt/mips/fbt_isa.c +++ b/sys/cddl/dev/fbt/mips/fbt_isa.c @@ -142,7 +142,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 3, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } retfbt = fbt; diff --git a/sys/cddl/dev/fbt/powerpc/fbt_isa.c b/sys/cddl/dev/fbt/powerpc/fbt_isa.c index 502d427da57a..6072737fa1e2 100644 --- a/sys/cddl/dev/fbt/powerpc/fbt_isa.c +++ b/sys/cddl/dev/fbt/powerpc/fbt_isa.c @@ -208,7 +208,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, FBT_AFRAMES, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } diff --git a/sys/cddl/dev/fbt/riscv/fbt_isa.c b/sys/cddl/dev/fbt/riscv/fbt_isa.c index 981017727c18..519784bbfcdf 100644 --- a/sys/cddl/dev/fbt/riscv/fbt_isa.c +++ b/sys/cddl/dev/fbt/riscv/fbt_isa.c @@ -141,7 +141,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 3, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } retfbt = fbt; diff --git a/sys/cddl/dev/fbt/x86/fbt_isa.c b/sys/cddl/dev/fbt/x86/fbt_isa.c index 794747618cec..74de00a3f00b 100644 --- a/sys/cddl/dev/fbt/x86/fbt_isa.c +++ b/sys/cddl/dev/fbt/x86/fbt_isa.c @@ -67,6 +67,7 @@ fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) uintptr_t *stack; uintptr_t arg0, arg1, arg2, arg3, arg4; fbt_probe_t *fbt; + int8_t fbtrval; #ifdef __amd64__ stack = (uintptr_t *)frame->tf_rsp; @@ -78,7 +79,11 @@ fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) cpu = &solaris_cpu[curcpu]; fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { - if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + if ((uintptr_t)fbt->fbtp_patchpoint != addr) + continue; + fbtrval = fbt->fbtp_rval; + for (; fbt != NULL; fbt = fbt->fbtp_tracenext) { + ASSERT(fbt->fbtp_rval == fbtrval); if (fbt->fbtp_roffset == 0) { #ifdef __amd64__ /* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */ @@ -135,9 +140,8 @@ fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) rval, 0, 0, 0); cpu->cpu_dtrace_caller = 0; } - - return (fbt->fbtp_rval); } + return (fbtrval); } return (0); @@ -162,7 +166,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, { char *modname = opaque; const char *name = symval->name; - fbt_probe_t *fbt, *retfbt; + fbt_probe_t *fbt, *hash, *retfbt; int j; int size; uint8_t *instr, *limit; @@ -224,8 +228,18 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_patchval = FBT_PATCHVAL; fbt->fbtp_symindx = symindx; - fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + for (hash = fbt_probetab[FBT_ADDR2NDX(instr)]; hash != NULL; + hash = hash->fbtp_hashnext) { + if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) { + fbt->fbtp_tracenext = hash->fbtp_tracenext; + hash->fbtp_tracenext = fbt; + break; + } + } + if (hash == NULL) { + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + } lf->fbt_nentries++; @@ -301,7 +315,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx, fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 3, fbt); } else { - retfbt->fbtp_next = fbt; + retfbt->fbtp_probenext = fbt; fbt->fbtp_id = retfbt->fbtp_id; } From fd036deac1695c4188a12f075bdf1280dc260b22 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 28 Aug 2018 21:09:19 +0000 Subject: [PATCH 38/51] Dynamically allocate IRQ ranges on x86. Previously, x86 used static ranges of IRQ values for different types of I/O interrupts. Interrupt pins on I/O APICs and 8259A PICs used IRQ values from 0 to 254. MSI interrupts used a compile-time-defined range starting at 256, and Xen event channels used a compile-time-defined range after MSI. Some recent systems have more than 255 I/O APIC interrupt pins which resulted in those IRQ values overflowing into the MSI range triggering an assertion failure. Replace statically assigned ranges with dynamic ranges. Do a single pass computing the sizes of the IRQ ranges (PICs, MSI, Xen) to determine the total number of IRQs required. Allocate the interrupt source and interrupt count arrays dynamically once this pass has completed. To minimize runtime complexity these arrays are only sized once during bootup. The PIC range is determined by the PICs present in the system. The MSI and Xen ranges continue to use a fixed size, though this does make it possible to turn the MSI range size into a tunable in the future. As a result, various places are updated to use dynamic limits instead of constants. In addition, the vmstat(8) utility has been taught to understand that some kernels may treat 'intrcnt' and 'intrnames' as pointers rather than arrays when extracting interrupt stats from a crashdump. This is determined by the presence (vs absence) of a global 'nintrcnt' symbol. This change reverts r189404 which worked around a buggy BIOS which enumerated an I/O APIC twice (using the same memory mapped address for both entries but using an IRQ base of 256 for one entry and a valid IRQ base for the second entry). Making the "base" of MSI IRQ values dynamic avoids the panic that r189404 worked around, and there may now be valid I/O APICs with an IRQ base above 256 which this workaround would incorrectly skip. If in the future the issue reported in PR 130483 reoccurs, we will have to add a pass over the I/O APIC entries in the MADT to detect duplicates using the memory mapped address and use some strategy to choose the "correct" one. While here, reserve room in intrcnts for the Hyper-V counters. PR: 229429, 130483 Reviewed by: kib, royger, cem Tested by: royger (Xen), kib (DMAR) Approved by: re (gjb) MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D16861 --- sys/sys/interrupt.h | 5 ++ sys/x86/acpica/madt.c | 4 -- sys/x86/include/apicvar.h | 8 +-- sys/x86/include/intr_machdep.h | 70 +++++++++------------ sys/x86/iommu/intel_intrmap.c | 2 +- sys/x86/isa/atpic.c | 57 ++++++++++------- sys/x86/x86/intr_machdep.c | 112 +++++++++++++++++++++++++++------ sys/x86/x86/io_apic.c | 46 +++++++++----- sys/x86/x86/local_apic.c | 66 +++++++++++++------ sys/x86/x86/msi.c | 28 +++++---- sys/x86/x86/nexus.c | 4 +- sys/x86/xen/xen_intr.c | 55 ++++++++++------ sys/x86/xen/xen_msi.c | 9 ++- sys/x86/xen/xen_nexus.c | 2 +- usr.bin/vmstat/vmstat.c | 36 ++++++++++- 15 files changed, 340 insertions(+), 164 deletions(-) diff --git a/sys/sys/interrupt.h b/sys/sys/interrupt.h index d3432c078f51..105bb968a6b3 100644 --- a/sys/sys/interrupt.h +++ b/sys/sys/interrupt.h @@ -154,8 +154,13 @@ extern struct intr_event *clk_intr_event; extern void *vm_ih; /* Counts and names for statistics (defined in MD code). */ +#if defined(__amd64__) || defined(__i386__) +extern u_long *intrcnt; /* counts for for each device and stray */ +extern char *intrnames; /* string table containing device names */ +#else extern u_long intrcnt[]; /* counts for for each device and stray */ extern char intrnames[]; /* string table containing device names */ +#endif extern size_t sintrcnt; /* size of intrcnt table */ extern size_t sintrnames; /* size of intrnames table */ diff --git a/sys/x86/acpica/madt.c b/sys/x86/acpica/madt.c index d9b58d3e7442..d590a639bf71 100644 --- a/sys/x86/acpica/madt.c +++ b/sys/x86/acpica/madt.c @@ -428,10 +428,6 @@ madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) apic->Id); if (ioapics[apic->Id].io_apic != NULL) panic("%s: Double APIC ID %u", __func__, apic->Id); - if (apic->GlobalIrqBase >= FIRST_MSI_INT) { - printf("MADT: Ignoring bogus I/O APIC ID %u", apic->Id); - break; - } ioapics[apic->Id].io_apic = ioapic_create(apic->Address, apic->Id, apic->GlobalIrqBase); ioapics[apic->Id].io_vector = apic->GlobalIrqBase; diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h index aecae5467309..7c80f62d0813 100644 --- a/sys/x86/include/apicvar.h +++ b/sys/x86/include/apicvar.h @@ -158,10 +158,10 @@ #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI -#define IRQ_EXTINT (NUM_IO_INTS + 1) -#define IRQ_NMI (NUM_IO_INTS + 2) -#define IRQ_SMI (NUM_IO_INTS + 3) -#define IRQ_DISABLED (NUM_IO_INTS + 4) +#define IRQ_EXTINT -1 +#define IRQ_NMI -2 +#define IRQ_SMI -3 +#define IRQ_DISABLED -4 /* * An APIC enumerator is a pseudo bus driver that enumerates APIC's including diff --git a/sys/x86/include/intr_machdep.h b/sys/x86/include/intr_machdep.h index ee1c4418e026..d9e7f881ffde 100644 --- a/sys/x86/include/intr_machdep.h +++ b/sys/x86/include/intr_machdep.h @@ -34,55 +34,41 @@ #ifdef _KERNEL /* - * The maximum number of I/O interrupts we allow. This number is rather - * arbitrary as it is just the maximum IRQ resource value. The interrupt - * source for a given IRQ maps that I/O interrupt to device interrupt - * source whether it be a pin on an interrupt controller or an MSI interrupt. - * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device - * interrupts allocate IDT vectors on demand. Currently we have 191 IDT - * vectors available for device interrupts. On many systems with I/O APICs, - * a lot of the IRQs are not used, so this number can be much larger than - * 191 and still be safe since only interrupt sources in actual use will - * allocate IDT vectors. + * Values used in determining the allocation of IRQ values among + * different types of I/O interrupts. These values are used as + * indices into a interrupt source array to map I/O interrupts to a + * device interrupt source whether it be a pin on an interrupt + * controller or an MSI interrupt. The 16 ISA IRQs are assigned fixed + * IDT vectors, but all other device interrupts allocate IDT vectors + * on demand. Currently we have 191 IDT vectors available for device + * interrupts on each CPU. On many systems with I/O APICs, a lot of + * the IRQs are not used, so the total number of IRQ values reserved + * can exceed the number of available IDT slots. * - * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values from 256 to 767 are used by MSI. When running under the Xen - * Hypervisor, IRQ values from 768 to 4863 are available for binding to - * event channel events. We leave 255 unused to avoid confusion since 255 is - * used in PCI to indicate an invalid IRQ. + * The first 16 IRQs (0 - 15) are reserved for ISA IRQs. Interrupt + * pins on I/O APICs for non-ISA interrupts use IRQ values starting at + * IRQ 17. This layout matches the GSI numbering used by ACPI so that + * IRQ values returned by ACPI methods such as _CRS can be used + * directly by the ACPI bus driver. + * + * MSI interrupts allocate a block of interrupts starting at either + * the end of the I/O APIC range or 256, whichever is higher. When + * running under the Xen Hypervisor, an additional range of IRQ values + * are available for binding to event channel events. We use 256 as + * the minimum IRQ value for MSI interrupts to attempt to leave 255 + * unused since 255 is used in PCI to indicate an invalid INTx IRQ. */ #define NUM_MSI_INTS 512 -#define FIRST_MSI_INT 256 -#ifdef XENHVM -#include -#include -#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS -#define FIRST_EVTCHN_INT \ - (FIRST_MSI_INT + NUM_MSI_INTS) -#define LAST_EVTCHN_INT \ - (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) -#else -#define NUM_EVTCHN_INTS 0 -#endif -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) +#define MINIMUM_MSI_INT 256 + +extern u_int first_msi_irq; +extern u_int num_io_irqs; /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 -/* - * - 1 ??? dummy counter. - * - 2 counters for each I/O interrupt. - * - 1 counter for each CPU for lapic timer. - * - 8 counters for each CPU for IPI counters for SMP. - */ -#ifdef SMP -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU) -#else -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) -#endif - #ifndef LOCORE typedef void inthand_t(void); @@ -97,6 +83,7 @@ struct intsrc; * return the vector associated with this source. */ struct pic { + void (*pic_register_sources)(struct pic *); void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); @@ -184,6 +171,9 @@ int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); +#ifdef XENHVM +void xen_intr_alloc_irqs(void); +#endif #endif /* !LOCORE */ #endif /* _KERNEL */ diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c index 32e11a8347e8..89d06b23a503 100644 --- a/sys/x86/iommu/intel_intrmap.c +++ b/sys/x86/iommu/intel_intrmap.c @@ -337,7 +337,7 @@ dmar_init_irt(struct dmar_unit *unit) "QI disabled, disabling interrupt remapping\n"); return (0); } - unit->irte_cnt = clp2(NUM_IO_INTS); + unit->irte_cnt = clp2(num_io_irqs); unit->irt = (dmar_irte_t *)(uintptr_t)kmem_alloc_contig( unit->irte_cnt * sizeof(dmar_irte_t), M_ZERO | M_WAITOK, 0, dmar_high, PAGE_SIZE, 0, DMAR_IS_COHERENT(unit) ? diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c index 33a0dd4ca266..8560793df503 100644 --- a/sys/x86/isa/atpic.c +++ b/sys/x86/isa/atpic.c @@ -95,6 +95,7 @@ inthand_t #define ATPIC(io, base, eoi) { \ .at_pic = { \ + .pic_register_sources = atpic_register_sources, \ .pic_enable_source = atpic_enable_source, \ .pic_disable_source = atpic_disable_source, \ .pic_eoi_source = (eoi), \ @@ -133,6 +134,7 @@ struct atpic_intsrc { u_long at_straycount; }; +static void atpic_register_sources(struct pic *pic); static void atpic_enable_source(struct intsrc *isrc); static void atpic_disable_source(struct intsrc *isrc, int eoi); static void atpic_eoi_master(struct intsrc *isrc); @@ -202,6 +204,36 @@ _atpic_eoi_slave(struct intsrc *isrc) #endif } +static void +atpic_register_sources(struct pic *pic) +{ + struct atpic *ap = (struct atpic *)pic; + struct atpic_intsrc *ai; + int i; + + /* + * If any of the ISA IRQs have an interrupt source already, then + * assume that the I/O APICs are being used and don't register any + * of our interrupt sources. This makes sure we don't accidentally + * use mixed mode. The "accidental" use could otherwise occur on + * machines that route the ACPI SCI interrupt to a different ISA + * IRQ (at least one machine routes it to IRQ 13) thus disabling + * that APIC ISA routing and allowing the ATPIC source for that IRQ + * to leak through. We used to depend on this feature for routing + * IRQ0 via mixed mode, but now we don't use mixed mode at all. + */ + for (i = 0; i < NUM_ISA_IRQS; i++) + if (intr_lookup_source(i) != NULL) + return; + + /* Loop through all interrupt sources and add them. */ + for (i = 0, ai = atintrs + ap->at_irqbase; i < 8; i++, ai++) { + if (ap->at_irqbase + i == ICU_SLAVEID) + continue; + intr_register_source(&ai->at_intsrc); + } +} + static void atpic_enable_source(struct intsrc *isrc) { @@ -467,8 +499,6 @@ atpic_startup(void) static void atpic_init(void *dummy __unused) { - struct atpic_intsrc *ai; - int i; /* * Register our PICs, even if we aren't going to use any of their @@ -478,27 +508,8 @@ atpic_init(void *dummy __unused) intr_register_pic(&atpics[1].at_pic) != 0) panic("Unable to register ATPICs"); - /* - * If any of the ISA IRQs have an interrupt source already, then - * assume that the APICs are being used and don't register any - * of our interrupt sources. This makes sure we don't accidentally - * use mixed mode. The "accidental" use could otherwise occur on - * machines that route the ACPI SCI interrupt to a different ISA - * IRQ (at least one machines routes it to IRQ 13) thus disabling - * that APIC ISA routing and allowing the ATPIC source for that IRQ - * to leak through. We used to depend on this feature for routing - * IRQ0 via mixed mode, but now we don't use mixed mode at all. - */ - for (i = 0; i < NUM_ISA_IRQS; i++) - if (intr_lookup_source(i) != NULL) - return; - - /* Loop through all interrupt sources and add them. */ - for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { - if (i == ICU_SLAVEID) - continue; - intr_register_source(&ai->at_intsrc); - } + if (num_io_irqs == 0) + num_io_irqs = NUM_ISA_IRQS; } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL); diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index a05b0ef36496..a879d616d17a 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -38,6 +38,7 @@ #include "opt_atpic.h" #include "opt_ddb.h" +#include "opt_smp.h" #include #include @@ -45,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -78,10 +80,9 @@ typedef void (*mask_fn)(void *); static int intrcnt_index; -static struct intsrc *interrupt_sources[NUM_IO_INTS]; +static struct intsrc **interrupt_sources; #ifdef SMP -static struct intsrc *interrupt_sorted[NUM_IO_INTS]; -CTASSERT(sizeof(interrupt_sources) == sizeof(interrupt_sorted)); +static struct intsrc **interrupt_sorted; static int intrbalance; SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, "Interrupt auto-balance interval (seconds). Zero disables."); @@ -91,15 +92,19 @@ static struct sx intrsrc_lock; static struct mtx intrpic_lock; static struct mtx intrcnt_lock; static TAILQ_HEAD(pics_head, pic) pics; +u_int num_io_irqs; #if defined(SMP) && !defined(EARLY_AP_STARTUP) static int assign_cpu; #endif -u_long intrcnt[INTRCNT_COUNT]; -char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)]; +u_long *intrcnt; +char *intrnames; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); +int nintrcnt; + +static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); static int intr_assign_cpu(void *arg, int cpu); static void intr_disable_src(void *arg); @@ -109,6 +114,18 @@ static void intrcnt_setname(const char *name, int index); static void intrcnt_updatename(struct intsrc *is); static void intrcnt_register(struct intsrc *is); +/* + * SYSINIT levels for SI_SUB_INTR: + * + * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init + * SI_ORDER_SECOND: Xen PICs + * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges + * SI_ORDER_FOURTH: Add 8259A PICs + * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources + * SI_ORDER_MIDDLE: SMP interrupt counters + * SI_ORDER_ANY: Enable interrupts on BSP + */ + static int intr_pic_registered(struct pic *pic) { @@ -143,6 +160,58 @@ intr_register_pic(struct pic *pic) return (error); } +/* + * Allocate interrupt source arrays and register interrupt sources + * once the number of interrupts is known. + */ +static void +intr_init_sources(void *arg) +{ + struct pic *pic; + + MPASS(num_io_irqs > 0); + + interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), + M_INTR, M_WAITOK | M_ZERO); + interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), + M_INTR, M_WAITOK | M_ZERO); + + /* + * - 1 ??? dummy counter. + * - 2 counters for each I/O interrupt. + * - 1 counter for each CPU for lapic timer. + * - 1 counter for each CPU for the Hyper-V vmbus driver. + * - 8 counters for each CPU for IPI counters for SMP. + */ + nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; +#ifdef COUNT_IPIS + if (mp_ncpus > 1) + nintrcnt += 8 * mp_ncpus; +#endif + intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | + M_ZERO); + intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK | + M_ZERO); + sintrcnt = nintrcnt * sizeof(u_long); + sintrnames = nintrcnt * (MAXCOMLEN + 1); + + intrcnt_setname("???", 0); + intrcnt_index = 1; + + /* + * NB: intrpic_lock is not held here to avoid LORs due to + * malloc() in intr_register_source(). However, we are still + * single-threaded at this point in startup so the list of + * PICs shouldn't change. + */ + TAILQ_FOREACH(pic, &pics, pics) { + if (pic->pic_register_sources != NULL) + pic->pic_register_sources(pic); + } +} +SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, + NULL); + /* * Register a new interrupt source with the global interrupt system. * The global interrupts need to be disabled when this function is @@ -155,6 +224,8 @@ intr_register_source(struct intsrc *isrc) KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); vector = isrc->is_pic->pic_vector(isrc); + KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, + num_io_irqs)); if (interrupt_sources[vector] != NULL) return (EEXIST); error = intr_event_create(&isrc->is_event, isrc, 0, vector, @@ -180,7 +251,7 @@ struct intsrc * intr_lookup_source(int vector) { - if (vector < 0 || vector >= nitems(interrupt_sources)) + if (vector < 0 || vector >= num_io_irqs) return (NULL); return (interrupt_sources[vector]); } @@ -378,6 +449,7 @@ intrcnt_register(struct intsrc *is) KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); mtx_lock_spin(&intrcnt_lock); + MPASS(intrcnt_index + 2 <= nintrcnt); is->is_index = intrcnt_index; intrcnt_index += 2; snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", @@ -394,6 +466,7 @@ intrcnt_add(const char *name, u_long **countp) { mtx_lock_spin(&intrcnt_lock); + MPASS(intrcnt_index < nintrcnt); *countp = &intrcnt[intrcnt_index]; intrcnt_setname(name, intrcnt_index); intrcnt_index++; @@ -404,8 +477,6 @@ static void intr_init(void *dummy __unused) { - intrcnt_setname("???", 0); - intrcnt_index = 1; TAILQ_INIT(&pics); mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); sx_init(&intrsrc_lock, "intrsrc"); @@ -471,10 +542,10 @@ void intr_reprogram(void) { struct intsrc *is; - int v; + u_int v; sx_xlock(&intrsrc_lock); - for (v = 0; v < NUM_IO_INTS; v++) { + for (v = 0; v < num_io_irqs; v++) { is = interrupt_sources[v]; if (is == NULL) continue; @@ -491,14 +562,15 @@ intr_reprogram(void) DB_SHOW_COMMAND(irqs, db_show_irqs) { struct intsrc **isrc; - int i, verbose; + u_int i; + int verbose; if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; isrc = interrupt_sources; - for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++) + for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) if (*isrc != NULL) db_dump_intr_event((*isrc)->is_event, verbose); } @@ -606,8 +678,7 @@ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; - u_int cpu; - int i; + u_int cpu, i; intr_init_cpus(); /* Don't bother on UP. */ @@ -617,7 +688,7 @@ intr_shuffle_irqs(void *arg __unused) /* Round-robin assign a CPU to each enabled source. */ sx_xlock(&intrsrc_lock); assign_cpu = 1; - for (i = 0; i < NUM_IO_INTS; i++) { + for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc != NULL && isrc->is_handlers > 0) { /* @@ -652,8 +723,8 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct intsrc *isrc; + u_int i; int error; - int i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) @@ -661,7 +732,7 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sx_slock(&intrsrc_lock); - for (i = 0; i < NUM_IO_INTS; i++) { + for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc == NULL) continue; @@ -720,8 +791,9 @@ intr_balance(void *dummy __unused, int pending __unused) * Sort interrupts according to count. */ sx_xlock(&intrsrc_lock); - memcpy(interrupt_sorted, interrupt_sources, sizeof(interrupt_sorted)); - qsort(interrupt_sorted, NUM_IO_INTS, sizeof(interrupt_sorted[0]), + memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * + sizeof(interrupt_sorted[0])); + qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), intrcmp); /* @@ -733,7 +805,7 @@ intr_balance(void *dummy __unused, int pending __unused) /* * Assign round-robin from most loaded to least. */ - for (i = NUM_IO_INTS - 1; i >= 0; i--) { + for (i = num_io_irqs - 1; i >= 0; i--) { isrc = interrupt_sorted[i]; if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) continue; diff --git a/sys/x86/x86/io_apic.c b/sys/x86/x86/io_apic.c index 81d91044aa24..f33a55d7acf4 100644 --- a/sys/x86/x86/io_apic.c +++ b/sys/x86/x86/io_apic.c @@ -80,7 +80,7 @@ static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures"); struct ioapic_intsrc { struct intsrc io_intsrc; - u_int io_irq; + int io_irq; u_int io_intpin:8; u_int io_vector:8; u_int io_cpu; @@ -112,6 +112,7 @@ static u_int ioapic_read(volatile ioapic_t *apic, int reg); static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); static const char *ioapic_bus_string(int bus_type); static void ioapic_print_irq(struct ioapic_intsrc *intpin); +static void ioapic_register_sources(struct pic *pic); static void ioapic_enable_source(struct intsrc *isrc); static void ioapic_disable_source(struct intsrc *isrc, int eoi); static void ioapic_eoi_source(struct intsrc *isrc); @@ -128,6 +129,7 @@ static void ioapic_reprogram_intpin(struct intsrc *isrc); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); struct pic ioapic_template = { + .pic_register_sources = ioapic_register_sources, .pic_enable_source = ioapic_enable_source, .pic_disable_source = ioapic_disable_source, .pic_eoi_source = ioapic_eoi_source, @@ -142,7 +144,7 @@ struct pic ioapic_template = { .pic_reprogram_pin = ioapic_reprogram_intpin, }; -static int next_ioapic_base; +static u_int next_ioapic_base; static u_int next_id; static int enable_extint; @@ -250,7 +252,7 @@ ioapic_print_irq(struct ioapic_intsrc *intpin) printf("SMI"); break; default: - printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus), + printf("%s IRQ %d", ioapic_bus_string(intpin->io_bus), intpin->io_irq); } } @@ -318,7 +320,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin) * been enabled yet, just ensure that the pin is masked. */ mtx_assert(&icu_lock, MA_OWNED); - if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq < NUM_IO_INTS && + if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq >= 0 && intpin->io_vector == 0)) { low = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); @@ -651,6 +653,8 @@ ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) io->io_id, intbase, next_ioapic_base); io->io_intbase = intbase; next_ioapic_base = intbase + numintr; + if (next_ioapic_base > num_io_irqs) + num_io_irqs = next_ioapic_base; io->io_numintr = numintr; io->io_addr = apic; io->io_paddr = addr; @@ -759,7 +763,7 @@ ioapic_remap_vector(void *cookie, u_int pin, int vector) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || vector < 0) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_irq = vector; if (bootverbose) @@ -778,7 +782,7 @@ ioapic_set_bus(void *cookie, u_int pin, int bus_type) io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); if (io->io_pins[pin].io_bus == bus_type) return (0); @@ -799,7 +803,7 @@ ioapic_set_nmi(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_NMI) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_NMI; @@ -822,7 +826,7 @@ ioapic_set_smi(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_SMI) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_SMI; @@ -845,7 +849,7 @@ ioapic_set_extint(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_EXTINT) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_EXTINT; @@ -870,7 +874,7 @@ ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); activehi = (pol == INTR_POLARITY_HIGH); if (io->io_pins[pin].io_activehi == activehi) @@ -891,7 +895,7 @@ ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (io->io_pins[pin].io_edgetrigger == edgetrigger) @@ -927,12 +931,26 @@ ioapic_register(void *cookie) /* * Reprogram pins to handle special case pins (such as NMI and - * SMI) and register valid pins as interrupt sources. + * SMI) and disable normal pins until a handler is registered. */ intr_register_pic(&io->io_pic); - for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { + for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) ioapic_reprogram_intpin(&pin->io_intsrc); - if (pin->io_irq < NUM_IO_INTS) +} + +/* + * Add interrupt sources for I/O APIC interrupt pins. + */ +static void +ioapic_register_sources(struct pic *pic) +{ + struct ioapic_intsrc *pin; + struct ioapic *io; + int i; + + io = (struct ioapic *)pic; + for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { + if (pin->io_irq >= 0) intr_register_source(&pin->io_intsrc); } } diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 7e3fcac1e78e..92e71977da4f 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -92,11 +92,16 @@ CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); -/* Magic IRQ values for the timer and syscalls. */ -#define IRQ_TIMER (NUM_IO_INTS + 1) -#define IRQ_SYSCALL (NUM_IO_INTS + 2) -#define IRQ_DTRACE_RET (NUM_IO_INTS + 3) -#define IRQ_EVTCHN (NUM_IO_INTS + 4) +/* + * I/O interrupts use non-negative IRQ values. These values are used + * to mark unused IDT entries or IDT entries reserved for a non-I/O + * interrupt. + */ +#define IRQ_FREE -1 +#define IRQ_TIMER -2 +#define IRQ_SYSCALL -3 +#define IRQ_DTRACE_RET -4 +#define IRQ_EVTCHN -5 enum lat_timer_mode { LAT_MODE_UNDEF = 0, @@ -648,7 +653,7 @@ native_lapic_create(u_int apic_id, int boot_cpu) lapics[apic_id].la_elvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) - lapics[apic_id].la_ioint_irqs[i] = -1; + lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; @@ -751,7 +756,6 @@ native_lapic_setup(int boot) uint32_t version; uint32_t maxlvt; register_t saveintr; - char buf[MAXCOMLEN + 1]; int elvt_count; int i; @@ -780,15 +784,11 @@ native_lapic_setup(int boot) LAPIC_LVT_PCINT)); } - /* Program timer LVT and setup handler. */ + /* Program timer LVT. */ la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, lapic_read32(LAPIC_LVT_TIMER)); la->lvt_timer_last = la->lvt_timer_base; lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); - if (boot) { - snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); - intrcnt_add(buf, &la->la_timer_count); - } /* Calibrate the timer parameters using BSP. */ if (boot && IS_BSP()) { @@ -842,6 +842,28 @@ native_lapic_setup(int boot) intr_restore(saveintr); } +static void +native_lapic_intrcnt(void *dummy __unused) +{ + struct pcpu *pc; + struct lapic *la; + char buf[MAXCOMLEN + 1]; + + /* If there are no APICs, skip this function. */ + if (lapics == NULL) + return; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + la = &lapics[pc->pc_apic_id]; + KASSERT(la->la_present, ("missing APIC structure")); + + snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); + intrcnt_add(buf, &la->la_timer_count); + } +} +SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, + NULL); + static void native_lapic_reenable_pmc(void) { @@ -1493,7 +1515,7 @@ native_apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; - KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); + KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple @@ -1501,7 +1523,7 @@ native_apic_alloc_vector(u_int apic_id, u_int irq) */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { - if (lapics[apic_id].la_ioint_irqs[vector] != -1) + if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); @@ -1527,7 +1549,7 @@ native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) - KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif @@ -1541,7 +1563,7 @@ native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ - if (lapics[apic_id].la_ioint_irqs[vector] != -1) { + if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { run = 0; first = 0; continue; @@ -1622,7 +1644,7 @@ native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); - KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); + KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS @@ -1643,7 +1665,7 @@ native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) thread_unlock(td); } mtx_lock_spin(&icu_lock); - lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; + lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); @@ -1694,7 +1716,7 @@ DB_SHOW_COMMAND(apic, db_show_apic) db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; - if (irq == -1 || irq == IRQ_SYSCALL) + if (irq == IRQ_FREE || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) @@ -1707,7 +1729,7 @@ DB_SHOW_COMMAND(apic, db_show_apic) db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); - else if (irq < NUM_IO_INTS) { + else if (irq < num_io_irqs) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); @@ -1934,6 +1956,10 @@ apic_setup_io(void *dummy __unused) /* Enable the MSI "pic". */ init_ops.msi_init(); + +#ifdef XENHVM + xen_intr_alloc_irqs(); +#endif } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); diff --git a/sys/x86/x86/msi.c b/sys/x86/x86/msi.c index 0890ab31f947..1c5502cedde6 100644 --- a/sys/x86/x86/msi.c +++ b/sys/x86/x86/msi.c @@ -120,7 +120,7 @@ struct msi_intsrc { u_int msi_cpu; /* Local APIC ID. (g) */ u_int msi_count:8; /* Messages in this group. (g) */ u_int msi_maxcount:8; /* Alignment for this group. (g) */ - int *msi_irqs; /* Group's IRQ list. (g) */ + u_int *msi_irqs; /* Group's IRQ list. (g) */ u_int msi_remap_cookie; }; @@ -151,6 +151,8 @@ struct pic msi_pic = { .pic_reprogram_pin = NULL, }; +u_int first_msi_irq; + #ifdef SMP /** * Xen hypervisors prior to 4.6.0 do not properly handle updates to @@ -168,7 +170,7 @@ SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN, #endif static int msi_enabled; -static int msi_last_irq; +static u_int msi_last_irq; static struct mtx msi_lock; static void @@ -329,6 +331,10 @@ msi_init(void) } #endif + MPASS(num_io_irqs > 0); + first_msi_irq = max(MINIMUM_MSI_INT, num_io_irqs); + num_io_irqs = first_msi_irq + NUM_MSI_INTS; + msi_enabled = 1; intr_register_pic(&msi_pic); mtx_init(&msi_lock, "msi", NULL, MTX_DEF); @@ -345,7 +351,7 @@ msi_create_source(void) mtx_unlock(&msi_lock); return; } - irq = msi_last_irq + FIRST_MSI_INT; + irq = msi_last_irq + first_msi_irq; msi_last_irq++; mtx_unlock(&msi_lock); @@ -363,8 +369,8 @@ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; - u_int cpu, domain; - int cnt, i, *mirqs, vector; + u_int cpu, domain, *mirqs; + int cnt, i, vector; #ifdef ACPI_DMAR u_int cookies[count]; int error; @@ -385,7 +391,7 @@ msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Try to find 'count' free IRQs. */ cnt = 0; - for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (i = first_msi_irq; i < first_msi_irq + NUM_MSI_INTS; i++) { msi = (struct msi_intsrc *)intr_lookup_source(i); /* End of allocated sources, so break. */ @@ -404,7 +410,7 @@ msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Do we need to create some new sources? */ if (cnt < count) { /* If we would exceed the max, give up. */ - if (i + (count - cnt) >= FIRST_MSI_INT + NUM_MSI_INTS) { + if (i + (count - cnt) >= first_msi_irq + NUM_MSI_INTS) { mtx_unlock(&msi_lock); free(mirqs, M_MSI); return (ENXIO); @@ -579,8 +585,8 @@ msi_map(int irq, uint64_t *addr, uint32_t *data) #ifdef ACPI_DMAR if (!msi->msi_msix) { - for (k = msi->msi_count - 1, i = FIRST_MSI_INT; k > 0 && - i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (k = msi->msi_count - 1, i = first_msi_irq; k > 0 && + i < first_msi_irq + NUM_MSI_INTS; i++) { if (i == msi->msi_irq) continue; msi1 = (struct msi_intsrc *)intr_lookup_source(i); @@ -630,7 +636,7 @@ msix_alloc(device_t dev, int *irq) mtx_lock(&msi_lock); /* Find a free IRQ. */ - for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (i = first_msi_irq; i < first_msi_irq + NUM_MSI_INTS; i++) { msi = (struct msi_intsrc *)intr_lookup_source(i); /* End of allocated sources, so break. */ @@ -645,7 +651,7 @@ msix_alloc(device_t dev, int *irq) /* Do we need to create a new source? */ if (msi == NULL) { /* If we would exceed the max, give up. */ - if (i + 1 >= FIRST_MSI_INT + NUM_MSI_INTS) { + if (i + 1 >= first_msi_irq + NUM_MSI_INTS) { mtx_unlock(&msi_lock); return (ENXIO); } diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index 9bceff204863..4762d5ae2cfb 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -223,7 +223,7 @@ nexus_init_resources(void) irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; - irq_rman.rm_end = NUM_IO_INTS - 1; + irq_rman.rm_end = num_io_irqs - 1; if (rman_init(&irq_rman)) panic("nexus_init_resources irq_rman"); @@ -231,7 +231,7 @@ nexus_init_resources(void) * We search for regions of existing IRQs and add those to the IRQ * resource manager. */ - for (irq = 0; irq < NUM_IO_INTS; irq++) + for (irq = 0; irq < num_io_irqs; irq++) if (intr_lookup_source(irq) != NULL) if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("nexus_init_resources irq_rman add"); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index f823b9303f62..559f3192255a 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -72,6 +73,8 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); +static u_int first_evtchn_irq; + /** * Per-cpu event channel processing state. */ @@ -187,7 +190,7 @@ struct pic xen_intr_pirq_pic = { }; static struct mtx xen_intr_isrc_lock; -static int xen_intr_auto_vector_count; +static u_int xen_intr_auto_vector_count; static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; static u_long *xen_intr_pirq_eoi_map; static boolean_t xen_intr_pirq_eoi_map_enabled; @@ -276,7 +279,7 @@ xen_intr_find_unused_isrc(enum evtchn_type type) struct xenisrc *isrc; u_int vector; - vector = FIRST_EVTCHN_INT + isrc_idx; + vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL && isrc->xi_type == EVTCHN_TYPE_UNBOUND) { @@ -314,7 +317,7 @@ xen_intr_alloc_isrc(enum evtchn_type type, int vector) } if (type != EVTCHN_TYPE_PIRQ) { - vector = FIRST_EVTCHN_INT + xen_intr_auto_vector_count; + vector = first_evtchn_irq + xen_intr_auto_vector_count; xen_intr_auto_vector_count++; } @@ -473,8 +476,8 @@ xen_intr_isrc(xen_intr_handle_t handle) return (NULL); vector = *(int *)handle; - KASSERT(vector >= FIRST_EVTCHN_INT && - vector < (FIRST_EVTCHN_INT + xen_intr_auto_vector_count), + KASSERT(vector >= first_evtchn_irq && + vector < (first_evtchn_irq + xen_intr_auto_vector_count), ("Xen interrupt vector is out of range")); return ((struct xenisrc *)intr_lookup_source(vector)); @@ -631,17 +634,13 @@ xen_intr_init(void *dummy __unused) mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* - * Register interrupt count manually as we aren't - * guaranteed to see a call to xen_intr_assign_cpu() - * before our first interrupt. Also set the per-cpu - * mask of CPU#0 to enable all, since by default - * all event channels are bound to CPU#0. + * Set the per-cpu mask of CPU#0 to enable all, since by default all + * event channels are bound to CPU#0. */ CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); - xen_intr_intrcnt_add(i); } for (i = 0; i < nitems(s->evtchn_mask); i++) @@ -666,6 +665,31 @@ xen_intr_init(void *dummy __unused) } SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL); +static void +xen_intrcnt_init(void *dummy __unused) +{ + unsigned int i; + + if (!xen_domain()) + return; + + /* + * Register interrupt count manually as we aren't guaranteed to see a + * call to xen_intr_assign_cpu() before our first interrupt. + */ + CPU_FOREACH(i) + xen_intr_intrcnt_add(i); +} +SYSINIT(xen_intrcnt_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intrcnt_init, NULL); + +void +xen_intr_alloc_irqs(void) +{ + + first_evtchn_irq = num_io_irqs; + num_io_irqs += NR_EVENT_CHANNELS; +} + /*--------------------------- Common PIC Functions ---------------------------*/ /** * Prepare this PIC for system suspension. @@ -768,7 +792,7 @@ xen_intr_resume(struct pic *unused, bool suspend_cancelled) for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) { u_int vector; - vector = FIRST_EVTCHN_INT + isrc_idx; + vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL) { isrc->xi_port = 0; @@ -872,7 +896,6 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) to_cpu = apic_cpuid(apic_id); vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id; - xen_intr_intrcnt_add(to_cpu); mtx_lock(&xen_intr_isrc_lock); isrc = (struct xenisrc *)base_isrc; @@ -1273,9 +1296,6 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id }; int error; - /* Ensure the target CPU is ready to handle evtchn interrupts. */ - xen_intr_intrcnt_add(cpu); - isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) { @@ -1338,9 +1358,6 @@ xen_intr_alloc_and_bind_ipi(u_int cpu, driver_filter_t filter, char name[MAXCOMLEN + 1]; int error; - /* Ensure the target CPU is ready to handle evtchn interrupts. */ - xen_intr_intrcnt_add(cpu); - isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) { diff --git a/sys/x86/xen/xen_msi.c b/sys/x86/xen/xen_msi.c index 0f678b164344..0d2544d293c9 100644 --- a/sys/x86/xen/xen_msi.c +++ b/sys/x86/xen/xen_msi.c @@ -44,16 +44,21 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include static struct mtx msi_lock; -static int msi_last_irq; +static u_int msi_last_irq; void xen_msi_init(void) { + MPASS(num_io_irqs > 0); + first_msi_irq = min(MINIMUM_MSI_INT, num_io_irqs); + num_io_irqs = first_msi_irq + NUM_MSI_INTS; + mtx_init(&msi_lock, "msi", NULL, MTX_DEF); } @@ -75,7 +80,7 @@ xen_msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Allocate MSI vectors */ for (i = 0; i < count; i++) - irqs[i] = FIRST_MSI_INT + msi_last_irq++; + irqs[i] = first_msi_irq + msi_last_irq++; mtx_unlock(&msi_lock); diff --git a/sys/x86/xen/xen_nexus.c b/sys/x86/xen/xen_nexus.c index 73506fc955f0..65d4281a1426 100644 --- a/sys/x86/xen/xen_nexus.c +++ b/sys/x86/xen/xen_nexus.c @@ -99,7 +99,7 @@ nexus_xen_config_intr(device_t dev, int irq, enum intr_trigger trig, * ISA and PCI intline IRQs are not preregistered on Xen, so * intercept calls to configure those and register them on the fly. */ - if ((irq < FIRST_MSI_INT) && (intr_lookup_source(irq) == NULL)) { + if ((irq < first_msi_irq) && (intr_lookup_source(irq) == NULL)) { ret = xen_register_pirq(irq, trig, pol); if (ret != 0) return (ret); diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c index 75f012f293e1..01b258bf668e 100644 --- a/usr.bin/vmstat/vmstat.c +++ b/usr.bin/vmstat/vmstat.c @@ -86,7 +86,7 @@ __FBSDID("$FreeBSD$"); static char da[] = "da"; enum x_stats { X_SUM, X_HZ, X_STATHZ, X_NCHSTATS, X_INTRNAMES, X_SINTRNAMES, - X_INTRCNT, X_SINTRCNT }; + X_INTRCNT, X_SINTRCNT, X_NINTRCNT }; static struct nlist namelist[] = { [X_SUM] = { .n_name = "_vm_cnt", }, @@ -97,6 +97,7 @@ static struct nlist namelist[] = { [X_SINTRNAMES] = { .n_name = "_sintrnames", }, [X_INTRCNT] = { .n_name = "_intrcnt", }, [X_SINTRCNT] = { .n_name = "_sintrcnt", }, + [X_NINTRCNT] = { .n_name = "_nintrcnt", }, { .n_name = NULL, }, }; @@ -196,6 +197,7 @@ static void domemstat_malloc(void); static void domemstat_zone(void); static void kread(int, void *, size_t); static void kreado(int, void *, size_t, size_t); +static void kreadptr(uintptr_t, void *, size_t); static void needhdr(int); static void needresize(int); static void doresize(void); @@ -318,6 +320,13 @@ main(int argc, char *argv[]) goto retry_nlist; } + /* + * 'nintrcnt' doesn't exist in older kernels, but + * that isn't fatal. + */ + if (namelist[X_NINTRCNT].n_type == 0 && c == 1) + goto nlist_ok; + for (c = 0; c < (int)(nitems(namelist)); c++) if (namelist[c].n_type == 0) bufsize += strlen(namelist[c].n_name) @@ -341,6 +350,7 @@ main(int argc, char *argv[]) xo_finish(); exit(1); } +nlist_ok: if (kd && Pflag) xo_errx(1, "Cannot use -P with crash dumps"); @@ -1232,12 +1242,18 @@ static unsigned int read_intrcnts(unsigned long **intrcnts) { size_t intrcntlen; + uintptr_t kaddr; if (kd != NULL) { kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen)); if ((*intrcnts = malloc(intrcntlen)) == NULL) err(1, "malloc()"); - kread(X_INTRCNT, *intrcnts, intrcntlen); + if (namelist[X_NINTRCNT].n_type == 0) + kread(X_INTRCNT, *intrcnts, intrcntlen); + else { + kread(X_INTRCNT, &kaddr, sizeof(kaddr)); + kreadptr(kaddr, *intrcnts, intrcntlen); + } } else { for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) { *intrcnts = reallocf(*intrcnts, intrcntlen); @@ -1294,6 +1310,7 @@ dointr(unsigned int interval, int reps) char *intrname, *intrnames; long long period_ms, old_uptime, uptime; size_t clen, inamlen, istrnamlen; + uintptr_t kaddr; unsigned int nintr; old_intrcnts = NULL; @@ -1304,7 +1321,12 @@ dointr(unsigned int interval, int reps) kread(X_SINTRNAMES, &inamlen, sizeof(inamlen)); if ((intrnames = malloc(inamlen)) == NULL) xo_err(1, "malloc()"); - kread(X_INTRNAMES, intrnames, inamlen); + if (namelist[X_NINTRCNT].n_type == 0) + kread(X_INTRNAMES, intrnames, inamlen); + else { + kread(X_INTRNAMES, &kaddr, sizeof(kaddr)); + kreadptr(kaddr, intrnames, inamlen); + } } else { for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) { if ((intrnames = reallocf(intrnames, inamlen)) == NULL) @@ -1646,6 +1668,14 @@ kread(int nlx, void *addr, size_t size) kreado(nlx, addr, size, 0); } +static void +kreadptr(uintptr_t addr, void *buf, size_t size) +{ + + if ((size_t)kvm_read(kd, addr, buf, size) != size) + xo_errx(1, "%s", kvm_geterr(kd)); +} + static void __dead2 usage(void) { From d8c069708e15b6b9ae4c57ebe891d9f20845a0b2 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Tue, 28 Aug 2018 22:22:06 +0000 Subject: [PATCH 39/51] Add beforeinstallconfig to bsd.confs.mk to enable running commands prior to the installconfig target. Approved by: re (rgrimes), bapt (mentor) Differential Revision: https://reviews.freebsd.org/D16874 --- share/mk/bsd.confs.mk | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/share/mk/bsd.confs.mk b/share/mk/bsd.confs.mk index 87aa8602d092..839631cf7756 100644 --- a/share/mk/bsd.confs.mk +++ b/share/mk/bsd.confs.mk @@ -29,8 +29,11 @@ all: buildconfig . if !target(afterinstallconfig) afterinstallconfig: . endif -installconfig: realinstallconfig afterinstallconfig -.ORDER: realinstallconfig afterinstallconfig +. if !target(beforeinstallconfig) +beforeinstallconfig: +. endif +installconfig: beforeinstallconfig realinstallconfig afterinstallconfig +.ORDER: beforeinstallconfig realinstallconfig afterinstallconfig ${group}OWN?= ${SHAREOWN} ${group}GRP?= ${SHAREGRP} From b4d1ec6c6965fe486cfd1320d7f2371a511be38d Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Tue, 28 Aug 2018 22:51:45 +0000 Subject: [PATCH 40/51] Fix the install of /root/.login missed as part of r337849. Approved by: re (rgrimes) --- bin/csh/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/csh/Makefile b/bin/csh/Makefile index 3aaa50d4a648..78d6fba3791c 100644 --- a/bin/csh/Makefile +++ b/bin/csh/Makefile @@ -8,8 +8,11 @@ .include -CONFGROUPS= ETC +CONFGROUPS= ETC ROOT ETC= csh.cshrc csh.login csh.logout +ROOT= dot.login +ROOTDIR= /root +ROOTNAME_dot.login= .login PACKAGE=runtime TCSHDIR= ${SRCTOP}/contrib/tcsh .PATH: ${TCSHDIR} From 26ffc1ab460e125b2fb78bad6a412f0f00761e25 Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Tue, 28 Aug 2018 23:56:52 +0000 Subject: [PATCH 41/51] Fix 'install: symlink usr/src/sys -> /sys: File exists' in distributeworld Follow r334617, specify ${DISTDIR} (by ${INSTALL_DDIR}), '/base' and add ${INSTALLFLAGS} while installing the '/sys' symbolic link. Reviewed by: bapt (earlier version), markj Approved by: re (gjb), markj (mentor) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D16877 --- Makefile.inc1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.inc1 b/Makefile.inc1 index cc3ab0f5eda2..8a86727467ac 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1334,7 +1334,7 @@ distributeworld installworld stageworld: _installcheck_world .PHONY METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs - ${INSTALL_SYMLINK} usr/src/sys ${DESTDIR}/sys + ${INSTALL_SYMLINK} ${INSTALLFLAGS} usr/src/sys ${INSTALL_DDIR}/base/sys .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} From 394e8d20d9f3626e31d0d1125f31ca23c9b749e3 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 29 Aug 2018 02:49:18 +0000 Subject: [PATCH 42/51] Add a sysctl for the ZFS abd_scatter_enabled setting. Submitted by: Yamagi Burmeister (original version) Approved by: re (rgrimes) MFC after: 3 days --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c index 2cd87c2b87bf..f6e4a92dd4c2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/abd.c @@ -153,6 +153,8 @@ size_t zfs_abd_chunk_size = 4096; #if defined(__FreeBSD__) && defined(_KERNEL) SYSCTL_DECL(_vfs_zfs); +SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, + &zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, &zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); #endif From e32cd65c5b0a38a14ee9be061a0352c5fe03a6ad Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Wed, 29 Aug 2018 04:37:53 +0000 Subject: [PATCH 43/51] cxgbe/iw_cxgbe: Fix iWARP RDMA + VIMAGE operation by setting the VNET properly in a couple of places in the driver. Submitted by: Krishnamraju Eraparaju @ Chelsio Approved by: re@ (rgrimes@) Sponsored by: Chelsio Communications --- sys/dev/cxgbe/iw_cxgbe/cm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index ce42a90ad59b..9eab1750bb11 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -76,6 +76,7 @@ struct cpl_set_tcb_rpl; #include #include #include +#include static spinlock_t req_lock; static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; @@ -2523,6 +2524,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; struct ifnet *nh_ifp; /* Logical egress interface */ + struct rdma_cm_id *rdma_id = (struct rdma_cm_id*)cm_id->context; + struct vnet *vnet = rdma_id->route.addr.dev_addr.net; CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); @@ -2568,7 +2571,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ref_qp(ep); ep->com.thread = curthread; + CURVNET_SET(vnet); err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp); + CURVNET_RESTORE(); + if (err) { CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); @@ -2811,7 +2817,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (!ep->parent_ep) ep->com.state = MORIBUND; + + CURVNET_SET(ep->com.so->so_vnet); sodisconnect(ep->com.so); + CURVNET_RESTORE(); } } From 76f6651cf08f1907a7cdb2501d1c891fbdc4f4d0 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Wed, 29 Aug 2018 06:04:54 +0000 Subject: [PATCH 44/51] Avoid printing extraneous function names when searching man page database (apropos, man -k). This commit Replaces .SS with .SH, similar to the man page provided by original heimdal (as in port). PR: 230573 Submitted by: yuripv@yuripv.net Approved by: re (rgrimes@) MFC after: 3 days --- crypto/heimdal/doc/doxyout/krb5/man/man3/krb5.3 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/heimdal/doc/doxyout/krb5/man/man3/krb5.3 b/crypto/heimdal/doc/doxyout/krb5/man/man3/krb5.3 index 9fe76079a2af..499365690406 100644 --- a/crypto/heimdal/doc/doxyout/krb5/man/man3/krb5.3 +++ b/crypto/heimdal/doc/doxyout/krb5/man/man3/krb5.3 @@ -2,9 +2,8 @@ .ad l .nh .SH NAME -Heimdal Kerberos 5 library \- -.SS "Functions" - +krb5 \- Heimdal Kerberos 5 library +.SH SYNOPSIS .in +1c .ti -1c .RI "KRB5_LIB_FUNCTION krb5_error_code KRB5_LIB_CALL \fBkrb5_add_et_list\fP (krb5_context context, void(*func)(struct et_list **))" From f0165b1ca68bff74bd6196d796b3fa5b7a732ca2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 29 Aug 2018 12:24:19 +0000 Subject: [PATCH 45/51] Remove {max/min}_offset() macros, use vm_map_{max/min}() inlines. Exposing max_offset and min_offset defines in public headers is causing clashes with variable names, for example when building QEMU. Based on the submission by: royger Reviewed by: alc, markj (previous version) Sponsored by: The FreeBSD Foundation (kib) MFC after: 1 week Approved by: re (marius) Differential revision: https://reviews.freebsd.org/D16881 --- sys/amd64/amd64/pmap.c | 12 ++++----- sys/arm/arm/pmap-v6.c | 14 +++++------ sys/arm64/arm64/pmap.c | 12 ++++----- .../uts/common/fs/zfs/sys/zfs_context.h | 7 ------ sys/i386/i386/pmap.c | 12 ++++----- sys/mips/mips/pmap.c | 12 ++++----- sys/riscv/riscv/pmap.c | 12 ++++----- sys/vm/vm_glue.c | 2 +- sys/vm/vm_init.c | 4 +-- sys/vm/vm_map.c | 22 ++++++++-------- sys/vm/vm_map.h | 25 +++++++++++++------ 11 files changed, 68 insertions(+), 66 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 3d70532b7fd3..cbd76aa48585 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -3094,8 +3094,8 @@ pmap_growkernel(vm_offset_t addr) return; addr = roundup2(addr, NBPDR); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & X86_PG_V) == 0) { @@ -3115,8 +3115,8 @@ pmap_growkernel(vm_offset_t addr) pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); if ((*pde & X86_PG_V) != 0) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -3134,8 +3134,8 @@ pmap_growkernel(vm_offset_t addr) pde_store(pde, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 8dd74f15d925..ceedc75cb941 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -2043,21 +2043,21 @@ pmap_growkernel(vm_offset_t addr) * not called, it could be first unused KVA (which is not * rounded up to PTE1_SIZE), * - * (2) when all KVA space is mapped and kernel_map->max_offset + * (2) when all KVA space is mapped and vm_map_max(kernel_map) * address is not rounded up to PTE1_SIZE. (For example, * it could be 0xFFFFFFFF.) */ kernel_vm_end = pte1_roundup(kernel_vm_end); mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, PTE1_SIZE); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { pte1 = pte1_load(kern_pte1(kernel_vm_end)); if (pte1_is_valid(pte1)) { kernel_vm_end += PTE1_SIZE; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -2099,8 +2099,8 @@ pmap_growkernel(vm_offset_t addr) pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); kernel_vm_end = kernel_vm_end_new; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 89d6178da5e1..390bfbe307c6 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1744,8 +1744,8 @@ pmap_growkernel(vm_offset_t addr) mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { l0 = pmap_l0(kernel_pmap, kernel_vm_end); KASSERT(pmap_load(l0) != 0, @@ -1768,8 +1768,8 @@ pmap_growkernel(vm_offset_t addr) l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & ATTR_AF) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -1787,8 +1787,8 @@ pmap_growkernel(vm_offset_t addr) pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h index 04606bda48db..a3c0e4c31d0d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h @@ -104,13 +104,6 @@ extern "C" { #include #include #include -/* There is clash. vm_map.h defines the two below and vdev_cache.c use them. */ -#ifdef min_offset -#undef min_offset -#endif -#ifdef max_offset -#undef max_offset -#endif #include #include diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 5368c7cc2432..0c1437df5187 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2229,13 +2229,13 @@ pmap_growkernel(vm_offset_t addr) mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, NBPDR); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -2257,8 +2257,8 @@ pmap_growkernel(vm_offset_t addr) pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 56afb4a3ca74..a0de635ff396 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -1255,8 +1255,8 @@ pmap_growkernel(vm_offset_t addr) mtx_assert(&kernel_map->system_mtx, MA_OWNED); req_class = VM_ALLOC_INTERRUPT; addr = roundup2(addr, NBSEG); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); #ifdef __mips_n64 @@ -1272,8 +1272,8 @@ pmap_growkernel(vm_offset_t addr) pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); if (*pde != 0) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -1305,8 +1305,8 @@ pmap_growkernel(vm_offset_t addr) pte[i] = PTE_G; kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 6d5d7cf844d8..8582667b27b9 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -1424,8 +1424,8 @@ pmap_growkernel(vm_offset_t addr) mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); - if (addr - 1 >= kernel_map->max_offset) - addr = kernel_map->max_offset; + if (addr - 1 >= vm_map_max(kernel_map)) + addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { l1 = pmap_l1(kernel_pmap, kernel_vm_end); if (pmap_load(l1) == 0) { @@ -1452,8 +1452,8 @@ pmap_growkernel(vm_offset_t addr) l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & PTE_A) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } continue; @@ -1478,8 +1478,8 @@ pmap_growkernel(vm_offset_t addr) pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; - if (kernel_vm_end - 1 >= kernel_map->max_offset) { - kernel_vm_end = kernel_map->max_offset; + if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { + kernel_vm_end = vm_map_max(kernel_map); break; } } diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 25db4ad18a4b..7952c81a1afe 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -122,7 +122,7 @@ kernacc(void *addr, int len, int rw) KASSERT((rw & ~VM_PROT_ALL) == 0, ("illegal ``rw'' argument to kernacc (%x)\n", rw)); - if ((vm_offset_t)addr + len > kernel_map->max_offset || + if ((vm_offset_t)addr + len > vm_map_max(kernel_map) || (vm_offset_t)addr + len < (vm_offset_t)addr) return (FALSE); diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 19262daf00e3..09e87ed231ed 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -259,8 +259,8 @@ vm_ksubmap_init(struct kva_md_info *kmi) * Discount the physical memory larger than the size of kernel_map * to avoid eating up all of KVA space. */ - physmem_est = lmin(physmem, btoc(kernel_map->max_offset - - kernel_map->min_offset)); + physmem_est = lmin(physmem, btoc(vm_map_max(kernel_map) - + vm_map_min(kernel_map))); v = kern_vfs_bio_buffer_alloc(v, physmem_est); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index c2a7128137a2..9dacd649aaea 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -339,8 +339,8 @@ vmspace_dofree(struct vmspace *vm) * Delete all of the mappings and pages they hold, then call * the pmap module to reclaim anything left. */ - (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset, - vm->vm_map.max_offset); + (void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), + vm_map_max(&vm->vm_map)); pmap_release(vmspace_pmap(vm)); vm->vm_map.pmap = NULL; @@ -799,8 +799,8 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) map->needs_wakeup = FALSE; map->system_map = 0; map->pmap = pmap; - map->min_offset = min; - map->max_offset = max; + map->header.end = min; + map->header.start = max; map->flags = 0; map->root = NULL; map->timestamp = 0; @@ -1198,7 +1198,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, /* * Check that the start and end points are not bogus. */ - if (start < map->min_offset || end > map->max_offset || start >= end) + if (start < vm_map_min(map) || end > vm_map_max(map) || + start >= end) return (KERN_INVALID_ADDRESS); /* @@ -1401,9 +1402,8 @@ vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, * Request must fit within min/max VM address and must avoid * address wrap. */ - if (start < map->min_offset) - start = map->min_offset; - if (start + length > map->max_offset || start + length < start) + start = MAX(start, vm_map_min(map)); + if (start + length > vm_map_max(map) || start + length < start) return (1); /* Empty tree means wide open address space. */ @@ -3429,7 +3429,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) old_map = &vm1->vm_map; /* Copy immutable fields of vm1 to vm2. */ - vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, NULL); + vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), NULL); if (vm2 == NULL) return (NULL); vm2->vm_taddr = vm1->vm_taddr; @@ -4329,14 +4329,14 @@ vm_offset_t vm_map_max_KBI(const struct vm_map *map) { - return (map->max_offset); + return (vm_map_max(map)); } vm_offset_t vm_map_min_KBI(const struct vm_map *map) { - return (map->min_offset); + return (vm_map_min(map)); } pmap_t diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 7502b95ee1a8..23548bf7d9b5 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -173,19 +173,26 @@ vm_map_entry_system_wired_count(vm_map_entry_t entry) * A map is a set of map entries. These map entries are * organized both as a binary search tree and as a doubly-linked * list. Both structures are ordered based upon the start and - * end addresses contained within each map entry. The list - * header has max start value and min end value to act as - * sentinels for sequential search of the doubly-linked list. + * end addresses contained within each map entry. + * + * Counterintuitively, the map's min offset value is stored in + * map->header.end, and its max offset value is stored in + * map->header.start. + * + * The list header has max start value and min end value to act + * as sentinels for sequential search of the doubly-linked list. * Sleator and Tarjan's top-down splay algorithm is employed to * control height imbalance in the binary search tree. * - * List of locks + * List of locks * (c) const until freed */ struct vm_map { struct vm_map_entry header; /* List of entries */ -#define min_offset header.end /* (c) */ -#define max_offset header.start /* (c) */ +/* + map min_offset header.end (c) + map max_offset header.start (c) +*/ struct sx lock; /* Lock for map data */ struct mtx system_mtx; int nentries; /* Number of entries */ @@ -214,13 +221,15 @@ struct vm_map { static __inline vm_offset_t vm_map_max(const struct vm_map *map) { - return (map->max_offset); + + return (map->header.start); } static __inline vm_offset_t vm_map_min(const struct vm_map *map) { - return (map->min_offset); + + return (map->header.end); } static __inline pmap_t From b83d10091fde9d180ab4ce2e533b712e97e5a2e0 Mon Sep 17 00:00:00 2001 From: Emmanuel Vadot Date: Wed, 29 Aug 2018 14:01:27 +0000 Subject: [PATCH 46/51] arm64: GENERIC-MMCCAM: Fix build and module depend Fix the build of the GENERIC-MMCCAM kernel config after the sdhci_xenon driver was commited. While here correct sdhci_fdt and tegra_sdhci, even with MMCCAM they do need to depend on sdhci(4) Reported by: Reshetnikov Dmitriy Approved by: re (kib) Sponsored by: Rubicon Communications, LLC ("NetGate") --- sys/arm/nvidia/tegra_sdhci.c | 2 +- sys/dev/sdhci/sdhci_fdt.c | 2 +- sys/dev/sdhci/sdhci_xenon.c | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sys/arm/nvidia/tegra_sdhci.c b/sys/arm/nvidia/tegra_sdhci.c index 9693ea8101e3..9870285b3af2 100644 --- a/sys/arm/nvidia/tegra_sdhci.c +++ b/sys/arm/nvidia/tegra_sdhci.c @@ -465,7 +465,7 @@ static DEFINE_CLASS_0(sdhci, tegra_sdhci_driver, tegra_sdhci_methods, sizeof(struct tegra_sdhci_softc)); DRIVER_MODULE(sdhci_tegra, simplebus, tegra_sdhci_driver, tegra_sdhci_devclass, NULL, NULL); -#ifndef MMCCAM MODULE_DEPEND(sdhci_tegra, sdhci, 1, 1, 1); +#ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci); #endif diff --git a/sys/dev/sdhci/sdhci_fdt.c b/sys/dev/sdhci/sdhci_fdt.c index 488a91fd16eb..78c17363c35b 100644 --- a/sys/dev/sdhci/sdhci_fdt.c +++ b/sys/dev/sdhci/sdhci_fdt.c @@ -353,7 +353,7 @@ static devclass_t sdhci_fdt_devclass; DRIVER_MODULE(sdhci_fdt, simplebus, sdhci_fdt_driver, sdhci_fdt_devclass, NULL, NULL); -#ifndef MMCCAM MODULE_DEPEND(sdhci_fdt, sdhci, 1, 1, 1); +#ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_fdt); #endif diff --git a/sys/dev/sdhci/sdhci_xenon.c b/sys/dev/sdhci/sdhci_xenon.c index d8c13d26f5b9..a3b76c30557f 100644 --- a/sys/dev/sdhci/sdhci_xenon.c +++ b/sys/dev/sdhci/sdhci_xenon.c @@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$"); #include "mmcbr_if.h" #include "sdhci_if.h" +#include "opt_mmccam.h" + #define MAX_SLOTS 6 static struct ofw_compat_data compat_data[] = { @@ -542,5 +544,8 @@ static devclass_t sdhci_xenon_devclass; DRIVER_MODULE(sdhci_xenon, simplebus, sdhci_xenon_driver, sdhci_xenon_devclass, NULL, NULL); + MODULE_DEPEND(sdhci_xenon, sdhci, 1, 1, 1); +#ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_xenon); +#endif From a520f8b6febebde86aef71045a013bfa9fa5c295 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Wed, 29 Aug 2018 15:55:25 +0000 Subject: [PATCH 47/51] Fix potential data corruption in iflib The MP ring may have txq pointers enqueued. Previously, these were passed to m_free() when IFC_QFLUSH was set. This patch checks for the value and doesn't call m_free(). Reviewed by: gallatin Approved by: re (gjb) Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D16882 --- sys/net/iflib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index bf361955ddf0..b71da1fdfc83 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -3636,7 +3636,8 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { - m_free(r->items[(cidx + i) & (r->size-1)]); + if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq) + m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); From bc0e855bd9c000b7c2ad7d1507ea8f76858e2f0f Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Wed, 29 Aug 2018 16:21:34 +0000 Subject: [PATCH 48/51] Fix compile error due to missing parenthesis in r338372 Approved by: re (gjb) --- sys/net/iflib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index b71da1fdfc83..faee06d02a83 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -3636,7 +3636,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { - if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq) + if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } From 94ec7ec758aad2cdfd976ef5385ca2b345a6d64d Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Wed, 29 Aug 2018 16:59:19 +0000 Subject: [PATCH 49/51] Finish moving dot.cshrc and dot.profile to bin/csh/ and bin/sh/. Approved by: re (gjb), will (mentor) Differential Revision: https://reviews.freebsd.org/D16770 --- bin/csh/Makefile | 9 ++++++++- {etc/root => bin/csh}/dot.cshrc | 0 bin/sh/Makefile | 10 +++++++++- {etc/root => bin/sh}/dot.profile | 0 etc/Makefile | 12 ------------ 5 files changed, 17 insertions(+), 14 deletions(-) rename {etc/root => bin/csh}/dot.cshrc (100%) rename {etc/root => bin/sh}/dot.profile (100%) diff --git a/bin/csh/Makefile b/bin/csh/Makefile index 78d6fba3791c..b0d4435d970c 100644 --- a/bin/csh/Makefile +++ b/bin/csh/Makefile @@ -10,8 +10,9 @@ CONFGROUPS= ETC ROOT ETC= csh.cshrc csh.login csh.logout -ROOT= dot.login +ROOT= dot.cshrc dot.login ROOTDIR= /root +ROOTNAME_dot.cshrc= .cshrc ROOTNAME_dot.login= .login PACKAGE=runtime TCSHDIR= ${SRCTOP}/contrib/tcsh @@ -153,4 +154,10 @@ tc.const.h: tc.const.c sh.char.h config.h config_f.h sh.types.h sh.err.h ${BUILD sort >> ${.TARGET} @echo '#endif /* _h_tc_const */' >> ${.TARGET} +beforeinstallconfig: + rm -f ${DESTDIR}/.cshrc + +afterinstallconfig: + ${INSTALL_LINK} ${TAG_ARGS} ${DESTDIR}/root/.cshrc ${DESTDIR}/.cshrc + .include diff --git a/etc/root/dot.cshrc b/bin/csh/dot.cshrc similarity index 100% rename from etc/root/dot.cshrc rename to bin/csh/dot.cshrc diff --git a/bin/sh/Makefile b/bin/sh/Makefile index 652e9ddc3dbf..abd756bb8e6d 100644 --- a/bin/sh/Makefile +++ b/bin/sh/Makefile @@ -3,7 +3,9 @@ .include -CONFS= profile +CONFS= dot.profile profile +CONFSDIR_dot.profile= /root +CONFSNAME_dot.profile= .profile PACKAGE=runtime PROG= sh INSTALLFLAGS= -S @@ -61,4 +63,10 @@ token.h: mktokens HAS_TESTS= SUBDIR.${MK_TESTS}+= tests +beforeinstallconfig: + rm -f ${DESTDIR}/.profile + +afterinstallconfig: + ${INSTALL_LINK} ${TAG_ARGS} ${DESTDIR}/root/.profile ${DESTDIR}/.profile + .include diff --git a/etc/root/dot.profile b/bin/sh/dot.profile similarity index 100% rename from etc/root/dot.profile rename to bin/sh/dot.profile diff --git a/etc/Makefile b/etc/Makefile index cf08a4e5155b..5b32e3a5664b 100644 --- a/etc/Makefile +++ b/etc/Makefile @@ -155,18 +155,6 @@ distribution: ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ dot.k5login ${DESTDIR}/root/.k5login; .endif - cd ${.CURDIR}/root; \ - ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ - dot.profile ${DESTDIR}/root/.profile; \ - rm -f ${DESTDIR}/.profile; \ - ${INSTALL_LINK} ${DESTDIR}/root/.profile ${DESTDIR}/.profile -.if ${MK_TCSH} != "no" - cd ${.CURDIR}/root; \ - ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ - dot.cshrc ${DESTDIR}/root/.cshrc; \ - rm -f ${DESTDIR}/.cshrc; \ - ${INSTALL_LINK} ${DESTDIR}/root/.cshrc ${DESTDIR}/.cshrc -.endif .if ${MK_MAIL} != "no" cd ${.CURDIR}/mail; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ From b48050264837c9246eb6f1ef686f73883c0320d9 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 29 Aug 2018 17:09:03 +0000 Subject: [PATCH 50/51] sed: Fix -i option behavior with 'q' command. Don't just exit when encountering the 'q' command if we edit file inplace, and give mf_fgets() a chance to actually handle the inplace case. Also add a regression test. Submitted by: Yuri Pankov Approved by: re (kib) MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D16798 --- usr.bin/sed/extern.h | 2 ++ usr.bin/sed/main.c | 5 +++-- usr.bin/sed/process.c | 12 ++++++++---- usr.bin/sed/tests/sed2_test.sh | 18 ++++++++++++++++++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/usr.bin/sed/extern.h b/usr.bin/sed/extern.h index a104c31dcaea..4d88650417a3 100644 --- a/usr.bin/sed/extern.h +++ b/usr.bin/sed/extern.h @@ -46,6 +46,8 @@ extern int aflag, eflag, nflag; extern const char *fname, *outfname; extern FILE *infile, *outfile; extern int rflags; /* regex flags to use */ +extern const char *inplace; +extern int quit; void cfclose(struct s_command *, struct s_command *); void compile(void); diff --git a/usr.bin/sed/main.c b/usr.bin/sed/main.c index e48c85ea9ae4..96994ec4469d 100644 --- a/usr.bin/sed/main.c +++ b/usr.bin/sed/main.c @@ -102,6 +102,7 @@ FILE *outfile; /* Current output file */ int aflag, eflag, nflag; int rflags = 0; +int quit = 0; static int rval; /* Exit status */ static int ispan; /* Whether inplace editing spans across files */ @@ -115,7 +116,7 @@ const char *fname; /* File name. */ const char *outfname; /* Output file name */ static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */ static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */ -static const char *inplace; /* Inplace edit file extension. */ +const char *inplace; /* Inplace edit file extension. */ u_long linenum; static void add_compunit(enum e_cut, char *); @@ -338,7 +339,7 @@ mf_fgets(SPACE *sp, enum e_spflag spflag) } for (;;) { - if (infile != NULL && (c = getc(infile)) != EOF) { + if (infile != NULL && (c = getc(infile)) != EOF && !quit) { (void)ungetc(c, infile); break; } diff --git a/usr.bin/sed/process.c b/usr.bin/sed/process.c index e79d187be35f..261c6239c273 100644 --- a/usr.bin/sed/process.c +++ b/usr.bin/sed/process.c @@ -210,10 +210,14 @@ process(void) } break; case 'q': - if (!nflag && !pd) - OUT(); - flush_appends(); - exit(0); + if (inplace == NULL) { + if (!nflag && !pd) + OUT(); + flush_appends(); + exit(0); + } + quit = 1; + break; case 'r': if (appendx >= appendnum) if ((appends = realloc(appends, diff --git a/usr.bin/sed/tests/sed2_test.sh b/usr.bin/sed/tests/sed2_test.sh index a32f09d4bbab..9acd628a8f1e 100755 --- a/usr.bin/sed/tests/sed2_test.sh +++ b/usr.bin/sed/tests/sed2_test.sh @@ -38,6 +38,7 @@ inplace_hardlink_src_body() atf_check ln a b atf_check sed -i '' -e 's,foo,bar,g' b atf_check -o 'inline:bar\n' -s exit:0 cat b + atf_check -s not-exit:0 stat -q '.!'* } atf_test_case inplace_symlink_src @@ -50,10 +51,27 @@ inplace_symlink_src_body() echo foo > a atf_check ln -s a b atf_check -e not-empty -s not-exit:0 sed -i '' -e 's,foo,bar,g' b + atf_check -s not-exit:0 stat -q '.!'* +} + +atf_test_case inplace_command_q +inplace_command_q_head() +{ + atf_set "descr" "Verify -i works correctly with the 'q' command" +} +inplace_command_q_body() +{ + printf '1\n2\n3\n' > a + atf_check -o 'inline:1\n2\n' sed '2q' a + atf_check sed -i.bak '2q' a + atf_check -o 'inline:1\n2\n' cat a + atf_check -o 'inline:1\n2\n3\n' cat a.bak + atf_check -s not-exit:0 stat -q '.!'* } atf_init_test_cases() { + atf_add_test_case inplace_command_q atf_add_test_case inplace_hardlink_src atf_add_test_case inplace_symlink_src } From 0fb707ecc9b14d8af8d189ced26a84be5a488687 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 29 Aug 2018 17:37:23 +0000 Subject: [PATCH 51/51] Tweak typos in UPDATING Approved by: re@ (gjb) --- UPDATING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UPDATING b/UPDATING index 34a043871d06..bd314d07df12 100644 --- a/UPDATING +++ b/UPDATING @@ -36,8 +36,8 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 12.x IS SLOW: supported by its designers since at least 2003. Fortuna has been the default since FreeBSD-11. -20170822: - devctl freeze/that have gone into the tree, the rc scripts have been +20180822: + devctl freeze/thaw have gone into the tree, the rc scripts have been updated to use them and devmatch has been changed. You should update kernel, userland and rc scripts all at the same time.