From 64143619ab52bac3705de2671e9c789d119312df Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 13 Feb 2019 03:11:12 +0000 Subject: [PATCH 01/89] powerpc/booke: Use the 'tlbilx' instruction on newer cores Newer cores have the 'tlbilx' instruction, which doesn't broadcast over CoreNet. This is significantly faster than walking the TLB to invalidate the PID mappings. tlbilx with the arguments given takes 131 clock cycles to complete, as opposed to 512 iterations through the loop plus tlbre/tlbwe at each iteration. MFC after: 3 weeks --- sys/powerpc/booke/pmap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 931814497d2f..19b9b36b491c 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -4325,6 +4325,21 @@ tid_flush(tlbtid_t tid) msr = mfmsr(); __asm __volatile("wrteei 0"); + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + switch ((mfpvr() >> 16) & 0xffff) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + /* tlbilxpid */ + __asm __volatile("isync; .long 0x7c000024; isync; msync"); + mtmsr(msr); + return; + } + for (way = 0; way < TLB0_WAYS; way++) for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { From be7dd423764c16ee320770f715fea5da105bad13 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 13 Feb 2019 04:19:08 +0000 Subject: [PATCH 02/89] libbe(3): Fix be_destroy behavior w.r.t. deep BE snapshots and -o be_destroy is documented to recursively destroy a boot environment. In the case of snapshots, one would take this to mean that these are also recursively destroyed. However, this was previously not the case. be_destroy would descend into the be_destroy callback and attempt to zfs_iter_children on the top-level snapshot, which is bogus. Our alternative approach is to take note of the snapshot name and iterate through all of fs children of the BE to try destruction in the children. The -o option is also fixed to work properly with deep BEs. If the BE was created with `bectl create -e otherDeepBE newDeepBE`, for instance, then a recursive snapshot of otherDeepBE would have been taken for construction of newDeepBE but a subsequent destroy with BE_DESTROY_ORIGIN set would only clean up the snapshot at the root of otherDeepBE: ${BEROOT}/otherDeepBE@... The most recent iteration instead pretends not to know how these things work, verifies that the origin is another BE and then passes that back through be_destroy to DTRT when snapshots and deep BEs may be in play. MFC after: 1 week --- lib/libbe/be.c | 100 ++++++++++++++++++++++++++++++++----------- lib/libbe/be.h | 1 + lib/libbe/be_error.c | 3 ++ lib/libbe/libbe.3 | 4 +- 4 files changed, 83 insertions(+), 25 deletions(-) diff --git a/lib/libbe/be.c b/lib/libbe/be.c index cf13e9fec9bd..13c1f66d3628 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -45,6 +45,11 @@ __FBSDID("$FreeBSD$"); #include "be.h" #include "be_impl.h" +struct be_destroy_data { + libbe_handle_t *lbh; + char *snapname; +}; + #if SOON static int be_create_child_noent(libbe_handle_t *lbh, const char *active, const char *child_path); @@ -186,12 +191,38 @@ be_nicenum(uint64_t num, char *buf, size_t buflen) static int be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) { + char path[BE_MAXPATHLEN]; + struct be_destroy_data *bdd; + zfs_handle_t *snap; int err; - if ((err = zfs_iter_children(zfs_hdl, be_destroy_cb, data)) != 0) - return (err); - if ((err = zfs_destroy(zfs_hdl, false)) != 0) + bdd = (struct be_destroy_data *)data; + if (bdd->snapname == NULL) { + err = zfs_iter_children(zfs_hdl, be_destroy_cb, data); + if (err != 0) + return (err); + return (zfs_destroy(zfs_hdl, false)); + } + /* If we're dealing with snapshots instead, delete that one alone */ + err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data); + if (err != 0) return (err); + /* + * This part is intentionally glossing over any potential errors, + * because there's a lot less potential for errors when we're cleaning + * up snapshots rather than a full deep BE. The primary error case + * here being if the snapshot doesn't exist in the first place, which + * the caller will likely deem insignificant as long as it doesn't + * exist after the call. Thus, such a missing snapshot shouldn't jam + * up the destruction. + */ + snprintf(path, sizeof(path), "%s@%s", zfs_get_name(zfs_hdl), + bdd->snapname); + if (!zfs_dataset_exists(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) + return (0); + snap = zfs_open(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT); + if (snap != NULL) + zfs_destroy(snap, false); return (0); } @@ -199,22 +230,26 @@ be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) * Destroy the boot environment or snapshot specified by the name * parameter. Options are or'd together with the possible values: * BE_DESTROY_FORCE : forces operation on mounted datasets + * BE_DESTROY_ORIGIN: destroy the origin snapshot as well */ int be_destroy(libbe_handle_t *lbh, const char *name, int options) { + struct be_destroy_data bdd; char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN]; zfs_handle_t *fs; - char *p; + char *snapdelim; int err, force, mounted; + size_t rootlen; - p = path; + bdd.lbh = lbh; + bdd.snapname = NULL; force = options & BE_DESTROY_FORCE; *origin = '\0'; be_root_concat(lbh, name, path); - if (strchr(name, '@') == NULL) { + if ((snapdelim = strchr(path, '@')) == NULL) { if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_FILESYSTEM)) return (set_error(lbh, BE_ERR_NOENT)); @@ -222,9 +257,10 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) strcmp(path, lbh->bootfs) == 0) return (set_error(lbh, BE_ERR_DESTROYACT)); - fs = zfs_open(lbh->lzh, p, ZFS_TYPE_FILESYSTEM); + fs = zfs_open(lbh->lzh, path, ZFS_TYPE_FILESYSTEM); if (fs == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); + if ((options & BE_DESTROY_ORIGIN) != 0 && zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin), NULL, NULL, 0, 1) != 0) @@ -233,41 +269,57 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) return (set_error(lbh, BE_ERR_NOENT)); - fs = zfs_open(lbh->lzh, p, ZFS_TYPE_SNAPSHOT); - if (fs == NULL) + bdd.snapname = strdup(snapdelim + 1); + if (bdd.snapname == NULL) + return (set_error(lbh, BE_ERR_NOMEM)); + *snapdelim = '\0'; + fs = zfs_open(lbh->lzh, path, ZFS_TYPE_DATASET); + if (fs == NULL) { + free(bdd.snapname); return (set_error(lbh, BE_ERR_ZFSOPEN)); + } } /* Check if mounted, unmount if force is specified */ if ((mounted = zfs_is_mounted(fs, NULL)) != 0) { - if (force) + if (force) { zfs_unmount(fs, NULL, 0); - else + } else { + free(bdd.snapname); return (set_error(lbh, BE_ERR_DESTROYMNT)); + } } - if ((err = be_destroy_cb(fs, NULL)) != 0) { + err = be_destroy_cb(fs, &bdd); + zfs_close(fs); + free(bdd.snapname); + if (err != 0) { /* Children are still present or the mount is referenced */ if (err == EBUSY) return (set_error(lbh, BE_ERR_DESTROYMNT)); return (set_error(lbh, BE_ERR_UNKNOWN)); } - if (*origin != '\0') { - fs = zfs_open(lbh->lzh, origin, ZFS_TYPE_SNAPSHOT); - if (fs == NULL) - return (set_error(lbh, BE_ERR_ZFSOPEN)); - err = zfs_destroy(fs, false); - if (err == EBUSY) - return (set_error(lbh, BE_ERR_DESTROYMNT)); - else if (err != 0) - return (set_error(lbh, BE_ERR_UNKNOWN)); - } + if ((options & BE_DESTROY_ORIGIN) == 0) + return (0); - return (0); + /* The origin can't possibly be shorter than the BE root */ + rootlen = strlen(lbh->root); + if (*origin == '\0' || strlen(origin) <= rootlen + 1) + return (set_error(lbh, BE_ERR_INVORIGIN)); + + /* + * We'll be chopping off the BE root and running this back through + * be_destroy, so that we properly handle the origin snapshot whether + * it be that of a deep BE or not. + */ + if (strncmp(origin, lbh->root, rootlen) != 0 || origin[rootlen] != '/') + return (0); + + return (be_destroy(lbh, origin + rootlen + 1, + options & ~BE_DESTROY_ORIGIN)); } - int be_snapshot(libbe_handle_t *lbh, const char *source, const char *snap_name, bool recursive, char *result) diff --git a/lib/libbe/be.h b/lib/libbe/be.h index dcf336d7423c..e1aced8d983d 100644 --- a/lib/libbe/be.h +++ b/lib/libbe/be.h @@ -59,6 +59,7 @@ typedef enum be_error { BE_ERR_NOPOOL, /* operation not supported on this pool */ BE_ERR_NOMEM, /* insufficient memory */ BE_ERR_UNKNOWN, /* unknown error */ + BE_ERR_INVORIGIN, /* invalid origin */ } be_error_t; diff --git a/lib/libbe/be_error.c b/lib/libbe/be_error.c index 746d873f8a39..04e062936a9d 100644 --- a/lib/libbe/be_error.c +++ b/lib/libbe/be_error.c @@ -105,6 +105,9 @@ libbe_error_description(libbe_handle_t *lbh) case BE_ERR_UNKNOWN: return ("unknown error"); + case BE_ERR_INVORIGIN: + return ("invalid origin"); + default: assert(lbh->error == BE_ERR_SUCCESS); return ("no error"); diff --git a/lib/libbe/libbe.3 b/lib/libbe/libbe.3 index ada024374eb9..750a6ec261e5 100644 --- a/lib/libbe/libbe.3 +++ b/lib/libbe/libbe.3 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 11, 2019 +.Dd February 12, 2019 .Dt LIBBE 3 .Os .Sh NAME @@ -489,6 +489,8 @@ BE_ERR_NOPOOL BE_ERR_NOMEM .It BE_ERR_UNKNOWN +.It +BE_ERR_INVORIGIN .El .Sh SEE ALSO .Xr bectl 8 From 60cc4a3e2db22ccab371a8ede6298b6ea117c3f5 Mon Sep 17 00:00:00 2001 From: "David E. O'Brien" Date: Wed, 13 Feb 2019 04:52:01 +0000 Subject: [PATCH 03/89] Note that readpassphrase() came into FreeBSD's libc at 4.6. --- lib/libc/gen/readpassphrase.3 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libc/gen/readpassphrase.3 b/lib/libc/gen/readpassphrase.3 index 3bcee6f5638d..174639263997 100644 --- a/lib/libc/gen/readpassphrase.3 +++ b/lib/libc/gen/readpassphrase.3 @@ -178,4 +178,6 @@ extension and should not be used if portability is desired. The .Fn readpassphrase function first appeared in +.Fx 4.6 +and .Ox 2.9 . From 64339c4130f344f01f8f36cbc3fda93f96c69d75 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Wed, 13 Feb 2019 07:35:18 +0000 Subject: [PATCH 04/89] Update vendor/libarchive/dist to git 3532bc32819b14bfd8a3a5e3d3554ce14d939940 archive_read_disk_posix.c: initialize delayed_errno --- libarchive/archive_read_disk_posix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libarchive/archive_read_disk_posix.c b/libarchive/archive_read_disk_posix.c index 09c366f5febe..31d2429c41a5 100644 --- a/libarchive/archive_read_disk_posix.c +++ b/libarchive/archive_read_disk_posix.c @@ -860,6 +860,7 @@ next_entry(struct archive_read_disk *a, struct tree *t, struct archive_string delayed_str; delayed = ARCHIVE_OK; + delayed_errno = 0; archive_string_init(&delayed_str); st = NULL; From fa91f845028358800c1a848b68fff7c7822ac6fb Mon Sep 17 00:00:00 2001 From: Randall Stewart Date: Wed, 13 Feb 2019 14:57:59 +0000 Subject: [PATCH 05/89] This commit adds the missing release mechanism for the ratelimiting code. The two modules (lagg and vlan) did have allocation routines, and even though they are indirect (and vector down to the underlying interfaces) they both need to have a free routine (that also vectors down to the actual interface). Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D19032 --- sys/net/if_lagg.c | 9 +++++++++ sys/net/if_vlan.c | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 3bea2e95d69b..5b7c6eb0aca1 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -133,6 +133,7 @@ static int lagg_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static void lagg_snd_tag_free(struct m_snd_tag *); #endif static int lagg_setmulti(struct lagg_port *); static int lagg_clrmulti(struct lagg_port *); @@ -514,6 +515,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; + ifp->if_snd_tag_free = lagg_snd_tag_free; #endif ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; @@ -1568,6 +1570,13 @@ lagg_snd_tag_alloc(struct ifnet *ifp, /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } + +static void +lagg_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); +} + #endif static int diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 535ce8cf7dcb..cf8b4ff1790a 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -267,6 +267,7 @@ static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #ifdef RATELIMIT static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static void vlan_snd_tag_free(struct m_snd_tag *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, @@ -1047,6 +1048,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_ioctl = vlan_ioctl; #ifdef RATELIMIT ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; + ifp->if_snd_tag_free = vlan_snd_tag_free; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); @@ -1934,4 +1936,10 @@ vlan_snd_tag_alloc(struct ifnet *ifp, /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } + +static void +vlan_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); +} #endif From c7ee62fcd54734db427f0d1e9f20caf2d201b6ec Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Wed, 13 Feb 2019 15:46:05 +0000 Subject: [PATCH 06/89] In r335015 PCB destroing was made deferred using epoch_call(). But ipsec_delete_pcbpolicy() uses some VNET-virtualized variables, and thus it needs VNET context, that is missing during gtaskqueue executing. Use inp_vnet context to set curvnet in in_pcbfree_deferred(). PR: 235684 MFC after: 1 week --- sys/netinet/in_pcb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index b13e1e36518c..41b3c812d97f 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1565,6 +1565,7 @@ in_pcbfree_deferred(epoch_context_t ctx) inp = __containerof(ctx, struct inpcb, inp_epoch_ctx); INP_WLOCK(inp); + CURVNET_SET(inp->inp_vnet); #ifdef INET struct ip_moptions *imo = inp->inp_moptions; inp->inp_moptions = NULL; @@ -1597,6 +1598,7 @@ in_pcbfree_deferred(epoch_context_t ctx) #ifdef INET inp_freemoptions(imo); #endif + CURVNET_RESTORE(); } /* From f6893f09d515a7db685556a61e76074021e4d6d4 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:19:37 +0000 Subject: [PATCH 07/89] Implement transparent 2MB superpage promotion for RISC-V. This includes support for pmap_enter(..., psind=1) as described in the commit log message for r321378. The changes are largely modelled after amd64. arm64 has more stringent requirements around superpage creation to avoid the possibility of TLB conflict aborts, and these requirements do not apply to RISC-V, which like amd64 permits simultaneous caching of 4KB and 2MB translations for a given page. RISC-V's PTE format includes only two software bits, and as these are already consumed we do not have an analogue for amd64's PG_PROMOTED. Instead, pmap_remove_l2() always invalidates the entire 2MB address range. pmap_ts_referenced() is modified to clear PTE_A, now that we support both hardware- and software-managed reference and dirty bits. Also fix pmap_fault_fixup() so that it does not set PTE_A or PTE_D on kernel mappings. Reviewed by: kib (earlier version) Discussed with: jhb Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18863 Differential Revision: https://reviews.freebsd.org/D18864 Differential Revision: https://reviews.freebsd.org/D18865 Differential Revision: https://reviews.freebsd.org/D18866 Differential Revision: https://reviews.freebsd.org/D18867 Differential Revision: https://reviews.freebsd.org/D18868 --- sys/riscv/include/param.h | 2 +- sys/riscv/include/pmap.h | 4 + sys/riscv/include/pte.h | 5 +- sys/riscv/include/vmparam.h | 4 +- sys/riscv/riscv/pmap.c | 1469 +++++++++++++++++++++++++++++------ sys/vm/vm_fault.c | 8 +- 6 files changed, 1247 insertions(+), 245 deletions(-) diff --git a/sys/riscv/include/param.h b/sys/riscv/include/param.h index f22e747ffdcc..4b4610bea8d1 100644 --- a/sys/riscv/include/param.h +++ b/sys/riscv/include/param.h @@ -82,7 +82,7 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) /* Page size */ #define PAGE_MASK (PAGE_SIZE - 1) -#define MAXPAGESIZES 1 /* maximum number of supported page sizes */ +#define MAXPAGESIZES 3 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES #define KSTACK_PAGES 4 /* pages of kernel stack (with pcb) */ diff --git a/sys/riscv/include/pmap.h b/sys/riscv/include/pmap.h index 79c5dc2a3da2..92eeee26331f 100644 --- a/sys/riscv/include/pmap.h +++ b/sys/riscv/include/pmap.h @@ -44,6 +44,8 @@ #include #include +#include + #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) @@ -80,6 +82,7 @@ struct pmap { pd_entry_t *pm_l1; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ + struct vm_radix pm_root; }; typedef struct pv_entry { @@ -139,6 +142,7 @@ void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void pmap_kremove_device(vm_offset_t, vm_size_t); +bool pmap_ps_enabled(pmap_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); diff --git a/sys/riscv/include/pte.h b/sys/riscv/include/pte.h index 723c9cf26ffb..a88566d890ae 100644 --- a/sys/riscv/include/pte.h +++ b/sys/riscv/include/pte.h @@ -62,7 +62,8 @@ typedef uint64_t pn_t; /* page number */ #define L3_SIZE (1 << L3_SHIFT) #define L3_OFFSET (L3_SIZE - 1) -#define Ln_ENTRIES (1 << 9) +#define Ln_ENTRIES_SHIFT 9 +#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) /* Bits 9:8 are reserved for software */ @@ -79,6 +80,8 @@ typedef uint64_t pn_t; /* page number */ #define PTE_RWX (PTE_R | PTE_W | PTE_X) #define PTE_RX (PTE_R | PTE_X) #define PTE_KERN (PTE_V | PTE_R | PTE_W | PTE_A | PTE_D) +#define PTE_PROMOTE (PTE_V | PTE_RWX | PTE_D | PTE_A | PTE_G | PTE_U | \ + PTE_SW_MANAGED | PTE_SW_WIRED) #define PTE_PPN0_S 10 #define PTE_PPN1_S 19 diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h index 49c720e681c2..ee03f7b09cc2 100644 --- a/sys/riscv/include/vmparam.h +++ b/sys/riscv/include/vmparam.h @@ -99,10 +99,10 @@ #define VM_NFREEORDER 12 /* - * Disable superpage reservations. + * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL -#define VM_NRESERVLEVEL 0 +#define VM_NRESERVLEVEL 1 #endif /* diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 2e9c3a12579b..3929fe1b3e51 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -118,6 +118,7 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include #include @@ -145,6 +146,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -154,9 +156,8 @@ __FBSDID("$FreeBSD$"); #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NUL1E (Ln_ENTRIES * Ln_ENTRIES) +#define NUL2E (Ln_ENTRIES * NUL1E) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ @@ -175,11 +176,12 @@ __FBSDID("$FreeBSD$"); #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ - (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) + (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ @@ -230,12 +232,51 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); static struct rwlock_padalign pvh_global_lock; static struct mtx_padalign allpmaps_lock; +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, + "VM/pmap parameters"); + +static int superpages_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN, &superpages_enabled, 0, + "Enable support for transparent superpages"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, + "2MB page mapping counters"); + +static u_long pmap_l2_demotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_l2_demotions, 0, + "2MB page demotions"); + +static u_long pmap_l2_mappings; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_l2_mappings, 0, + "2MB page mappings"); + +static u_long pmap_l2_p_failures; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_l2_p_failures, 0, + "2MB page promotion failures"); + +static u_long pmap_l2_promotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_l2_promotions, 0, + "2MB page promotions"); + /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; +static struct md_page *pv_table; +static struct md_page pv_dummy; + +/* + * Internal flags for pmap_enter()'s helper functions. + */ +#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ +#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); @@ -244,6 +285,11 @@ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va); +static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, + u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, @@ -254,9 +300,9 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); -static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, +static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); -static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); +static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); #define pmap_clear(pte) pmap_store(pte, 0) #define pmap_clear_bits(pte, bits) atomic_clear_64(pte, bits) @@ -636,7 +682,8 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { - int i; + vm_size_t s; + int i, pv_npg; /* * Initialize the pv chunk and pmap list mutexes. @@ -649,6 +696,24 @@ pmap_init(void) */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); + + if (superpages_enabled) + pagesizes[1] = L2_SIZE; } #ifdef SMP @@ -999,6 +1064,13 @@ pmap_qremove(vm_offset_t sva, int count) pmap_invalidate_range(kernel_pmap, sva, va); } +bool +pmap_ps_enabled(pmap_t pmap __unused) +{ + + return (superpages_enabled); +} + /*************************************************** * Page table page management routines..... ***************************************************/ @@ -1018,6 +1090,34 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } + +/* + * Inserts the specified page table page into the specified pmap's collection + * of idle page table pages. Each of a pmap's page table pages is responsible + * for mapping a distinct range of virtual addresses. The pmap's collection is + * ordered by this virtual address range. + */ +static __inline int +pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_insert(&pmap->pm_root, ml3)); +} + +/* + * Removes the page table page mapping the specified virtual address from the + * specified pmap's collection of idle page table pages, and returns it. + * Otherwise, returns NULL if there is no page table page corresponding to the + * specified virtual address. + */ +static __inline vm_page_t +pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); +} /* * Decrements a page table page's wire count, which is used to record the @@ -1026,12 +1126,12 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, * page table page was unmapped and FALSE otherwise. */ static inline boolean_t -pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { - _pmap_unwire_l3(pmap, va, m, free); + _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else { return (FALSE); @@ -1039,36 +1139,30 @@ pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } static void -_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - /* - * unmap the page table page - */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= NUL1E) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_clear(l1); pmap_distribute_l1(pmap, pmap_l1_index(va), 0); } else { - /* PTE page */ pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_clear(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { + if (m->pindex < NUL1E) { pd_entry_t *l1; - /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); - pmap_unwire_l3(pmap, va, pdpg, free); + pmap_unwire_ptp(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); @@ -1082,24 +1176,20 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } /* - * After removing an l3 entry, this routine is used to + * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int -pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { - vm_paddr_t phys; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); - - phys = PTE_TO_PHYS(ptepde); - - mpte = PHYS_TO_VM_PAGE(phys); - return (pmap_unwire_l3(pmap, va, mpte, free)); + mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde)); + return (pmap_unwire_ptp(pmap, va, mpte, free)); } void @@ -1140,6 +1230,8 @@ pmap_pinit(pmap_t pmap) LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock(&allpmaps_lock); + vm_radix_init(&pmap->pm_root); + return (1); } @@ -1193,11 +1285,11 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) * it isn't already there. */ - if (ptepindex >= NUPDE) { + if (ptepindex >= NUL1E) { pd_entry_t *l1; vm_pindex_t l1index; - l1index = ptepindex - NUPDE; + l1index = ptepindex - NUL1E; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); @@ -1213,7 +1305,7 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL1E + l1index, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); @@ -1240,6 +1332,29 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) return (m); } +static vm_page_t +pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +{ + pd_entry_t *l1; + vm_page_t l2pg; + vm_pindex_t l2pindex; + +retry: + l1 = pmap_l1(pmap, va); + if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) { + /* Add a reference to the L2 page. */ + l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1))); + l2pg->wire_count++; + } else { + /* Allocate a L2 page. */ + l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; + l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); + if (l2pg == NULL && lockp != NULL) + goto retry; + } + return (l2pg); +} + static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { @@ -1598,6 +1713,79 @@ get_pv_entry(pmap_t pmap, struct rwlock **lockp) return (pv); } +/* + * Ensure that the number of spare PV entries in the specified pmap meets or + * exceeds the given count, "needed". + * + * The given PV list lock may be released. + */ +static void +reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +{ + struct pch new_tail; + struct pv_chunk *pc; + vm_page_t m; + int avail, free; + bool reclaimed; + + rw_assert(&pvh_global_lock, RA_LOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); + + /* + * Newly allocated PV chunks must be stored in a private list until + * the required number of PV chunks have been allocated. Otherwise, + * reclaim_pv_chunk() could recycle one of these chunks. In + * contrast, these chunks must be added to the pmap upon allocation. + */ + TAILQ_INIT(&new_tail); +retry: + avail = 0; + TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { + bit_count((bitstr_t *)pc->pc_map, 0, + sizeof(pc->pc_map) * NBBY, &free); + if (free == 0) + break; + avail += free; + if (avail >= needed) + break; + } + for (reclaimed = false; avail < needed; avail += _NPCPV) { + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + reclaimed = true; + } + /* XXX PV STATS */ +#if 0 + dump_add_page(m->phys_addr); +#endif + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0; + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + + /* + * The reclaim might have freed a chunk from the current pmap. + * If that chunk contained available entries, we need to + * re-count the number of available entries. + */ + if (reclaimed) + goto retry; + } + if (!TAILQ_EMPTY(&new_tail)) { + mtx_lock(&pv_chunks_mutex); + TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); + mtx_unlock(&pv_chunks_mutex); + } +} + /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL @@ -1632,7 +1820,7 @@ pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) pv = pmap_pvh_remove(pvh, pmap, va); - KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va)); free_pv_entry(pmap, pv); } @@ -1659,6 +1847,222 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, return (FALSE); } +/* + * After demotion from a 2MB page mapping to 512 4KB page mappings, + * destroy the pv entry for the 2MB page mapping and reinstantiate the pv + * entries for each of the 4KB page mappings. + */ +static void __unused +pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + struct pv_chunk *pc; + pv_entry_t pv; + vm_page_t m; + vm_offset_t va_last; + int bit, field; + + rw_assert(&pvh_global_lock, RA_LOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the 2mpage's pv entry for this mapping to the first + * page's pv list. Once this transfer begins, the pv list lock + * must not be released until the last pv entry is reinstantiated. + */ + pvh = pa_to_pvh(pa); + va &= ~L2_OFFSET; + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + /* Instantiate the remaining 511 pv entries. */ + va_last = va + L2_SIZE - PAGE_SIZE; + for (;;) { + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || + pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); + for (field = 0; field < _NPCM; field++) { + while (pc->pc_map[field] != 0) { + bit = ffsl(pc->pc_map[field]) - 1; + pc->pc_map[field] &= ~(1ul << bit); + pv = &pc->pc_pventry[field * 64 + bit]; + va += PAGE_SIZE; + pv->pv_va = va; + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_l2: page %p is not managed", m)); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (va == va_last) + goto out; + } + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } +out: + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + /* XXX PV stats */ +} + +#if VM_NRESERVLEVEL > 0 +static void +pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_page_t m; + vm_offset_t va_last; + + rw_assert(&pvh_global_lock, RA_LOCKED); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_pv_promote_l2: misaligned va %#lx", va)); + + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + m = PHYS_TO_VM_PAGE(pa); + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va)); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + + va_last = va + L2_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} +#endif /* VM_NRESERVLEVEL > 0 */ + +/* + * Create the PV entry for a 2MB page mapping. Always returns true unless the + * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns + * false if the PV entry cannot be allocated without resorting to reclamation. + */ +static bool +pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_paddr_t pa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* Pass NULL instead of the lock pointer to disable reclamation. */ + if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? + NULL : lockp)) == NULL) + return (false); + pv->pv_va = va; + pa = PTE_TO_PHYS(l2e); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + return (true); +} + +static void +pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + pt_entry_t newl2, oldl2; + vm_page_t ml3; + vm_paddr_t ml3pa; + + KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); + KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + ml3 = pmap_remove_pt_page(pmap, va); + if (ml3 == NULL) + panic("pmap_remove_kernel_l2: Missing pt page"); + + ml3pa = VM_PAGE_TO_PHYS(ml3); + newl2 = ml3pa | PTE_V; + + /* + * Initialize the page table page. + */ + pagezero((void *)PHYS_TO_DMAP(ml3pa)); + + /* + * Demote the mapping. + */ + oldl2 = pmap_load_store(l2, newl2); + KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", + __func__, l2, oldl2)); +} + +/* + * pmap_remove_l2: Do the things to unmap a level 2 superpage. + */ +static int +pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, + pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + pt_entry_t oldl2; + vm_offset_t eva, va; + vm_page_t m, ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); + oldl2 = pmap_load_clear(l2); + KASSERT((oldl2 & PTE_RWX) != 0, + ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2)); + + /* + * The sfence.vma documentation states that it is sufficient to specify + * a single address within a superpage mapping. However, since we do + * not perform any invalidation upon promotion, TLBs may still be + * caching 4KB mappings within the superpage, so we must invalidate the + * entire range. + */ + pmap_invalidate_range(pmap, sva, sva + L2_SIZE); + if ((oldl2 & PTE_SW_WIRED) != 0) + pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; + pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); + if ((oldl2 & PTE_SW_MANAGED) != 0) { + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2)); + pvh = pa_to_pvh(PTE_TO_PHYS(oldl2)); + pmap_pvh_free(pvh, pmap, sva); + eva = sva + L2_SIZE; + for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2)); + va < eva; va += PAGE_SIZE, m++) { + if ((oldl2 & PTE_D) != 0) + vm_page_dirty(m); + if ((oldl2 & PTE_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (pmap == kernel_pmap) { + pmap_remove_kernel_l2(pmap, l2, sva); + } else { + ml3 = pmap_remove_pt_page(pmap, sva); + if (ml3 != NULL) { + pmap_resident_count_dec(pmap, 1); + KASSERT(ml3->wire_count == Ln_ENTRIES, + ("pmap_remove_l2: l3 page wire count error")); + ml3->wire_count = 1; + vm_page_unwire_noq(ml3); + pmap_add_delayed_free_list(ml3, free, FALSE); + } + } + return (pmap_unuse_pt(pmap, sva, l1e, free)); +} + /* * pmap_remove_l3: do the things to unmap a page in a process */ @@ -1687,7 +2091,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pmap_pvh_free(&m->md, pmap, va); } - return (pmap_unuse_l3(pmap, va, l2e, free)); + return (pmap_unuse_pt(pmap, va, l2e, free)); } /* @@ -1699,11 +2103,11 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + struct spglist free; struct rwlock *lock; vm_offset_t va, va_next; - pd_entry_t *l1, *l2; - pt_entry_t l3_pte, *l3; - struct spglist free; + pd_entry_t *l1, *l2, l2e; + pt_entry_t *l3; /* * Perform an unsynchronized read. This is, however, safe. @@ -1739,16 +2143,22 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; - - l3_pte = pmap_load(l2); - - /* - * Weed out invalid mappings. - */ - if (l3_pte == 0) - continue; - if ((pmap_load(l2) & PTE_RX) != 0) + if ((l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + (void)pmap_remove_l2(pmap, l2, sva, + pmap_load(l1), &free, &lock); + continue; + } else if (!pmap_demote_l2_locked(pmap, l2, sva, + &lock)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + l2e = pmap_load(l2); + } /* * Limit our scan to either the end of the va represented @@ -1761,8 +2171,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { - if (l3 == NULL) - panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); @@ -1772,8 +2180,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } if (va == va_next) va = sva; - if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, - &lock)) { + if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) { sva += L3_SIZE; break; } @@ -1783,7 +2190,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); + rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } @@ -1804,42 +2211,54 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_remove_all(vm_page_t m) { - pv_entry_t pv; - pmap_t pmap; - pt_entry_t *l3, tl3; - pd_entry_t *l2, tl2; struct spglist free; + struct md_page *pvh; + pmap_t pmap; + pt_entry_t *l3, l3e; + pd_entry_t *l2, l2e; + pv_entry_t pv; + vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + rw_wlock(&pvh_global_lock); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + (void)pmap_demote_l2(pmap, l2, va); + PMAP_UNLOCK(pmap); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); - tl2 = pmap_load(l2); + l2e = pmap_load(l2); - KASSERT((tl2 & PTE_RX) == 0, - ("pmap_remove_all: found a table when expecting " - "a block in %p's pv list", m)); + KASSERT((l2e & PTE_RX) == 0, + ("pmap_remove_all: found a superpage in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load_clear(l3); + l3e = pmap_load_clear(l3); pmap_invalidate_page(pmap, pv->pv_va); - if (tl3 & PTE_SW_WIRED) + if (l3e & PTE_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & PTE_A) != 0) + if ((l3e & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ - if ((tl3 & PTE_D) != 0) + if ((l3e & PTE_D) != 0) vm_page_dirty(m); - pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); + pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); @@ -1857,10 +2276,12 @@ pmap_remove_all(vm_page_t m) void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { - pd_entry_t *l1, *l2; + pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e, mask; vm_page_t m; - vm_offset_t va_next; + vm_paddr_t pa; + vm_offset_t va, va_next; + bool anychanged, pv_lists_locked; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -1871,12 +2292,14 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; + anychanged = false; + pv_lists_locked = false; mask = 0; if ((prot & VM_PROT_WRITE) == 0) mask |= PTE_W | PTE_D; if ((prot & VM_PROT_EXECUTE) == 0) mask |= PTE_X; - +resume: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); @@ -1892,10 +2315,41 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (l2 == NULL || pmap_load(l2) == 0) - continue; - if ((pmap_load(l2) & PTE_RX) != 0) + if (l2 == NULL || (l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { +retryl2: + if ((l2e & (PTE_SW_MANAGED | PTE_D)) == + (PTE_SW_MANAGED | PTE_D)) { + pa = PTE_TO_PHYS(l2e); + for (va = sva, m = PHYS_TO_VM_PAGE(pa); + va < va_next; m++, va += PAGE_SIZE) + vm_page_dirty(m); + } + if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask)) + goto retryl2; + anychanged = true; + } else { + if (!pv_lists_locked) { + pv_lists_locked = true; + if (!rw_try_rlock(&pvh_global_lock)) { + if (anychanged) + pmap_invalidate_all( + pmap); + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + goto resume; + } + } + if (!pmap_demote_l2(pmap, l2, sva)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + } + } if (va_next > eva) va_next = eva; @@ -1903,7 +2357,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { l3e = pmap_load(l3); -retry: +retryl3: if ((l3e & PTE_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0 && @@ -1913,59 +2367,235 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) vm_page_dirty(m); } if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask)) - goto retry; - /* XXX: Use pmap_invalidate_range */ - pmap_invalidate_page(pmap, sva); + goto retryl3; + anychanged = true; } } + if (anychanged) + pmap_invalidate_all(pmap); + if (pv_lists_locked) + rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } int pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype) { - pt_entry_t orig_l3; - pt_entry_t new_l3; - pt_entry_t *l3; + pd_entry_t *l2, l2e; + pt_entry_t bits, *pte, oldpte; int rv; rv = 0; - PMAP_LOCK(pmap); - - l3 = pmap_l3(pmap, va); - if (l3 == NULL) - goto done; - - orig_l3 = pmap_load(l3); - if ((orig_l3 & PTE_V) == 0 || - (ftype == VM_PROT_WRITE && (orig_l3 & PTE_W) == 0) || - (ftype == VM_PROT_EXECUTE && (orig_l3 & PTE_X) == 0) || - (ftype == VM_PROT_READ && (orig_l3 & PTE_R) == 0)) - goto done; - - new_l3 = orig_l3 | PTE_A; - if (ftype == VM_PROT_WRITE) - new_l3 |= PTE_D; - - if (orig_l3 != new_l3) { - pmap_store(l3, new_l3); - pmap_invalidate_page(pmap, va); - rv = 1; + l2 = pmap_l2(pmap, va); + if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0) goto done; + if ((l2e & PTE_RWX) == 0) { + pte = pmap_l2_to_l3(l2, va); + if (pte == NULL || ((oldpte = pmap_load(pte) & PTE_V)) == 0) + goto done; + } else { + pte = l2; + oldpte = l2e; } - /* - * XXX: This case should never happen since it means - * the PTE shouldn't have resulted in a fault. - */ + if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) || + (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) || + (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) || + (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0)) + goto done; + bits = PTE_A; + if (ftype == VM_PROT_WRITE) + bits |= PTE_D; + + /* + * Spurious faults can occur if the implementation caches invalid + * entries in the TLB, or if simultaneous accesses on multiple CPUs + * race with each other. + */ + if ((oldpte & bits) != bits) + pmap_store_bits(pte, bits); + sfence_vma(); + rv = 1; done: PMAP_UNLOCK(pmap); - return (rv); } +static bool +pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va) +{ + struct rwlock *lock; + bool rv; + + lock = NULL; + rv = pmap_demote_l2_locked(pmap, l2, va, &lock); + if (lock != NULL) + rw_wunlock(lock); + return (rv); +} + +/* + * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page + * mapping is invalidated. + */ +static bool +pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + struct spglist free; + vm_page_t mpte; + pd_entry_t newl2, oldl2; + pt_entry_t *firstl3, newl3; + vm_paddr_t mptepa; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + oldl2 = pmap_load(l2); + KASSERT((oldl2 & PTE_RWX) != 0, + ("pmap_demote_l2_locked: oldl2 is not a leaf entry")); + if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == + NULL) { + if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL, + pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : + VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == + NULL) { + SLIST_INIT(&free); + (void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET, + pmap_load(pmap_l1(pmap, va)), &free, lockp); + vm_page_free_pages_toq(&free, true); + CTR2(KTR_PMAP, "pmap_demote_l2_locked: " + "failure for va %#lx in pmap %p", va, pmap); + return (false); + } + if (va < VM_MAXUSER_ADDRESS) + pmap_resident_count_inc(pmap, 1); + } + mptepa = VM_PAGE_TO_PHYS(mpte); + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa); + newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V; + KASSERT((oldl2 & PTE_A) != 0, + ("pmap_demote_l2_locked: oldl2 is missing PTE_A")); + KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W, + ("pmap_demote_l2_locked: oldl2 is missing PTE_D")); + newl3 = oldl2; + + /* + * If the page table page is new, initialize it. + */ + if (mpte->wire_count == 1) { + mpte->wire_count = Ln_ENTRIES; + for (i = 0; i < Ln_ENTRIES; i++) + pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); + } + KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3), + ("pmap_demote_l2_locked: firstl3 and newl3 map different physical " + "addresses")); + + /* + * If the mapping has changed attributes, update the page table + * entries. + */ + if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE)) + for (i = 0; i < Ln_ENTRIES; i++) + pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); + + /* + * The spare PV entries must be reserved prior to demoting the + * mapping, that is, prior to changing the L2 entry. Otherwise, the + * state of the L2 entry and the PV lists will be inconsistent, which + * can result in reclaim_pv_chunk() attempting to remove a PV entry from + * the wrong PV list and pmap_pv_demote_l2() failing to find the + * expected PV entry for the 2MB page mapping that is being demoted. + */ + if ((oldl2 & PTE_SW_MANAGED) != 0) + reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); + + /* + * Demote the mapping. + */ + pmap_store(l2, newl2); + + /* + * Demote the PV entry. + */ + if ((oldl2 & PTE_SW_MANAGED) != 0) + pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp); + + atomic_add_long(&pmap_l2_demotions, 1); + CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p", + va, pmap); + return (true); +} + +#if VM_NRESERVLEVEL > 0 +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3; + vm_paddr_t pa; + vm_page_t ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + va &= ~L2_OFFSET; + KASSERT((pmap_load(l2) & PTE_RWX) == 0, + ("pmap_promote_l2: invalid l2 entry %p", l2)); + + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); + pa = PTE_TO_PHYS(pmap_load(firstl3)); + if ((pa & L2_OFFSET) != 0) { + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + + pa += PAGE_SIZE; + for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { + if (PTE_TO_PHYS(pmap_load(l3)) != pa) { + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + if ((pmap_load(l3) & PTE_PROMOTE) != + (pmap_load(firstl3) & PTE_PROMOTE)) { + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + pa += PAGE_SIZE; + } + + ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + KASSERT(ml3->pindex == pmap_l2_pindex(va), + ("pmap_promote_l2: page table page's pindex is wrong")); + if (pmap_insert_pt_page(pmap, ml3)) { + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + + if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0) + pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)), + lockp); + + pmap_store(l2, pmap_load(firstl3)); + + atomic_add_long(&pmap_l2_promotions, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, + pmap); +} +#endif + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1980,20 +2610,19 @@ pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype) */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - u_int flags, int8_t psind __unused) + u_int flags, int8_t psind) { struct rwlock *lock; - pd_entry_t *l1, *l2; + pd_entry_t *l1, *l2, l2e; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; - boolean_t nosleep; pt_entry_t entry; - pn_t l2_pn; - pn_t l3_pn; - pn_t pn; + pn_t l2_pn, l3_pn, pn; + int rv; + bool nosleep; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) @@ -2008,7 +2637,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, new_l3 |= PTE_D; if (prot & VM_PROT_WRITE) new_l3 |= PTE_W; - if ((va >> 63) == 0) + if (va < VM_MAX_USER_ADDRESS) new_l3 |= PTE_U; new_l3 |= (pn << PTE_PPN0_S); @@ -2028,13 +2657,29 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); - mpte = NULL; - lock = NULL; + mpte = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); + if (psind == 1) { + /* Assert the required virtual and physical alignment. */ + KASSERT((va & L2_OFFSET) == 0, + ("pmap_enter: va %#lx unaligned", va)); + KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); + rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock); + goto out; + } - if (va < VM_MAXUSER_ADDRESS) { + l2 = pmap_l2(pmap, va); + if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 && + ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2, + va, &lock))) { + l3 = pmap_l2_to_l3(l2, va); + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + mpte->wire_count++; + } + } else if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { @@ -2050,7 +2695,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { - l2 = pmap_l2(pmap, va); if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | @@ -2071,9 +2715,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l2 = pmap_l1_to_l2(l1, va); } - KASSERT(l2 != NULL, - ("No l2 table after allocating one")); - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) @@ -2162,6 +2803,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pv = pmap_pvh_remove(&om->md, pmap, va); + KASSERT(pv != NULL, + ("pmap_enter: no PV entry for %#lx", va)); if ((new_l3 & PTE_SW_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->aflags & PGA_WRITEABLE) != 0 && @@ -2216,10 +2859,163 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pmap_store(l3, new_l3); } +#if VM_NRESERVLEVEL > 0 + if (mpte != NULL && mpte->wire_count == Ln_ENTRIES && + pmap_ps_enabled(pmap) && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) + pmap_promote_l2(pmap, l2, va, &lock); +#endif + + rv = KERN_SUCCESS; +out: if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * Tries to create a read- and/or execute-only 2MB page mapping. Returns true + * if successful. Returns false if (1) a page table page cannot be allocated + * without sleeping, (2) a mapping already exists at the specified virtual + * address, or (3) a PV entry cannot be allocated without reclaiming another + * PV entry. + */ +static bool +pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + struct rwlock **lockp) +{ + pd_entry_t new_l2; + pn_t pn; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; + new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); + if ((m->oflags & VPO_UNMANAGED) == 0) + new_l2 |= PTE_SW_MANAGED; + if ((prot & VM_PROT_EXECUTE) != 0) + new_l2 |= PTE_X; + if (va < VM_MAXUSER_ADDRESS) + new_l2 |= PTE_U; + return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == + KERN_SUCCESS); +} + +/* + * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if + * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE + * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and + * a mapping already exists at the specified virtual address. Returns + * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table + * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if + * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. + * + * The parameter "m" is only used when creating a managed, writeable mapping. + */ +static int +pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, + vm_page_t m, struct rwlock **lockp) +{ + struct spglist free; + pd_entry_t *l2, *l3, oldl2; + vm_offset_t sva; + vm_page_t l2pg, mt; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? + NULL : lockp)) == NULL) { + CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + + l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); + l2 = &l2[pmap_l2_index(va)]; + if ((oldl2 = pmap_load(l2)) != 0) { + KASSERT(l2pg->wire_count > 1, + ("pmap_enter_l2: l2pg's wire count is too low")); + if ((flags & PMAP_ENTER_NOREPLACE) != 0) { + l2pg->wire_count--; + CTR2(KTR_PMAP, + "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_FAILURE); + } + SLIST_INIT(&free); + if ((oldl2 & PTE_RWX) != 0) + (void)pmap_remove_l2(pmap, l2, va, + pmap_load(pmap_l1(pmap, va)), &free, lockp); + else + for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { + l3 = pmap_l2_to_l3(l2, sva); + if ((pmap_load(l3) & PTE_V) != 0 && + pmap_remove_l3(pmap, l3, sva, oldl2, &free, + lockp) != 0) + break; + } + vm_page_free_pages_toq(&free, true); + if (va >= VM_MAXUSER_ADDRESS) { + mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + if (pmap_insert_pt_page(pmap, mt)) { + /* + * XXX Currently, this can't happen bacuse + * we do not perform pmap_enter(psind == 1) + * on the kernel pmap. + */ + panic("pmap_enter_l2: trie insert failed"); + } + } else + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); + } + + if ((new_l2 & PTE_SW_MANAGED) != 0) { + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { + SLIST_INIT(&free); + if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { + /* + * Although "va" is not mapped, paging-structure + * caches could nonetheless have entries that + * refer to the freed page table pages. + * Invalidate those entries. + */ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } + CTR2(KTR_PMAP, + "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + if ((new_l2 & PTE_W) != 0) + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_aflag_set(mt, PGA_WRITEABLE); + } + + /* + * Increment counters. + */ + if ((new_l2 & PTE_SW_WIRED) != 0) + pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; + pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; + + /* + * Map the superpage. + */ + pmap_store(l2, new_l2); + + atomic_add_long(&pmap_l2_mappings, 1); + CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", + va, pmap); + return (KERN_SUCCESS); } @@ -2254,7 +3050,13 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); - mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); + if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && + m->psind == 1 && pmap_ps_enabled(pmap) && + pmap_enter_2mpage(pmap, va, m, prot, &lock)) + m = &m[L2_SIZE / PAGE_SIZE - 1]; + else + mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, + &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) @@ -2365,7 +3167,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); - if (pmap_unwire_l3(pmap, va, mpte, &free)) { + if (pmap_unwire_ptp(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, false); } @@ -2429,11 +3231,12 @@ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; - pd_entry_t *l1, *l2; - pt_entry_t *l3; - boolean_t pv_lists_locked; + pd_entry_t *l1, *l2, l2e; + pt_entry_t *l3, l3e; + bool pv_lists_locked; - pv_lists_locked = FALSE; + pv_lists_locked = false; +retry: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); @@ -2449,25 +3252,46 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (pmap_load(l2) == 0) + if ((l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + if ((l2e & PTE_SW_WIRED) == 0) + panic("pmap_unwire: l2 %#jx is missing " + "PTE_SW_WIRED", (uintmax_t)l2e); + pmap_clear_bits(l2, PTE_SW_WIRED); + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = true; + if (!rw_try_rlock(&pvh_global_lock)) { + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + /* Repeat sva. */ + goto retry; + } + } + if (!pmap_demote_l2(pmap, l2, sva)) + panic("pmap_unwire: demotion failed"); + } + } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { - if (pmap_load(l3) == 0) + if ((l3e = pmap_load(l3)) == 0) continue; - if ((pmap_load(l3) & PTE_SW_WIRED) == 0) + if ((l3e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " - "PTE_SW_WIRED", (uintmax_t)*l3); + "PTE_SW_WIRED", (uintmax_t)l3e); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ - atomic_clear_long(l3, PTE_SW_WIRED); + pmap_clear_bits(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } @@ -2595,6 +3419,7 @@ pmap_quick_remove_page(vm_offset_t addr) boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; @@ -2615,6 +3440,18 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) if (loops >= 16) break; } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); @@ -2629,11 +3466,13 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) int pmap_page_wired_mappings(vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pmap_t pmap; + pd_entry_t *l2; pt_entry_t *l3; pv_entry_t pv; - int count, md_gen; + int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -2659,11 +3498,72 @@ pmap_page_wired_mappings(vm_page_t m) count++; PMAP_UNLOCK(pmap); } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + if ((pmap_load(l2) & PTE_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } +static void +pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, + struct spglist *free, bool superpage) +{ + struct md_page *pvh; + vm_page_t mpte, mt; + + if (superpage) { + pmap_resident_count_dec(pmap, Ln_ENTRIES); + pvh = pa_to_pvh(m->phys_addr); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + if (TAILQ_EMPTY(&pvh->pv_list)) { + for (mt = m; mt < &m[Ln_ENTRIES]; mt++) + if (TAILQ_EMPTY(&mt->md.pv_list) && + (mt->aflags & PGA_WRITEABLE) != 0) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + mpte = pmap_remove_pt_page(pmap, pv->pv_va); + if (mpte != NULL) { + pmap_resident_count_dec(pmap, 1); + KASSERT(mpte->wire_count == Ln_ENTRIES, + ("pmap_remove_pages: pte page wire count error")); + mpte->wire_count = 0; + pmap_add_delayed_free_list(mpte, free, FALSE); + } + } else { + pmap_resident_count_dec(pmap, 1); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->aflags & PGA_WRITEABLE) != 0) { + pvh = pa_to_pvh(m->phys_addr); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } +} + /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the @@ -2683,17 +3583,17 @@ pmap_page_wired_mappings(vm_page_t m) void pmap_remove_pages(pmap_t pmap) { - pd_entry_t ptepde, *l2; - pt_entry_t *l3, tl3; struct spglist free; - vm_page_t m; + pd_entry_t ptepde; + pt_entry_t *pte, tpte; + vm_page_t m, mt; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; - vm_paddr_t pa; + bool superpage; lock = NULL; @@ -2712,53 +3612,57 @@ pmap_remove_pages(pmap_t pmap) pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; - l2 = pmap_l2(pmap, pv->pv_va); - ptepde = pmap_load(l2); - l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load(l3); + pte = pmap_l1(pmap, pv->pv_va); + ptepde = pmap_load(pte); + pte = pmap_l1_to_l2(pte, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & PTE_RWX) != 0) { + superpage = true; + } else { + ptepde = tpte; + pte = pmap_l2_to_l3(pte, pv->pv_va); + tpte = pmap_load(pte); + superpage = false; + } /* * We cannot remove wired pages from a * process' mapping at this time. */ - if (tl3 & PTE_SW_WIRED) { + if (tpte & PTE_SW_WIRED) { allfree = 0; continue; } - pa = PTE_TO_PHYS(tl3); - m = PHYS_TO_VM_PAGE(pa); - KASSERT(m->phys_addr == pa, - ("vm_page_t %p phys_addr mismatch %016jx %016jx", - m, (uintmax_t)m->phys_addr, - (uintmax_t)tl3)); - + m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad l3 %#jx", - (uintmax_t)tl3)); + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); - pmap_clear(l3); + pmap_clear(pte); /* * Update the vm_page_t clean/reference bits. */ - if ((tl3 & PTE_D) != 0) - vm_page_dirty(m); + if ((tpte & (PTE_D | PTE_W)) == + (PTE_D | PTE_W)) { + if (superpage) + for (mt = m; + mt < &m[Ln_ENTRIES]; mt++) + vm_page_dirty(mt); + else + vm_page_dirty(m); + } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; - pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); - m->md.pv_gen++; - if (TAILQ_EMPTY(&m->md.pv_list) && - (m->aflags & PGA_WRITEABLE) != 0) - vm_page_aflag_clear(m, PGA_WRITEABLE); - - pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); + pmap_remove_pages_pv(pmap, m, pv, &free, + superpage); + pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } @@ -2778,20 +3682,23 @@ pmap_remove_pages(pmap_t pmap) vm_page_free_pages_toq(&free, false); } -/* - * This is used to check if a page has been accessed or modified. As we - * don't have a bit to see if it has been modified we have to assume it - * has been if the page is read/write. - */ -static boolean_t +static bool pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { + struct md_page *pvh; struct rwlock *lock; + pd_entry_t *l2; + pt_entry_t *l3, mask; pv_entry_t pv; - pt_entry_t *l3, mask, value; pmap_t pmap; - int md_gen; - boolean_t rv; + int md_gen, pvh_gen; + bool rv; + + mask = 0; + if (modified) + mask |= PTE_D; + if (accessed) + mask |= PTE_A; rv = FALSE; rw_rlock(&pvh_global_lock); @@ -2811,33 +3718,34 @@ pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) } } l3 = pmap_l3(pmap, pv->pv_va); - mask = 0; - value = 0; - if (modified) { - mask |= PTE_D; - value |= PTE_D; - } - if (accessed) { - mask |= PTE_A; - value |= PTE_A; - } - -#if 0 - if (modified) { - mask |= ATTR_AP_RW_BIT; - value |= ATTR_AP(ATTR_AP_RW); - } - if (accessed) { - mask |= ATTR_AF | ATTR_DESCR_MASK; - value |= ATTR_AF | L3_PAGE; - } -#endif - - rv = (pmap_load(l3) & mask) == value; + rv = (pmap_load(l3) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + rv = (pmap_load(l2) & mask) == mask; + PMAP_UNLOCK(pmap); + if (rv) + goto out; + } + } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); @@ -2911,12 +3819,14 @@ pmap_is_referenced(vm_page_t m) void pmap_remove_write(vm_page_t m) { - pmap_t pmap; + struct md_page *pvh; struct rwlock *lock; - pv_entry_t pv; - pt_entry_t *l3, oldl3; - pt_entry_t newl3; - int md_gen; + pmap_t pmap; + pd_entry_t *l2; + pt_entry_t *l3, oldl3, newl3; + pv_entry_t next_pv, pv; + vm_offset_t va; + int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); @@ -2929,18 +3839,43 @@ pmap_remove_write(vm_page_t m) VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + rw_rlock(&pvh_global_lock); retry_pv_loop: rw_wlock(lock); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + rw_wunlock(lock); + goto retry_pv_loop; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + if ((pmap_load(l2) & PTE_W) != 0) + (void)pmap_demote_l2_locked(pmap, l2, va, &lock); + KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), + ("inconsistent pv lock %p %p for page %p", + lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + PMAP_UNLOCK(pmap); + } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; @@ -2964,13 +3899,6 @@ pmap_remove_write(vm_page_t m) rw_runlock(&pvh_global_lock); } -static __inline boolean_t -safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) -{ - - return (FALSE); -} - /* * pmap_ts_referenced: * @@ -2990,38 +3918,104 @@ safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) int pmap_ts_referenced(vm_page_t m) { + struct spglist free; + struct md_page *pvh; + struct rwlock *lock; pv_entry_t pv, pvf; pmap_t pmap; - struct rwlock *lock; - pd_entry_t *l2; - pt_entry_t *l3, old_l3; + pd_entry_t *l2, l2e; + pt_entry_t *l3, l3e; vm_paddr_t pa; - int cleared, md_gen, not_cleared; - struct spglist free; + vm_offset_t va; + int md_gen, pvh_gen, ret; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); - cleared = 0; + ret = 0; pa = VM_PAGE_TO_PHYS(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); + lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: - not_cleared = 0; + if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + goto small_mappings; + pv = pvf; + do { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + l2e = pmap_load(l2); + if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) { + /* + * Although l2e is mapping a 2MB page, because + * this function is called at a 4KB page granularity, + * we only update the 4KB page under test. + */ + vm_page_dirty(m); + } + if ((l2e & PTE_A) != 0) { + /* + * Since this reference bit is shared by 512 4KB + * pages, it should not be cleared every time it is + * tested. Apply a simple "hash" function on the + * physical page number, the virtual superpage number, + * and the pmap address to select one 4KB page out of + * the 512 on which testing the reference bit will + * result in clearing that reference bit. This + * function is designed to avoid the selection of the + * same 4KB page for every 2MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the superpage is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ + (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && + (l2e & PTE_SW_WIRED) == 0) { + pmap_clear_bits(l2, PTE_A); + pmap_invalidate_page(pmap, va); + } + ret++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + } + if (ret >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); +small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { - if (pvf == NULL) - pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } @@ -3032,36 +4026,21 @@ pmap_ts_referenced(vm_page_t m) ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); - old_l3 = pmap_load(l3); - if ((old_l3 & PTE_D) != 0) + l3e = pmap_load(l3); + if ((l3e & PTE_D) != 0) vm_page_dirty(m); - if ((old_l3 & PTE_A) != 0) { - if (safe_to_clear_referenced(pmap, old_l3)) { - /* - * TODO: We don't handle the access flag - * at all. We need to be able to set it in - * the exception handler. - */ - panic("RISCVTODO: safe_to_clear_referenced\n"); - } else if ((old_l3 & PTE_SW_WIRED) == 0) { + if ((l3e & PTE_A) != 0) { + if ((l3e & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ - pmap_remove_l3(pmap, l3, pv->pv_va, - pmap_load(l2), &free, &lock); + pmap_clear_bits(l3, PTE_A); pmap_invalidate_page(pmap, pv->pv_va); - cleared++; - if (pvf == pv) - pvf = NULL; - pv = NULL; - KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), - ("inconsistent pv lock %p %p for page %p", - lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); - } else - not_cleared++; + } + ret++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ @@ -3070,13 +4049,13 @@ pmap_ts_referenced(vm_page_t m) TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } - } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + - not_cleared < PMAP_TS_REFERENCED_MAX); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && ret < + PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); - return (cleared + not_cleared); + return (ret); } /* @@ -3163,7 +4142,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) l2 = pmap_l2(pmap, addr); if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) { - if ((tpte & (PTE_R | PTE_W | PTE_X)) != 0) { + if ((tpte & PTE_RWX) != 0) { pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET); val = MINCORE_INCORE | MINCORE_SUPER; } else { @@ -3241,6 +4220,20 @@ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { + vm_offset_t superpage_offset; + + if (size < L2_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & L2_OFFSET; + if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || + (*addr & L2_OFFSET) == superpage_offset) + return; + if ((*addr & L2_OFFSET) < superpage_offset) + *addr = (*addr & ~L2_OFFSET) + superpage_offset; + else + *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e095ccc69edf..4bf8869b997f 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -271,7 +271,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, { vm_page_t m, m_map; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ + VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; #endif @@ -286,7 +287,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, m_map = m; psind = 0; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ + VM_NRESERVLEVEL > 0 if ((m->flags & PG_FICTITIOUS) == 0 && (m_super = vm_reserv_to_superpage(m)) != NULL && rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && @@ -463,7 +465,7 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, pidx += npages, m = vm_page_next(&m[npages - 1])) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; #if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__) + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) psind = m->psind; if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 || pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last || From 91c85dd88b978ea956824fd4f49c14f0a1a59c8f Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:38:47 +0000 Subject: [PATCH 08/89] Implement pmap_clear_modify() for RISC-V. Reviewed by: kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18875 --- sys/riscv/riscv/pmap.c | 82 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 3929fe1b3e51..18400128790e 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -4074,6 +4074,14 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) void pmap_clear_modify(vm_page_t m) { + struct md_page *pvh; + struct rwlock *lock; + pmap_t pmap; + pv_entry_t next_pv, pv; + pd_entry_t *l2, oldl2; + pt_entry_t *l3, oldl3; + vm_offset_t va; + int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); @@ -4088,8 +4096,78 @@ pmap_clear_modify(vm_page_t m) */ if ((m->aflags & PGA_WRITEABLE) == 0) return; - - /* RISCVTODO: We lack support for tracking if a page is modified */ + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(&pvh_global_lock); + rw_wlock(lock); +restart: + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + oldl2 = pmap_load(l2); + if ((oldl2 & PTE_W) != 0) { + if (pmap_demote_l2_locked(pmap, l2, va, &lock)) { + if ((oldl2 & PTE_SW_WIRED) == 0) { + /* + * Write protect the mapping to a + * single page so that a subsequent + * write access may repromote. + */ + va += VM_PAGE_TO_PHYS(m) - + PTE_TO_PHYS(oldl2); + l3 = pmap_l2_to_l3(l2, va); + oldl3 = pmap_load(l3); + if ((oldl3 & PTE_V) != 0) { + while (!atomic_fcmpset_long(l3, + &oldl3, oldl3 & ~(PTE_D | + PTE_W))) + cpu_spinwait(); + vm_page_dirty(m); + pmap_invalidate_page(pmap, va); + } + } + } + } + PMAP_UNLOCK(pmap); + } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + KASSERT((pmap_load(l2) & PTE_RWX) == 0, + ("pmap_clear_modify: found a 2mpage in page %p's pv list", + m)); + l3 = pmap_l2_to_l3(l2, pv->pv_va); + if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { + pmap_clear_bits(l3, PTE_D); + pmap_invalidate_page(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + rw_wunlock(lock); + rw_runlock(&pvh_global_lock); } void * From 35c91b0c278e38eaaa7fb3bfe0d18ead2e65b4b6 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:50:01 +0000 Subject: [PATCH 09/89] Implement per-CPU pmap activation tracking for RISC-V. This reduces the overhead of TLB invalidations by ensuring that we only interrupt CPUs which are using the given pmap. Tracking is performed in pmap_activate(), which gets called during context switches: from cpu_throw(), if a thread is exiting or an AP is starting, or cpu_switch() for a regular context switch. For now, pmap_sync_icache() still must interrupt all CPUs. Reviewed by: kib (earlier version), jhb Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18874 --- sys/riscv/include/pcb.h | 1 - sys/riscv/include/pcpu.h | 1 + sys/riscv/include/pmap.h | 7 +++ sys/riscv/riscv/genassym.c | 1 - sys/riscv/riscv/machdep.c | 4 -- sys/riscv/riscv/mp_machdep.c | 4 ++ sys/riscv/riscv/pmap.c | 88 ++++++++++++++++++++++++++---------- sys/riscv/riscv/swtch.S | 72 +++++++++++------------------ sys/riscv/riscv/vm_machdep.c | 3 -- 9 files changed, 104 insertions(+), 77 deletions(-) diff --git a/sys/riscv/include/pcb.h b/sys/riscv/include/pcb.h index 27737a4bdf24..6cc85198a417 100644 --- a/sys/riscv/include/pcb.h +++ b/sys/riscv/include/pcb.h @@ -55,7 +55,6 @@ struct pcb { #define PCB_FP_STARTED 0x1 #define PCB_FP_USERMASK 0x1 uint64_t pcb_sepc; /* Supervisor exception pc */ - vm_offset_t pcb_l1addr; /* L1 page tables base address */ vm_offset_t pcb_onfault; /* Copyinout fault handler */ }; diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h index b91c48eef1cd..d570a68138c5 100644 --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -45,6 +45,7 @@ #define ALT_STACK_SIZE 128 #define PCPU_MD_FIELDS \ + struct pmap *pc_curpmap; /* Currently active pmap */ \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ char __pad[61] diff --git a/sys/riscv/include/pmap.h b/sys/riscv/include/pmap.h index 92eeee26331f..2d55b764e9e8 100644 --- a/sys/riscv/include/pmap.h +++ b/sys/riscv/include/pmap.h @@ -41,6 +41,7 @@ #ifndef LOCORE #include +#include #include #include @@ -80,6 +81,8 @@ struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ pd_entry_t *pm_l1; + u_long pm_satp; /* value for SATP register */ + cpuset_t pm_active; /* active on cpus */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ struct vm_radix pm_root; @@ -137,6 +140,10 @@ extern vm_offset_t virtual_end; #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) +struct thread; + +void pmap_activate_boot(pmap_t); +void pmap_activate_sw(struct thread *); void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index 8336769743ba..f69a3b86470f 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -63,7 +63,6 @@ ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index e98c90392f62..d5159c72cb51 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -871,10 +871,6 @@ initriscv(struct riscv_bootparams *rvbp) init_proc0(rvbp->kern_stack); - /* set page table base register for thread0 */ - thread0.td_pcb->pcb_l1addr = \ - (rvbp->kern_l1pt - KERNBASE + rvbp->kern_phys); - msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 0cd95fd1f79e..609d32b30f95 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -255,6 +256,9 @@ init_secondary(uint64_t cpu) /* Enable external (PLIC) interrupts */ csr_set(sie, SIE_SEIE); + /* Activate process 0's pmap. */ + pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); + mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 18400128790e..d25224f75e79 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -118,9 +118,10 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include -#include +#include #include #include #include @@ -566,6 +567,8 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) rw_init(&pvh_global_lock, "pmap pv global"); + CPU_FILL(&kernel_pmap->pm_active); + /* Assume the address we were loaded to is a valid physical address. */ min_pa = max_pa = kernstart; @@ -723,9 +726,6 @@ pmap_init(void) * In general, the calling thread uses a plain fence to order the * writes to the page tables before invoking an SBI callback to invoke * sfence_vma() on remote CPUs. - * - * Since the riscv pmap does not yet have a pm_active field, IPIs are - * sent to all CPUs in the system. */ static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) @@ -733,10 +733,11 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, va, 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, va, 1); sfence_vma_page(va); sched_unpin(); } @@ -747,10 +748,11 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); /* * Might consider a loop of sfence_vma_page() for a small @@ -766,16 +768,17 @@ pmap_invalidate_all(pmap_t pmap) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); - fence(); /* * XXX: The SBI doc doesn't detail how to specify x0 as the * address to perform a global fence. BBL currently treats * all sfence_vma requests as global however. */ - sbi_remote_sfence_vma(mask.__bits, 0, 0); + fence(); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, 0, 0); sfence_vma(); sched_unpin(); } @@ -1199,6 +1202,9 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT); + CPU_ZERO(&pmap->pm_active); + pmap_activate_boot(pmap); } int @@ -1216,12 +1222,15 @@ pmap_pinit(pmap_t pmap) l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + CPU_ZERO(&pmap->pm_active); + /* Install kernel pagetables */ memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); @@ -1411,6 +1420,8 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); mtx_lock(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); @@ -4251,26 +4262,56 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) return (val); } +void +pmap_activate_sw(struct thread *td) +{ + pmap_t oldpmap, pmap; + u_int cpu; + + oldpmap = PCPU_GET(curpmap); + pmap = vmspace_pmap(td->td_proc->p_vmspace); + if (pmap == oldpmap) + return; + load_satp(pmap->pm_satp); + + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); + CPU_CLR_ATOMIC(cpu, &oldpmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); + CPU_CLR(cpu, &oldpmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); + + sfence_vma(); +} + void pmap_activate(struct thread *td) { - pmap_t pmap; - uint64_t reg; critical_enter(); - pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); - - reg = SATP_MODE_SV39; - reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT); - load_satp(reg); - - pmap_invalidate_all(pmap); + pmap_activate_sw(td); critical_exit(); } void -pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +pmap_activate_boot(pmap_t pmap) +{ + u_int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); +} + +void +pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { cpuset_t mask; @@ -4286,7 +4327,8 @@ pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) mask = all_cpus; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_fence_i(mask.__bits); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_fence_i(mask.__bits); sched_unpin(); } diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S index c80e9877a9cf..b57c0487610b 100644 --- a/sys/riscv/riscv/swtch.S +++ b/sys/riscv/riscv/swtch.S @@ -207,28 +207,21 @@ ENTRY(fpe_state_clear) END(fpe_state_clear) /* - * void cpu_throw(struct thread *old, struct thread *new) + * void cpu_throw(struct thread *old __unused, struct thread *new) */ ENTRY(cpu_throw) + /* Activate the new thread's pmap. */ + mv s0, a1 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a0, s0 + /* Store the new curthread */ - sd a1, PC_CURTHREAD(gp) + sd a0, PC_CURTHREAD(gp) /* And the new pcb */ - ld x13, TD_PCB(a1) + ld x13, TD_PCB(a0) sd x13, PC_CURPCB(gp) - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - /* Load registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) @@ -250,7 +243,7 @@ ENTRY(cpu_throw) #ifdef FPE /* Is FPE enabled for new thread? */ - ld t0, TD_FRAME(a1) + ld t0, TD_FRAME(a0) ld t1, (TF_SSTATUS)(t0) li t2, SSTATUS_FS_MASK and t3, t1, t2 @@ -324,39 +317,28 @@ ENTRY(cpu_switch) 1: #endif + /* Activate the new thread's pmap */ + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a1, s1 + + /* Release the old thread */ + sd s2, TD_LOCK(s0) +#if defined(SCHED_ULE) && defined(SMP) + /* Spin if TD_LOCK points to a blocked_lock */ + la s2, _C_LABEL(blocked_lock) +1: + ld t0, TD_LOCK(a1) + beq t0, s2, 1b +#endif /* * Restore the saved context. */ ld x13, TD_PCB(a1) - /* - * TODO: We may need to flush the cache here if switching - * to a user process. - */ - - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - - /* Release the old thread */ - sd a2, TD_LOCK(a0) -#if defined(SCHED_ULE) && defined(SMP) - /* Spin if TD_LOCK points to a blocked_lock */ - la a2, _C_LABEL(blocked_lock) -1: - ld t0, TD_LOCK(a1) - beq t0, a2, 1b -#endif - /* Restore the registers */ ld tp, (PCB_TP)(x13) ld ra, (PCB_RA)(x13) diff --git a/sys/riscv/riscv/vm_machdep.c b/sys/riscv/riscv/vm_machdep.c index 14bb0ee3430f..c1801f01f2f1 100644 --- a/sys/riscv/riscv/vm_machdep.c +++ b/sys/riscv/riscv/vm_machdep.c @@ -92,9 +92,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); - tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); From 9031358d6797dc94dc81dab74f11e084aae9a377 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Wed, 13 Feb 2019 18:28:53 +0000 Subject: [PATCH 10/89] silence cast-align warnings from clang on powerpc64 silence the following warning when compiling libthr with clang 8 for powerpc64 architecture: usr/src/lib/libthr/arch/powerpc/include/pthread_md.h:82:10: error: cast from 'uint8_t *' (aka 'unsigned char *') to 'struct tcb *' increases required alignment from 1 to 8 [-Werror,-Wcast-align] 82: return ((struct tcb *)(_tp - TP_OFFSET)); Submitted by: alfredo.junior_eldorado.org.br Reviewed by: git_bdragon.rtk0.net, emaste, kib, jhibbits, luporl Differential Revision: https://reviews.freebsd.org/D18807 --- lib/libthr/arch/powerpc/include/pthread_md.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/libthr/arch/powerpc/include/pthread_md.h b/lib/libthr/arch/powerpc/include/pthread_md.h index a9923d7337d8..939d7d2670e9 100644 --- a/lib/libthr/arch/powerpc/include/pthread_md.h +++ b/lib/libthr/arch/powerpc/include/pthread_md.h @@ -72,14 +72,15 @@ _tcb_set(struct tcb *tcb) static __inline struct tcb * _tcb_get(void) { - register uint8_t *_tp; + register struct tcb *tcb; + #ifdef __powerpc64__ - __asm __volatile("mr %0,13" : "=r"(_tp)); + __asm __volatile("addi %0,13,%1" : "=r"(tcb) : "i"(-TP_OFFSET)); #else - __asm __volatile("mr %0,2" : "=r"(_tp)); + __asm __volatile("addi %0,2,%1" : "=r"(tcb) : "i"(-TP_OFFSET)); #endif - return ((struct tcb *)(_tp - TP_OFFSET)); + return (tcb); } static __inline struct pthread * From 381ab04f4f5d40a9354267c5169ac1c0bf798ebd Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 13 Feb 2019 20:13:40 +0000 Subject: [PATCH 11/89] Pull in r353907 from upstream llvm trunk (by Reid Kleckner): [MC] Make symbol version errors non-fatal We stil don't have a source location, which is pretty lame, but at least we won't tell the user to file a clang bug report anymore. Fixes PR40712 This will make errors for symbols with @@ versions that are not defined non-fatal. For example: void f(void) { __asm__(".symver foo,bar@@baz"); } will now result in: error: versioned symbol bar@@baz must be defined instead of clang crashing with a diagnostic report. PR: 234671 Upstream PR: https://bugs.llvm.org/show_bug.cgi?id=40712 MFC after: 3 days --- contrib/llvm/lib/MC/ELFObjectWriter.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp index db531f75c87c..b23333e47566 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -1258,14 +1258,20 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, if (!Symbol.isUndefined() && !Rest.startswith("@@@")) continue; - // FIXME: produce a better error message. + // FIXME: Get source locations for these errors or diagnose them earlier. if (Symbol.isUndefined() && Rest.startswith("@@") && - !Rest.startswith("@@@")) - report_fatal_error("A @@ version cannot be undefined"); + !Rest.startswith("@@@")) { + Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName + + " must be defined"); + continue; + } - if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) - report_fatal_error(llvm::Twine("Multiple symbol versions defined for ") + - Symbol.getName()); + if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) { + Asm.getContext().reportError( + SMLoc(), llvm::Twine("multiple symbol versions defined for ") + + Symbol.getName()); + continue; + } Renames.insert(std::make_pair(&Symbol, Alias)); } From 5864456d1ae16e8352d3e455a52301733ab4e3da Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Thu, 14 Feb 2019 09:21:19 +0000 Subject: [PATCH 12/89] Add UPDATING entry for IEEE80211_AMPDU_AGE and AH_SUPPORT_AR5416 options removal Notified by: ian --- UPDATING | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/UPDATING b/UPDATING index c4338e1efdab..c0f96e97b58f 100644 --- a/UPDATING +++ b/UPDATING @@ -38,6 +38,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: modules on kernels not having 'device iflib', the iflib.ko module is loaded automatically. +20190125: + The IEEE80211_AMPDU_AGE and AH_SUPPORT_AR5416 kernel configuration + options no longer exist since r343219 and r343427 respectively; + nothing uses them, so they should be just removed from custom + kernel config files. + 20181230: r342635 changes the way efibootmgr(8) works by requiring users to add the -b (bootnum) parameter for commands where the bootnum was previously From 642bb66b63cf5d199e99df898a26238d7aed86da Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 13:53:11 +0000 Subject: [PATCH 13/89] Provide userspace versions of do_cpuid() and cpuid_count() on i386. Some older compilers, when generating PIC code, cannot handle inline asm that clobbers %ebx (because %ebx is used as the GOT offset register). Userspace versions avoid clobbering %ebx by saving it to stack before executing the CPUID instruction. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/i386/include/cpufunc.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index da9a3e3a0869..0e935e8c7d72 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -108,21 +108,47 @@ disable_intr(void) __asm __volatile("cli" : : : "memory"); } +#ifdef _KERNEL static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax)); + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax), "c" (cx)); + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); } +#else +static __inline void +do_cpuid(u_int ax, u_int *p) +{ + __asm __volatile( + "pushl\t%%ebx\n\t" + "cpuid\n\t" + "movl\t%%ebx,%1\n\t" + "popl\t%%ebx" + : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); +} + +static __inline void +cpuid_count(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile( + "pushl\t%%ebx\n\t" + "cpuid\n\t" + "movl\t%%ebx,%1\n\t" + "popl\t%%ebx" + : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); +} +#endif static __inline void enable_intr(void) From b5d72efb7a3edaf7a1bfd0f36c75151aa412e4c2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 13:59:00 +0000 Subject: [PATCH 14/89] x86 __vdso_gettc(): use machine/cpufunc.h function for CPUID. Based on the discussion with: jkim Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libc/x86/sys/__vdso_gettc.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/lib/libc/x86/sys/__vdso_gettc.c b/lib/libc/x86/sys/__vdso_gettc.c index 3a56b4af7bd9..0781e97e2ce5 100644 --- a/lib/libc/x86/sys/__vdso_gettc.c +++ b/lib/libc/x86/sys/__vdso_gettc.c @@ -53,31 +53,6 @@ __FBSDID("$FreeBSD$"); #include #include "libc_private.h" -static void -cpuidp(u_int leaf, u_int p[4]) -{ - - __asm __volatile( -#if defined(__i386__) - " pushl %%ebx\n" -#endif - " cpuid\n" -#if defined(__i386__) - " movl %%ebx,%1\n" - " popl %%ebx" -#endif - : "=a" (p[0]), -#if defined(__i386__) - "=r" (p[1]), -#elif defined(__amd64__) - "=b" (p[1]), -#else -#error "Arch" -#endif - "=c" (p[2]), "=d" (p[3]) - : "0" (leaf)); -} - static void rdtsc_mb_lfence(void) { @@ -100,12 +75,12 @@ rdtsc_mb_none(void) DEFINE_UIFUNC(static, void, rdtsc_mb, (void), static) { u_int p[4]; - /* Not a typo, string matches our cpuidp() registers use. */ + /* Not a typo, string matches our do_cpuid() registers use. */ static const char intel_id[] = "GenuntelineI"; if ((cpu_feature & CPUID_SSE2) == 0) return (rdtsc_mb_none); - cpuidp(0, p); + do_cpuid(0, p); return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ? rdtsc_mb_lfence : rdtsc_mb_mfence); } From 071bca67ee8eeca078945099ba4637cefa2ed949 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 14:02:33 +0000 Subject: [PATCH 15/89] Unify i386 and amd64 getcontextx.c, and use ifuncs while there. In particular, use ifuncs for __getcontextx_size(), also calculate the size of the extended save area in resolver. Same for __fillcontextx2(). Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libc/Makefile | 1 + lib/libc/amd64/gen/Makefile.inc | 2 +- lib/libc/i386/gen/Makefile.inc | 2 +- lib/libc/i386/gen/getcontextx.c | 145 ---------------------- lib/libc/x86/gen/Makefile.inc | 6 + lib/libc/{amd64 => x86}/gen/getcontextx.c | 85 ++++++++----- 6 files changed, 65 insertions(+), 176 deletions(-) delete mode 100644 lib/libc/i386/gen/getcontextx.c create mode 100644 lib/libc/x86/gen/Makefile.inc rename lib/libc/{amd64 => x86}/gen/getcontextx.c (62%) diff --git a/lib/libc/Makefile b/lib/libc/Makefile index 4a456f0e4957..b545a2e81e98 100644 --- a/lib/libc/Makefile +++ b/lib/libc/Makefile @@ -122,6 +122,7 @@ NOASM= .endif .if ${LIBC_ARCH} == "i386" || ${LIBC_ARCH} == "amd64" .include "${LIBC_SRCTOP}/x86/sys/Makefile.inc" +.include "${LIBC_SRCTOP}/x86/gen/Makefile.inc" .endif .if ${MK_NIS} != "no" CFLAGS+= -DYP diff --git a/lib/libc/amd64/gen/Makefile.inc b/lib/libc/amd64/gen/Makefile.inc index fb4f7f4d30f0..30fb05f89cb7 100644 --- a/lib/libc/amd64/gen/Makefile.inc +++ b/lib/libc/amd64/gen/Makefile.inc @@ -2,7 +2,7 @@ # $FreeBSD$ SRCS+= _setjmp.S _set_tp.c rfork_thread.S setjmp.S sigsetjmp.S \ - fabs.S getcontextx.c \ + fabs.S \ infinity.c ldexp.c makecontext.c signalcontext.c \ flt_rounds.c fpgetmask.c fpsetmask.c fpgetprec.c fpsetprec.c \ fpgetround.c fpsetround.c fpgetsticky.c diff --git a/lib/libc/i386/gen/Makefile.inc b/lib/libc/i386/gen/Makefile.inc index 73dcabbece53..45e69cad1d0f 100644 --- a/lib/libc/i386/gen/Makefile.inc +++ b/lib/libc/i386/gen/Makefile.inc @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c fabs.S \ - flt_rounds.c getcontextx.c infinity.c ldexp.c makecontext.c \ + flt_rounds.c infinity.c ldexp.c makecontext.c \ rfork_thread.S setjmp.S signalcontext.c sigsetjmp.S diff --git a/lib/libc/i386/gen/getcontextx.c b/lib/libc/i386/gen/getcontextx.c deleted file mode 100644 index aedf3f021a7f..000000000000 --- a/lib/libc/i386/gen/getcontextx.c +++ /dev/null @@ -1,145 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 Konstantin Belousov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include - -static int xstate_sz = -1; - -int -__getcontextx_size(void) -{ - u_int p[4]; - int cpuid_supported; - - if (xstate_sz == -1) { - __asm __volatile( - " pushfl\n" - " popl %%eax\n" - " movl %%eax,%%ecx\n" - " xorl $0x200000,%%eax\n" - " pushl %%eax\n" - " popfl\n" - " pushfl\n" - " popl %%eax\n" - " xorl %%eax,%%ecx\n" - " je 1f\n" - " movl $1,%0\n" - " jmp 2f\n" - "1: movl $0,%0\n" - "2:\n" - : "=r" (cpuid_supported) : : "eax", "ecx"); - if (cpuid_supported) { - __asm __volatile( - " pushl %%ebx\n" - " cpuid\n" - " movl %%ebx,%1\n" - " popl %%ebx\n" - : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (0x1)); - if ((p[2] & CPUID2_OSXSAVE) != 0) { - __asm __volatile( - " pushl %%ebx\n" - " cpuid\n" - " movl %%ebx,%1\n" - " popl %%ebx\n" - : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), - "=d" (p[3]) - : "0" (0xd), "2" (0x0)); - xstate_sz = p[1] - sizeof(struct savexmm); - } else - xstate_sz = 0; - } else - xstate_sz = 0; - } - - return (sizeof(ucontext_t) + xstate_sz); -} - -int -__fillcontextx2(char *ctx) -{ - struct i386_get_xfpustate xfpu; - ucontext_t *ucp; - - ucp = (ucontext_t *)ctx; - if (xstate_sz != 0) { - xfpu.addr = (char *)(ucp + 1); - xfpu.len = xstate_sz; - if (sysarch(I386_GET_XFPUSTATE, &xfpu) == -1) - return (-1); - ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; - ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; - ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; - } else { - ucp->uc_mcontext.mc_xfpustate = 0; - ucp->uc_mcontext.mc_xfpustate_len = 0; - } - return (0); -} - -int -__fillcontextx(char *ctx) -{ - ucontext_t *ucp; - - ucp = (ucontext_t *)ctx; - if (getcontext(ucp) == -1) - return (-1); - __fillcontextx2(ctx); - return (0); -} - -__weak_reference(__getcontextx, getcontextx); - -ucontext_t * -__getcontextx(void) -{ - char *ctx; - int error; - - ctx = malloc(__getcontextx_size()); - if (ctx == NULL) - return (NULL); - if (__fillcontextx(ctx) == -1) { - error = errno; - free(ctx); - errno = error; - return (NULL); - } - return ((ucontext_t *)ctx); -} diff --git a/lib/libc/x86/gen/Makefile.inc b/lib/libc/x86/gen/Makefile.inc new file mode 100644 index 000000000000..0943fdb04bbf --- /dev/null +++ b/lib/libc/x86/gen/Makefile.inc @@ -0,0 +1,6 @@ +# $FreeBSD$ + +.PATH: ${LIBC_SRCTOP}/x86/gen + +SRCS+= \ + getcontextx.c diff --git a/lib/libc/amd64/gen/getcontextx.c b/lib/libc/x86/gen/getcontextx.c similarity index 62% rename from lib/libc/amd64/gen/getcontextx.c rename to lib/libc/x86/gen/getcontextx.c index a109bfe738a6..e9060bac44ad 100644 --- a/lib/libc/amd64/gen/getcontextx.c +++ b/lib/libc/x86/gen/getcontextx.c @@ -35,51 +35,78 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include +#include +#include -static int xstate_sz = -1; +#if defined __i386__ +#define X86_GET_XFPUSTATE I386_GET_XFPUSTATE +typedef struct savexmm savex86_t ; +typedef struct i386_get_xfpustate x86_get_xfpustate_t; +#elif defined __amd64__ +#define X86_GET_XFPUSTATE AMD64_GET_XFPUSTATE +typedef struct savefpu savex86_t; +typedef struct amd64_get_xfpustate x86_get_xfpustate_t; +#else +#error "Wrong arch" +#endif -int -__getcontextx_size(void) +static int xstate_sz = 0; + +static int +__getcontextx_size_xfpu(void) { - u_int p[4]; - - if (xstate_sz == -1) { - do_cpuid(1, p); - if ((p[2] & CPUID2_OSXSAVE) != 0) { - cpuid_count(0xd, 0x0, p); - xstate_sz = p[1] - sizeof(struct savefpu); - } else - xstate_sz = 0; - } return (sizeof(ucontext_t) + xstate_sz); } -int -__fillcontextx2(char *ctx) +DEFINE_UIFUNC(, int, __getcontextx_size, (void), static) { - struct amd64_get_xfpustate xfpu; + u_int p[4]; + + if ((cpu_feature2 & CPUID2_OSXSAVE) != 0) { + cpuid_count(0xd, 0x0, p); + xstate_sz = p[1] - sizeof(savex86_t); + } + return (__getcontextx_size_xfpu); +} + +static int +__fillcontextx2_xfpu(char *ctx) +{ + x86_get_xfpustate_t xfpu; ucontext_t *ucp; ucp = (ucontext_t *)ctx; - if (xstate_sz != 0) { - xfpu.addr = (char *)(ucp + 1); - xfpu.len = xstate_sz; - if (sysarch(AMD64_GET_XFPUSTATE, &xfpu) == -1) - return (-1); - ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; - ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; - ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; - } else { - ucp->uc_mcontext.mc_xfpustate = 0; - ucp->uc_mcontext.mc_xfpustate_len = 0; - } + xfpu.addr = (char *)(ucp + 1); + xfpu.len = xstate_sz; + if (sysarch(X86_GET_XFPUSTATE, &xfpu) == -1) + return (-1); + ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; + ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; + ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; return (0); } +static int +__fillcontextx2_noxfpu(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + ucp->uc_mcontext.mc_xfpustate = 0; + ucp->uc_mcontext.mc_xfpustate_len = 0; + return (0); +} + +DEFINE_UIFUNC(, int, __fillcontextx2, (char *), static) +{ + + return ((cpu_feature2 & CPUID2_OSXSAVE) != 0 ? __fillcontextx2_xfpu : + __fillcontextx2_noxfpu); +} + int __fillcontextx(char *ctx) { From 72091bb39382abba0d71dc23738684bfb4bc2574 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 14:44:53 +0000 Subject: [PATCH 16/89] Enable enabling ASLR on non-x86 architectures. Discussed with: emaste Sponsored by: The FreeBSD Foundation --- sys/arm/arm/elf_machdep.c | 2 +- sys/arm64/arm64/elf_machdep.c | 3 ++- sys/mips/mips/elf_machdep.c | 4 ++-- sys/powerpc/powerpc/elf32_machdep.c | 2 +- sys/powerpc/powerpc/elf64_machdep.c | 2 +- sys/riscv/riscv/elf_machdep.c | 2 +- sys/sparc64/sparc64/elf_machdep.c | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index 50e53bd93020..08fd72f83546 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -84,7 +84,7 @@ struct sysentvec elf32_freebsd_sysvec = { #if __ARM_ARCH >= 6 SV_ASLR | SV_SHP | SV_TIMEKEEP | #endif - SV_ABI_FREEBSD | SV_ILP32, + SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c index ab46995dd119..c3f24fe893ea 100644 --- a/sys/arm64/arm64/elf_machdep.c +++ b/sys/arm64/arm64/elf_machdep.c @@ -79,7 +79,8 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 | + SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/mips/mips/elf_machdep.c b/sys/mips/mips/elf_machdep.c index e8791f821a62..e603c8120e63 100644 --- a/sys/mips/mips/elf_machdep.c +++ b/sys/mips/mips/elf_machdep.c @@ -76,7 +76,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, @@ -131,7 +131,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_ILP32, + .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index 18ae37318fdd..ec5b565dcf1e 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -115,7 +115,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_fixlimit = NULL, #endif .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index 7198241a2b30..9c4b3db39c0b 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -79,7 +79,7 @@ struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/riscv/riscv/elf_machdep.c b/sys/riscv/riscv/elf_machdep.c index d4904b1c2cd9..da56ad395971 100644 --- a/sys/riscv/riscv/elf_machdep.c +++ b/sys/riscv/riscv/elf_machdep.c @@ -82,7 +82,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/sparc64/sparc64/elf_machdep.c b/sys/sparc64/sparc64/elf_machdep.c index 1fbd09fa945e..8f7e55bc9b30 100644 --- a/sys/sparc64/sparc64/elf_machdep.c +++ b/sys/sparc64/sparc64/elf_machdep.c @@ -80,7 +80,7 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, From 59621b207c8dddba1abe9dd3455178e5186f5a8c Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Thu, 14 Feb 2019 15:15:32 +0000 Subject: [PATCH 17/89] [PPC64] Fix mismatch between thread flags and MSR When sigreturn() restored a thread's context, SRR1 was being restored to its previous value, but pcb_flags was not being touched. This could cause a mismatch between the thread's MSR and its pcb_flags. For instance, when the thread used the FPU for the first time inside the signal handler, sigreturn() would clear SRR1, but not pcb_flags. Then, the thread would return with the FPU bit cleared in MSR and, the next time it tried to use the FPU, it would fail on a KASSERT that checked if the FPU was disabled. This change clears the FPU bit in both pcb_flags and frame->srr1, as the code that restores the context expects to use the FPU trap to re-enable it. PR: 234539 Reported by: sbruno Reviewed by: jhibbits, sbruno Differential Revision: https://reviews.freebsd.org/D19166 --- sys/powerpc/powerpc/exec_machdep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c index 4cb9fb8bbae5..3ae1640b2642 100644 --- a/sys/powerpc/powerpc/exec_machdep.c +++ b/sys/powerpc/powerpc/exec_machdep.c @@ -474,6 +474,10 @@ set_mcontext(struct thread *td, mcontext_t *mcp) else tf->fixreg[2] = tls; + /* Disable FPU */ + tf->srr1 &= ~PSL_FP; + pcb->pcb_flags &= ~PCB_FPU; + if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; From 484e9d0322ecc53bf22bf2d1a44913a6ae37174d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 15:45:53 +0000 Subject: [PATCH 18/89] Make anon clustering more compatible. Make the clustering enabling knob more fine-grained by providing a setting where the allocation with hint is not clustered. This is aimed to be somewhat more compatible with e.g. go 1.4 which expects that hinted mmap without MAP_FIXED does not change the allocation address. Now the vm.cluster_anon can be set to 1 to only cluster when no hints, and to 2 to always cluster. Default value is 1. Requested by: peter Reviewed by: emaste, markj Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D19194 --- sys/vm/vm_map.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 01544c8bf007..7fa738f40156 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1487,7 +1487,22 @@ static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; static int cluster_anon = 1; SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, &cluster_anon, 0, - "Cluster anonymous mappings"); + "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); + +static bool +clustering_anon_allowed(vm_offset_t addr) +{ + + switch (cluster_anon) { + case 0: + return (false); + case 1: + return (addr == 0); + case 2: + default: + return (true); + } +} static long aslr_restarts; SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, @@ -1593,7 +1608,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } else alignment = 0; en_aslr = (map->flags & MAP_ASLR) != 0; - update_anon = cluster = cluster_anon != 0 && + update_anon = cluster = clustering_anon_allowed(*addr) && (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && find_space != VMFS_NO_SPACE && object == NULL && (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | From deb17a3ba3a5d71d13357b9ee072932102faaa3d Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 14 Feb 2019 17:04:04 +0000 Subject: [PATCH 19/89] Fix small typo. Differential Review: https://reviews.freebsd.org/D19193 --- UPDATING | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UPDATING b/UPDATING index c0f96e97b58f..58da2b7af0af 100644 --- a/UPDATING +++ b/UPDATING @@ -237,7 +237,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: 20180719: ARM64 now have efifb support, if you want to have serial console on your arm64 board when an screen is connected and the bootloader - setup a frambuffer for us to use, just add : + setup a framebuffer for us to use, just add : boot_serial=YES boot_multicons=YES in /boot/loader.conf From af06fa2652fb2b62ddb1647d7c036a32276fc2e5 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Thu, 14 Feb 2019 18:02:37 +0000 Subject: [PATCH 20/89] ixl: Fix panic caused by bug exposed by r344062 Don't use a struct if_irq for IFLIB_INTR_IOV type interrupts since that results in get_core_offset() being called on them, and get_core_offset() doesn't handle IFLIB_INTR_IOV type interrupts, which results in an assert() being triggered in iflib_irq_set_affinity(). PR: 235730 Reported by: Jeffrey Pieper MFC after: 1 day Sponsored by: Intel Corporation --- sys/dev/ixl/if_ixl.c | 2 +- sys/dev/ixl/ixl_pf.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index fc9ad8e11d7c..ed1513f3a514 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -932,7 +932,7 @@ ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) return (err); } /* Create soft IRQ for handling VFLRs */ - iflib_softirq_alloc_generic(ctx, &pf->iov_irq, IFLIB_INTR_IOV, pf, 0, "iov"); + iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { diff --git a/sys/dev/ixl/ixl_pf.h b/sys/dev/ixl/ixl_pf.h index 99992e1ec608..b72277233e48 100644 --- a/sys/dev/ixl/ixl_pf.h +++ b/sys/dev/ixl/ixl_pf.h @@ -138,7 +138,6 @@ struct ixl_pf { struct ixl_vf *vfs; int num_vfs; uint16_t veb_seid; - struct if_irq iov_irq; }; /* From 23e5e43ccd0fc9211757a56cd9d1b7386cfd1415 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Thu, 14 Feb 2019 19:07:08 +0000 Subject: [PATCH 21/89] Finish the fix for overflow in calcru1(). The previous fix was unnecessarily very slow up to 105 hours where the simple formula used previously worked, and unnecessarily slow by a factor of about 5/3 up to 388 days, and didn't work above 388 days. 388 days is not a long time, since it is a reasonable uptime, and for processes the times being calculated are aggregated over all threads, so with N CPUs running the same thread a runtime of 388 days is reachable after only 388 / N physical days. The PRs document overflow at 388 days, but don't try to fix it. Use the simple formula up to 76 hours. Then use a complicated general method that reduces to the simple formula up to a bit less than 105 hours, then reduces to the previous method without its extra work up to almost 388 days, then does more complicated reductions, usually many bits at a time so that this is not slow. This works up to half of maximum representable time (292271 years), with accumulated rounding errors of at most 32 usec. amd64 can do all this with no avoidable rounding errors in an inline asm with 2 instructions, but this is too special to use. __uint128_t can do the same with 100's of instructions on 64-bit arches. Long doubles with at least 64 bits of precision are the easiest method to use on i386 userland, but are hard to use in the kernel. PR: 76972 and duplicates Reviewed by: kib --- sys/kern/kern_resource.c | 93 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index a4de84c311c3..4264a1253644 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -863,13 +863,88 @@ rufetchtd(struct thread *td, struct rusage *ru) calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } +/* XXX: the MI version is too slow to use: */ +#ifndef __HAVE_INLINE_FLSLL +#define flsll(x) (fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x)) +#endif + static uint64_t mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c) { + uint64_t acc, bh, bl; + int i, s, sa, sb; + /* - * Compute floor(a * (b / c)) without overflowing, (b / c) <= 1.0. + * Calculate (a * b) / c accurately enough without overflowing. c + * must be nonzero, and its top bit must be 0. a or b must be + * <= c, and the implementation is tuned for b <= c. + * + * The comments about times are for use in calcru1() with units of + * microseconds for 'a' and stathz ticks at 128 Hz for b and c. + * + * Let n be the number of top zero bits in c. Each iteration + * either returns, or reduces b by right shifting it by at least n. + * The number of iterations is at most 1 + 64 / n, and the error is + * at most the number of iterations. + * + * It is very unusual to need even 2 iterations. Previous + * implementations overflowed essentially by returning early in the + * first iteration, with n = 38 giving overflow at 105+ hours and + * n = 32 giving overlow at at 388+ days despite a more careful + * calculation. 388 days is a reasonable uptime, and the calculation + * needs to work for the uptime times the number of CPUs since 'a' + * is per-process. */ - return ((a / c) * b + (a % c) * (b / c) + (a % c) * (b % c) / c); + if (a >= (uint64_t)1 << 63) + return (0); /* Unsupported arg -- can't happen. */ + acc = 0; + for (i = 0; i < 128; i++) { + sa = flsll(a); + sb = flsll(b); + if (sa + sb <= 64) + /* Up to 105 hours on first iteration. */ + return (acc + (a * b) / c); + if (a >= c) { + /* + * This reduction is based on a = q * c + r, with the + * remainder r < c. 'a' may be large to start, and + * moving bits from b into 'a' at the end of the loop + * sets the top bit of 'a', so the reduction makes + * significant progress. + */ + acc += (a / c) * b; + a %= c; + sa = flsll(a); + if (sa + sb <= 64) + /* Up to 388 days on first iteration. */ + return (acc + (a * b) / c); + } + + /* + * This step writes a * b as a * ((bh << s) + bl) = + * a * (bh << s) + a * bl = (a << s) * bh + a * bl. The 2 + * additive terms are handled separately. Splitting in + * this way is linear except for rounding errors. + * + * s = 64 - sa is the maximum such that a << s fits in 64 + * bits. Since a < c and c has at least 1 zero top bit, + * sa < 64 and s > 0. Thus this step makes progress by + * reducing b (it increases 'a', but taking remainders on + * the next iteration completes the reduction). + * + * Finally, the choice for s is just what is needed to keep + * a * bl from overflowing, so we don't need complications + * like a recursive call mul64_by_fraction(a, bl, c) to + * handle the second additive term. + */ + s = 64 - sa; + bh = b >> s; + bl = b - (bh << s); + acc += (a * bl) / c; + a <<= s; + b = bh; + } + return (0); /* Algorithm failure -- can't happen. */ } static void @@ -896,15 +971,23 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, tu = ruxp->rux_tu; } + /* Subdivide tu. Avoid overflow in the multiplications. */ + if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) { + /* Up to 76 hours when stathz is 128. */ + uu = (tu * ut) / tt; + su = (tu * st) / tt; + } else { + uu = mul64_by_fraction(tu, ut, tt); + su = mul64_by_fraction(tu, ut, st); + } + if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ - uu = mul64_by_fraction(tu, ut, tt); if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; - su = mul64_by_fraction(tu, st, tt); if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { @@ -933,8 +1016,6 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); - uu = mul64_by_fraction(tu, ut, tt); - su = mul64_by_fraction(tu, st, tt); } ruxp->rux_uu = uu; From a99bc4c3eb8c281a3a0616a037481a5666732745 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 03:46:39 +0000 Subject: [PATCH 22/89] Add CBC-MAC authentication. This adds the CBC-MAC code to the kernel, but does not hook it up to anything (that comes in the next commit). https://tools.ietf.org/html/rfc3610 describes the algorithm. Note that this is a software-only implementation, which means it is fairly slow. Sponsored by: iXsystems Inc Differential Revision: https://reviews.freebsd.org/D18592 --- sys/conf/files | 2 + sys/modules/crypto/Makefile | 2 + sys/opencrypto/cbc_mac.c | 252 +++++++++++++++++++++++++++++++++ sys/opencrypto/cbc_mac.h | 67 +++++++++ sys/opencrypto/cryptodev.h | 7 +- sys/opencrypto/xform_cbc_mac.c | 55 +++++++ 6 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 sys/opencrypto/cbc_mac.c create mode 100644 sys/opencrypto/cbc_mac.h create mode 100644 sys/opencrypto/xform_cbc_mac.c diff --git a/sys/conf/files b/sys/conf/files index 74cabfb486fb..aa30c0cc64f0 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4847,6 +4847,8 @@ crypto/libsodium/randombytes.c optional crypto \ compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium" crypto/libsodium/utils.c optional crypto \ compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium" +opencrypto/cbc_mac.c optional crypto +opencrypto/xform_cbc_mac.c optional crypto rpc/auth_none.c optional krpc | nfslockd | nfscl | nfsd rpc/auth_unix.c optional krpc | nfslockd | nfscl | nfsd rpc/authunix_prot.c optional krpc | nfslockd | nfscl | nfsd diff --git a/sys/modules/crypto/Makefile b/sys/modules/crypto/Makefile index 0e66e4c3827a..09c5d710e985 100644 --- a/sys/modules/crypto/Makefile +++ b/sys/modules/crypto/Makefile @@ -68,5 +68,7 @@ CFLAGS.utils.c += -I${LIBSODIUM_INC} -I${LIBSODIUM_COMPAT} SRCS += opt_param.h cryptodev_if.h bus_if.h device_if.h SRCS += opt_ddb.h +SRCS += cbc_mac.c +SRCS += xform_cbc_mac.c .include diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c new file mode 100644 index 000000000000..e75e5df369ba --- /dev/null +++ b/sys/opencrypto/cbc_mac.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2018-2019 iXsystems Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$); + +#include +#include +#include +#include +#include +#include + +/* + * Given two CCM_CBC_BLOCK_LEN blocks, xor + * them into dst, and then encrypt dst. + */ +static void +xor_and_encrypt(struct aes_cbc_mac_ctx *ctx, + const uint8_t *src, uint8_t *dst) +{ + const uint64_t *b1; + uint64_t *b2; + uint64_t temp_block[CCM_CBC_BLOCK_LEN/sizeof(uint64_t)]; + + b1 = (const uint64_t*)src; + b2 = (uint64_t*)dst; + + for (size_t count = 0; + count < CCM_CBC_BLOCK_LEN/sizeof(uint64_t); + count++) { + temp_block[count] = b1[count] ^ b2[count]; + } + rijndaelEncrypt(ctx->keysched, ctx->rounds, (void*)temp_block, dst); +} + +void +AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *ctx) +{ + bzero(ctx, sizeof(*ctx)); +} + +void +AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *ctx, const uint8_t *key, uint16_t klen) +{ + ctx->rounds = rijndaelKeySetupEnc(ctx->keysched, key, klen * 8); +} + +/* + * This is called to set the nonce, aka IV. + * Before this call, the authDataLength and cryptDataLength fields + * MUST have been set. Sadly, there's no way to return an error. + * + * The CBC-MAC algorithm requires that the first block contain the + * nonce, as well as information about the sizes and lengths involved. + */ +void +AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *ctx, const uint8_t *nonce, uint16_t nonceLen) +{ + uint8_t b0[CCM_CBC_BLOCK_LEN]; + uint8_t *bp = b0, flags = 0; + uint8_t L = 0; + uint64_t dataLength = ctx->cryptDataLength; + + KASSERT(ctx->authDataLength != 0 || ctx->cryptDataLength != 0, + ("Auth Data and Data lengths cannot both be 0")); + + KASSERT(nonceLen >= 7 && nonceLen <= 13, + ("nonceLen must be between 7 and 13 bytes")); + + ctx->nonce = nonce; + ctx->nonceLength = nonceLen; + + ctx->authDataCount = 0; + ctx->blockIndex = 0; + explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block)); + + /* + * Need to determine the L field value. This is the number of + * bytes needed to specify the length of the message; the length + * is whatever is left in the 16 bytes after specifying flags and + * the nonce. + */ + L = 15 - nonceLen; + + flags = ((ctx->authDataLength > 0) << 6) + + (((AES_CBC_MAC_HASH_LEN - 2) / 2) << 3) + + L - 1; + /* + * Now we need to set up the first block, which has flags, nonce, + * and the message length. + */ + b0[0] = flags; + bcopy(nonce, b0 + 1, nonceLen); + bp = b0 + 1 + nonceLen; + + /* Need to copy L' [aka L-1] bytes of cryptDataLength */ + for (uint8_t *dst = b0 + sizeof(b0) - 1; dst >= bp; dst--) { + *dst = dataLength; + dataLength >>= 8; + } + /* Now need to encrypt b0 */ + rijndaelEncrypt(ctx->keysched, ctx->rounds, b0, ctx->block); + /* If there is auth data, we need to set up the staging block */ + if (ctx->authDataLength) { + if (ctx->authDataLength < ((1<<16) - (1<<8))) { + uint16_t sizeVal = htobe16(ctx->authDataLength); + bcopy(&sizeVal, ctx->staging_block, sizeof(sizeVal)); + ctx->blockIndex = sizeof(sizeVal); + } else if (ctx->authDataLength < (1UL<<32)) { + uint32_t sizeVal = htobe32(ctx->authDataLength); + ctx->staging_block[0] = 0xff; + ctx->staging_block[1] = 0xfe; + bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal)); + ctx->blockIndex = 2 + sizeof(sizeVal); + } else { + uint64_t sizeVal = htobe64(ctx->authDataLength); + ctx->staging_block[0] = 0xff; + ctx->staging_block[1] = 0xff; + bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal)); + ctx->blockIndex = 2 + sizeof(sizeVal); + } + } +} + +int +AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *ctx, const uint8_t *data, + uint16_t length) +{ + size_t copy_amt; + + /* + * This will be called in one of two phases: + * (1) Applying authentication data, or + * (2) Applying the payload data. + * + * Because CBC-MAC puts the authentication data size before the + * data, subsequent calls won't be block-size-aligned. Which + * complicates things a fair bit. + * + * The payload data doesn't have that problem. + */ + + if (ctx->authDataCount < ctx->authDataLength) { + /* + * We need to process data as authentication data. + * Since we may be out of sync, we may also need + * to pad out the staging block. + */ + const uint8_t *ptr = data; + while (length > 0) { + + copy_amt = MIN(length, + sizeof(ctx->staging_block) - ctx->blockIndex); + + bcopy(ptr, ctx->staging_block + ctx->blockIndex, + copy_amt); + ptr += copy_amt; + length -= copy_amt; + ctx->authDataCount += copy_amt; + ctx->blockIndex += copy_amt; + ctx->blockIndex %= sizeof(ctx->staging_block); + if (ctx->authDataCount == ctx->authDataLength) + length = 0; + if (ctx->blockIndex == 0 || + ctx->authDataCount >= ctx->authDataLength) { + /* + * We're done with this block, so we + * xor staging_block with block, and then + * encrypt it. + */ + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + bzero(ctx->staging_block, sizeof(ctx->staging_block)); + ctx->blockIndex = 0; + } + } + return (0); + } + /* + * If we're here, then we're encoding payload data. + * This is marginally easier, except that _Update can + * be called with non-aligned update lengths. As a result, + * we still need to use the staging block. + */ + KASSERT((length + ctx->cryptDataCount) <= ctx->cryptDataLength, + ("More encryption data than allowed")); + + while (length) { + uint8_t *ptr; + + copy_amt = MIN(sizeof(ctx->staging_block) - ctx->blockIndex, + length); + ptr = ctx->staging_block + ctx->blockIndex; + bcopy(data, ptr, copy_amt); + data += copy_amt; + ctx->blockIndex += copy_amt; + ctx->cryptDataCount += copy_amt; + length -= copy_amt; + if (ctx->blockIndex == sizeof(ctx->staging_block)) { + /* We've got a full block */ + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + ctx->blockIndex = 0; + bzero(ctx->staging_block, sizeof(ctx->staging_block)); + } + } + return (0); +} + +void +AES_CBC_MAC_Final(uint8_t *buf, struct aes_cbc_mac_ctx *ctx) +{ + uint8_t s0[CCM_CBC_BLOCK_LEN]; + + /* + * We first need to check to see if we've got any data + * left over to encrypt. + */ + if (ctx->blockIndex != 0) { + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + ctx->cryptDataCount += ctx->blockIndex; + ctx->blockIndex = 0; + explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block)); + } + bzero(s0, sizeof(s0)); + s0[0] = (15 - ctx->nonceLength) - 1; + bcopy(ctx->nonce, s0 + 1, ctx->nonceLength); + rijndaelEncrypt(ctx->keysched, ctx->rounds, s0, s0); + for (size_t indx = 0; indx < AES_CBC_MAC_HASH_LEN; indx++) + buf[indx] = ctx->block[indx] ^ s0[indx]; + explicit_bzero(s0, sizeof(s0)); +} diff --git a/sys/opencrypto/cbc_mac.h b/sys/opencrypto/cbc_mac.h new file mode 100644 index 000000000000..33e61cc1a123 --- /dev/null +++ b/sys/opencrypto/cbc_mac.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2014 The FreeBSD Foundation + * Copyright (c) 2018, iXsystems Inc. + * All rights reserved. + * + * This software was developed by Sean Eric Fagan, with lots of references + * to existing AES-CCM (gmac) code. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _CBC_CCM_H +# define _CBC_CCM_H + +# include +# include + +# define CCM_CBC_BLOCK_LEN 16 /* 128 bits */ +# define CCM_CBC_MAX_DIGEST_LEN 16 +# define CCM_CBC_MIN_DIGEST_LEN 4 + +/* + * This is the authentication context structure; + * the encryption one is similar. + */ +struct aes_cbc_mac_ctx { + uint64_t authDataLength, authDataCount; + uint64_t cryptDataLength, cryptDataCount; + int blockIndex; + uint8_t staging_block[CCM_CBC_BLOCK_LEN]; + uint8_t block[CCM_CBC_BLOCK_LEN]; + const uint8_t *nonce; + int nonceLength; /* This one is in bytes, not bits! */ + /* AES state data */ + int rounds; + uint32_t keysched[4*(RIJNDAEL_MAXNR+1)]; +}; + +void AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *); +void AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +void AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +int AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +void AES_CBC_MAC_Final(uint8_t *, struct aes_cbc_mac_ctx *); + +#endif /* _CBC_CCM_H */ diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index fe13539eca01..336271a19252 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -86,6 +86,7 @@ #define SHA1_KPDK_HASH_LEN 20 #define AES_GMAC_HASH_LEN 16 #define POLY1305_HASH_LEN 16 +#define AES_CBC_MAC_HASH_LEN 16 /* Maximum hash algorithm result length */ #define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */ @@ -107,6 +108,9 @@ #define AES_128_GMAC_KEY_LEN 16 #define AES_192_GMAC_KEY_LEN 24 #define AES_256_GMAC_KEY_LEN 32 +#define AES_128_CBC_MAC_KEY_LEN 16 +#define AES_192_CBC_MAC_KEY_LEN 24 +#define AES_256_CBC_MAC_KEY_LEN 32 #define POLY1305_KEY_LEN 32 @@ -199,7 +203,8 @@ #define CRYPTO_SHA2_384 36 #define CRYPTO_SHA2_512 37 #define CRYPTO_POLY1305 38 -#define CRYPTO_ALGORITHM_MAX 38 /* Keep updated - see below */ +#define CRYPTO_AES_CCM_CBC_MAC 39 /* auth side */ +#define CRYPTO_ALGORITHM_MAX 39 /* Keep updated - see below */ #define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ (x) <= CRYPTO_ALGORITHM_MAX) diff --git a/sys/opencrypto/xform_cbc_mac.c b/sys/opencrypto/xform_cbc_mac.c new file mode 100644 index 000000000000..60afe9501547 --- /dev/null +++ b/sys/opencrypto/xform_cbc_mac.c @@ -0,0 +1,55 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +/* Authentication instances */ +struct auth_hash auth_hash_ccm_cbc_mac_128 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-128", + .keysize = AES_128_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t))AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; +struct auth_hash auth_hash_ccm_cbc_mac_192 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-192", + .keysize = AES_192_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; +struct auth_hash auth_hash_ccm_cbc_mac_256 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-256", + .keysize = AES_256_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; From 507281e55e3903e8ea1007ddf201f6703860189d Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 03:53:03 +0000 Subject: [PATCH 23/89] Add AES-CCM encryption, and plumb into OCF. This commit essentially has three parts: * Add the AES-CCM encryption hooks. This is in and of itself fairly small, as there is only a small difference between CCM and the other ICM-based algorithms. * Hook the code into the OpenCrypto framework. This is the bulk of the changes, as the algorithm type has to be checked for, and the differences between it and GCM dealt with. * Update the cryptocheck tool to be aware of it. This is invaluable for confirming that the code works. This is a software-only implementation, meaning that the performance is very low. Sponsored by: iXsystems Inc. Differential Revision: https://reviews.freebsd.org/D19090 --- sys/opencrypto/cryptodev.c | 27 +++- sys/opencrypto/cryptodev.h | 4 +- sys/opencrypto/cryptosoft.c | 86 +++++++++++- sys/opencrypto/xform_aes_icm.c | 28 ++++ sys/opencrypto/xform_auth.h | 5 + sys/opencrypto/xform_enc.h | 1 + tools/tools/crypto/cryptocheck.c | 228 ++++++++++++++++++++++++++++++- 7 files changed, 371 insertions(+), 8 deletions(-) diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index cfdf28cd90a4..df055fbc26d0 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -444,6 +444,9 @@ cryptof_ioctl( case CRYPTO_CHACHA20: txform = &enc_xform_chacha20; break; + case CRYPTO_AES_CCM_16: + txform = &enc_xform_ccm; + break; default: CRYPTDEB("invalid cipher"); @@ -488,6 +491,25 @@ cryptof_ioctl( thash = &auth_hash_nist_gmac_aes_256; break; + case CRYPTO_AES_CCM_CBC_MAC: + switch (sop->keylen) { + case 16: + thash = &auth_hash_ccm_cbc_mac_128; + break; + case 24: + thash = &auth_hash_ccm_cbc_mac_192; + break; + case 32: + thash = &auth_hash_ccm_cbc_mac_256; + break; + default: + CRYPTDEB("Invalid CBC MAC key size %d", + sop->keylen); + SDT_PROBE1(opencrypto, dev, ioctl, + error, __LINE__); + return (EINVAL); + } + break; #ifdef notdef case CRYPTO_MD5: thash = &auth_hash_md5; @@ -1003,12 +1025,13 @@ cryptodev_aead( } /* - * For GCM, crd_len covers only the AAD. For other ciphers + * For GCM/CCM, crd_len covers only the AAD. For other ciphers * chained with an HMAC, crd_len covers both the AAD and the * cipher text. */ crda->crd_skip = 0; - if (cse->cipher == CRYPTO_AES_NIST_GCM_16) + if (cse->cipher == CRYPTO_AES_NIST_GCM_16 || + cse->cipher == CRYPTO_AES_CCM_16) crda->crd_len = caead->aadlen; else crda->crd_len = caead->aadlen + caead->len; diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index 336271a19252..bd71e518c576 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -133,6 +133,7 @@ #define ARC4_IV_LEN 1 #define AES_GCM_IV_LEN 12 +#define AES_CCM_IV_LEN 12 #define AES_XTS_IV_LEN 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -204,7 +205,8 @@ #define CRYPTO_SHA2_512 37 #define CRYPTO_POLY1305 38 #define CRYPTO_AES_CCM_CBC_MAC 39 /* auth side */ -#define CRYPTO_ALGORITHM_MAX 39 /* Keep updated - see below */ +#define CRYPTO_AES_CCM_16 40 /* cipher side */ +#define CRYPTO_ALGORITHM_MAX 40 /* Keep updated - see below */ #define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ (x) <= CRYPTO_ALGORITHM_MAX) diff --git a/sys/opencrypto/cryptosoft.c b/sys/opencrypto/cryptosoft.c index 431628df198c..2d064ffa15e8 100644 --- a/sys/opencrypto/cryptosoft.c +++ b/sys/opencrypto/cryptosoft.c @@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$"); #include #include "cryptodev_if.h" +_Static_assert(AES_CCM_IV_LEN == AES_GCM_IV_LEN, + "AES_GCM_IV_LEN must currently be the same as AES_CCM_IV_LEN"); + static int32_t swcr_id; u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN]; @@ -506,6 +509,7 @@ swcr_authenc(struct cryptop *crp) caddr_t buf = (caddr_t)crp->crp_buf; uint32_t *blkp; int aadlen, blksz, i, ivlen, len, iskip, oskip, r; + int isccm = 0; ivlen = blksz = iskip = oskip = 0; @@ -520,13 +524,18 @@ swcr_authenc(struct cryptop *crp) sw = &ses->swcr_algorithms[i]; switch (sw->sw_alg) { + case CRYPTO_AES_CCM_16: case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_NIST_GMAC: swe = sw; crde = crd; exf = swe->sw_exf; - ivlen = 12; + /* AES_CCM_IV_LEN and AES_GCM_IV_LEN are both 12 */ + ivlen = AES_CCM_IV_LEN; break; + case CRYPTO_AES_CCM_CBC_MAC: + isccm = 1; + /* FALLTHROUGH */ case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: @@ -544,8 +553,26 @@ swcr_authenc(struct cryptop *crp) } if (crde == NULL || crda == NULL) return (EINVAL); + /* + * We need to make sure that the auth algorithm matches the + * encr algorithm. Specifically, for AES-GCM must go with + * AES NIST GMAC, and AES-CCM must go with CBC-MAC. + */ + if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16) { + switch (crda->crd_alg) { + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + break; /* Good! */ + default: + return (EINVAL); /* Not good! */ + } + } else if (crde->crd_alg == CRYPTO_AES_CCM_16 && + crda->crd_alg != CRYPTO_AES_CCM_CBC_MAC) + return (EINVAL); - if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 && + if ((crde->crd_alg == CRYPTO_AES_NIST_GCM_16 || + crde->crd_alg == CRYPTO_AES_CCM_16) && (crde->crd_flags & CRD_F_IV_EXPLICIT) == 0) return (EINVAL); @@ -576,6 +603,15 @@ swcr_authenc(struct cryptop *crp) } } + if (swa->sw_alg == CRYPTO_AES_CCM_CBC_MAC) { + /* + * AES CCM-CBC needs to know the length of + * both the auth data, and payload data, before + * doing the auth computation. + */ + ctx.aes_cbc_mac_ctx.authDataLength = crda->crd_len; + ctx.aes_cbc_mac_ctx.cryptDataLength = crde->crd_len; + } /* Supply MAC with IV */ if (axf->Reinit) axf->Reinit(&ctx, iv, ivlen); @@ -610,16 +646,30 @@ swcr_authenc(struct cryptop *crp) bzero(blk, blksz); crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len, blk); + /* + * One of the problems with CCM+CBC is that the authentication + * is done on the unecncrypted data. As a result, we have + * to do the authentication update at different times, + * depending on whether it's CCM or not. + */ if (crde->crd_flags & CRD_F_ENCRYPT) { + if (isccm) + axf->Update(&ctx, blk, len); if (exf->encrypt_multi != NULL) exf->encrypt_multi(swe->sw_kschedule, blk, len); else exf->encrypt(swe->sw_kschedule, blk); - axf->Update(&ctx, blk, len); + if (!isccm) + axf->Update(&ctx, blk, len); crypto_copyback(crp->crp_flags, buf, crde->crd_skip + i, len, blk); } else { + if (isccm) { + KASSERT(exf->encrypt_multi == NULL, + ("assume CCM is single-block only")); + exf->decrypt(swe->sw_kschedule, blk); + } axf->Update(&ctx, blk, len); } } @@ -650,6 +700,11 @@ swcr_authenc(struct cryptop *crp) r = timingsafe_bcmp(aalg, uaalg, axf->hashsize); if (r == 0) { /* tag matches, decrypt data */ + if (isccm) { + KASSERT(exf->reinit != NULL, + ("AES-CCM reinit function must be set")); + exf->reinit(swe->sw_kschedule, iv); + } for (i = 0; i < crde->crd_len; i += blksz) { len = MIN(crde->crd_len - i, blksz); if (len < blksz) @@ -799,6 +854,9 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri) case CRYPTO_AES_NIST_GCM_16: txf = &enc_xform_aes_nist_gcm; goto enccommon; + case CRYPTO_AES_CCM_16: + txf = &enc_xform_ccm; + goto enccommon; case CRYPTO_AES_NIST_GMAC: txf = &enc_xform_aes_nist_gmac; swd->sw_exf = txf; @@ -943,6 +1001,22 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri) swd->sw_axf = axf; break; + case CRYPTO_AES_CCM_CBC_MAC: + switch (cri->cri_klen) { + case 128: + axf = &auth_hash_ccm_cbc_mac_128; + break; + case 192: + axf = &auth_hash_ccm_cbc_mac_192; + break; + case 256: + axf = &auth_hash_ccm_cbc_mac_256; + break; + default: + swcr_freesession(dev, cses); + return EINVAL; + } + goto auth4common; case CRYPTO_AES_128_NIST_GMAC: axf = &auth_hash_nist_gmac_aes_128; goto auth4common; @@ -1042,6 +1116,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) case CRYPTO_CAMELLIA_CBC: case CRYPTO_NULL_CBC: case CRYPTO_CHACHA20: + case CRYPTO_AES_CCM_16: txf = swd->sw_exf; if (swd->sw_kschedule) @@ -1056,6 +1131,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) case CRYPTO_SHA2_512_HMAC: case CRYPTO_RIPEMD160_HMAC: case CRYPTO_NULL_HMAC: + case CRYPTO_AES_CCM_CBC_MAC: axf = swd->sw_axf; if (swd->sw_ictx) { @@ -1201,6 +1277,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: + case CRYPTO_AES_CCM_16: + case CRYPTO_AES_CCM_CBC_MAC: crp->crp_etype = swcr_authenc(crp); goto done; @@ -1291,6 +1369,8 @@ swcr_attach(device_t dev) REGISTER(CRYPTO_BLAKE2B); REGISTER(CRYPTO_BLAKE2S); REGISTER(CRYPTO_CHACHA20); + REGISTER(CRYPTO_AES_CCM_16); + REGISTER(CRYPTO_AES_CCM_CBC_MAC); REGISTER(CRYPTO_POLY1305); #undef REGISTER diff --git a/sys/opencrypto/xform_aes_icm.c b/sys/opencrypto/xform_aes_icm.c index 8d3694fa23a0..052be5a779ae 100644 --- a/sys/opencrypto/xform_aes_icm.c +++ b/sys/opencrypto/xform_aes_icm.c @@ -57,6 +57,7 @@ static void aes_icm_crypt(caddr_t, u_int8_t *); static void aes_icm_zerokey(u_int8_t **); static void aes_icm_reinit(caddr_t, u_int8_t *); static void aes_gcm_reinit(caddr_t, u_int8_t *); +static void aes_ccm_reinit(caddr_t, u_int8_t *); /* Encryption instances */ struct enc_xform enc_xform_aes_icm = { @@ -79,6 +80,18 @@ struct enc_xform enc_xform_aes_nist_gcm = { aes_gcm_reinit, }; +struct enc_xform enc_xform_ccm = { + .type = CRYPTO_AES_CCM_16, + .name = "AES-CCM", + .blocksize = AES_ICM_BLOCK_LEN, .ivsize = AES_CCM_IV_LEN, + .minkey = AES_MIN_KEY, .maxkey = AES_MAX_KEY, + .encrypt = aes_icm_crypt, + .decrypt = aes_icm_crypt, + .setkey = aes_icm_setkey, + .zerokey = aes_icm_zerokey, + .reinit = aes_ccm_reinit, +}; + /* * Encryption wrapper routines. */ @@ -104,6 +117,21 @@ aes_gcm_reinit(caddr_t key, u_int8_t *iv) ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2; } +static void +aes_ccm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + ctx = (struct aes_icm_ctx*)key; + + /* CCM has flags, then the IV, then the counter, which starts at 1 */ + bzero(ctx->ac_block, sizeof(ctx->ac_block)); + /* 3 bytes for length field; this gives a nonce of 12 bytes */ + ctx->ac_block[0] = (15 - AES_CCM_IV_LEN) - 1; + bcopy(iv, ctx->ac_block+1, AES_CCM_IV_LEN); + ctx->ac_block[AESICM_BLOCKSIZE - 1] = 1; +} + static void aes_icm_crypt(caddr_t key, u_int8_t *data) { diff --git a/sys/opencrypto/xform_auth.h b/sys/opencrypto/xform_auth.h index 9af0f8e6d155..9b0726257d77 100644 --- a/sys/opencrypto/xform_auth.h +++ b/sys/opencrypto/xform_auth.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,9 @@ extern struct auth_hash auth_hash_nist_gmac_aes_256; extern struct auth_hash auth_hash_blake2b; extern struct auth_hash auth_hash_blake2s; extern struct auth_hash auth_hash_poly1305; +extern struct auth_hash auth_hash_ccm_cbc_mac_128; +extern struct auth_hash auth_hash_ccm_cbc_mac_192; +extern struct auth_hash auth_hash_ccm_cbc_mac_256; union authctx { MD5_CTX md5ctx; @@ -95,6 +99,7 @@ union authctx { SHA384_CTX sha384ctx; SHA512_CTX sha512ctx; struct aes_gmac_ctx aes_gmac_ctx; + struct aes_cbc_mac_ctx aes_cbc_mac_ctx; }; #endif /* _CRYPTO_XFORM_AUTH_H_ */ diff --git a/sys/opencrypto/xform_enc.h b/sys/opencrypto/xform_enc.h index 545e0ec25497..2797ca980fb9 100644 --- a/sys/opencrypto/xform_enc.h +++ b/sys/opencrypto/xform_enc.h @@ -84,6 +84,7 @@ extern struct enc_xform enc_xform_aes_xts; extern struct enc_xform enc_xform_arc4; extern struct enc_xform enc_xform_camellia; extern struct enc_xform enc_xform_chacha20; +extern struct enc_xform enc_xform_ccm; struct aes_icm_ctx { u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)]; diff --git a/tools/tools/crypto/cryptocheck.c b/tools/tools/crypto/cryptocheck.c index c8a1be69a3aa..47c6bc0cfcf3 100644 --- a/tools/tools/crypto/cryptocheck.c +++ b/tools/tools/crypto/cryptocheck.c @@ -105,6 +105,9 @@ * aes-gcm 128-bit aes gcm * aes-gcm192 192-bit aes gcm * aes-gcm256 256-bit aes gcm + * aes-ccm 128-bit aes ccm + * aes-ccm192 192-bit aes ccm + * aes-ccm256 256-bit aes ccm */ #include @@ -131,7 +134,7 @@ struct alg { const char *name; int cipher; int mac; - enum { T_HASH, T_HMAC, T_BLKCIPHER, T_AUTHENC, T_GCM } type; + enum { T_HASH, T_HMAC, T_BLKCIPHER, T_AUTHENC, T_GCM, T_CCM } type; const EVP_CIPHER *(*evp_cipher)(void); const EVP_MD *(*evp_md)(void); } algs[] = { @@ -186,6 +189,15 @@ struct alg { { .name = "aes-gcm256", .cipher = CRYPTO_AES_NIST_GCM_16, .mac = CRYPTO_AES_256_NIST_GMAC, .type = T_GCM, .evp_cipher = EVP_aes_256_gcm }, + { .name = "aes-ccm", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_128_ccm }, + { .name = "aes-ccm192", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_192_ccm }, + { .name = "aes-ccm256", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_256_ccm }, }; static bool verbose; @@ -1158,6 +1170,214 @@ run_gcm_test(struct alg *alg, size_t size) free(key); } +static void +openssl_ccm_encrypt(struct alg *alg, const EVP_CIPHER *cipher, const char *key, + const char *iv, size_t iv_len, const char *aad, size_t aad_len, + const char *input, char *output, size_t size, char *tag) +{ + EVP_CIPHER_CTX *ctx; + int outl, total; + + ctx = EVP_CIPHER_CTX_new(); + if (ctx == NULL) + errx(1, "OpenSSL %s (%zu) ctx new failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1) + errx(1, "OpenSSL %s (%zu) ctx init failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_TAG, AES_CBC_MAC_HASH_LEN, NULL) != 1) + errx(1, "OpenSSL %s (%zu) setting tag length failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptInit_ex(ctx, NULL, NULL, (const u_char *)key, + (const u_char *)iv) != 1) + errx(1, "OpenSSL %s (%zu) ctx init failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptUpdate(ctx, NULL, &outl, NULL, size) != 1) + errx(1, "OpenSSL %s (%zu) unable to set data length: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + + if (aad != NULL) { + if (EVP_EncryptUpdate(ctx, NULL, &outl, (const u_char *)aad, + aad_len) != 1) + errx(1, "OpenSSL %s (%zu) aad update failed: %s", + alg->name, size, + ERR_error_string(ERR_get_error(), NULL)); + } + if (EVP_EncryptUpdate(ctx, (u_char *)output, &outl, + (const u_char *)input, size) != 1) + errx(1, "OpenSSL %s (%zu) encrypt update failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + total = outl; + if (EVP_EncryptFinal_ex(ctx, (u_char *)output + outl, &outl) != 1) + errx(1, "OpenSSL %s (%zu) encrypt final failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + total += outl; + if (total != size) + errx(1, "OpenSSL %s (%zu) encrypt size mismatch: %d", alg->name, + size, total); + if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_GET_TAG, AES_CBC_MAC_HASH_LEN, + tag) != 1) + errx(1, "OpenSSL %s (%zu) get tag failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + EVP_CIPHER_CTX_free(ctx); +} + +static bool +ocf_ccm(struct alg *alg, const char *key, size_t key_len, const char *iv, + size_t iv_len, const char *aad, size_t aad_len, const char *input, + char *output, size_t size, char *tag, int enc, int *cridp) +{ + struct session2_op sop; + struct crypt_aead caead; + int fd; + bool rv; + + memset(&sop, 0, sizeof(sop)); + memset(&caead, 0, sizeof(caead)); + sop.crid = crid; + sop.keylen = key_len; + sop.key = (char *)key; + sop.cipher = alg->cipher; + sop.mackeylen = key_len; + sop.mackey = (char *)key; + sop.mac = alg->mac; + fd = crget(); + if (ioctl(fd, CIOCGSESSION2, &sop) < 0) { + warn("cryptodev %s not supported for device %s", + alg->name, crfind(crid)); + close(fd); + return (false); + } + + caead.ses = sop.ses; + caead.op = enc ? COP_ENCRYPT : COP_DECRYPT; + caead.len = size; + caead.aadlen = aad_len; + caead.ivlen = iv_len; + caead.src = (char *)input; + caead.dst = output; + caead.aad = (char *)aad; + caead.tag = tag; + caead.iv = (char *)iv; + + if (ioctl(fd, CIOCCRYPTAEAD, &caead) < 0) { + warn("cryptodev %s (%zu) failed for device %s", + alg->name, size, crfind(crid)); + rv = false; + } else + rv = true; + + if (ioctl(fd, CIOCFSESSION, &sop.ses) < 0) + warn("ioctl(CIOCFSESSION)"); + + close(fd); + *cridp = sop.crid; + return (rv); +} + +static void +run_ccm_test(struct alg *alg, size_t size) +{ + const EVP_CIPHER *cipher; + char *aad, *buffer, *cleartext, *ciphertext; + char *iv, *key; + u_int iv_len, key_len; + int crid; + char control_tag[AES_CBC_MAC_HASH_LEN], test_tag[AES_CBC_MAC_HASH_LEN]; + + cipher = alg->evp_cipher(); + if (size % EVP_CIPHER_block_size(cipher) != 0) { + if (verbose) + printf( + "%s (%zu): invalid buffer size (block size %d)\n", + alg->name, size, EVP_CIPHER_block_size(cipher)); + return; + } + + memset(control_tag, 0x3c, sizeof(control_tag)); + memset(test_tag, 0x3c, sizeof(test_tag)); + + /* + * We only have one algorithm constant for CBC-MAC; however, the + * alg structure uses the different openssl types, which gives us + * the key length. We need that for the OCF code. + */ + key_len = EVP_CIPHER_key_length(cipher); + + /* + * AES-CCM can have varying IV lengths; however, for the moment + * we only support AES_CCM_IV_LEN (12). So if the sizes are + * different, we'll fail. + */ + iv_len = EVP_CIPHER_iv_length(cipher); + if (iv_len != AES_CCM_IV_LEN) { + if (verbose) + printf("OpenSSL CCM IV length (%d) != AES_CCM_IV_LEN", + iv_len); + return; + } + + key = alloc_buffer(key_len); + iv = generate_iv(iv_len, alg); + cleartext = alloc_buffer(size); + buffer = malloc(size); + ciphertext = malloc(size); + if (aad_len != 0) + aad = alloc_buffer(aad_len); + else + aad = NULL; + + /* OpenSSL encrypt */ + openssl_ccm_encrypt(alg, cipher, key, iv, iv_len, aad, aad_len, cleartext, + ciphertext, size, control_tag); + + /* OCF encrypt */ + if (!ocf_ccm(alg, key, key_len, iv, iv_len, aad, aad_len, cleartext, + buffer, size, test_tag, 1, &crid)) + goto out; + if (memcmp(ciphertext, buffer, size) != 0) { + printf("%s (%zu) encryption mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(ciphertext, size, NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(buffer, size, NULL, 0); + goto out; + } + if (memcmp(control_tag, test_tag, sizeof(control_tag)) != 0) { + printf("%s (%zu) enc tag mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(control_tag, sizeof(control_tag), NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(test_tag, sizeof(test_tag), NULL, 0); + goto out; + } + + /* OCF decrypt */ + if (!ocf_ccm(alg, key, key_len, iv, iv_len, aad, aad_len, ciphertext, + buffer, size, control_tag, 0, &crid)) + goto out; + if (memcmp(cleartext, buffer, size) != 0) { + printf("%s (%zu) decryption mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(cleartext, size, NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(buffer, size, NULL, 0); + goto out; + } + + if (verbose) + printf("%s (%zu) matched (cryptodev device %s)\n", + alg->name, size, crfind(crid)); + +out: + free(aad); + free(ciphertext); + free(buffer); + free(cleartext); + free(iv); + free(key); +} + static void run_test(struct alg *alg, size_t size) { @@ -1178,6 +1398,9 @@ run_test(struct alg *alg, size_t size) case T_GCM: run_gcm_test(alg, size); break; + case T_CCM: + run_ccm_test(alg, size); + break; } } @@ -1247,7 +1470,8 @@ run_aead_tests(size_t *sizes, u_int nsizes) u_int i; for (i = 0; i < nitems(algs); i++) - if (algs[i].type == T_GCM) + if (algs[i].type == T_GCM || + algs[i].type == T_CCM) run_test_sizes(&algs[i], sizes, nsizes); } From 72309077ebc4b0fad0057ea3a4ede6d894878b76 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 04:01:59 +0000 Subject: [PATCH 24/89] Pasting in a source control line missed the last quote. Fixed. --- sys/opencrypto/cbc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c index e75e5df369ba..93f4d1854582 100644 --- a/sys/opencrypto/cbc_mac.c +++ b/sys/opencrypto/cbc_mac.c @@ -23,7 +23,7 @@ */ #include -__FBSDID("$FreeBSD$); +__FBSDID("$FreeBSD$"); #include #include From 1357a3bc19a3094c6ddd0a477e70b4239fee63d9 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 04:15:43 +0000 Subject: [PATCH 25/89] Fix another issue from r344141, having to do with size of a shift amount. This did not show up in my testing. Differential Revision: https://reviews.freebsd.org/D18592 --- sys/opencrypto/cbc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c index 93f4d1854582..bd7f9cb00d79 100644 --- a/sys/opencrypto/cbc_mac.c +++ b/sys/opencrypto/cbc_mac.c @@ -128,7 +128,7 @@ AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *ctx, const uint8_t *nonce, uint16_t n uint16_t sizeVal = htobe16(ctx->authDataLength); bcopy(&sizeVal, ctx->staging_block, sizeof(sizeVal)); ctx->blockIndex = sizeof(sizeVal); - } else if (ctx->authDataLength < (1UL<<32)) { + } else if (ctx->authDataLength < (1ULL<<32)) { uint32_t sizeVal = htobe32(ctx->authDataLength); ctx->staging_block[0] = 0xff; ctx->staging_block[1] = 0xfe; From e82fdca156aaeb03cdb9555d4cb6228e6209092f Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Fri, 15 Feb 2019 09:45:17 +0000 Subject: [PATCH 26/89] Fix a byte ordering issue for the advertised receiver window in ACK segments sent in TIMEWAIT state, which I introduced in r336937. MFC after: 3 days Sponsored by: Netflix, Inc. --- sys/netinet/tcp_timewait.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 0384fc54ca8d..04729763a858 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -302,7 +302,7 @@ tcp_twstart(struct tcpcb *tp) if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (tp->rcv_adv - tp->rcv_nxt)) recwin = (tp->rcv_adv - tp->rcv_nxt); - tw->last_win = htons((u_short)(recwin >> tp->rcv_scale)); + tw->last_win = (u_short)(recwin >> tp->rcv_scale); /* * Set t_recent if timestamps are used on the connection. From c51a229ca754eba7e83a4a5e8c18a3517263ba2f Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Fri, 15 Feb 2019 10:34:27 +0000 Subject: [PATCH 27/89] Fix validation of the Rx OOO completion in the ENA Requested ID should be validated when the packet is received and not when the driver is repopulating the mbufs. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon, Inc. MFC after: 1 week --- sys/dev/ena/ena.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 12a9e25d3078..6a82d9a15fc6 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -1046,10 +1046,6 @@ ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) "RX buffer - next to use: %d", next_to_use); req_id = rx_ring->free_rx_ids[next_to_use]; - rc = validate_rx_req_id(rx_ring, req_id); - if (unlikely(rc != 0)) - break; - rx_info = &rx_ring->rx_buffer_info[req_id]; rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); @@ -1472,6 +1468,7 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; unsigned int descs = ena_rx_ctx->descs; + int rc; uint16_t ntc, len, req_id, buf = 0; ntc = *next_to_clean; @@ -1485,6 +1482,10 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc != 0)) + return (NULL); + rx_info = &rx_ring->rx_buffer_info[req_id]; ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx", @@ -1517,6 +1518,16 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, ++buf; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc != 0)) { + /* + * If the req_id is invalid, then the device will be + * reset. In that case we must free all mbufs that + * were already gathered. + */ + m_freem(mbuf); + return (NULL); + } rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(rx_info->mbuf == NULL)) { From 1d65b4c095cdb9e511857c97556403851f517118 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Fri, 15 Feb 2019 10:40:41 +0000 Subject: [PATCH 28/89] Do not use ntc for obtaining buffer on Rx in the ENA In out of order mode Rx buffer are accesses by req_id. Accessing and validating mbuf using ntc is causing false error. Increase driver revision after latest RX OOO completion fixes. Submitted by: Rafal Kozik Obtained from: Semihalf Sponsored by: Amazon, Inc. MFC after: 1 week --- sys/dev/ena/ena.c | 10 ++++------ sys/dev/ena/ena.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 6a82d9a15fc6..49213d12b9d9 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -1473,12 +1473,6 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, ntc = *next_to_clean; adapter = rx_ring->adapter; - rx_info = &rx_ring->rx_buffer_info[ntc]; - - if (unlikely(rx_info->mbuf == NULL)) { - device_printf(adapter->pdev, "NULL mbuf in rx_info"); - return (NULL); - } len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@ -1487,6 +1481,10 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, return (NULL); rx_info = &rx_ring->rx_buffer_info[req_id]; + if (unlikely(rx_info->mbuf == NULL)) { + device_printf(adapter->pdev, "NULL mbuf in rx_info"); + return (NULL); + } ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx", rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index b0ef5e47d818..574cd57b9cda 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -41,7 +41,7 @@ #define DRV_MODULE_VER_MAJOR 0 #define DRV_MODULE_VER_MINOR 8 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_SUBMINOR 3 #define DRV_MODULE_NAME "ena" From 85cf19adc5de9585cc49a0a97d73c2be89b96226 Mon Sep 17 00:00:00 2001 From: "Rodney W. Grimes" Date: Fri, 15 Feb 2019 16:20:21 +0000 Subject: [PATCH 29/89] In r340042 an attempt to quiet coverity warning cid 1305412 was overdone. nopt is the only allocated space, xopt and cp are aliases into that allocated space. Remove the 2 unneeded free's Reported by: Patrick Mooney (@pmooney_pfmooney.com) Reviewed by: jhb (maintainer), Patrick Mooney (joyent/illumos) Approved by: bde (mentor) CID: 1305412 MFC after: 3 days MFC with: 340042 Differential Revision: https://reviews.freebsd.org/D19200 --- usr.sbin/bhyve/block_if.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index dc57217ddaa3..c94548ba5fdb 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -576,8 +576,6 @@ blockif_open(const char *optstr, const char *ident) err: if (fd >= 0) close(fd); - free(cp); - free(xopts); free(nopt); return (NULL); } From 11e67b92b5782066894420581b7046424f7d31e2 Mon Sep 17 00:00:00 2001 From: "Rodney W. Grimes" Date: Fri, 15 Feb 2019 16:48:15 +0000 Subject: [PATCH 30/89] In r340044 an attempt to quiet coverity warning cid 1357336 was incorrectly implemented leading to a possible double free. It is possible for both the conditional free, and the unconditional free added in r340044 to be done, fix that by initializing uopt to NULL, removing the conditional free, and only using the unconditional free at the end. Reported by: Patrick Mooney (patrick.mooney@joyent.com) Reviewed by: jhb (maintainer), Patrick Mooney (joyent/illumos) Approved by: bde (mentor) CID: 1357336 MFC after: 3 days MFC with: 340044 Differential Revision: https://reviews.freebsd.org/D19202 --- usr.sbin/bhyve/pci_xhci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/usr.sbin/bhyve/pci_xhci.c b/usr.sbin/bhyve/pci_xhci.c index 3c0a798a0112..de18739e9573 100644 --- a/usr.sbin/bhyve/pci_xhci.c +++ b/usr.sbin/bhyve/pci_xhci.c @@ -2626,6 +2626,7 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) char *uopt, *xopts, *config; int usb3_port, usb2_port, i; + uopt = NULL; usb3_port = sc->usb3_port_start - 1; usb2_port = sc->usb2_port_start - 1; devices = NULL; @@ -2700,8 +2701,6 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) sc->ndevices++; } - if (uopt != NULL) - free(uopt); portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); From e49f2c66d1289fccf4a99fa20acad4125fba8ce9 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 15 Feb 2019 18:28:51 +0000 Subject: [PATCH 31/89] stand: dev_net: correct net_open's interpretation of params net_open previously casted the first vararg to a char * and this was half-OK: at first, it is passed to netif_open, which would cast it back to the struct devdesc * that it really is and use it properly. It is then strdup()d and used as the netdev_name, which is objectively wrong. Correct it so that the first vararg is properly casted to a struct devdesc * and the netdev_name gets set properly to make it more clear at a glance that it's not doing something horribly wrong. Reported by: mmel Reviewed by: imp, mmel, tsoome MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D19206 --- stand/common/dev_net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/stand/common/dev_net.c b/stand/common/dev_net.c index d461f747adaf..1fa955ac1cf1 100644 --- a/stand/common/dev_net.c +++ b/stand/common/dev_net.c @@ -122,13 +122,15 @@ net_open(struct open_file *f, ...) { struct iodesc *d; va_list args; - char *devname; /* Device part of file name (or NULL). */ + struct devdesc *dev; + const char *devname; /* Device part of file name (or NULL). */ int error = 0; va_start(args, f); - devname = va_arg(args, char*); + dev = va_arg(args, struct devdesc *); va_end(args); + devname = dev->d_dev->dv_name; /* Before opening another interface, close the previous one first. */ if (netdev_sock >= 0 && strcmp(devname, netdev_name) != 0) net_cleanup(); @@ -137,7 +139,7 @@ net_open(struct open_file *f, ...) if (netdev_opens == 0) { /* Find network interface. */ if (netdev_sock < 0) { - netdev_sock = netif_open(devname); + netdev_sock = netif_open(dev); if (netdev_sock < 0) { printf("net_open: netif_open() failed\n"); return (ENXIO); From ca62461bc6525f4d25d276714b4b0a2947e183a0 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Fri, 15 Feb 2019 18:51:43 +0000 Subject: [PATCH 32/89] iflib: Improve return values of interrupt handlers. iflib was returning FILTER_HANDLED, in cases where FILTER_STRAY was more correct. This potentially caused issues with shared legacy interrupts. Driver filters returning FILTER_STRAY are now properly handled. Submitted by: Augustin Cavalier Reviewed by: marius, gallatin Obtained from: Haiku (a84bb9, 4947d1) MFC after: 1 week Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D19201 --- sys/net/iflib.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index fb0c420f1dab..51d980cd1026 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1468,12 +1468,17 @@ iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + int result; + if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); @@ -1488,15 +1493,18 @@ iflib_fast_intr_rxtx(void *arg) iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; iflib_txq_t txq; void *sc; - int i, cidx; + int i, cidx, result; qidx_t txqid; if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; @@ -1531,13 +1539,17 @@ iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + int result; if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); From bcf99d2d99910404f734b910c5214854e0fe626d Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 15 Feb 2019 22:22:38 +0000 Subject: [PATCH 33/89] Add WITH_PIE knob to build Position Independent Executables Building binaries as PIE allows the executable itself to be loaded at a random address when ASLR is enabled (not just its shared libraries). With this change PIE objects have a .pieo extension and INTERNALLIB libraries libXXX_pie.a. MK_PIE is disabled for some kerberos5 tools, Clang, and Subversion, as they explicitly reference .a libraries in their Makefiles. These can be addressed on an individual basis later. MK_PIE is also disabled for rtld-elf because it is already position-independent using bespoke Makefile rules. Currently only dynamically linked binaries will be built as PIE. Discussed with: dim Reviewed by: kib MFC after: 1 month Relnotes: Yes Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18423 --- kerberos5/tools/asn1_compile/Makefile | 1 + kerberos5/tools/slc/Makefile | 1 + lib/clang/Makefile.inc | 2 ++ libexec/rtld-elf/Makefile | 1 + share/mk/bsd.lib.mk | 38 ++++++++++++++++++-- share/mk/bsd.opts.mk | 1 + share/mk/bsd.prog.mk | 6 ++++ share/mk/src.libnames.mk | 50 +++++++++++++++------------ stand/i386/Makefile.inc | 1 + tools/build/options/WITHOUT_PIE | 3 ++ tools/build/options/WITH_PIE | 3 ++ usr.bin/clang/Makefile.inc | 2 ++ usr.bin/svn/Makefile.inc | 2 ++ 13 files changed, 87 insertions(+), 24 deletions(-) create mode 100644 tools/build/options/WITHOUT_PIE create mode 100644 tools/build/options/WITH_PIE diff --git a/kerberos5/tools/asn1_compile/Makefile b/kerberos5/tools/asn1_compile/Makefile index 68715facfcbd..5e9cbfa5ea45 100644 --- a/kerberos5/tools/asn1_compile/Makefile +++ b/kerberos5/tools/asn1_compile/Makefile @@ -6,6 +6,7 @@ LIBROKEN_A= ${.OBJDIR:H:H}/lib/libroken/libroken.a LIBADD= vers LDADD= ${LIBROKEN_A} DPADD= ${LIBROKEN_A} +MK_PIE:= no SRCS= \ asn1parse.y \ diff --git a/kerberos5/tools/slc/Makefile b/kerberos5/tools/slc/Makefile index 34092a566443..df64d829d080 100644 --- a/kerberos5/tools/slc/Makefile +++ b/kerberos5/tools/slc/Makefile @@ -6,6 +6,7 @@ LIBADD= vers LDADD= ${LIBROKEN_A} DPADD= ${LIBROKEN_A} MAN= +MK_PIE:= no SRCS= roken.h \ slc-gram.y \ diff --git a/lib/clang/Makefile.inc b/lib/clang/Makefile.inc index a0b4eea76882..3d49ea44c991 100644 --- a/lib/clang/Makefile.inc +++ b/lib/clang/Makefile.inc @@ -2,6 +2,8 @@ .include +MK_PIE:= no # Explicit libXXX.a references + .if ${COMPILER_TYPE} == "clang" DEBUG_FILES_CFLAGS= -gline-tables-only .else diff --git a/libexec/rtld-elf/Makefile b/libexec/rtld-elf/Makefile index 71b75b9273b3..59e97c93cd83 100644 --- a/libexec/rtld-elf/Makefile +++ b/libexec/rtld-elf/Makefile @@ -7,6 +7,7 @@ .include PACKAGE= clibs MK_BIND_NOW= no +MK_PIE= no # Always position independent using local rules MK_SSP= no CONFS= libmap.conf diff --git a/share/mk/bsd.lib.mk b/share/mk/bsd.lib.mk index 5c1a38ce7ccd..244d68762155 100644 --- a/share/mk/bsd.lib.mk +++ b/share/mk/bsd.lib.mk @@ -91,13 +91,16 @@ CTFFLAGS+= -g # prefer .s to a .c, add .po, remove stuff not used in the BSD libraries # .pico used for PIC object files # .nossppico used for NOSSP PIC object files -.SUFFIXES: .out .o .bc .ll .po .pico .nossppico .S .asm .s .c .cc .cpp .cxx .C .f .y .l .ln +# .pieo used for PIE object files +.SUFFIXES: .out .o .bc .ll .po .pico .nossppico .pieo .S .asm .s .c .cc .cpp .cxx .C .f .y .l .ln .if !defined(PICFLAG) .if ${MACHINE_CPUARCH} == "sparc64" PICFLAG=-fPIC +PIEFLAG=-fPIE .else PICFLAG=-fpic +PIEFLAG=-fpie .endif .endif @@ -115,6 +118,10 @@ PO_FLAG=-pg ${CC} ${PICFLAG} -DPIC ${SHARED_CFLAGS:C/^-fstack-protector.*$//} ${CFLAGS:C/^-fstack-protector.*$//} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} +.c.pieo: + ${CC} ${PIEFLAG} -DPIC ${SHARED_CFLAGS} ${CFLAGS} -c ${.IMPSRC} -o ${.TARGET} + ${CTFCONVERT_CMD} + .cc.po .C.po .cpp.po .cxx.po: ${CXX} ${PO_FLAG} ${STATIC_CXXFLAGS} ${PO_CXXFLAGS} -c ${.IMPSRC} -o ${.TARGET} @@ -124,6 +131,9 @@ PO_FLAG=-pg .cc.nossppico .C.nossppico .cpp.nossppico .cxx.nossppico: ${CXX} ${PICFLAG} -DPIC ${SHARED_CXXFLAGS:C/^-fstack-protector.*$//} ${CXXFLAGS:C/^-fstack-protector.*$//} -c ${.IMPSRC} -o ${.TARGET} +.cc.pieo .C.pieo .cpp.pieo .cxx.pieo: + ${CXX} ${PIEFLAG} ${SHARED_CXXFLAGS} ${CXXFLAGS} -c ${.IMPSRC} -o ${.TARGET} + .f.po: ${FC} -pg ${FFLAGS} -o ${.TARGET} -c ${.IMPSRC} ${CTFCONVERT_CMD} @@ -136,7 +146,7 @@ PO_FLAG=-pg ${FC} ${PICFLAG} -DPIC ${FFLAGS:C/^-fstack-protector.*$//} -o ${.TARGET} -c ${.IMPSRC} ${CTFCONVERT_CMD} -.s.po .s.pico .s.nossppico: +.s.po .s.pico .s.nossppico .s.pieo: ${AS} ${AFLAGS} -o ${.TARGET} ${.IMPSRC} ${CTFCONVERT_CMD} @@ -155,6 +165,11 @@ PO_FLAG=-pg ${CFLAGS:C/^-fstack-protector.*$//} ${ACFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} +.asm.pieo: + ${CC:N${CCACHE_BIN}} -x assembler-with-cpp ${PIEFLAG} -DPIC \ + ${CFLAGS} ${ACFLAGS} -c ${.IMPSRC} -o ${.TARGET} + ${CTFCONVERT_CMD} + .S.po: ${CC:N${CCACHE_BIN}} -DPROF ${PO_CFLAGS} ${ACFLAGS} -c ${.IMPSRC} \ -o ${.TARGET} @@ -170,6 +185,11 @@ PO_FLAG=-pg -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} +.S.pieo: + ${CC:N${CCACHE_BIN}} ${PIEFLAG} -DPIC ${CFLAGS} ${ACFLAGS} \ + -c ${.IMPSRC} -o ${.TARGET} + ${CTFCONVERT_CMD} + _LIBDIR:=${LIBDIR} _SHLIBDIR:=${SHLIBDIR} @@ -334,6 +354,20 @@ lib${LIB_PRIVATE}${LIB}_nossp_pic.a: ${NOSSPSOBJS} .endif # !defined(INTERNALLIB) +.if defined(INTERNALLIB) && ${MK_PIE} != "no" +PIEOBJS+= ${OBJS:.o=.pieo} +DEPENDOBJS+= ${PIEOBJS} +CLEANFILES+= ${PIEOBJS} + +_LIBS+= lib${LIB_PRIVATE}${LIB}_pie.a + +lib${LIB_PRIVATE}${LIB}_pie.a: ${PIEOBJS} + @${ECHO} building pie ${LIB} library + @rm -f ${.TARGET} + ${AR} ${ARFLAGS} ${.TARGET} ${PIEOBJS} ${ARADD} + ${RANLIB} ${RANLIBFLAGS} ${.TARGET} +.endif + .if defined(_SKIP_BUILD) all: .else diff --git a/share/mk/bsd.opts.mk b/share/mk/bsd.opts.mk index 5a5bf811ddf9..8c410617a33a 100644 --- a/share/mk/bsd.opts.mk +++ b/share/mk/bsd.opts.mk @@ -73,6 +73,7 @@ __DEFAULT_NO_OPTIONS = \ CCACHE_BUILD \ CTF \ INSTALL_AS_USER \ + PIE \ RETPOLINE \ STALE_STAGED diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index 2e5fe36e6f9c..3811bb5b67a2 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -38,6 +38,12 @@ MK_DEBUG_FILES= no .if ${MK_BIND_NOW} != "no" LDFLAGS+= -Wl,-znow .endif +.if ${MK_PIE} != "no" && \ + !defined(NO_SHARED) || ${NO_SHARED} == "no" || ${NO_SHARED} == "NO" +CFLAGS+= -fPIE +CXXFLAGS+= -fPIE +LDFLAGS+= -pie +.endif .if ${MK_RETPOLINE} != "no" CFLAGS+= -mretpoline CXXFLAGS+= -mretpoline diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index 886922b94e0b..ac0e37f0669e 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -368,6 +368,10 @@ LDADD_atf_cxx= -lprivateatf-c++ LIB${_l:tu}?= ${LIBDESTDIR}${LIBDIR_BASE}/libprivate${_l}.a .endfor +.if ${MK_PIE} != "no" +PIE_SUFFIX= _pie +.endif + .for _l in ${_LIBRARIES} .if ${_INTERNALLIBS:M${_l}} || !defined(SYSROOT) LDADD_${_l}_L+= -L${LIB${_l:tu}DIR} @@ -375,6 +379,8 @@ LDADD_${_l}_L+= -L${LIB${_l:tu}DIR} DPADD_${_l}?= ${LIB${_l:tu}} .if ${_PRIVATELIBS:M${_l}} LDADD_${_l}?= -lprivate${_l} +.elif ${_INTERNALLIBS:M${_l}} +LDADD_${_l}?= ${LDADD_${_l}_L} -l${_l:S/${PIE_SUFFIX}//}${PIE_SUFFIX} .else LDADD_${_l}?= ${LDADD_${_l}_L} -l${_l} .endif @@ -418,69 +424,69 @@ LDADD+= ${LDADD_${_l}} # INTERNALLIB definitions. LIBELFTCDIR= ${OBJTOP}/lib/libelftc -LIBELFTC?= ${LIBELFTCDIR}/libelftc.a +LIBELFTC?= ${LIBELFTCDIR}/libelftc${PIE_SUFFIX}.a LIBPEDIR= ${OBJTOP}/lib/libpe -LIBPE?= ${LIBPEDIR}/libpe.a +LIBPE?= ${LIBPEDIR}/libpe${PIE_SUFFIX}.a LIBOPENBSDDIR= ${OBJTOP}/lib/libopenbsd -LIBOPENBSD?= ${LIBOPENBSDDIR}/libopenbsd.a +LIBOPENBSD?= ${LIBOPENBSDDIR}/libopenbsd${PIE_SUFFIX}.a LIBSMDIR= ${OBJTOP}/lib/libsm -LIBSM?= ${LIBSMDIR}/libsm.a +LIBSM?= ${LIBSMDIR}/libsm${PIE_SUFFIX}.a LIBSMDBDIR= ${OBJTOP}/lib/libsmdb -LIBSMDB?= ${LIBSMDBDIR}/libsmdb.a +LIBSMDB?= ${LIBSMDBDIR}/libsmdb${PIE_SUFFIX}.a LIBSMUTILDIR= ${OBJTOP}/lib/libsmutil -LIBSMUTIL?= ${LIBSMUTILDIR}/libsmutil.a +LIBSMUTIL?= ${LIBSMUTILDIR}/libsmutil${PIE_SUFFIX}.a LIBNETBSDDIR?= ${OBJTOP}/lib/libnetbsd -LIBNETBSD?= ${LIBNETBSDDIR}/libnetbsd.a +LIBNETBSD?= ${LIBNETBSDDIR}/libnetbsd${PIE_SUFFIX}.a LIBVERSDIR?= ${OBJTOP}/kerberos5/lib/libvers -LIBVERS?= ${LIBVERSDIR}/libvers.a +LIBVERS?= ${LIBVERSDIR}/libvers${PIE_SUFFIX}.a LIBSLDIR= ${OBJTOP}/kerberos5/lib/libsl -LIBSL?= ${LIBSLDIR}/libsl.a +LIBSL?= ${LIBSLDIR}/libsl${PIE_SUFFIX}.a LIBIPFDIR= ${OBJTOP}/sbin/ipf/libipf -LIBIPF?= ${LIBIPFDIR}/libipf.a +LIBIPF?= ${LIBIPFDIR}/libipf${PIE_SUFFIX}.a LIBTELNETDIR= ${OBJTOP}/lib/libtelnet -LIBTELNET?= ${LIBTELNETDIR}/libtelnet.a +LIBTELNET?= ${LIBTELNETDIR}/libtelnet${PIE_SUFFIX}.a LIBCRONDIR= ${OBJTOP}/usr.sbin/cron/lib -LIBCRON?= ${LIBCRONDIR}/libcron.a +LIBCRON?= ${LIBCRONDIR}/libcron${PIE_SUFFIX}.a LIBNTPDIR= ${OBJTOP}/usr.sbin/ntp/libntp -LIBNTP?= ${LIBNTPDIR}/libntp.a +LIBNTP?= ${LIBNTPDIR}/libntp${PIE_SUFFIX}.a LIBNTPEVENTDIR= ${OBJTOP}/usr.sbin/ntp/libntpevent -LIBNTPEVENT?= ${LIBNTPEVENTDIR}/libntpevent.a +LIBNTPEVENT?= ${LIBNTPEVENTDIR}/libntpevent${PIE_SUFFIX}.a LIBOPTSDIR= ${OBJTOP}/usr.sbin/ntp/libopts -LIBOPTS?= ${LIBOPTSDIR}/libopts.a +LIBOPTS?= ${LIBOPTSDIR}/libopts${PIE_SUFFIX}.a LIBPARSEDIR= ${OBJTOP}/usr.sbin/ntp/libparse -LIBPARSE?= ${LIBPARSEDIR}/libparse.a +LIBPARSE?= ${LIBPARSEDIR}/libparse${PIE_SUFFIX}.a LIBLPRDIR= ${OBJTOP}/usr.sbin/lpr/common_source -LIBLPR?= ${LIBLPRDIR}/liblpr.a +LIBLPR?= ${LIBLPRDIR}/liblpr${PIE_SUFFIX}.a LIBFIFOLOGDIR= ${OBJTOP}/usr.sbin/fifolog/lib -LIBFIFOLOG?= ${LIBFIFOLOGDIR}/libfifolog.a +LIBFIFOLOG?= ${LIBFIFOLOGDIR}/libfifolog${PIE_SUFFIX}.a LIBBSNMPTOOLSDIR= ${OBJTOP}/usr.sbin/bsnmpd/tools/libbsnmptools -LIBBSNMPTOOLS?= ${LIBBSNMPTOOLSDIR}/libbsnmptools.a +LIBBSNMPTOOLS?= ${LIBBSNMPTOOLSDIR}/libbsnmptools${PIE_SUFFIX}.a LIBAMUDIR= ${OBJTOP}/usr.sbin/amd/libamu -LIBAMU?= ${LIBAMUDIR}/libamu.a +LIBAMU?= ${LIBAMUDIR}/libamu${PIE_SUFFIX}.a -LIBBE?= ${LIBBEDIR}/libbe.a +LIBBE?= ${LIBBEDIR}/libbe${PIE_SUFFIX}.a LIBPMCSTATDIR= ${OBJTOP}/lib/libpmcstat -LIBPMCSTAT?= ${LIBPMCSTATDIR}/libpmcstat.a +LIBPMCSTAT?= ${LIBPMCSTATDIR}/libpmcstat${PIE_SUFFIX}.a LIBC_NOSSP_PICDIR= ${OBJTOP}/lib/libc LIBC_NOSSP_PIC?= ${LIBC_NOSSP_PICDIR}/libc_nossp_pic.a diff --git a/stand/i386/Makefile.inc b/stand/i386/Makefile.inc index 583b94ffb24b..e2723dd79cdb 100644 --- a/stand/i386/Makefile.inc +++ b/stand/i386/Makefile.inc @@ -7,6 +7,7 @@ LOADER_ADDRESS?=0x200000 LDFLAGS+= -nostdlib LDFLAGS.lld+= -Wl,--no-rosegment +MK_PIE:= no # BTX components BTXDIR= ${BOOTOBJ}/i386/btx diff --git a/tools/build/options/WITHOUT_PIE b/tools/build/options/WITHOUT_PIE new file mode 100644 index 000000000000..69cc4843587f --- /dev/null +++ b/tools/build/options/WITHOUT_PIE @@ -0,0 +1,3 @@ +.\" $FreeBSD$ +Do not build dynamically linked binaries as +Position-Independent Executable (PIE). diff --git a/tools/build/options/WITH_PIE b/tools/build/options/WITH_PIE new file mode 100644 index 000000000000..c328a79e07cc --- /dev/null +++ b/tools/build/options/WITH_PIE @@ -0,0 +1,3 @@ +.\" $FreeBSD$ +Build dynamically linked binaries as +Position-Independent Executable (PIE). diff --git a/usr.bin/clang/Makefile.inc b/usr.bin/clang/Makefile.inc index dfff7ea35508..ba32f0c1032d 100644 --- a/usr.bin/clang/Makefile.inc +++ b/usr.bin/clang/Makefile.inc @@ -4,6 +4,8 @@ WARNS?= 0 .include +MK_PIE:= no # Explicit libXXX.a references + .if ${COMPILER_TYPE} == "clang" DEBUG_FILES_CFLAGS= -gline-tables-only .else diff --git a/usr.bin/svn/Makefile.inc b/usr.bin/svn/Makefile.inc index 2650c4f3ddd2..32eee3399ceb 100644 --- a/usr.bin/svn/Makefile.inc +++ b/usr.bin/svn/Makefile.inc @@ -2,6 +2,8 @@ .include +MK_PIE:= no # Explicit libXXX.a references + .if ${MK_SVN} == "yes" SVNLITE?= .else From b3a27c81f3b408a0dff34a442ad7e9a2107fedfe Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 15 Feb 2019 22:28:34 +0000 Subject: [PATCH 34/89] Regen src.conf.5 after r344179 --- share/man/man5/src.conf.5 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index a4cb42b41e68..4f004b6718c8 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd January 31, 2019 +.Dd February 15, 2019 .Dt SRC.CONF 5 .Os .Sh NAME @@ -406,7 +406,8 @@ Set to build the Clang C/C++ compiler during the bootstrap phase of the build. This is a default setting on amd64/amd64, arm/arm, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386. .It Va WITH_CLANG_EXTRAS -Set to build additional clang and llvm tools, such as bugpoint. +Set to build additional clang and llvm tools, such as bugpoint and +clang-format. .It Va WITHOUT_CLANG_FULL Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. @@ -1542,6 +1543,9 @@ When set, it enforces these options: .It .Va WITHOUT_AUTHPF .El +.It Va WITH_PIE +Build dynamically linked binaries as +Position-Independent Executable (PIE). .It Va WITHOUT_PKGBOOTSTRAP Set to not build .Xr pkg 7 From 8094f70b778f5c1d490790c815da50a8fc4190bc Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 15 Feb 2019 22:30:09 +0000 Subject: [PATCH 35/89] Fix Makefile conditional after r344179 --- share/mk/bsd.prog.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index 3811bb5b67a2..942292b1b84b 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -39,7 +39,7 @@ MK_DEBUG_FILES= no LDFLAGS+= -Wl,-znow .endif .if ${MK_PIE} != "no" && \ - !defined(NO_SHARED) || ${NO_SHARED} == "no" || ${NO_SHARED} == "NO" + (!defined(NO_SHARED) || ${NO_SHARED} == "no" || ${NO_SHARED} == "NO") CFLAGS+= -fPIE CXXFLAGS+= -fPIE LDFLAGS+= -pie From 447b492eb82745278f1592206d38b7d1ffa86f76 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Fri, 15 Feb 2019 22:48:50 +0000 Subject: [PATCH 36/89] Use make's :tl instead of checking "no" and "NO" Suggested by: kevans Reviewed by: kevans --- share/mk/bsd.prog.mk | 7 +++---- share/mk/src.libnames.mk | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index 942292b1b84b..4500f2858553 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -38,8 +38,7 @@ MK_DEBUG_FILES= no .if ${MK_BIND_NOW} != "no" LDFLAGS+= -Wl,-znow .endif -.if ${MK_PIE} != "no" && \ - (!defined(NO_SHARED) || ${NO_SHARED} == "no" || ${NO_SHARED} == "NO") +.if ${MK_PIE} != "no" && (!defined(NO_SHARED) || ${NO_SHARED:tl} == "no") CFLAGS+= -fPIE CXXFLAGS+= -fPIE LDFLAGS+= -pie @@ -48,7 +47,7 @@ LDFLAGS+= -pie CFLAGS+= -mretpoline CXXFLAGS+= -mretpoline # retpolineplt is broken with static linking (PR 233336) -.if !defined(NO_SHARED) || ${NO_SHARED} == "no" || ${NO_SHARED} == "NO" +.if !defined(NO_SHARED) || ${NO_SHARED:tl} == "no" LDFLAGS+= -Wl,-zretpolineplt .endif .endif @@ -74,7 +73,7 @@ TAGS+= package=${PACKAGE:Uruntime} TAG_ARGS= -T ${TAGS:[*]:S/ /,/g} .endif -.if defined(NO_SHARED) && (${NO_SHARED} != "no" && ${NO_SHARED} != "NO") +.if defined(NO_SHARED) && (${NO_SHARED:tl} != "no" LDFLAGS+= -static .endif diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index ac0e37f0669e..daf420fb377a 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -386,7 +386,7 @@ LDADD_${_l}?= ${LDADD_${_l}_L} -l${_l} .endif # Add in all dependencies for static linkage. .if defined(_DP_${_l}) && (${_INTERNALLIBS:M${_l}} || \ - (defined(NO_SHARED) && (${NO_SHARED} != "no" && ${NO_SHARED} != "NO"))) + (defined(NO_SHARED) && ${NO_SHARED:tl} != "no")) .for _d in ${_DP_${_l}} DPADD_${_l}+= ${DPADD_${_d}} LDADD_${_l}+= ${LDADD_${_d}} From 78a7722fbca9957189eeac93d13f42b9bcfb4c2e Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 22:49:15 +0000 Subject: [PATCH 37/89] FUSE: Respect userspace FS "do-not-cache" of file attributes The FUSE protocol demands that kernel implementations cache user filesystem file attributes (vattr data) for a maximum period of time in the range of [0, ULONG_MAX] seconds. In practice, typical requests are for 0, 1, or 10 seconds; or "a long time" to represent indefinite caching. Historically, FreeBSD FUSE has ignored this client directive entirely. This works fine for local-only filesystems, but causes consistency issues with multi-writer network filesystems. For now, respect 0 second cache TTLs and do not cache such metadata. Non-zero metadata caching TTLs in the range [0.000000001, ULONG_MAX] seconds are still cached indefinitely, because it is unclear how a userspace filesystem could do anything sensible with those semantics even if implemented. In the future, as an optimization, we should implement notify_inval_entry, etc, which provide userspace filesystems a way of evicting the kernel cache. One potentially bogus access to invalid cached attribute data was left in fuse_io_strategy. It is restricted behind the undocumented and non-default "vfs.fuse.fix_broken_io" sysctl or "brokenio" mount option; maybe these are deadcode and can be eliminated? Some minor APIs changed to facilitate this: 1. Attribute cache validity is tracked in FUSE inodes ("fuse_vnode_data"). 2. cache_attrs() respects the provided TTL and only caches in the FUSE inode if TTL > 0. It also grows an "out" argument, which, if non-NULL, stores the translated fuse_attr (even if not suitable for caching). 3. FUSE VTOVA(vp) returns NULL if the vnode's cache is invalid, to help avoid programming mistakes. 4. A VOP_LINK check for potential nlink overflow prior to invoking the FUSE link op was weakened (only performed when we have a valid attr cache). The check is racy in a multi-writer network filesystem anyway -- classic TOCTOU. We have to trust any userspace filesystem that rejects local caching to account for it correctly. PR: 230258 (inspired by; does not fix) --- sys/fs/fuse/fuse_internal.c | 23 +++++------------ sys/fs/fuse/fuse_internal.h | 50 +++++++++++++++++++++++++++++-------- sys/fs/fuse/fuse_io.c | 1 + sys/fs/fuse/fuse_node.c | 1 + sys/fs/fuse/fuse_node.h | 5 +++- sys/fs/fuse/fuse_vnops.c | 24 +++++++++++------- 6 files changed, 66 insertions(+), 38 deletions(-) diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index af8b89e7ac24..f9111449a400 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -373,7 +373,6 @@ fuse_internal_readdir_processdata(struct uio *uio, /* remove */ -#define INVALIDATE_CACHED_VATTRS_UPON_UNLINK 1 int fuse_internal_remove(struct vnode *dvp, struct vnode *vp, @@ -381,15 +380,11 @@ fuse_internal_remove(struct vnode *dvp, enum fuse_opcode op) { struct fuse_dispatcher fdi; + struct fuse_vnode_data *fvdat; + int err; - struct vattr *vap = VTOVA(vp); - -#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK - int need_invalidate = 0; - uint64_t target_nlink = 0; - -#endif - int err = 0; + err = 0; + fvdat = VTOFUD(vp); debug_printf("dvp=%p, cnp=%p, op=%d\n", vp, cnp, op); @@ -399,13 +394,6 @@ fuse_internal_remove(struct vnode *dvp, memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; -#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK - if (vap->va_nlink > 1) { - need_invalidate = 1; - target_nlink = vap->va_nlink; - } -#endif - err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); return err; @@ -489,7 +477,7 @@ fuse_internal_newentry_core(struct vnode *dvp, feo->nodeid, 1); return err; } - cache_attrs(*vpp, feo); + cache_attrs(*vpp, feo, NULL); return err; } @@ -563,6 +551,7 @@ fuse_internal_vnode_disappear(struct vnode *vp) ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); fvdat->flag |= FN_REVOKED; + fvdat->valid_attr_cache = false; cache_purge(vp); } diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h index 94ede8453754..ae986a81ca48 100644 --- a/sys/fs/fuse/fuse_internal.h +++ b/sys/fs/fuse/fuse_internal.h @@ -200,15 +200,47 @@ fuse_internal_access(struct vnode *vp, /* attributes */ +/* + * Cache FUSE attributes 'fat', with nominal expiration + * 'attr_valid'.'attr_valid_nsec', in attr cache associated with vnode 'vp'. + * Optionally, if argument 'vap' is not NULL, store a copy of the converted + * attributes there as well. + * + * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do + * return the result to the caller). + */ static __inline void -fuse_internal_attr_fat2vat(struct mount *mp, +fuse_internal_attr_fat2vat(struct vnode *vp, struct fuse_attr *fat, + uint64_t attr_valid, + uint32_t attr_valid_nsec, struct vattr *vap) { + struct mount *mp; + struct fuse_vnode_data *fvdat; + struct vattr *vp_cache_at; + + mp = vnode_mount(vp); + fvdat = VTOFUD(vp); + DEBUGX(FUSE_DEBUG_INTERNAL, "node #%ju, mode 0%o\n", (uintmax_t)fat->ino, fat->mode); + /* Honor explicit do-not-cache requests from user filesystems. */ + if (attr_valid == 0 && attr_valid_nsec == 0) + fvdat->valid_attr_cache = false; + else + fvdat->valid_attr_cache = true; + + vp_cache_at = VTOVA(vp); + + if (vap == NULL && vp_cache_at == NULL) + return; + + if (vap == NULL) + vap = vp_cache_at; + vattr_null(vap); vap->va_fsid = mp->mnt_stat.f_fsid.val[0]; @@ -227,21 +259,17 @@ fuse_internal_attr_fat2vat(struct mount *mp, vap->va_ctime.tv_nsec = fat->ctimensec; vap->va_blocksize = PAGE_SIZE; vap->va_type = IFTOVT(fat->mode); - -#if (S_BLKSIZE == 512) - /* Optimize this case */ - vap->va_bytes = fat->blocks << 9; -#else vap->va_bytes = fat->blocks * S_BLKSIZE; -#endif - vap->va_flags = 0; + + if (vap != vp_cache_at && vp_cache_at != NULL) + memcpy(vp_cache_at, vap, sizeof(*vap)); } -#define cache_attrs(vp, fuse_out) \ - fuse_internal_attr_fat2vat(vnode_mount(vp), &(fuse_out)->attr, \ - VTOVA(vp)); +#define cache_attrs(vp, fuse_out, vap_out) \ + fuse_internal_attr_fat2vat((vp), &(fuse_out)->attr, \ + (fuse_out)->attr_valid, (fuse_out)->attr_valid_nsec, (vap_out)) /* fsync */ diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 494ea52c3f71..8fa7dc06f79d 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -655,6 +655,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp) uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); + /* XXXCEM: Potentially invalid access to cached_attrs here */ if ((!error && uiop->uio_resid) || (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO && uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 && diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index a6bb66ac0fa3..8bc58f08e3cd 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -147,6 +147,7 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, int i; fvdat->nid = nodeid; + vattr_null(&fvdat->cached_attrs); if (nodeid == FUSE_ROOT_ID) { vp->v_vflag |= VV_ROOT; } diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h index bb80be5378ed..81db2ea66758 100644 --- a/sys/fs/fuse/fuse_node.h +++ b/sys/fs/fuse/fuse_node.h @@ -86,6 +86,7 @@ struct fuse_vnode_data { uint32_t flag; /** meta **/ + bool valid_attr_cache; struct vattr cached_attrs; off_t filesize; uint64_t nlookup; @@ -95,7 +96,9 @@ struct fuse_vnode_data { #define VTOFUD(vp) \ ((struct fuse_vnode_data *)((vp)->v_data)) #define VTOI(vp) (VTOFUD(vp)->nid) -#define VTOVA(vp) (&(VTOFUD(vp)->cached_attrs)) +#define VTOVA(vp) \ + (VTOFUD(vp)->valid_attr_cache ? \ + &(VTOFUD(vp)->cached_attrs) : NULL) #define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0)) #define FUSE_NULL_ID 0 diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 75de97bc226d..62b3f2009467 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -518,10 +518,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap) } goto out; } - cache_attrs(vp, (struct fuse_attr_out *)fdi.answ); - if (vap != VTOVA(vp)) { - memcpy(vap, VTOVA(vp), sizeof(*vap)); - } + + cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, vap); if (vap->va_type != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; @@ -628,9 +626,15 @@ fuse_vnop_link(struct vop_link_args *ap) if (vnode_mount(tdvp) != vnode_mount(vp)) { return EXDEV; } - if (vap->va_nlink >= FUSE_LINK_MAX) { + + /* + * This is a seatbelt check to protect naive userspace filesystems from + * themselves and the limitations of the FUSE IPC protocol. If a + * filesystem does not allow attribute caching, assume it is capable of + * validating that nlink does not overflow. + */ + if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) return EMLINK; - } fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); @@ -966,9 +970,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) } if (op == FUSE_GETATTR) { - cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ); + cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ, + NULL); } else { - cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ); + cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ, + NULL); } /* Insert name into cache if appropriate. */ @@ -1644,7 +1650,7 @@ fuse_vnop_setattr(struct vop_setattr_args *ap) } } if (!err && !sizechanged) { - cache_attrs(vp, (struct fuse_attr_out *)fdi.answ); + cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, NULL); } out: fdisp_destroy(&fdi); From 09176f096b48d4e766fc4445623075ced92929b3 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 22:50:31 +0000 Subject: [PATCH 38/89] FUSE: Respect userspace FS "do-not-cache" of path components The FUSE protocol demands that kernel implementations cache user filesystem path components (lookup/cnp data) for a maximum period of time in the range of [0, ULONG_MAX] seconds. In practice, typical requests are for 0, 1, or 10 seconds; or "a long time" to represent indefinite caching. Historically, FreeBSD FUSE has ignored this client directive entirely. This works fine for local-only filesystems, but causes consistency issues with multi-writer network filesystems. For now, respect 0 second cache TTLs and do not cache such metadata. Non-zero metadata caching TTLs in the range [0.000000001, ULONG_MAX] seconds are still cached indefinitely, because it is unclear how a userspace filesystem could do anything sensible with those semantics even if implemented. Pass fuse_entry_out to fuse_vnode_get when available and only cache lookup if the user filesystem did not set a zero second TTL. PR: 230258 (inspired by; does not fix) --- sys/fs/fuse/fuse_internal.c | 2 +- sys/fs/fuse/fuse_node.c | 5 ++++- sys/fs/fuse/fuse_node.h | 1 + sys/fs/fuse/fuse_vfsops.c | 3 ++- sys/fs/fuse/fuse_vnops.c | 30 +++++++++--------------------- 5 files changed, 17 insertions(+), 24 deletions(-) diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index f9111449a400..d556dc3fd96a 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -471,7 +471,7 @@ fuse_internal_newentry_core(struct vnode *dvp, if ((err = fuse_internal_checkentry(feo, vtyp))) { return err; } - err = fuse_vnode_get(mp, feo->nodeid, dvp, vpp, cnp, vtyp); + err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp); if (err) { fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, feo->nodeid, 1); diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index 8bc58f08e3cd..e8b16335272d 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -241,6 +241,7 @@ fuse_vnode_alloc(struct mount *mp, int fuse_vnode_get(struct mount *mp, + struct fuse_entry_out *feo, uint64_t nodeid, struct vnode *dvp, struct vnode **vpp, @@ -261,7 +262,9 @@ fuse_vnode_get(struct mount *mp, MPASS(!(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); fuse_vnode_setparent(*vpp, dvp); } - if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0) { + if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 && + feo != NULL && + (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) { ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); cache_enter(dvp, *vpp, cnp); diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h index 81db2ea66758..eab470fd3b94 100644 --- a/sys/fs/fuse/fuse_node.h +++ b/sys/fs/fuse/fuse_node.h @@ -117,6 +117,7 @@ fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp) void fuse_vnode_destroy(struct vnode *vp); int fuse_vnode_get(struct mount *mp, + struct fuse_entry_out *feo, uint64_t nodeid, struct vnode *dvp, struct vnode **vpp, diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c index 5685b04ee0d8..ae17768ad2e6 100644 --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -444,7 +444,8 @@ fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp) if (err == 0) *vpp = data->vroot; } else { - err = fuse_vnode_get(mp, FUSE_ROOT_ID, NULL, vpp, NULL, VDIR); + err = fuse_vnode_get(mp, NULL, FUSE_ROOT_ID, NULL, vpp, NULL, + VDIR); if (err == 0) { FUSE_LOCK(); MPASS(data->vroot == NULL || data->vroot == *vpp); diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 62b3f2009467..618b01944712 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -384,7 +384,7 @@ fuse_vnop_create(struct vop_create_args *ap) if ((err = fuse_internal_checkentry(feo, VREG))) { goto out; } - err = fuse_vnode_get(mp, feo->nodeid, dvp, vpp, cnp, VREG); + err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, VREG); if (err) { struct fuse_release_in *fri; uint64_t nodeid = feo->nodeid; @@ -857,8 +857,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) vref(dvp); *vpp = dvp; } else { - err = fuse_vnode_get(dvp->v_mount, nid, dvp, - &vp, cnp, IFTOVT(fattr->mode)); + err = fuse_vnode_get(dvp->v_mount, feo, nid, + dvp, &vp, cnp, IFTOVT(fattr->mode)); if (err) goto out; *vpp = vp; @@ -893,12 +893,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) err = EISDIR; goto out; } - err = fuse_vnode_get(vnode_mount(dvp), - nid, - dvp, - &vp, - cnp, - IFTOVT(fattr->mode)); + err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, + &vp, cnp, IFTOVT(fattr->mode)); if (err) { goto out; } @@ -936,12 +932,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) } } VOP_UNLOCK(dvp, 0); - err = fuse_vnode_get(vnode_mount(dvp), - nid, - NULL, - &vp, - cnp, - IFTOVT(fattr->mode)); + err = fuse_vnode_get(vnode_mount(dvp), feo, nid, NULL, + &vp, cnp, IFTOVT(fattr->mode)); vfs_unbusy(mp); vn_lock(dvp, ltype | LK_RETRY); if ((dvp->v_iflag & VI_DOOMED) != 0) { @@ -956,12 +948,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) vref(dvp); *vpp = dvp; } else { - err = fuse_vnode_get(vnode_mount(dvp), - nid, - dvp, - &vp, - cnp, - IFTOVT(fattr->mode)); + err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, + &vp, cnp, IFTOVT(fattr->mode)); if (err) { goto out; } From 194e691aaf96c98d44ff04ea634fc4acf9d86a19 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 22:51:09 +0000 Subject: [PATCH 39/89] FUSE: Only "dirty" cached file size when data is dirty Most users of fuse_vnode_setsize() set the cached fvdat->filesize and update the buf cache bounds as a result of either a read from the underlying FUSE filesystem, or as part of a write-through type operation (like truncate => VOP_SETATTR). In these cases, do not set the FN_SIZECHANGE flag, which indicates that an inode's data is dirty (in particular, that the local buf cache and fvdat->filesize have dirty extended data). PR: 230258 (related) --- sys/fs/fuse/fuse_io.c | 5 ++++- sys/fs/fuse/fuse_node.c | 1 + sys/fs/fuse/fuse_vnops.c | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 8fa7dc06f79d..25f63a9511fc 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -362,8 +362,11 @@ fuse_write_directbackend(struct vnode *vp, struct uio *uio, } uio->uio_resid += diff; uio->uio_offset -= diff; - if (uio->uio_offset > fvdat->filesize) + if (uio->uio_offset > fvdat->filesize && + fuse_data_cache_enable) { fuse_vnode_setsize(vp, cred, uio->uio_offset); + fvdat->flag &= ~FN_SIZECHANGE; + } } fdisp_destroy(&fdi); diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index e8b16335272d..dee7f328b5ac 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -375,6 +375,7 @@ fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred) struct vattr va; if ((fvdat->flag & FN_SIZECHANGE) != 0 || + fuse_data_cache_enable == 0 || (fuse_refresh_size == 0 && fvdat->filesize != 0)) return; diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 618b01944712..6ab93bb16f36 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -538,6 +538,7 @@ fuse_vnop_getattr(struct vop_getattr_args *ap) if (fvdat->filesize != new_filesize) { fuse_vnode_setsize(vp, cred, new_filesize); + fvdat->flag &= ~FN_SIZECHANGE; } } debug_printf("fuse_getattr e: returning 0\n"); @@ -1637,9 +1638,9 @@ fuse_vnop_setattr(struct vop_setattr_args *ap) err = EAGAIN; } } - if (!err && !sizechanged) { + if (err == 0) cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, NULL); - } + out: fdisp_destroy(&fdi); if (!err && sizechanged) { From c4af8b173ae69e60ca698b754e71d4bb89687dfc Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 22:52:49 +0000 Subject: [PATCH 40/89] FUSE: The FUSE design expects writethrough caching At least prior to 7.23 (which adds FUSE_WRITEBACK_CACHE), the FUSE protocol specifies only clean data to be cached. Prior to this change, we implement and default to writeback caching. This is ok enough for local only filesystems without hardlinks, but violates the general design contract with FUSE and breaks distributed filesystems or concurrent access to hardlinks of the same inode. In this change, add cache mode as an extension of cache enable/disable. The new modes are UC (was: cache disabled), WT (default), and WB (was: cache enabled). For now, WT caching is implemented as write-around, which meets the goal of only caching clean data. WT can be better than WA for workloads that frequently read data that was recently written, but WA is trivial to implement. Note that this has no effect on O_WRONLY-opened files, which were already coerced to write-around. Refs: * https://sourceforge.net/p/fuse/mailman/message/8902254/ * https://github.com/vgough/encfs/issues/315 PR: 230258 (inspired by) --- sys/fs/fuse/fuse_io.c | 10 ++++++++-- sys/fs/fuse/fuse_ipc.h | 12 +++++++++--- sys/fs/fuse/fuse_node.c | 37 +++++++++++++++++++++++++++++++------ 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 25f63a9511fc..eb89db221773 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -155,7 +155,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, } break; case UIO_WRITE: - if (directio) { + /* + * Kludge: simulate write-through caching via write-around + * caching. Same effect, as far as never caching dirty data, + * but slightly pessimal in that newly written data is not + * cached. + */ + if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) { FS_DEBUG("direct write of vnode %ju via file handle %ju\n", (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); err = fuse_write_directbackend(vp, uio, cred, fufh, ioflag); @@ -363,7 +369,7 @@ fuse_write_directbackend(struct vnode *vp, struct uio *uio, uio->uio_resid += diff; uio->uio_offset -= diff; if (uio->uio_offset > fvdat->filesize && - fuse_data_cache_enable) { + fuse_data_cache_mode != FUSE_CACHE_UC) { fuse_vnode_setsize(vp, cred, uio->uio_offset); fvdat->flag &= ~FN_SIZECHANGE; } diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 49a14c2cc004..0ecbe5bf8d5a 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -214,7 +214,13 @@ struct fuse_data { #define FSESS_NO_MMAP 0x0800 /* disable mmap */ #define FSESS_BROKENIO 0x1000 /* fix broken io */ -extern int fuse_data_cache_enable; +enum fuse_data_cache_mode { + FUSE_CACHE_UC, + FUSE_CACHE_WT, + FUSE_CACHE_WB, +}; + +extern int fuse_data_cache_mode; extern int fuse_data_cache_invalidate; extern int fuse_mmap_enable; extern int fuse_sync_resize; @@ -248,7 +254,7 @@ fsess_opt_datacache(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); - return (fuse_data_cache_enable || + return (fuse_data_cache_mode != FUSE_CACHE_UC && (data->dataflags & FSESS_NO_DATACACHE) == 0); } @@ -257,7 +263,7 @@ fsess_opt_mmap(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); - if (!(fuse_mmap_enable && fuse_data_cache_enable)) + if (!fuse_mmap_enable || fuse_data_cache_mode == FUSE_CACHE_UC) return 0; return ((data->dataflags & (FSESS_NO_DATACACHE | FSESS_NO_MMAP)) == 0); } diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index dee7f328b5ac..f452a73b9faa 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -94,16 +94,19 @@ __FBSDID("$FreeBSD$"); MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); +static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS); + static int fuse_node_count = 0; SYSCTL_INT(_vfs_fuse, OID_AUTO, node_count, CTLFLAG_RD, &fuse_node_count, 0, "Count of FUSE vnodes"); -int fuse_data_cache_enable = 1; +int fuse_data_cache_mode = FUSE_CACHE_WT; -SYSCTL_INT(_vfs_fuse, OID_AUTO, data_cache_enable, CTLFLAG_RW, - &fuse_data_cache_enable, 0, - "enable caching of FUSE file data (including dirty data)"); +SYSCTL_PROC(_vfs_fuse, OID_AUTO, data_cache_mode, CTLTYPE_INT|CTLFLAG_RW, + &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", + "Zero: disable caching of FUSE file data; One: write-through caching " + "(default); Two: write-back caching (generally unsafe)"); int fuse_data_cache_invalidate = 0; @@ -116,7 +119,7 @@ int fuse_mmap_enable = 1; SYSCTL_INT(_vfs_fuse, OID_AUTO, mmap_enable, CTLFLAG_RW, &fuse_mmap_enable, 0, - "If non-zero, and data_cache_enable is also non-zero, enable mmap(2) of " + "If non-zero, and data_cache_mode is also non-zero, enable mmap(2) of " "FUSE files"); int fuse_refresh_size = 0; @@ -140,6 +143,28 @@ SYSCTL_INT(_vfs_fuse, OID_AUTO, fix_broken_io, CTLFLAG_RW, "If non-zero, print a diagnostic warning if a userspace filesystem returns" " EIO on reads of recently extended portions of files"); +static int +sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS) +{ + int val, error; + + val = *(int *)arg1; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return (error); + + switch (val) { + case FUSE_CACHE_UC: + case FUSE_CACHE_WT: + case FUSE_CACHE_WB: + *(int *)arg1 = val; + break; + default: + return (EDOM); + } + return (0); +} + static void fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, uint64_t nodeid, enum vtype vtyp) @@ -375,7 +400,7 @@ fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred) struct vattr va; if ((fvdat->flag & FN_SIZECHANGE) != 0 || - fuse_data_cache_enable == 0 || + fuse_data_cache_mode == FUSE_CACHE_UC || (fuse_refresh_size == 0 && fvdat->filesize != 0)) return; From 3c324b9465710947abef23044cdcd8bfce8d982d Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 22:55:13 +0000 Subject: [PATCH 41/89] FUSE: Refresh cached file size when it changes (lookup) The cached fvdat->filesize is indepedent of the (mostly unused) cached_attrs, and we failed to update it when a cached (but perhaps inactive) vnode was found during VOP_LOOKUP to have a different size than cached. As noted in the code comment, this can occur in distributed filesystems or with other kinds of irregular file behavior (anything is possible in FUSE). We do something similar in fuse_vnop_getattr already. PR: 230258 (as reported in description; other issues explored in comments are not all resolved) Reported by: MooseFS FreeBSD Team Submitted by: Jakub Kruszona-Zawadzki (earlier version) --- sys/fs/fuse/fuse_vnops.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 6ab93bb16f36..6a263283c6dd 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -949,12 +949,45 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) vref(dvp); *vpp = dvp; } else { + struct fuse_vnode_data *fvdat; + err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp, &vp, cnp, IFTOVT(fattr->mode)); if (err) { goto out; } fuse_vnode_setparent(vp, dvp); + + /* + * In the case where we are looking up a FUSE node + * represented by an existing cached vnode, and the + * true size reported by FUSE_LOOKUP doesn't match + * the vnode's cached size, fix the vnode cache to + * match the real object size. + * + * This can occur via FUSE distributed filesystems, + * irregular files, etc. + */ + fvdat = VTOFUD(vp); + if (vnode_isreg(vp) && + fattr->size != fvdat->filesize) { + /* + * The FN_SIZECHANGE flag reflects a dirty + * append. If userspace lets us know our cache + * is invalid, that write was lost. (Dirty + * writes that do not cause append are also + * lost, but we don't detect them here.) + * + * XXX: Maybe disable WB caching on this mount. + */ + if (fvdat->flag & FN_SIZECHANGE) + printf("%s: WB cache incoherent on " + "%s!\n", __func__, + vnode_mount(vp)->mnt_stat.f_mntonname); + + (void)fuse_vnode_setsize(vp, cred, fattr->size); + fvdat->flag &= ~FN_SIZECHANGE; + } *vpp = vp; } From 66fb0b1ad7b9342699e890d311f36742a15660f7 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 15 Feb 2019 23:36:22 +0000 Subject: [PATCH 42/89] For 32-bit machines rollback the default number of vnode pager pbufs back to the lever before r343030. For 64-bit machines reduce it slightly, too. Together with r343030 I bumped the limit up to the value we use at Netflix to serve 100 Gbit/s of sendfile traffic, and it probably isn't a good default. Provide a loader tunable to change vnode pager pbufs count. Document it. --- lib/libc/sys/sendfile.2 | 26 ++++++++++++++++++++++---- sys/vm/vnode_pager.c | 12 +++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index 85825b7fdecb..53fdaa5e6350 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 25, 2019 +.Dd February 15, 2019 .Dt SENDFILE 2 .Os .Sh NAME @@ -48,6 +48,7 @@ The system call sends a regular file or shared memory object specified by descriptor .Fa fd +h out a stream socket specified by descriptor .Fa s . .Pp @@ -224,6 +225,19 @@ implementation of .Fn sendfile is "zero-copy", meaning that it has been optimized so that copying of the file data is avoided. .Sh TUNING +.Ss physical paging buffers +.Fn sendfile +uses vnode pager to read file pages into memory. +The pager uses a pool of physical buffers to run its I/O operations. +When system runs out of pbufs, sendfile will block and report state +.Dq Li zonelimit . +Size of the pool can be tuned with +.Va vm.vnode_pbufs +.Xr loader.conf 5 +tunable and can be checked with +.Xr sysctl 8 +OID of the same name at runtime. +.Ss sendfile(2) buffers On some architectures, this system call internally uses a special .Fn sendfile buffer @@ -279,9 +293,11 @@ buffers usage respectively. These values may also be viewed through .Nm netstat Fl m . .Pp -If a value of zero is reported for -.Va kern.ipc.nsfbufs , -your architecture does not need to use +If +.Xr sysctl 8 +OID +.Va kern.ipc.nsfbufs +doesn't exist, your architecture does not need to use .Fn sendfile buffers because their task can be efficiently performed by the generic virtual memory structures. @@ -363,11 +379,13 @@ does not support The socket peer has closed the connection. .El .Sh SEE ALSO +.Xr loader.conf 5 , .Xr netstat 1 , .Xr open 2 , .Xr send 2 , .Xr socket 2 , .Xr writev 2 , +.Xr sysctl 8 , .Xr tuning 7 .Rs .%A K. Elmeleegy diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 3e71ab4436cc..ded9e65e4e4c 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -115,13 +115,23 @@ SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW, &vnode_domainset, 0, sysctl_handle_domainset, "A", "Default vnode NUMA policy"); +static int nvnpbufs; +SYSCTL_INT(_vm, OID_AUTO, vnode_pbufs, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &nvnpbufs, 0, "number of physical buffers allocated for vnode pager"); + static uma_zone_t vnode_pbuf_zone; static void vnode_pager_init(void *dummy) { - vnode_pbuf_zone = pbuf_zsecond_create("vnpbuf", nswbuf * 8); +#ifdef __LP64__ + nvnpbufs = nswbuf * 2; +#else + nvnpbufs = nswbuf / 2; +#endif + TUNABLE_INT_FETCH("vm.vnode_pbufs", &nvnpbufs); + vnode_pbuf_zone = pbuf_zsecond_create("vnpbuf", nvnpbufs); } SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL); From a335df48bdbacb3d28ad36369ad29c9fdb212e8e Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Fri, 15 Feb 2019 23:41:54 +0000 Subject: [PATCH 43/89] Fixup bsd.prog.mk after r344182 Reported by: tinderbox Sponsored by: Dell EMC Isilon --- share/mk/bsd.prog.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index 4500f2858553..0ce546278445 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -73,7 +73,7 @@ TAGS+= package=${PACKAGE:Uruntime} TAG_ARGS= -T ${TAGS:[*]:S/ /,/g} .endif -.if defined(NO_SHARED) && (${NO_SHARED:tl} != "no" +.if defined(NO_SHARED) && ${NO_SHARED:tl} != "no" LDFLAGS+= -static .endif From 5bfb2e008de43c0581d158477679e621ba19ae14 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 15 Feb 2019 23:46:34 +0000 Subject: [PATCH 44/89] Imaginary cat jumped my keyboard! --- lib/libc/sys/sendfile.2 | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/libc/sys/sendfile.2 b/lib/libc/sys/sendfile.2 index 53fdaa5e6350..07f97e3e0b54 100644 --- a/lib/libc/sys/sendfile.2 +++ b/lib/libc/sys/sendfile.2 @@ -48,7 +48,6 @@ The system call sends a regular file or shared memory object specified by descriptor .Fa fd -h out a stream socket specified by descriptor .Fa s . .Pp From 95e310720d7fd41ef9ad254382b249a7ea6a75f7 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 16 Feb 2019 00:15:02 +0000 Subject: [PATCH 45/89] Remove write-only s_flag. --- sbin/nvmecontrol/firmware.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sbin/nvmecontrol/firmware.c b/sbin/nvmecontrol/firmware.c index 9692af99bcfe..9321859d2a83 100644 --- a/sbin/nvmecontrol/firmware.c +++ b/sbin/nvmecontrol/firmware.c @@ -177,7 +177,7 @@ static void firmware(const struct nvme_function *nf, int argc, char *argv[]) { int fd = -1, slot = 0; - int a_flag, s_flag, f_flag; + int a_flag, f_flag; int activate_action, reboot_required; int opt; char *p, *image = NULL; @@ -188,7 +188,7 @@ firmware(const struct nvme_function *nf, int argc, char *argv[]) uint8_t fw_slot1_ro, fw_num_slots; struct nvme_controller_data cdata; - a_flag = s_flag = f_flag = false; + a_flag = f_flag = false; while ((opt = getopt(argc, argv, "af:s:")) != -1) { switch (opt) { @@ -214,7 +214,6 @@ firmware(const struct nvme_function *nf, int argc, char *argv[]) "7.\n", optarg); usage(nf); } - s_flag = true; break; case 'f': image = optarg; From 025816d9cecee9c9dc18bd4ef46b6ed7554d6cfc Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Sat, 16 Feb 2019 00:15:54 +0000 Subject: [PATCH 46/89] Add support for a virtual hostname to nfsd Specifically, this allows (via "-V vhostname") telling nfsd what principal to use, instead of the hostname. This is used at iXsystems for fail-over in HA systems. Reviewed by: macklem Sponsored by: iXsystems Inc. Differential Revision: https://reviews.freebsd.org/D19191 --- libexec/rc/rc.d/nfsd | 5 +++++ usr.sbin/nfsd/nfsd.8 | 6 +++++- usr.sbin/nfsd/nfsd.c | 28 ++++++++++++++++++++-------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/libexec/rc/rc.d/nfsd b/libexec/rc/rc.d/nfsd index 33ad356e05e9..3c16416f23d9 100755 --- a/libexec/rc/rc.d/nfsd +++ b/libexec/rc/rc.d/nfsd @@ -13,6 +13,7 @@ name="nfsd" desc="Remote NFS server" rcvar="nfs_server_enable" command="/usr/sbin/${name}" +nfs_server_vhost="" load_rc_config $name start_precmd="nfsd_precmd" @@ -20,6 +21,7 @@ sig_stop="USR1" nfsd_precmd() { + local _vhost rc_flags="${nfs_server_flags}" # Load the modules now, so that the vfs.nfsd sysctl @@ -46,6 +48,9 @@ nfsd_precmd() force_depend rpcbind || return 1 force_depend mountd || return 1 + if [ -n "${nfs_server_vhost}" ]; then + command_args="-V \"${nfs_server_vhost}\"" + fi } run_rc_command "$1" diff --git a/usr.sbin/nfsd/nfsd.8 b/usr.sbin/nfsd/nfsd.8 index 6e5157bae969..156925b8fcca 100644 --- a/usr.sbin/nfsd/nfsd.8 +++ b/usr.sbin/nfsd/nfsd.8 @@ -28,7 +28,7 @@ .\" @(#)nfsd.8 8.4 (Berkeley) 3/29/95 .\" $FreeBSD$ .\" -.Dd August 5, 2018 +.Dd February 14, 2019 .Dt NFSD 8 .Os .Sh NAME @@ -43,6 +43,7 @@ server .Op Fl h Ar bindip .Op Fl p Ar pnfs_setup .Op Fl m Ar mirror_level +.Op Fl V Ar virtual_hostname .Op Fl Fl maxthreads Ar max_threads .Op Fl Fl minthreads Ar min_threads .Sh DESCRIPTION @@ -78,6 +79,9 @@ Unregister the service with .Xr rpcbind 8 without creating any servers. +.It Fl V Ar virtual_hostname +Specifies a hostname to be used as a principal name, instead of +the default hostname. .It Fl n Ar threads Specifies how many servers to create. This option is equivalent to specifying .Fl Fl maxthreads diff --git a/usr.sbin/nfsd/nfsd.c b/usr.sbin/nfsd/nfsd.c index c4a4396264d1..2325a10f804d 100644 --- a/usr.sbin/nfsd/nfsd.c +++ b/usr.sbin/nfsd/nfsd.c @@ -122,7 +122,7 @@ static void nonfs(int); static void reapchild(int); static int setbindhost(struct addrinfo **ia, const char *bindhost, struct addrinfo hints); -static void start_server(int, struct nfsd_nfsd_args *); +static void start_server(int, struct nfsd_nfsd_args *, const char *vhost); static void unregistration(void); static void usage(void); static void open_stable(int *, int *); @@ -176,6 +176,7 @@ main(int argc, char **argv) char **bindhost = NULL; pid_t pid; struct nfsd_nfsd_args nfsdargs; + const char *vhostname = NULL; nfsdargs.mirrorcnt = 1; nfsdargs.addr = NULL; @@ -183,16 +184,24 @@ main(int argc, char **argv) nfsdcnt = DEFNFSDCNT; unregister = reregister = tcpflag = maxsock = 0; bindanyflag = udpflag = connect_type_cnt = bindhostc = 0; - getopt_shortopts = "ah:n:rdtuep:m:"; + getopt_shortopts = "ah:n:rdtuep:m:V:"; getopt_usage = "usage:\n" " nfsd [-ardtue] [-h bindip]\n" " [-n numservers] [--minthreads #] [--maxthreads #]\n" - " [-p/--pnfs dsserver0:/dsserver0-mounted-on-dir,...," - "dsserverN:/dsserverN-mounted-on-dir] [-m mirrorlevel]\n"; + " [-p/--pnfs dsserver0:/dsserver0-mounted-on-dir,...,\n" + " [-V virtual_hostname]\n" + " dsserverN:/dsserverN-mounted-on-dir] [-m mirrorlevel]\n"; while ((ch = getopt_long(argc, argv, getopt_shortopts, longopts, &longindex)) != -1) switch (ch) { + case 'V': + if (strlen(optarg) <= MAXHOSTNAMELEN) + vhostname = optarg; + else + warnx("Virtual host name (%s) is too long", + optarg); + break; case 'a': bindanyflag = 1; break; @@ -473,7 +482,7 @@ main(int argc, char **argv) } else { (void)signal(SIGUSR1, child_cleanup); setproctitle("server"); - start_server(0, &nfsdargs); + start_server(0, &nfsdargs, vhostname); } } @@ -790,7 +799,7 @@ main(int argc, char **argv) * a "server" too. start_server will not return. */ if (!tcpflag) - start_server(1, &nfsdargs); + start_server(1, &nfsdargs, vhostname); /* * Loop forever accepting connections and passing the sockets @@ -987,7 +996,7 @@ get_tuned_nfsdcount(void) } static void -start_server(int master, struct nfsd_nfsd_args *nfsdargp) +start_server(int master, struct nfsd_nfsd_args *nfsdargp, const char *vhost) { char principal[MAXHOSTNAMELEN + 5]; int status, error; @@ -995,7 +1004,10 @@ start_server(int master, struct nfsd_nfsd_args *nfsdargp) struct addrinfo *aip, hints; status = 0; - gethostname(hostname, sizeof (hostname)); + if (vhost == NULL) + gethostname(hostname, sizeof (hostname)); + else + strlcpy(hostname, vhost, sizeof (hostname)); snprintf(principal, sizeof (principal), "nfs@%s", hostname); if ((cp = strchr(hostname, '.')) == NULL || *(cp + 1) == '\0') { From 06da0ce08476454412372bd8d48a9947a395a1c5 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 01:48:38 +0000 Subject: [PATCH 47/89] GC ATA_REQUEST_TIMEOUT option remnants It was removed from code in r249083 and from sys/conf/options in r249213. PR: 222170 MFC after: 3 days --- sys/conf/NOTES | 8 -------- sys/dev/ata/ata-all.h | 4 ---- 2 files changed, 12 deletions(-) diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 97ec484ae0d9..8c143205cee8 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1759,14 +1759,6 @@ hint.ata.1.at="isa" hint.ata.1.port="0x170" hint.ata.1.irq="15" -# -# The following options are valid on the ATA driver: -# -# ATA_REQUEST_TIMEOUT: the number of seconds to wait for an ATA request -# before timing out. - -#options ATA_REQUEST_TIMEOUT=10 - # # Standard floppy disk controllers and floppy tapes, supports # the Y-E DATA External FDD (PC Card) diff --git a/sys/dev/ata/ata-all.h b/sys/dev/ata/ata-all.h index 980ed6344ca6..674f76c2b7cb 100644 --- a/sys/dev/ata/ata-all.h +++ b/sys/dev/ata/ata-all.h @@ -203,10 +203,6 @@ #define ATA_OP_FINISHED 1 #define ATA_MAX_28BIT_LBA 268435455UL -#ifndef ATA_REQUEST_TIMEOUT -#define ATA_REQUEST_TIMEOUT 10 -#endif - /* structure used for composite atomic operations */ #define MAX_COMPOSITES 32 /* u_int32_t bits */ struct ata_composite { From ebd13ba3d49d8bb5a5f46842f82686198ae93965 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 03:49:48 +0000 Subject: [PATCH 48/89] Remove vi(1)-related files via 'make delete-old' when WITHOUT_VI=1 is set. MFC after: 5 days --- tools/build/mk/OptionalObsoleteFiles.inc | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index 79dd1d189b43..51e5a0ce7497 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -9704,6 +9704,32 @@ OLD_FILES+=usr/share/man/man8/lastlogin.8.gz OLD_FILES+=usr/share/man/man8/utx.8.gz .endif +.if ${MK_VI} == no +OLD_FILES+=etc/rc.d/virecover +OLD_FILES+=rescue/ex +OLD_FILES+=rescue/vi +OLD_FILES+=usr/bin/ex +OLD_FILES+=usr/bin/nex +OLD_FILES+=usr/bin/nvi +OLD_FILES+=usr/bin/nview +OLD_FILES+=usr/bin/vi +OLD_FILES+=usr/bin/view +OLD_FILES+=usr/share/man/man1/ex.1.gz +OLD_FILES+=usr/share/man/man1/nex.1.gz +OLD_FILES+=usr/share/man/man1/nvi.1.gz +OLD_FILES+=usr/share/man/man1/nview.1.gz +OLD_FILES+=usr/share/man/man1/vi.1.gz +OLD_FILES+=usr/share/man/man1/view.1.gz +. if exists(${DESTDIR}/usr/share/vi) +VI_DIRS!=find ${DESTDIR}/usr/share/vi -type d \ + | sed -e 's,^${DESTDIR}/,,'; echo +VI_FILES!=find ${DESTDIR}/usr/share/vi \! -type d \ + | sed -e 's,^${DESTDIR}/,,'; echo +OLD_DIRS+=${VI_DIRS} +OLD_FILES+=${VI_FILES} +. endif +.endif + .if ${MK_WIRELESS} == no OLD_FILES+=etc/regdomain.xml OLD_FILES+=etc/rc.d/hostapd From 18f7e2b45eab25b7b83c340146a1c9a8828a27ce Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 16 Feb 2019 04:16:10 +0000 Subject: [PATCH 49/89] powerpc/booke: Use DMAP where possible for page copy and zeroing This avoids several locks and pmap_kenter()'s, improving performance marginally. MFC after: 2 weeks --- sys/powerpc/booke/pmap.c | 103 +++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 36 deletions(-) diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 19b9b36b491c..e5d20834adc9 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -2973,14 +2973,19 @@ mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) /* XXX KASSERT off and size are within a single page? */ - mtx_lock(&zero_page_mutex); - va = zero_page_va; + if (hw_direct_map) { + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); + } else { + mtx_lock(&zero_page_mutex); + va = zero_page_va; - mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); - bzero((caddr_t)va + off, size); - mmu_booke_kremove(mmu, va); + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); + mmu_booke_kremove(mmu, va); - mtx_unlock(&zero_page_mutex); + mtx_unlock(&zero_page_mutex); + } } /* @@ -2991,15 +2996,23 @@ mmu_booke_zero_page(mmu_t mmu, vm_page_t m) { vm_offset_t off, va; - mtx_lock(&zero_page_mutex); - va = zero_page_va; + if (hw_direct_map) { + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + } else { + va = zero_page_va; + mtx_lock(&zero_page_mutex); + + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + } - mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); - mmu_booke_kremove(mmu, va); - mtx_unlock(&zero_page_mutex); + if (!hw_direct_map) { + mmu_booke_kremove(mmu, va); + + mtx_unlock(&zero_page_mutex); + } } /* @@ -3015,13 +3028,20 @@ mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) sva = copy_page_src_va; dva = copy_page_dst_va; - mtx_lock(©_page_mutex); - mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); - mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); + if (hw_direct_map) { + sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); + dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); + } else { + mtx_lock(©_page_mutex); + mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); + mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); + } memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); - mmu_booke_kremove(mmu, dva); - mmu_booke_kremove(mmu, sva); - mtx_unlock(©_page_mutex); + if (!hw_direct_map) { + mmu_booke_kremove(mmu, dva); + mmu_booke_kremove(mmu, sva); + mtx_unlock(©_page_mutex); + } } static inline void @@ -3032,26 +3052,31 @@ mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_offset_t a_pg_offset, b_pg_offset; int cnt; - mtx_lock(©_page_mutex); - while (xfersize > 0) { - a_pg_offset = a_offset & PAGE_MASK; - cnt = min(xfersize, PAGE_SIZE - a_pg_offset); - mmu_booke_kenter(mmu, copy_page_src_va, - VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); - a_cp = (char *)copy_page_src_va + a_pg_offset; - b_pg_offset = b_offset & PAGE_MASK; - cnt = min(cnt, PAGE_SIZE - b_pg_offset); - mmu_booke_kenter(mmu, copy_page_dst_va, - VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); - b_cp = (char *)copy_page_dst_va + b_pg_offset; - bcopy(a_cp, b_cp, cnt); - mmu_booke_kremove(mmu, copy_page_dst_va); - mmu_booke_kremove(mmu, copy_page_src_va); - a_offset += cnt; - b_offset += cnt; - xfersize -= cnt; + if (hw_direct_map) { + bcopy((caddr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*ma)) + a_offset, + (caddr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*mb)), xfersize); + } else { + mtx_lock(©_page_mutex); + while (xfersize > 0) { + a_pg_offset = a_offset & PAGE_MASK; + cnt = min(xfersize, PAGE_SIZE - a_pg_offset); + mmu_booke_kenter(mmu, copy_page_src_va, + VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); + a_cp = (char *)copy_page_src_va + a_pg_offset; + b_pg_offset = b_offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - b_pg_offset); + mmu_booke_kenter(mmu, copy_page_dst_va, + VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); + b_cp = (char *)copy_page_dst_va + b_pg_offset; + bcopy(a_cp, b_cp, cnt); + mmu_booke_kremove(mmu, copy_page_dst_va); + mmu_booke_kremove(mmu, copy_page_src_va); + a_offset += cnt; + b_offset += cnt; + xfersize -= cnt; + } + mtx_unlock(©_page_mutex); } - mtx_unlock(©_page_mutex); } static vm_offset_t @@ -3064,6 +3089,9 @@ mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) paddr = VM_PAGE_TO_PHYS(m); + if (hw_direct_map) + return (PHYS_TO_DMAP(paddr)); + flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; flags |= PTE_PS_4KB; @@ -3097,6 +3125,9 @@ mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) { pte_t *pte; + if (hw_direct_map) + return; + pte = pte_find(mmu, kernel_pmap, addr); KASSERT(PCPU_GET(qmap_addr) == addr, From 0454ed97943bd3969ca5e6155fe6c6f61af3b0fc Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 16 Feb 2019 04:38:34 +0000 Subject: [PATCH 50/89] powerpc/booke: depessimize MAS register updates We only need to isync before we actually use the MAS registers, so before and after the TLB read/write/sync/search operations. MFC after: 2 weeks --- sys/powerpc/booke/pmap.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index e5d20834adc9..5e9b5be2508f 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -3911,29 +3911,23 @@ tlb1_write_entry_int(void *arg) mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(args->idx); mtspr(SPR_MAS0, mas0); - __asm __volatile("isync"); mtspr(SPR_MAS1, args->e->mas1); - __asm __volatile("isync"); mtspr(SPR_MAS2, args->e->mas2); - __asm __volatile("isync"); mtspr(SPR_MAS3, args->e->mas3); - __asm __volatile("isync"); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: mtspr(SPR_MAS8, 0); - __asm __volatile("isync"); /* FALLTHROUGH */ case FSL_E500v2: mtspr(SPR_MAS7, args->e->mas7); - __asm __volatile("isync"); break; default: break; } - __asm __volatile("tlbwe; isync; msync"); + __asm __volatile("isync; tlbwe; isync; msync"); } @@ -4376,7 +4370,6 @@ tid_flush(tlbtid_t tid) mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); - __asm __volatile("isync"); mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); @@ -4453,7 +4446,6 @@ DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); - __asm __volatile("isync"); mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); From a9b033c2f326020985bac17e253f7617e1bb58ea Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 16 Feb 2019 04:47:33 +0000 Subject: [PATCH 51/89] powerpc/booke: Fix 32-bit build MFC after: 2 weeks MFC with: 344202 --- sys/powerpc/booke/pmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 5e9b5be2508f..5705c222c190 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -3053,8 +3053,11 @@ mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, int cnt; if (hw_direct_map) { - bcopy((caddr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*ma)) + a_offset, - (caddr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*mb)), xfersize); + a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*ma)) + + a_offset); + b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*mb)) + + b_offset); + bcopy(a_cp, b_cp, xfersize); } else { mtx_lock(©_page_mutex); while (xfersize > 0) { From 48514fe19616cc089aacc2ae2d1db55ec65be722 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 04:49:51 +0000 Subject: [PATCH 52/89] Allow to remove unused files via 'make delete-old(-libs)' when WITHOUT_OFED and / or WITHOUT_OFED_EXTRA src.conf(5) options are set. MFC after: 5 days --- tools/build/mk/OptionalObsoleteFiles.inc | 377 +++++++++++++++++++++++ 1 file changed, 377 insertions(+) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index 51e5a0ce7497..f0934db10a12 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -7172,6 +7172,383 @@ OLD_FILES+=usr/share/man/man8/ntpq.8.gz OLD_FILES+=usr/share/man/man8/ntptime.8.gz .endif +.if ${MK_OFED} == no +OLD_FILES+=etc/newsyslog.conf.d/opensm.conf +OLD_FILES+=etc/rc.d/opensm +OLD_FILES+=usr/bin/ibstat +OLD_FILES+=usr/bin/ibv_asyncwatch +OLD_FILES+=usr/bin/ibv_devices +OLD_FILES+=usr/bin/ibv_devinfo +OLD_FILES+=usr/bin/ibv_rc_pingpong +OLD_FILES+=usr/bin/ibv_srq_pingpong +OLD_FILES+=usr/bin/ibv_uc_pingpong +OLD_FILES+=usr/bin/ibv_ud_pingpong +OLD_FILES+=usr/bin/mckey +OLD_FILES+=usr/bin/rping +OLD_FILES+=usr/bin/ucmatose +OLD_FILES+=usr/bin/udaddy +OLD_FILES+=usr/include/infiniband/marshall.h +OLD_FILES+=usr/include/infiniband/kern-abi.h +OLD_FILES+=usr/include/infiniband/umad_sm.h +OLD_FILES+=usr/include/infiniband/umad.h +OLD_FILES+=usr/include/infiniband/arch.h +OLD_FILES+=usr/include/infiniband/verbs.h +OLD_FILES+=usr/include/infiniband/ib.h +OLD_FILES+=usr/include/infiniband/cm.h +OLD_FILES+=usr/include/infiniband/opcode.h +OLD_FILES+=usr/include/infiniband/ibnetdisc.h +OLD_FILES+=usr/include/infiniband/driver.h +OLD_FILES+=usr/include/infiniband/mad_osd.h +OLD_FILES+=usr/include/infiniband/umad_types.h +OLD_FILES+=usr/include/infiniband/umad_cm.h +OLD_FILES+=usr/include/infiniband/cm_abi.h +OLD_FILES+=usr/include/infiniband/sa-kern-abi.h +OLD_FILES+=usr/include/infiniband/ibnetdisc_osd.h +OLD_FILES+=usr/include/infiniband/opensm/osm_event_plugin.h +OLD_FILES+=usr/include/infiniband/opensm/osm_console_io.h +OLD_FILES+=usr/include/infiniband/opensm/osm_ucast_cache.h +OLD_FILES+=usr/include/infiniband/opensm/osm_port.h +OLD_FILES+=usr/include/infiniband/opensm/osm_path.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mtree.h +OLD_FILES+=usr/include/infiniband/opensm/osm_log.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mcm_port.h +OLD_FILES+=usr/include/infiniband/opensm/osm_subnet.h +OLD_FILES+=usr/include/infiniband/opensm/osm_pkey.h +OLD_FILES+=usr/include/infiniband/opensm/osm_remote_sm.h +OLD_FILES+=usr/include/infiniband/opensm/osm_qos_policy.h +OLD_FILES+=usr/include/infiniband/opensm/osm_sm.h +OLD_FILES+=usr/include/infiniband/opensm/osm_node.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mcast_mgr.h +OLD_FILES+=usr/include/infiniband/opensm/osm_madw.h +OLD_FILES+=usr/include/infiniband/opensm/osm_lid_mgr.h +OLD_FILES+=usr/include/infiniband/opensm/osm_congestion_control.h +OLD_FILES+=usr/include/infiniband/opensm/osm_port_profile.h +OLD_FILES+=usr/include/infiniband/opensm/osm_perfmgr.h +OLD_FILES+=usr/include/infiniband/opensm/osm_service.h +OLD_FILES+=usr/include/infiniband/opensm/osm_base.h +OLD_FILES+=usr/include/infiniband/opensm/osm_vl15intf.h +OLD_FILES+=usr/include/infiniband/opensm/st.h +OLD_FILES+=usr/include/infiniband/opensm/osm_attrib_req.h +OLD_FILES+=usr/include/infiniband/opensm/osm_ucast_mgr.h +OLD_FILES+=usr/include/infiniband/opensm/osm_db.h +OLD_FILES+=usr/include/infiniband/opensm/osm_sa_mad_ctrl.h +OLD_FILES+=usr/include/infiniband/opensm/osm_db_pack.h +OLD_FILES+=usr/include/infiniband/opensm/osm_opensm.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mesh.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mcast_tbl.h +OLD_FILES+=usr/include/infiniband/opensm/osm_sm_mad_ctrl.h +OLD_FILES+=usr/include/infiniband/opensm/osm_stats.h +OLD_FILES+=usr/include/infiniband/opensm/osm_mad_pool.h +OLD_FILES+=usr/include/infiniband/opensm/osm_switch.h +OLD_FILES+=usr/include/infiniband/opensm/osm_ucast_lash.h +OLD_FILES+=usr/include/infiniband/opensm/osm_errors.h +OLD_FILES+=usr/include/infiniband/opensm/osm_partition.h +OLD_FILES+=usr/include/infiniband/opensm/osm_prefix_route.h +OLD_FILES+=usr/include/infiniband/opensm/osm_helper.h +OLD_FILES+=usr/include/infiniband/opensm/osm_version.h +OLD_FILES+=usr/include/infiniband/opensm/osm_sa.h +OLD_FILES+=usr/include/infiniband/opensm/osm_config.h +OLD_FILES+=usr/include/infiniband/opensm/osm_multicast.h +OLD_FILES+=usr/include/infiniband/opensm/osm_file_ids.h +OLD_FILES+=usr/include/infiniband/opensm/osm_perfmgr_db.h +OLD_FILES+=usr/include/infiniband/opensm/osm_console.h +OLD_FILES+=usr/include/infiniband/opensm/osm_msgdef.h +OLD_FILES+=usr/include/infiniband/opensm/osm_router.h +OLD_FILES+=usr/include/infiniband/opensm/osm_guid.h +OLD_FILES+=usr/include/infiniband/opensm/osm_inform.h +OLD_DIRS+=usr/include/infiniband/opensm +OLD_FILES+=usr/include/infiniband/iba/ib_types.h +OLD_FILES+=usr/include/infiniband/iba/ib_cm_types.h +OLD_DIRS+=usr/include/infiniband/iba +OLD_FILES+=usr/include/infiniband/umad_str.h +OLD_FILES+=usr/include/infiniband/udma_barrier.h +OLD_FILES+=usr/include/infiniband/umad_sa.h +OLD_FILES+=usr/include/infiniband/mad.h +OLD_FILES+=usr/include/infiniband/sa.h +OLD_FILES+=usr/include/infiniband/byteorder.h +OLD_FILES+=usr/include/infiniband/types.h +OLD_FILES+=usr/include/infiniband/byteswap.h +OLD_FILES+=usr/include/infiniband/vendor/osm_pkt_randomizer.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_rmpp_ctx.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mtl_hca_guid.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_txn.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_svc.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_test.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_inout.h +OLD_FILES+=usr/include/infiniband/vendor/osm_mtl_bind.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_hca.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_sa_api.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_sender.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor.h +OLD_FILES+=usr/include/infiniband/vendor/osm_umadt.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mtl_transaction_mgr.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_defs.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_dispatcher.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_api.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mtl.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_transport.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_al.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_sar.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_umadt.h +OLD_FILES+=usr/include/infiniband/vendor/osm_ts_useraccess.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_ts.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_mlx_transport_anafa.h +OLD_FILES+=usr/include/infiniband/vendor/osm_vendor_ibumad.h +OLD_DIRS+=usr/include/infiniband/vendor +OLD_FILES+=usr/include/infiniband/endian.h +OLD_FILES+=usr/include/infiniband/complib/cl_byteswap.h +OLD_FILES+=usr/include/infiniband/complib/cl_types.h +OLD_FILES+=usr/include/infiniband/complib/cl_map.h +OLD_FILES+=usr/include/infiniband/complib/cl_packon.h +OLD_FILES+=usr/include/infiniband/complib/cl_timer.h +OLD_FILES+=usr/include/infiniband/complib/cl_thread_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_thread.h +OLD_FILES+=usr/include/infiniband/complib/cl_event.h +OLD_FILES+=usr/include/infiniband/complib/cl_byteswap_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_passivelock.h +OLD_FILES+=usr/include/infiniband/complib/cl_vector.h +OLD_FILES+=usr/include/infiniband/complib/cl_nodenamemap.h +OLD_FILES+=usr/include/infiniband/complib/cl_event_wheel.h +OLD_FILES+=usr/include/infiniband/complib/cl_log.h +OLD_FILES+=usr/include/infiniband/complib/cl_fleximap.h +OLD_FILES+=usr/include/infiniband/complib/cl_qlist.h +OLD_FILES+=usr/include/infiniband/complib/cl_timer_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_pool.h +OLD_FILES+=usr/include/infiniband/complib/cl_debug.h +OLD_FILES+=usr/include/infiniband/complib/cl_types_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_dispatcher.h +OLD_FILES+=usr/include/infiniband/complib/cl_ptr_vector.h +OLD_FILES+=usr/include/infiniband/complib/cl_atomic_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_qmap.h +OLD_FILES+=usr/include/infiniband/complib/cl_spinlock_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_qcomppool.h +OLD_FILES+=usr/include/infiniband/complib/cl_threadpool.h +OLD_FILES+=usr/include/infiniband/complib/cl_list.h +OLD_FILES+=usr/include/infiniband/complib/cl_debug_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_packoff.h +OLD_FILES+=usr/include/infiniband/complib/cl_qpool.h +OLD_FILES+=usr/include/infiniband/complib/cl_spinlock.h +OLD_FILES+=usr/include/infiniband/complib/cl_event_osd.h +OLD_FILES+=usr/include/infiniband/complib/cl_atomic.h +OLD_FILES+=usr/include/infiniband/complib/cl_math.h +OLD_FILES+=usr/include/infiniband/complib/cl_comppool.h +OLD_DIRS+=usr/include/infiniband/complib +OLD_DIRS+=usr/include/infiniband +OLD_FILES+=usr/lib/libcxgb4.a +OLD_FILES+=usr/lib/libcxgb4.so +OLD_LIBS+=usr/lib/libcxgb4.so.1 +OLD_FILES+=usr/lib/libcxgb4_p.a +OLD_FILES+=usr/lib/libibcm.a +OLD_FILES+=usr/lib/libibcm.so +OLD_LIBS+=usr/lib/libibcm.so.1 +OLD_FILES+=usr/lib/libibcm_p.a +OLD_FILES+=usr/lib/libibmad.a +OLD_FILES+=usr/lib/libibmad.so +OLD_LIBS+=usr/lib/libibmad.so.5 +OLD_FILES+=usr/lib/libibmad_p.a +OLD_FILES+=usr/lib/libibnetdisc.a +OLD_FILES+=usr/lib/libibnetdisc.so +OLD_LIBS+=usr/lib/libibnetdisc.so.5 +OLD_FILES+=usr/lib/libibnetdisc_p.a +OLD_FILES+=usr/lib/libibumad.a +OLD_FILES+=usr/lib/libibumad.so +OLD_LIBS+=usr/lib/libibumad.so.1 +OLD_FILES+=usr/lib/libibumad_p.a +OLD_FILES+=usr/lib/libibverbs.a +OLD_FILES+=usr/lib/libibverbs.so +OLD_LIBS+=usr/lib/libibverbs.so.1 +OLD_FILES+=usr/lib/libibverbs_p.a +OLD_FILES+=usr/lib/libmlx4.a +OLD_FILES+=usr/lib/libmlx4.so +OLD_LIBS+=usr/lib/libmlx4.so.1 +OLD_FILES+=usr/lib/libmlx4_p.a +OLD_FILES+=usr/lib/libmlx5.a +OLD_FILES+=usr/lib/libmlx5.so +OLD_LIBS+=usr/lib/libmlx5.so.1 +OLD_FILES+=usr/lib/libmlx5_p.a +OLD_FILES+=usr/lib/libopensm.a +OLD_FILES+=usr/lib/libopensm.so +OLD_LIBS+=usr/lib/libopensm.so.5 +OLD_FILES+=usr/lib/libopensm_p.a +OLD_FILES+=usr/lib/libosmcomp.a +OLD_FILES+=usr/lib/libosmcomp.so +OLD_LIBS+=usr/lib/libosmcomp.so.3 +OLD_FILES+=usr/lib/libosmcomp_p.a +OLD_FILES+=usr/lib/libosmvendor.a +OLD_FILES+=usr/lib/libosmvendor.so +OLD_LIBS+=usr/lib/libosmvendor.so.4 +OLD_FILES+=usr/lib/libosmvendor_p.a +OLD_FILES+=usr/lib/librdmacm.a +OLD_FILES+=usr/lib/librdmacm.so +OLD_LIBS+=usr/lib/librdmacm.so.1 +OLD_FILES+=usr/lib/librdmacm_p.a +OLD_FILES+=usr/share/man/man1/ibv_asyncwatch.1.gz +OLD_FILES+=usr/share/man/man1/ibv_devices.1.gz +OLD_FILES+=usr/share/man/man1/ibv_devinfo.1.gz +OLD_FILES+=usr/share/man/man1/ibv_rc_pingpong.1.gz +OLD_FILES+=usr/share/man/man1/ibv_srq_pingpong.1.gz +OLD_FILES+=usr/share/man/man1/ibv_uc_pingpong.1.gz +OLD_FILES+=usr/share/man/man1/ibv_ud_pingpong.1.gz +OLD_FILES+=usr/share/man/man1/mckey.1.gz +OLD_FILES+=usr/share/man/man1/rping.1.gz +OLD_FILES+=usr/share/man/man1/ucmatose.1.gz +OLD_FILES+=usr/share/man/man1/udaddy.1.gz +OLD_FILES+=usr/share/man/man3/ibnd_debug.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_destroy_fabric.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_discover_fabric.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_find_node_dr.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_find_node_guid.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_iter_nodes.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_iter_nodes_type.3.gz +OLD_FILES+=usr/share/man/man3/ibnd_show_progress.3.gz +OLD_FILES+=usr/share/man/man3/ibv_alloc_mw.3.gz +OLD_FILES+=usr/share/man/man3/ibv_alloc_pd.3.gz +OLD_FILES+=usr/share/man/man3/ibv_attach_mcast.3.gz +OLD_FILES+=usr/share/man/man3/ibv_bind_mw.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_ah.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_ah_from_wc.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_comp_channel.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_cq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_cq_ex.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_flow.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_qp.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_qp_ex.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_rwq_ind_table.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_srq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_srq_ex.3.gz +OLD_FILES+=usr/share/man/man3/ibv_create_wq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_event_type_str.3.gz +OLD_FILES+=usr/share/man/man3/ibv_fork_init.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_async_event.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_cq_event.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_device_guid.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_device_list.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_device_name.3.gz +OLD_FILES+=usr/share/man/man3/ibv_get_srq_num.3.gz +OLD_FILES+=usr/share/man/man3/ibv_inc_rkey.3.gz +OLD_FILES+=usr/share/man/man3/ibv_modify_qp.3.gz +OLD_FILES+=usr/share/man/man3/ibv_modify_srq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_modify_wq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_open_device.3.gz +OLD_FILES+=usr/share/man/man3/ibv_open_qp.3.gz +OLD_FILES+=usr/share/man/man3/ibv_open_xrcd.3.gz +OLD_FILES+=usr/share/man/man3/ibv_poll_cq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_post_recv.3.gz +OLD_FILES+=usr/share/man/man3/ibv_post_send.3.gz +OLD_FILES+=usr/share/man/man3/ibv_post_srq_recv.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_device.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_device_ex.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_gid.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_pkey.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_port.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_qp.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_rt_values_ex.3.gz +OLD_FILES+=usr/share/man/man3/ibv_query_srq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_rate_to_mbps.3.gz +OLD_FILES+=usr/share/man/man3/ibv_rate_to_mult.3.gz +OLD_FILES+=usr/share/man/man3/ibv_reg_mr.3.gz +OLD_FILES+=usr/share/man/man3/ibv_req_notify_cq.3.gz +OLD_FILES+=usr/share/man/man3/ibv_rereg_mr.3.gz +OLD_FILES+=usr/share/man/man3/ibv_resize_cq.3.gz +OLD_FILES+=usr/share/man/man3/rdma_accept.3.gz +OLD_FILES+=usr/share/man/man3/rdma_ack_cm_event.3.gz +OLD_FILES+=usr/share/man/man3/rdma_bind_addr.3.gz +OLD_FILES+=usr/share/man/man3/rdma_connect.3.gz +OLD_FILES+=usr/share/man/man3/rdma_create_ep.3.gz +OLD_FILES+=usr/share/man/man3/rdma_create_event_channel.3.gz +OLD_FILES+=usr/share/man/man3/rdma_create_id.3.gz +OLD_FILES+=usr/share/man/man3/rdma_create_qp.3.gz +OLD_FILES+=usr/share/man/man3/rdma_create_srq.3.gz +OLD_FILES+=usr/share/man/man3/rdma_dereg_mr.3.gz +OLD_FILES+=usr/share/man/man3/rdma_destroy_ep.3.gz +OLD_FILES+=usr/share/man/man3/rdma_destroy_event_channel.3.gz +OLD_FILES+=usr/share/man/man3/rdma_destroy_id.3.gz +OLD_FILES+=usr/share/man/man3/rdma_destroy_qp.3.gz +OLD_FILES+=usr/share/man/man3/rdma_destroy_srq.3.gz +OLD_FILES+=usr/share/man/man3/rdma_disconnect.3.gz +OLD_FILES+=usr/share/man/man3/rdma_event_str.3.gz +OLD_FILES+=usr/share/man/man3/rdma_free_devices.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_cm_event.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_devices.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_dst_port.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_local_addr.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_peer_addr.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_recv_comp.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_request.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_send_comp.3.gz +OLD_FILES+=usr/share/man/man3/rdma_get_src_port.3.gz +OLD_FILES+=usr/share/man/man3/rdma_getaddrinfo.3.gz +OLD_FILES+=usr/share/man/man3/rdma_join_multicast.3.gz +OLD_FILES+=usr/share/man/man3/rdma_leave_multicast.3.gz +OLD_FILES+=usr/share/man/man3/rdma_listen.3.gz +OLD_FILES+=usr/share/man/man3/rdma_migrate_id.3.gz +OLD_FILES+=usr/share/man/man3/rdma_notify.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_read.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_readv.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_recv.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_recvv.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_send.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_sendv.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_ud_send.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_write.3.gz +OLD_FILES+=usr/share/man/man3/rdma_post_writev.3.gz +OLD_FILES+=usr/share/man/man3/rdma_reg_msgs.3.gz +OLD_FILES+=usr/share/man/man3/rdma_reg_read.3.gz +OLD_FILES+=usr/share/man/man3/rdma_reg_write.3.gz +OLD_FILES+=usr/share/man/man3/rdma_reject.3.gz +OLD_FILES+=usr/share/man/man3/rdma_resolve_addr.3.gz +OLD_FILES+=usr/share/man/man3/rdma_resolve_route.3.gz +OLD_FILES+=usr/share/man/man3/rdma_set_option.3.gz +OLD_FILES+=usr/share/man/man4/mlx4ib.4.gz +OLD_FILES+=usr/share/man/man4/mlx5ib.4.gz +OLD_FILES+=usr/share/man/man8/ibstat.8.gz +.endif + +.if ${MK_OFED_EXTRA} == no +OLD_FILES+=usr/bin/dump_fts +OLD_FILES+=usr/bin/ibaddr +OLD_FILES+=usr/bin/ibcacheedit +OLD_FILES+=usr/bin/ibccconfig +OLD_FILES+=usr/bin/ibccquery +OLD_FILES+=usr/bin/iblinkinfo +OLD_FILES+=usr/bin/ibmirror +OLD_FILES+=usr/bin/ibnetdiscover +OLD_FILES+=usr/bin/ibping +OLD_FILES+=usr/bin/ibportstate +OLD_FILES+=usr/bin/ibqueryerrors +OLD_FILES+=usr/bin/ibroute +OLD_FILES+=usr/bin/ibsysstat +OLD_FILES+=usr/bin/ibtracert +OLD_FILES+=usr/bin/opensm +OLD_FILES+=usr/bin/perfquery +OLD_FILES+=usr/bin/saquery +OLD_FILES+=usr/bin/sminfo +OLD_FILES+=usr/bin/smpdump +OLD_FILES+=usr/bin/smpquery +OLD_FILES+=usr/bin/vendstat +OLD_FILES+=usr/share/man/man8/dump_fts.8.gz +OLD_FILES+=usr/share/man/man8/ibaddr.8.gz +OLD_FILES+=usr/share/man/man8/ibcacheedit.8.gz +OLD_FILES+=usr/share/man/man8/ibccconfig.8.gz +OLD_FILES+=usr/share/man/man8/ibccquery.8.gz +OLD_FILES+=usr/share/man/man8/iblinkinfo.8.gz +OLD_FILES+=usr/share/man/man8/ibnetdiscover.8.gz +OLD_FILES+=usr/share/man/man8/ibping.8.gz +OLD_FILES+=usr/share/man/man8/ibportstate.8.gz +OLD_FILES+=usr/share/man/man8/ibqueryerrors.8.gz +OLD_FILES+=usr/share/man/man8/ibroute.8.gz +OLD_FILES+=usr/share/man/man8/ibsysstat.8.gz +OLD_FILES+=usr/share/man/man8/ibtracert.8.gz +OLD_FILES+=usr/share/man/man8/opensm.8.gz +OLD_FILES+=usr/share/man/man8/perfquery.8.gz +OLD_FILES+=usr/share/man/man8/saquery.8.gz +OLD_FILES+=usr/share/man/man8/sminfo.8.gz +OLD_FILES+=usr/share/man/man8/smpdump.8.gz +OLD_FILES+=usr/share/man/man8/smpquery.8.gz +OLD_FILES+=usr/share/man/man8/vendstat.8.gz +.endif + .if ${MK_OPENSSH} == no OLD_FILES+=etc/rc.d/sshd OLD_FILES+=etc/ssh/moduli From b94a281e1e50121c2e45e50fab981f811711430b Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 05:04:01 +0000 Subject: [PATCH 53/89] Add more rc.d scripts / empty directors / config files into OptionalObsoleteFiles.inc Note: only files with conditional installation logic were included from the PR. PR: 233046 Submitted by: MFC after: 5 days --- tools/build/mk/OptionalObsoleteFiles.inc | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index f0934db10a12..feecb479def8 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -54,6 +54,10 @@ OLD_FILES+=usr/share/man/man8/acpidump.8.gz OLD_FILES+=usr/share/man/man8/iasl.8.gz .endif +.if ${MK_ACPI} == no && ${MK_APM} == no +OLD_FILES+=etc/rc.d/powerd +.endif + .if ${MK_AMD} == no OLD_FILES+=etc/amd.map OLD_FILES+=etc/newsyslog.conf.d/amd.conf @@ -434,6 +438,7 @@ OLD_FILES+=usr/bin/ld.bfd .endif .if ${MK_BLACKLIST} == no +OLD_FILES+=etc/blacklistd.conf OLD_FILES+=etc/rc.d/blacklistd OLD_FILES+=usr/include/blacklist.h OLD_FILES+=usr/lib/libblacklist.a @@ -655,6 +660,7 @@ OLD_FILES+=usr/share/man/man8/zfsloader.8.gz .endif .if ${MK_BOOTPARAMD} == no +OLD_FILES+=etc/rc.d/bootparams OLD_FILES+=usr/sbin/bootparamd OLD_FILES+=usr/share/man/man5/bootparams.5.gz OLD_FILES+=usr/share/man/man8/bootparamd.8.gz @@ -1131,6 +1137,7 @@ OLD_FILES+=etc/casper/system.grp OLD_FILES+=etc/casper/system.pwd OLD_FILES+=etc/casper/system.random OLD_FILES+=etc/casper/system.sysctl +OLD_DIRS+=etc/casper OLD_FILES+=etc/rc.d/casperd OLD_LIBS+=lib/libcapsicum.so.0 OLD_LIBS+=lib/libcasper.so.0 @@ -1293,6 +1300,8 @@ OLD_FILES+=etc/rc.d/zvol OLD_FILES+=etc/devd/zfs.conf OLD_FILES+=etc/periodic/daily/404.status-zfs OLD_FILES+=etc/periodic/daily/800.scrub-zfs +OLD_FILES+=etc/zfs/exports +OLD_DIRS+=etc/zfs OLD_LIBS+=lib/libzfs.so.2 OLD_LIBS+=lib/libzfs.so.3 OLD_LIBS+=lib/libzfs_core.so.2 @@ -2279,6 +2288,7 @@ OLD_DIRS+=usr/share/dict .if ${MK_DMAGENT} == no OLD_FILES+=etc/dma/dma.conf +OLD_DIRS+=etc/dma OLD_FILES+=usr/libexec/dma OLD_FILES+=usr/libexec/dma-mbox-create OLD_FILES+=usr/share/man/man8/dma.8.gz @@ -2914,6 +2924,7 @@ OLD_FILES+=usr/share/man/man8/gssd.8.gz .endif .if ${MK_HAST} == no +OLD_FILES+=etc/rc.d/hastd OLD_FILES+=sbin/hastctl OLD_FILES+=sbin/hastd OLD_FILES+=usr/share/examples/hast/ucarp.sh @@ -3106,6 +3117,7 @@ OLD_FILES+=rescue/rtsol .endif .if ${MK_INETD} == no +OLD_FILES+=etc/inetd.conf OLD_FILES+=etc/rc.d/inetd OLD_FILES+=usr/sbin/inetd OLD_FILES+=usr/share/man/man5/inetd.conf.5.gz @@ -3196,6 +3208,7 @@ OLD_FILES+=usr/share/man/man8/ippool.8.gz .endif .if ${MK_IPFW} == no +OLD_FILES+=etc/rc.d/ipfw OLD_FILES+=etc/periodic/security/500.ipfwdenied OLD_FILES+=etc/periodic/security/550.ipfwlimit OLD_FILES+=sbin/ipfw @@ -4256,6 +4269,8 @@ OLD_FILES+=usr/share/man/man1/host.1.gz .endif .if ${MK_LEGACY_CONSOLE} == no +OLD_FILES+=etc/rc.d/moused +OLD_FILES+=etc/rc.d/syscons OLD_FILES+=usr/sbin/kbdcontrol OLD_FILES+=usr/sbin/kbdmap OLD_FILES+=usr/sbin/moused @@ -6029,6 +6044,7 @@ OLD_FILES+=etc/mail.rc OLD_FILES+=etc/mail/aliases OLD_FILES+=etc/mail/mailer.conf OLD_FILES+=etc/periodic/daily/130.clean-msgs +OLD_FILES+=etc/rc.d/othermta OLD_FILES+=usr/bin/Mail OLD_FILES+=usr/bin/biff OLD_FILES+=usr/bin/from @@ -6501,6 +6517,10 @@ OLD_FILES+=usr/share/man/man8/nghook.8.gz OLD_FILES+=usr/share/man/man8/pppoed.8.gz .endif +.if ${MK_IPFW} == no || ${MK_NETGRAPH} == no +OLD_FILES+=etc/rc.d/ipfw_netflow +.endif + .if ${MK_NETGRAPH_SUPPORT} == no OLD_FILES+=usr/include/bsnmp/snmp_netgraph.h OLD_FILES+=usr/lib/snmp_netgraph.so @@ -6874,8 +6894,12 @@ OLD_FILES+=usr/share/man/man8/nscd.8.gz .endif .if ${MK_NTP} == no +OLD_FILES+=etc/ntp/leap-seconds +OLD_DIRS+=etc/ntp OLD_FILES+=etc/ntp.conf OLD_FILES+=etc/periodic/daily/480.status-ntpd +OLD_FILES+=etc/periodic/daily/480.leapfile-ntpd +OLD_FILES+=etc/rc.d/ntpd OLD_FILES+=usr/bin/ntpq OLD_FILES+=usr/sbin/ntp-keygen OLD_FILES+=usr/sbin/ntpd @@ -7554,6 +7578,7 @@ OLD_FILES+=etc/rc.d/sshd OLD_FILES+=etc/ssh/moduli OLD_FILES+=etc/ssh/ssh_config OLD_FILES+=etc/ssh/sshd_config +OLD_DIRS+=etc/ssh OLD_FILES+=usr/bin/scp OLD_FILES+=usr/bin/sftp OLD_FILES+=usr/bin/slogin @@ -8090,6 +8115,7 @@ OLD_FILES+=${RESCUE_FILES} .endif .if ${MK_ROUTED} == no +OLD_FILES+=etc/rc.d/routed OLD_FILES+=rescue/routed OLD_FILES+=rescue/rtquery OLD_FILES+=sbin/routed @@ -8105,6 +8131,7 @@ OLD_FILES+=etc/periodic/daily/150.clean-hoststat OLD_FILES+=etc/periodic/daily/440.status-mailq OLD_FILES+=etc/periodic/daily/460.status-mail-rejects OLD_FILES+=etc/periodic/daily/500.queuerun +OLD_FILES+=etc/rc.d/sendmail OLD_FILES+=bin/rmail OLD_FILES+=usr/bin/vacation OLD_FILES+=usr/include/libmilter/mfapi.h @@ -10067,6 +10094,7 @@ OLD_FILES+=usr/share/misc/usbdevs .if ${MK_UTMPX} == no OLD_FILES+=etc/periodic/monthly/200.accounting +OLD_FILES+=etc/rc.d/utx OLD_FILES+=usr/bin/last OLD_FILES+=usr/bin/users OLD_FILES+=usr/bin/who From 8323b6417cb5a1e2e9164dac5134d9db19ca4357 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Sat, 16 Feb 2019 12:49:55 +0000 Subject: [PATCH 54/89] wlandebug: disable PIE to fix build failure libifconfig is built as a static-only PRIVATELIB (and there is no _pie.a version) so disable PIE in libifconfig's consumer. Sponsored by: The FreeBSD Foundation --- usr.sbin/wlandebug/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/usr.sbin/wlandebug/Makefile b/usr.sbin/wlandebug/Makefile index 7ac993319947..8f3455f2ebac 100644 --- a/usr.sbin/wlandebug/Makefile +++ b/usr.sbin/wlandebug/Makefile @@ -2,6 +2,7 @@ PROG= wlandebug MAN= wlandebug.8 +MK_PIE:= no LIBADD+= ifconfig From 4a0225e5fe20d29203f85964a3d11e0a55c716b5 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 16:01:23 +0000 Subject: [PATCH 55/89] Refresh OptionalObsoleteFiles.inc for MK_PMC: - Add missing /usr/sbin/pmc, pmcformat.h, libpmcstat.h and pmc.haswellxeon.3 to the list. - Correct man page section for pmcstudy.8. - Include recently added libipt and libopencsd for corresponding TARGET_ARCH MFC after: 5 days --- tools/build/mk/OptionalObsoleteFiles.inc | 72 +++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index feecb479def8..100eef719c36 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -7763,8 +7763,76 @@ OLD_FILES+=usr/share/man/man7/pkg.7.gz .if ${MK_PMC} == no OLD_FILES+=usr/bin/pmcstudy +.if ${TARGET_ARCH} == "amd64" +OLD_FILES+=usr/include/libipt/pt_last_ip.h +OLD_FILES+=usr/include/libipt/intel-pt.h +OLD_FILES+=usr/include/libipt/pt_time.h +OLD_FILES+=usr/include/libipt/pt_cpu.h +OLD_FILES+=usr/include/libipt/pt_compiler.h +OLD_DIRS+=usr/include/libipt +.endif +.if ${TARGET_ARCH} == "aarch64" +OLD_FILES+=usr/include/opencsd/c_api/opencsd_c_api.h +OLD_FILES+=usr/include/opencsd/c_api/ocsd_c_api_cust_impl.h +OLD_FILES+=usr/include/opencsd/c_api/ocsd_c_api_types.h +OLD_FILES+=usr/include/opencsd/c_api/ocsd_c_api_cust_fact.h +OLD_FILES+=usr/include/opencsd/c_api/ocsd_c_api_custom.h +OLD_DIRS+=usr/include/opencsd/c_api +OLD_FILES+=usr/include/opencsd/ocsd_if_types.h +OLD_FILES+=usr/include/opencsd/ptm/trc_dcd_mngr_ptm.h +OLD_FILES+=usr/include/opencsd/ptm/trc_pkt_proc_ptm.h +OLD_FILES+=usr/include/opencsd/ptm/trc_cmp_cfg_ptm.h +OLD_FILES+=usr/include/opencsd/ptm/ptm_decoder.h +OLD_FILES+=usr/include/opencsd/ptm/trc_pkt_elem_ptm.h +OLD_FILES+=usr/include/opencsd/ptm/trc_pkt_decode_ptm.h +OLD_FILES+=usr/include/opencsd/ptm/trc_pkt_types_ptm.h +OLD_DIRS+=usr/include/opencsd/ptm +OLD_FILES+=usr/include/opencsd/trc_gen_elem_types.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_pkt_proc_etmv4.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_etmv4_stack_elem.h +OLD_FILES+=usr/include/opencsd/etmv4/etmv4_decoder.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_pkt_elem_etmv4i.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_dcd_mngr_etmv4i.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_pkt_types_etmv4.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_pkt_elem_etmv4d.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_pkt_decode_etmv4i.h +OLD_FILES+=usr/include/opencsd/etmv4/trc_cmp_cfg_etmv4.h +OLD_DIRS+=usr/include/opencsd/etmv4 +OLD_FILES+=usr/include/opencsd/etmv3/trc_pkt_decode_etmv3.h +OLD_FILES+=usr/include/opencsd/etmv3/trc_cmp_cfg_etmv3.h +OLD_FILES+=usr/include/opencsd/etmv3/etmv3_decoder.h +OLD_FILES+=usr/include/opencsd/etmv3/trc_pkt_proc_etmv3.h +OLD_FILES+=usr/include/opencsd/etmv3/trc_pkt_elem_etmv3.h +OLD_FILES+=usr/include/opencsd/etmv3/trc_pkt_types_etmv3.h +OLD_FILES+=usr/include/opencsd/etmv3/trc_dcd_mngr_etmv3.h +OLD_DIRS+=usr/include/opencsd/etmv3 +OLD_FILES+=usr/include/opencsd/trc_pkt_types.h +OLD_FILES+=usr/include/opencsd/stm/trc_pkt_proc_stm.h +OLD_FILES+=usr/include/opencsd/stm/trc_pkt_types_stm.h +OLD_FILES+=usr/include/opencsd/stm/stm_decoder.h +OLD_FILES+=usr/include/opencsd/stm/trc_dcd_mngr_stm.h +OLD_FILES+=usr/include/opencsd/stm/trc_cmp_cfg_stm.h +OLD_FILES+=usr/include/opencsd/stm/trc_pkt_elem_stm.h +OLD_FILES+=usr/include/opencsd/stm/trc_pkt_decode_stm.h +OLD_DIRS+=usr/include/opencsd/stm +OLD_DIRS+=usr/include/opencsd +.endif OLD_FILES+=usr/include/pmc.h OLD_FILES+=usr/include/pmclog.h +OLD_FILES+=usr/include/pmcformat.h +OLD_FILES+=usr/include/libpmcstat.h +.if ${TARGET_ARCH} == "amd64" +OLD_FILES+=usr/lib/libipt.a +OLD_FILES+=usr/lib/libipt.so +OLD_LIBS+=lib/libipt.so.0 +OLD_FILES+=usr/lib/libipt_p.a +.endif +.if ${TARGET_ARCH} == "aarch64" +OLD_FILES+=usr/lib/libopencsd.a +OLD_FILES+=usr/lib/libopencsd.so +OLD_LIBS+=lib/libopencsd.so.0 +OLD_FILES+=usr/lib/libopencsd_p.a +.endif OLD_FILES+=usr/lib/libpmc.a OLD_FILES+=usr/lib/libpmc.so OLD_LIBS+=usr/lib/libpmc.so.5 @@ -7773,10 +7841,10 @@ OLD_FILES+=usr/lib32/libpmc.a OLD_FILES+=usr/lib32/libpmc.so OLD_LIBS+=usr/lib32/libpmc.so.5 OLD_FILES+=usr/lib32/libpmc_p.a +OLD_FILES+=usr/sbin/pmc OLD_FILES+=usr/sbin/pmcannotate OLD_FILES+=usr/sbin/pmccontrol OLD_FILES+=usr/sbin/pmcstat -OLD_FILES+=usr/share/man/man1/pmcstudy.1.gz OLD_FILES+=usr/share/man/man3/pmc.3.gz OLD_FILES+=usr/share/man/man3/pmc.atom.3.gz OLD_FILES+=usr/share/man/man3/pmc.atomsilvermont.3.gz @@ -7786,6 +7854,7 @@ OLD_FILES+=usr/share/man/man3/pmc.corei7.3.gz OLD_FILES+=usr/share/man/man3/pmc.corei7uc.3.gz OLD_FILES+=usr/share/man/man3/pmc.haswell.3.gz OLD_FILES+=usr/share/man/man3/pmc.haswelluc.3.gz +OLD_FILES+=usr/share/man/man3/pmc.haswellxeon.3.gz OLD_FILES+=usr/share/man/man3/pmc.iaf.3.gz OLD_FILES+=usr/share/man/man3/pmc.ivybridge.3.gz OLD_FILES+=usr/share/man/man3/pmc.ivybridgexeon.3.gz @@ -7845,6 +7914,7 @@ OLD_FILES+=usr/share/man/man3/pmclog_read.3.gz OLD_FILES+=usr/share/man/man8/pmcannotate.8.gz OLD_FILES+=usr/share/man/man8/pmccontrol.8.gz OLD_FILES+=usr/share/man/man8/pmcstat.8.gz +OLD_FILES+=usr/share/man/man8/pmcstudy.8.gz .endif .if ${MK_PORTSNAP} == no From fe3d954e9ab252562f5bc7c49aed4fd29526388d Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 16:17:46 +0000 Subject: [PATCH 56/89] Remove corresponding lib32/ files when WITHOUT_OFED=1 is set MFC after: 5 days MFC with: 344207 --- tools/build/mk/OptionalObsoleteFiles.inc | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index 100eef719c36..29fdeb0688e5 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -7407,6 +7407,56 @@ OLD_FILES+=usr/lib/librdmacm.a OLD_FILES+=usr/lib/librdmacm.so OLD_LIBS+=usr/lib/librdmacm.so.1 OLD_FILES+=usr/lib/librdmacm_p.a +.if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64" +OLD_FILES+=usr/lib32/libcxgb4.a +OLD_FILES+=usr/lib32/libcxgb4.so +OLD_LIBS+=usr/lib32/libcxgb4.so.1 +OLD_FILES+=usr/lib32/libcxgb4_p.a +OLD_FILES+=usr/lib32/libibcm.a +OLD_FILES+=usr/lib32/libibcm.so +OLD_LIBS+=usr/lib32/libibcm.so.1 +OLD_FILES+=usr/lib32/libibcm_p.a +OLD_FILES+=usr/lib32/libibmad.a +OLD_FILES+=usr/lib32/libibmad.so +OLD_LIBS+=usr/lib32/libibmad.so.5 +OLD_FILES+=usr/lib32/libibmad_p.a +OLD_FILES+=usr/lib32/libibnetdisc.a +OLD_FILES+=usr/lib32/libibnetdisc.so +OLD_LIBS+=usr/lib32/libibnetdisc.so.5 +OLD_FILES+=usr/lib32/libibnetdisc_p.a +OLD_FILES+=usr/lib32/libibumad.a +OLD_FILES+=usr/lib32/libibumad.so +OLD_LIBS+=usr/lib32/libibumad.so.1 +OLD_FILES+=usr/lib32/libibumad_p.a +OLD_FILES+=usr/lib32/libibverbs.a +OLD_FILES+=usr/lib32/libibverbs.so +OLD_LIBS+=usr/lib32/libibverbs.so.1 +OLD_FILES+=usr/lib32/libibverbs_p.a +OLD_FILES+=usr/lib32/libmlx4.a +OLD_FILES+=usr/lib32/libmlx4.so +OLD_LIBS+=usr/lib32/libmlx4.so.1 +OLD_FILES+=usr/lib32/libmlx4_p.a +OLD_FILES+=usr/lib32/libmlx5.a +OLD_FILES+=usr/lib32/libmlx5.so +OLD_LIBS+=usr/lib32/libmlx5.so.1 +OLD_FILES+=usr/lib32/libmlx5_p.a +OLD_FILES+=usr/lib32/libopensm.a +OLD_FILES+=usr/lib32/libopensm.so +OLD_LIBS+=usr/lib32/libopensm.so.5 +OLD_FILES+=usr/lib32/libopensm_p.a +OLD_FILES+=usr/lib32/libosmcomp.a +OLD_FILES+=usr/lib32/libosmcomp.so +OLD_LIBS+=usr/lib32/libosmcomp.so.3 +OLD_FILES+=usr/lib32/libosmcomp_p.a +OLD_FILES+=usr/lib32/libosmvendor.a +OLD_FILES+=usr/lib32/libosmvendor.so +OLD_LIBS+=usr/lib32/libosmvendor.so.4 +OLD_FILES+=usr/lib32/libosmvendor_p.a +OLD_FILES+=usr/lib32/librdmacm.a +OLD_FILES+=usr/lib32/librdmacm.so +OLD_LIBS+=usr/lib32/librdmacm.so.1 +OLD_FILES+=usr/lib32/librdmacm_p.a +.endif OLD_FILES+=usr/share/man/man1/ibv_asyncwatch.1.gz OLD_FILES+=usr/share/man/man1/ibv_devices.1.gz OLD_FILES+=usr/share/man/man1/ibv_devinfo.1.gz From 7588d90e4a22585cfff3b725fa7bf570cc7bd09d Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sat, 16 Feb 2019 16:34:23 +0000 Subject: [PATCH 57/89] Few more corrections to WITHOUT_OFED=1 make delete-old removal: - Drop profile libraries; MK_PROFILE=no is set in all Makefile's. - Correct library path to libmlx5.so.1 and libibverbs.so.1 MFC after: 5 days MFC with: 344207 --- tools/build/mk/OptionalObsoleteFiles.inc | 28 ++---------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index 29fdeb0688e5..b496b590b210 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -7362,100 +7362,76 @@ OLD_DIRS+=usr/include/infiniband OLD_FILES+=usr/lib/libcxgb4.a OLD_FILES+=usr/lib/libcxgb4.so OLD_LIBS+=usr/lib/libcxgb4.so.1 -OLD_FILES+=usr/lib/libcxgb4_p.a OLD_FILES+=usr/lib/libibcm.a OLD_FILES+=usr/lib/libibcm.so OLD_LIBS+=usr/lib/libibcm.so.1 -OLD_FILES+=usr/lib/libibcm_p.a OLD_FILES+=usr/lib/libibmad.a OLD_FILES+=usr/lib/libibmad.so OLD_LIBS+=usr/lib/libibmad.so.5 -OLD_FILES+=usr/lib/libibmad_p.a OLD_FILES+=usr/lib/libibnetdisc.a OLD_FILES+=usr/lib/libibnetdisc.so OLD_LIBS+=usr/lib/libibnetdisc.so.5 -OLD_FILES+=usr/lib/libibnetdisc_p.a OLD_FILES+=usr/lib/libibumad.a OLD_FILES+=usr/lib/libibumad.so OLD_LIBS+=usr/lib/libibumad.so.1 -OLD_FILES+=usr/lib/libibumad_p.a OLD_FILES+=usr/lib/libibverbs.a OLD_FILES+=usr/lib/libibverbs.so -OLD_LIBS+=usr/lib/libibverbs.so.1 -OLD_FILES+=usr/lib/libibverbs_p.a +OLD_LIBS+=lib/libibverbs.so.1 OLD_FILES+=usr/lib/libmlx4.a OLD_FILES+=usr/lib/libmlx4.so OLD_LIBS+=usr/lib/libmlx4.so.1 -OLD_FILES+=usr/lib/libmlx4_p.a OLD_FILES+=usr/lib/libmlx5.a OLD_FILES+=usr/lib/libmlx5.so -OLD_LIBS+=usr/lib/libmlx5.so.1 -OLD_FILES+=usr/lib/libmlx5_p.a +OLD_LIBS+=lib/libmlx5.so.1 OLD_FILES+=usr/lib/libopensm.a OLD_FILES+=usr/lib/libopensm.so OLD_LIBS+=usr/lib/libopensm.so.5 -OLD_FILES+=usr/lib/libopensm_p.a OLD_FILES+=usr/lib/libosmcomp.a OLD_FILES+=usr/lib/libosmcomp.so OLD_LIBS+=usr/lib/libosmcomp.so.3 -OLD_FILES+=usr/lib/libosmcomp_p.a OLD_FILES+=usr/lib/libosmvendor.a OLD_FILES+=usr/lib/libosmvendor.so OLD_LIBS+=usr/lib/libosmvendor.so.4 -OLD_FILES+=usr/lib/libosmvendor_p.a OLD_FILES+=usr/lib/librdmacm.a OLD_FILES+=usr/lib/librdmacm.so OLD_LIBS+=usr/lib/librdmacm.so.1 -OLD_FILES+=usr/lib/librdmacm_p.a .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64" OLD_FILES+=usr/lib32/libcxgb4.a OLD_FILES+=usr/lib32/libcxgb4.so OLD_LIBS+=usr/lib32/libcxgb4.so.1 -OLD_FILES+=usr/lib32/libcxgb4_p.a OLD_FILES+=usr/lib32/libibcm.a OLD_FILES+=usr/lib32/libibcm.so OLD_LIBS+=usr/lib32/libibcm.so.1 -OLD_FILES+=usr/lib32/libibcm_p.a OLD_FILES+=usr/lib32/libibmad.a OLD_FILES+=usr/lib32/libibmad.so OLD_LIBS+=usr/lib32/libibmad.so.5 -OLD_FILES+=usr/lib32/libibmad_p.a OLD_FILES+=usr/lib32/libibnetdisc.a OLD_FILES+=usr/lib32/libibnetdisc.so OLD_LIBS+=usr/lib32/libibnetdisc.so.5 -OLD_FILES+=usr/lib32/libibnetdisc_p.a OLD_FILES+=usr/lib32/libibumad.a OLD_FILES+=usr/lib32/libibumad.so OLD_LIBS+=usr/lib32/libibumad.so.1 -OLD_FILES+=usr/lib32/libibumad_p.a OLD_FILES+=usr/lib32/libibverbs.a OLD_FILES+=usr/lib32/libibverbs.so OLD_LIBS+=usr/lib32/libibverbs.so.1 -OLD_FILES+=usr/lib32/libibverbs_p.a OLD_FILES+=usr/lib32/libmlx4.a OLD_FILES+=usr/lib32/libmlx4.so OLD_LIBS+=usr/lib32/libmlx4.so.1 -OLD_FILES+=usr/lib32/libmlx4_p.a OLD_FILES+=usr/lib32/libmlx5.a OLD_FILES+=usr/lib32/libmlx5.so OLD_LIBS+=usr/lib32/libmlx5.so.1 -OLD_FILES+=usr/lib32/libmlx5_p.a OLD_FILES+=usr/lib32/libopensm.a OLD_FILES+=usr/lib32/libopensm.so OLD_LIBS+=usr/lib32/libopensm.so.5 -OLD_FILES+=usr/lib32/libopensm_p.a OLD_FILES+=usr/lib32/libosmcomp.a OLD_FILES+=usr/lib32/libosmcomp.so OLD_LIBS+=usr/lib32/libosmcomp.so.3 -OLD_FILES+=usr/lib32/libosmcomp_p.a OLD_FILES+=usr/lib32/libosmvendor.a OLD_FILES+=usr/lib32/libosmvendor.so OLD_LIBS+=usr/lib32/libosmvendor.so.4 -OLD_FILES+=usr/lib32/libosmvendor_p.a OLD_FILES+=usr/lib32/librdmacm.a OLD_FILES+=usr/lib32/librdmacm.so OLD_LIBS+=usr/lib32/librdmacm.so.1 -OLD_FILES+=usr/lib32/librdmacm_p.a .endif OLD_FILES+=usr/share/man/man1/ibv_asyncwatch.1.gz OLD_FILES+=usr/share/man/man1/ibv_devices.1.gz From c9b6ff9c980f9219220cceb18f8e58662da07ad8 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Sat, 16 Feb 2019 23:57:38 +0000 Subject: [PATCH 58/89] mdmfs(8): use -o reserve with malloc-backed md(4) Mentioned in mdconfig(8), malloc-backed md(4) can be unstable unless required memory is allocated up front with -o reserve. Furthermore, panics have been observed with md used in fstab on 12.0-RELEASE. Choose the stable route and pass -o reserve. Submitted by: Paul Vixie MFC after: 1 week --- sbin/mdmfs/mdmfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sbin/mdmfs/mdmfs.c b/sbin/mdmfs/mdmfs.c index 6e4a39611a89..2dcf087079e5 100644 --- a/sbin/mdmfs/mdmfs.c +++ b/sbin/mdmfs/mdmfs.c @@ -196,6 +196,7 @@ main(int argc, char **argv) usage(); mdtype = MD_MALLOC; have_mdtype = true; + argappend(&mdconfig_arg, "-o reserve"); break; case 'm': argappend(&newfs_arg, "-m %s", optarg); From e1d8f44fd41f10ee693512bb5efa9ad63a72ffc0 Mon Sep 17 00:00:00 2001 From: Ganbold Tsagaankhuu Date: Sun, 17 Feb 2019 01:16:27 +0000 Subject: [PATCH 59/89] Add sysctl for setting battery charging current. The charging current can be set using steps from 0: 200mA to 13: 2800mA (200mA/step). While there, fix battery charging current related sensor descriptions. Reviewed by: manu Differential Revision: https://reviews.freebsd.org/D19212 --- sys/arm/allwinner/axp81x.c | 45 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/sys/arm/allwinner/axp81x.c b/sys/arm/allwinner/axp81x.c index 2913fe918786..8a09f8e0c925 100644 --- a/sys/arm/allwinner/axp81x.c +++ b/sys/arm/allwinner/axp81x.c @@ -120,6 +120,10 @@ MALLOC_DEFINE(M_AXP8XX_REG, "AXP8xx regulator", "AXP8xx power regulator"); #define AXP_VOLTCTL_MASK 0x7f #define AXP_POWERBAT 0x32 #define AXP_POWERBAT_SHUTDOWN (1 << 7) +#define AXP_CHARGERCTL1 0x33 +#define AXP_CHARGERCTL1_MIN 0 +#define AXP_CHARGERCTL1_MAX 13 +#define AXP_CHARGERCTL1_CMASK 0xf #define AXP_IRQEN1 0x40 #define AXP_IRQEN1_ACIN_HI (1 << 6) #define AXP_IRQEN1_ACIN_LO (1 << 5) @@ -614,13 +618,13 @@ static const struct axp8xx_sensors axp8xx_common_sensors[] = { .id = AXP_SENSOR_BATT_CHARGE_CURRENT, .name = "batchargecurrent", .format = "I", - .desc = "Battery Charging Current", + .desc = "Average Battery Charging Current", }, { .id = AXP_SENSOR_BATT_DISCHARGE_CURRENT, .name = "batdischargecurrent", .format = "I", - .desc = "Battery Discharging Current", + .desc = "Average Battery Discharging Current", }, { .id = AXP_SENSOR_BATT_CAPACITY_PERCENT, @@ -889,6 +893,33 @@ axp8xx_shutdown(void *devp, int howto) axp8xx_write(dev, AXP_POWERBAT, AXP_POWERBAT_SHUTDOWN); } +static int +axp8xx_sysctl_chargecurrent(SYSCTL_HANDLER_ARGS) +{ + device_t dev = arg1; + uint8_t data; + int val, error; + + error = axp8xx_read(dev, AXP_CHARGERCTL1, &data, 1); + if (error != 0) + return (error); + + if (bootverbose) + device_printf(dev, "Raw CHARGECTL1 val: 0x%0x\n", data); + val = (data & AXP_CHARGERCTL1_CMASK); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) /* error || read request */ + return (error); + + if ((val < AXP_CHARGERCTL1_MIN) || (val > AXP_CHARGERCTL1_MAX)) + return (EINVAL); + + val |= (data & (AXP_CHARGERCTL1_CMASK << 4)); + axp8xx_write(dev, AXP_CHARGERCTL1, val); + + return (0); +} + static int axp8xx_sysctl(SYSCTL_HANDLER_ARGS) { @@ -1482,6 +1513,16 @@ axp8xx_attach(device_t dev) sc->sensors[i].format, sc->sensors[i].desc); } + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "batchargecurrentstep", + CTLTYPE_INT | CTLFLAG_RW, + dev, 0, axp8xx_sysctl_chargecurrent, + "I", "Battery Charging Current Step, " + "0: 200mA, 1: 400mA, 2: 600mA, 3: 800mA, " + "4: 1000mA, 5: 1200mA, 6: 1400mA, 7: 1600mA, " + "8: 1800mA, 9: 2000mA, 10: 2200mA, 11: 2400mA, " + "12: 2600mA, 13: 2800mA"); /* Get thresholds */ if (axp8xx_read(dev, AXP_BAT_CAP_WARN, &val, 1) == 0) { From 07362361e0dbef989aacdf2067bed54bdbba690a Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Sun, 17 Feb 2019 03:35:15 +0000 Subject: [PATCH 60/89] Fix memory corruption bug introduced in r325310 The bug occurred when a bounce buffer was used and the requested read size was greater than the size of the bounce buffer. This commit also rewrites the read logic so that it is easier to systematically verify all alignment and size cases. Reviewed by: allanjude, tsoome MFC after: 3 months Differential Revision: https://reviews.freebsd.org/D19140 --- stand/libsa/zfs/zfs.c | 101 +++++++++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/stand/libsa/zfs/zfs.c b/stand/libsa/zfs/zfs.c index bad0f5c747f1..f1202d2bf35d 100644 --- a/stand/libsa/zfs/zfs.c +++ b/stand/libsa/zfs/zfs.c @@ -363,51 +363,100 @@ static int vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) { int fd, ret; - size_t res, size, remainder, rb_size, blksz; - unsigned secsz; - off_t off; - char *bouncebuf, *rb_buf; + size_t res, head, tail, total_size, full_sec_size; + unsigned secsz, do_tail_read; + off_t start_sec; + char *outbuf, *bouncebuf; fd = (uintptr_t) priv; + outbuf = (char *) buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); if (ret != 0) return (ret); - off = offset / secsz; - remainder = offset % secsz; - if (lseek(fd, off * secsz, SEEK_SET) == -1) - return (errno); + /* + * Handling reads of arbitrary offset and size - multi-sector case + * and single-sector case. + * + * Multi-sector Case + * (do_tail_read = true if tail > 0) + * + * |<----------------------total_size--------------------->| + * | | + * |<--head-->|<--------------bytes------------>|<--tail-->| + * | | | | + * | | |<~full_sec_size~>| | | + * +------------------+ +------------------+ + * | |0101010| . . . |0101011| | + * +------------------+ +------------------+ + * start_sec start_sec + n + * + * + * Single-sector Case + * (do_tail_read = false) + * + * |<------total_size = secsz----->| + * | | + * |<-head->|<---bytes--->|<-tail->| + * +-------------------------------+ + * | |0101010101010| | + * +-------------------------------+ + * start_sec + */ + start_sec = offset / secsz; + head = offset % secsz; + total_size = roundup2(head + bytes, secsz); + tail = total_size - (head + bytes); + do_tail_read = ((tail > 0) && (head + bytes > secsz)); + full_sec_size = total_size; + if (head > 0) + full_sec_size -= secsz; + if (do_tail_read) + full_sec_size -= secsz; - rb_buf = buf; - rb_size = bytes; - size = roundup2(bytes + remainder, secsz); - blksz = size; - if (remainder != 0 || size != bytes) { + /* Return of partial sector data requires a bounce buffer. */ + if ((head > 0) || do_tail_read) { bouncebuf = zfs_alloc(secsz); if (bouncebuf == NULL) { printf("vdev_read: out of memory\n"); return (ENOMEM); } - rb_buf = bouncebuf; - blksz = rb_size - remainder; } - while (bytes > 0) { - res = read(fd, rb_buf, rb_size); - if (res != rb_size) { + if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) + return (errno); + + /* Partial data return from first sector */ + if (head > 0) { + res = read(fd, bouncebuf, secsz); + if (res != secsz) { ret = EIO; goto error; } - if (bytes < blksz) - blksz = bytes; - if (bouncebuf != NULL) - memcpy(buf, rb_buf + remainder, blksz); - buf = (void *)((uintptr_t)buf + blksz); - bytes -= blksz; - remainder = 0; - blksz = rb_size; + memcpy(outbuf, bouncebuf + head, secsz - head); + outbuf += secsz - head; + } + + /* Full data return from read sectors */ + if (full_sec_size > 0) { + res = read(fd, outbuf, full_sec_size); + if (res != full_sec_size) { + ret = EIO; + goto error; + } + outbuf += full_sec_size; + } + + /* Partial data return from last sector */ + if (do_tail_read) { + res = read(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + memcpy(outbuf, bouncebuf, secsz - tail); } ret = 0; From 861729a32e1a723ecfd2c5ba009d108879aa661b Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Sun, 17 Feb 2019 03:52:44 +0000 Subject: [PATCH 61/89] Remove whole-disk vdev support from zfsboot This is consistent with the removal of whole-disk vdev support from libsa/zfs/zfs.c in r342151, and is part way to having the LBAs read during probe be fully constrained by partition tables when present. Reviewed by: tsoome MFC after: 3 months Differential Revision: https://reviews.freebsd.org/D19142 --- stand/i386/zfsboot/zfsboot.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/zfsboot/zfsboot.c index 28a9e9a39760..fe3c5d1e6b07 100644 --- a/stand/i386/zfsboot/zfsboot.c +++ b/stand/i386/zfsboot/zfsboot.c @@ -545,32 +545,19 @@ probe_drive(struct zfsdsk *zdsk) char *sec; unsigned i; - /* - * If we find a vdev on the whole disk, stop here. - */ - if (vdev_probe(vdev_read2, zdsk, NULL) == 0) - return; - #ifdef LOADER_GELI_SUPPORT /* - * Taste the disk, if it is GELI encrypted, decrypt it and check to see if - * it is a usable vdev then. Otherwise dig - * out the partition table and probe each slice/partition - * in turn for a vdev or GELI encrypted vdev. + * Taste the disk, if it is GELI encrypted, decrypt it then dig out the + * partition table and probe each slice/partition in turn for a vdev or + * GELI encrypted vdev. */ elba = drvsize_ext(zdsk); if (elba > 0) { elba--; } zdsk->gdev = geli_taste(vdev_read, zdsk, elba, "disk%u:0:"); - if (zdsk->gdev != NULL) { - if (geli_havekey(zdsk->gdev) == 0 || - geli_passphrase(zdsk->gdev, gelipw) == 0) { - if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { - return; - } - } - } + if ((zdsk->gdev != NULL) && (geli_havekey(zdsk->gdev) == 0)) + geli_passphrase(zdsk->gdev, gelipw); #endif /* LOADER_GELI_SUPPORT */ sec = dmadat->secbuf; From 602566044a5c23be8b96d90d95e9a7e62418ec33 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sun, 17 Feb 2019 16:35:19 +0000 Subject: [PATCH 62/89] Remove a redundant flag variable. Use the object pointer itself to determine whether the object is locked. No functional change intended. Reviewed by: kib MFC after: 1 week Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D19215 --- sys/vm/vm_pageout.c | 43 ++++++++++--------------------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index c69e5e8bd9d7..d2d0a087fc3d 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -695,10 +695,9 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) vm_page_t m, marker; int act_delta, error, numpagedout, queue, starting_target; int vnodes_skipped; - bool obj_locked, pageout_ok; + bool pageout_ok; mtx = NULL; - obj_locked = false; object = NULL; starting_target = launder; vnodes_skipped = 0; @@ -760,22 +759,16 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) } if (object != m->object) { - if (obj_locked) { + if (object != NULL) VM_OBJECT_WUNLOCK(object); - obj_locked = false; - } object = m->object; - } - if (!obj_locked) { if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); - obj_locked = true; mtx_lock(mtx); goto recheck; - } else - obj_locked = true; + } } if (vm_page_busied(m)) @@ -897,17 +890,13 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) vnodes_skipped++; } mtx = NULL; - obj_locked = false; + object = NULL; } } - if (mtx != NULL) { + if (mtx != NULL) mtx_unlock(mtx); - mtx = NULL; - } - if (obj_locked) { + if (object != NULL) VM_OBJECT_WUNLOCK(object); - obj_locked = false; - } vm_pagequeue_lock(pq); vm_pageout_end_scan(&ss); vm_pagequeue_unlock(pq); @@ -1368,7 +1357,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, vm_object_t object; int act_delta, addl_page_shortage, deficit, page_shortage; int starting_page_shortage; - bool obj_locked; /* * The addl_page_shortage is an estimate of the number of temporarily @@ -1388,7 +1376,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, starting_page_shortage = page_shortage = shortage + deficit; mtx = NULL; - obj_locked = false; object = NULL; vm_batchqueue_init(&rq); @@ -1446,22 +1433,16 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, } if (object != m->object) { - if (obj_locked) { + if (object != NULL) VM_OBJECT_WUNLOCK(object); - obj_locked = false; - } object = m->object; - } - if (!obj_locked) { if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); - obj_locked = true; mtx_lock(mtx); goto recheck; - } else - obj_locked = true; + } } if (vm_page_busied(m)) { @@ -1563,14 +1544,10 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, reinsert: vm_pageout_reinsert_inactive(&ss, &rq, m); } - if (mtx != NULL) { + if (mtx != NULL) mtx_unlock(mtx); - mtx = NULL; - } - if (obj_locked) { + if (object != NULL) VM_OBJECT_WUNLOCK(object); - obj_locked = false; - } vm_pageout_reinsert_inactive(&ss, &rq, NULL); vm_pageout_reinsert_inactive(&ss, &ss.bq, NULL); vm_pagequeue_lock(pq); From 8cbc89c7d26e903fa88299478d661c6fba5d7f93 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sun, 17 Feb 2019 16:43:44 +0000 Subject: [PATCH 63/89] Fix refcount leaks in the SGX Linux compat ioctl handler. Some argument validation error paths would return without releasing the file reference obtained at the beginning of the function. While here, fix some style bugs and remove trivial debug prints. Reviewed by: kib MFC after: 1 week Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D19214 --- sys/amd64/sgx/sgx_linux.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/sys/amd64/sgx/sgx_linux.c b/sys/amd64/sgx/sgx_linux.c index f2963f28c663..60ebec0e0278 100644 --- a/sys/amd64/sgx/sgx_linux.c +++ b/sys/amd64/sgx/sgx_linux.c @@ -70,30 +70,26 @@ sgx_linux_ioctl(struct thread *td, struct linux_ioctl_args *args) cmd = args->cmd; args->cmd &= ~(LINUX_IOC_IN | LINUX_IOC_OUT); - if (cmd & LINUX_IOC_IN) + if ((cmd & LINUX_IOC_IN) != 0) args->cmd |= IOC_IN; - if (cmd & LINUX_IOC_OUT) + if ((cmd & LINUX_IOC_OUT) != 0) args->cmd |= IOC_OUT; len = IOCPARM_LEN(cmd); if (len > SGX_IOCTL_MAX_DATA_LEN) { - printf("%s: Can't copy data: cmd len is too big %d\n", - __func__, len); - return (EINVAL); + error = EINVAL; + goto out; } - if (cmd & LINUX_IOC_IN) { + if ((cmd & LINUX_IOC_IN) != 0) { error = copyin((void *)args->arg, data, len); - if (error) { - printf("%s: Can't copy data, error %d\n", - __func__, error); - return (EINVAL); - } + if (error != 0) + goto out; } - error = (fo_ioctl(fp, args->cmd, (caddr_t)data, td->td_ucred, td)); + error = fo_ioctl(fp, args->cmd, (caddr_t)data, td->td_ucred, td); +out: fdrop(fp, td); - return (error); } From 648890835c4420d928c495ee1693f3be20ca8100 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sun, 17 Feb 2019 16:56:41 +0000 Subject: [PATCH 64/89] Remove a write-only variable orphaned by r340677. --- sys/kern/sys_pipe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 803c7c03415e..db5dfc68ae24 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -379,9 +379,7 @@ void pipe_dtor(struct pipe *dpipe) { struct pipe *peer; - ino_t ino; - ino = dpipe->pipe_ino; peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL; funsetown(&dpipe->pipe_sigio); pipeclose(dpipe); From a0705597218bfca01b45135cafa9b2f5009f8d26 Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Sun, 17 Feb 2019 17:47:08 +0000 Subject: [PATCH 65/89] It turns out r344226 narrowed the overrun bug but did not eliminate it entirely This commit fixes a remaining output buffer overrun in the single-sector case when there is a non-zero tail. Reviewed by: allanjude, tsoome MFC after: 3 months MFC with: r344226 Differential Revision: https://reviews.freebsd.org/D19220 --- stand/libsa/zfs/zfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stand/libsa/zfs/zfs.c b/stand/libsa/zfs/zfs.c index f1202d2bf35d..8e9e50b6e85d 100644 --- a/stand/libsa/zfs/zfs.c +++ b/stand/libsa/zfs/zfs.c @@ -435,8 +435,8 @@ vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) ret = EIO; goto error; } - memcpy(outbuf, bouncebuf + head, secsz - head); - outbuf += secsz - head; + memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes)); + outbuf += min(secsz - head, bytes); } /* Full data return from read sectors */ From d97753b5c8f1d32fbcdcbb0d129b49f808245865 Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Sun, 17 Feb 2019 18:26:27 +0000 Subject: [PATCH 66/89] libnv: fix double free In r343986 we introduced a double free. The structure was already freed fixed in the r302966. This problem was introduced because the GitHub version was out of sync with the FreeBSD one. Submitted by: Mindaugas Rasiukevicius MFC with: r343986 --- sys/contrib/libnv/nvpair.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sys/contrib/libnv/nvpair.c b/sys/contrib/libnv/nvpair.c index d45ac34f59c5..5f09eb070b06 100644 --- a/sys/contrib/libnv/nvpair.c +++ b/sys/contrib/libnv/nvpair.c @@ -229,14 +229,6 @@ nvpair_remove_nvlist_array(nvpair_t *nvp) nvlarray = __DECONST(nvlist_t **, nvpair_get_nvlist_array(nvp, &count)); for (i = 0; i < count; i++) { - nvlist_t *nvl; - nvpair_t *nnvp; - - nvl = nvlarray[i]; - nnvp = nvlist_get_array_next_nvpair(nvl); - if (nnvp != NULL) { - nvpair_free_structure(nnvp); - } nvlist_set_array_next(nvl, NULL); nvlist_set_parent(nvl, NULL); } From 3bea7b5b05f200df4cabee12e405b8feade16f0e Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Sun, 17 Feb 2019 18:32:19 +0000 Subject: [PATCH 67/89] libnv: fix revert Reported by: jenkins --- sys/contrib/libnv/nvpair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/contrib/libnv/nvpair.c b/sys/contrib/libnv/nvpair.c index 5f09eb070b06..f8981a7f4c34 100644 --- a/sys/contrib/libnv/nvpair.c +++ b/sys/contrib/libnv/nvpair.c @@ -229,8 +229,8 @@ nvpair_remove_nvlist_array(nvpair_t *nvp) nvlarray = __DECONST(nvlist_t **, nvpair_get_nvlist_array(nvp, &count)); for (i = 0; i < count; i++) { - nvlist_set_array_next(nvl, NULL); - nvlist_set_parent(nvl, NULL); + nvlist_set_array_next(nvlarray[i], NULL); + nvlist_set_parent(nvlarray[i], NULL); } } From 4d02caf7cb304853a14c726360bd78777f521b16 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sun, 17 Feb 2019 23:32:09 +0000 Subject: [PATCH 68/89] Restore loader(8)'s ability for lsdev to show partitions within a bsd slice. I'm pretty sure this used to work at one time, perhaps long ago. It has been failing recently because if you call disk_open() with dev->d_partition set to -1 when d_slice refers to a bsd slice, it assumes you want it to open the first partition within that slice. When you then pass that open dev instance to ptable_open(), it tries to read the start of the 'a' partition and decides there is no recognizable partition type there. This restores the old functionality by resetting d_offset to the start of the raw slice after disk_open() returns. For good measure, d_partition is also set back to -1, although that doesn't currently affect anything. I would have preferred to make disk_open() avoid such rude assumptions and if you ask for partition -1 you get the raw slice. But the commit history shows that someone already did that once (r239058), and had to revert it (r239232), so I didn't even try to go down that road. --- stand/common/disk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/stand/common/disk.c b/stand/common/disk.c index ec714b24d85a..b0c8d29eac98 100644 --- a/stand/common/disk.c +++ b/stand/common/disk.c @@ -133,6 +133,13 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) dev.d_partition = -1; if (disk_open(&dev, part->end - part->start + 1, od->sectorsize) == 0) { + /* + * disk_open() for partition -1 on a bsd slice assumes + * you want the first bsd partition. Reset things so + * that we're looking at the start of the raw slice. + */ + dev.d_partition = -1; + dev.d_offset = part->start; table = ptable_open(&dev, part->end - part->start + 1, od->sectorsize, ptblread); if (table != NULL) { From 71c96def1c4bea9d6288adaab58ee29488235b75 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sun, 17 Feb 2019 23:38:17 +0000 Subject: [PATCH 69/89] Use a couple local variables to avoid repetitive long expressions that cause line-wrapping. --- stand/common/disk.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/stand/common/disk.c b/stand/common/disk.c index b0c8d29eac98..0f7a3543da23 100644 --- a/stand/common/disk.c +++ b/stand/common/disk.c @@ -112,15 +112,18 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) struct ptable *table; char line[80]; int res; + u_int sectsize; + uint64_t partsize; pa = (struct print_args *)arg; od = (struct open_disk *)pa->dev->dd.d_opendata; + sectsize = od->sectorsize; + partsize = part->end - part->start + 1; sprintf(line, " %s%s: %s", pa->prefix, pname, parttype2str(part->type)); if (pa->verbose) sprintf(line, "%-*s%s", PWIDTH, line, - display_size(part->end - part->start + 1, - od->sectorsize)); + display_size(partsize, sectsize)); strcat(line, "\n"); if (pager_output(line)) return 1; @@ -131,8 +134,7 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) dev.dd.d_unit = pa->dev->dd.d_unit; dev.d_slice = part->index; dev.d_partition = -1; - if (disk_open(&dev, part->end - part->start + 1, - od->sectorsize) == 0) { + if (disk_open(&dev, partsize, sectsize) == 0) { /* * disk_open() for partition -1 on a bsd slice assumes * you want the first bsd partition. Reset things so @@ -140,8 +142,7 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) */ dev.d_partition = -1; dev.d_offset = part->start; - table = ptable_open(&dev, part->end - part->start + 1, - od->sectorsize, ptblread); + table = ptable_open(&dev, partsize, sectsize, ptblread); if (table != NULL) { sprintf(line, " %s%s", pa->prefix, pname); bsd.dev = pa->dev; From 5b777dbfa5d6f94061629b7453bd3270616aadd7 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sun, 17 Feb 2019 23:46:11 +0000 Subject: [PATCH 70/89] Make lsdev -v output line up in neat columns by using a fixed width for the size field and a tab between the partition type and the size. Changes this disk devices: disk0 (MMC) disk0s1: DOS/Windows 49MB disk0s2: FreeBSD 14GB disk0s2a: FreeBSD UFS 14GB disk0s2b: Unknown 2048KB disk0s2d: FreeBSD UFS 2040KB to this disk devices: disk0 (MMC) disk0s1: DOS/Windows 49MB disk0s2: FreeBSD 14GB disk0s2a: FreeBSD UFS 14GB disk0s2b: Unknown 2048KB disk0s2d: FreeBSD UFS 2040KB --- stand/common/disk.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/stand/common/disk.c b/stand/common/disk.c index 0f7a3543da23..c008179cb025 100644 --- a/stand/common/disk.c +++ b/stand/common/disk.c @@ -75,7 +75,7 @@ display_size(uint64_t size, u_int sectorsize) size /= 1024; unit = 'M'; } - sprintf(buf, "%ld%cB", (long)size, unit); + sprintf(buf, "%4ld%cB", (long)size, unit); return (buf); } @@ -119,12 +119,9 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) od = (struct open_disk *)pa->dev->dd.d_opendata; sectsize = od->sectorsize; partsize = part->end - part->start + 1; - sprintf(line, " %s%s: %s", pa->prefix, pname, - parttype2str(part->type)); - if (pa->verbose) - sprintf(line, "%-*s%s", PWIDTH, line, - display_size(partsize, sectsize)); - strcat(line, "\n"); + sprintf(line, " %s%s: %s\t%s\n", pa->prefix, pname, + parttype2str(part->type), + pa->verbose ? display_size(partsize, sectsize) : ""); if (pager_output(line)) return 1; res = 0; From 7827bb1c2c17f886f675b4b49becf1a60f5a9f5f Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sun, 17 Feb 2019 23:48:51 +0000 Subject: [PATCH 71/89] Garbage collection no-longer-used constant. --- stand/common/disk.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/stand/common/disk.c b/stand/common/disk.c index c008179cb025..560371fb2880 100644 --- a/stand/common/disk.c +++ b/stand/common/disk.c @@ -102,7 +102,6 @@ ptblread(void *d, void *buf, size_t blocks, uint64_t offset) blocks * od->sectorsize, (char *)buf, NULL)); } -#define PWIDTH 35 static int ptable_print(void *arg, const char *pname, const struct ptable_entry *part) { @@ -154,7 +153,6 @@ ptable_print(void *arg, const char *pname, const struct ptable_entry *part) return (res); } -#undef PWIDTH int disk_print(struct disk_devdesc *dev, char *prefix, int verbose) From bc4d122db033e9ff27fd230159e7e903634cf0bb Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Mon, 18 Feb 2019 01:57:47 +0000 Subject: [PATCH 72/89] powerpc/boot: Move ubldr to /boot/uboot, and make this a separate filesystem Summary: Now that mpc85xx can boot via ubldr, move ubldr to a separate filesystem, mounted on /boot/uboot, so that a fresh install can boot correctly. Reviewed By: nwhitehorn Differential Revision: https://reviews.freebsd.org/D18709 --- etc/mtree/BSD.root.dist | 2 ++ stand/powerpc/uboot/Makefile | 1 + usr.sbin/bsdinstall/partedit/partedit_powerpc.c | 12 ++++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/etc/mtree/BSD.root.dist b/etc/mtree/BSD.root.dist index c154a2fbcf86..371530eb0490 100644 --- a/etc/mtree/BSD.root.dist +++ b/etc/mtree/BSD.root.dist @@ -26,6 +26,8 @@ .. modules .. + uboot + .. zfs .. .. diff --git a/stand/powerpc/uboot/Makefile b/stand/powerpc/uboot/Makefile index ccffd0ec1467..9124507397ad 100644 --- a/stand/powerpc/uboot/Makefile +++ b/stand/powerpc/uboot/Makefile @@ -11,6 +11,7 @@ LOADER_BZIP2_SUPPORT?= no .include +BINDIR= /boot/uboot PROG= ubldr NEWVERSWHAT= "U-Boot loader" ${MACHINE_ARCH} INSTALLFLAGS= -b diff --git a/usr.sbin/bsdinstall/partedit/partedit_powerpc.c b/usr.sbin/bsdinstall/partedit/partedit_powerpc.c index d18f2b77857d..5a3041df5b0c 100644 --- a/usr.sbin/bsdinstall/partedit/partedit_powerpc.c +++ b/usr.sbin/bsdinstall/partedit/partedit_powerpc.c @@ -94,9 +94,10 @@ bootpart_size(const char *part_type) return (0); if (strcmp(platform, "chrp") == 0) return (800*1024); - if (strcmp(platform, "ps3") == 0 || strcmp(platform, "powernv") == 0 || - strcmp(platform, "mpc85xx") == 0) + if (strcmp(platform, "ps3") == 0 || strcmp(platform, "powernv") == 0) return (512*1024*1024); + if (strcmp(platform, "mpc85xx") == 0) + return (16*1024*1024); return (0); } @@ -111,14 +112,17 @@ bootpart_type(const char *scheme, const char **mountpoint) return ("prep-boot"); if (strcmp(platform, "powermac") == 0) return ("apple-boot"); - if (strcmp(platform, "powernv") == 0 || strcmp(platform, "ps3") == 0 || - strcmp(platform, "mpc85xx") == 0) { + if (strcmp(platform, "powernv") == 0 || strcmp(platform, "ps3") == 0) { *mountpoint = "/boot"; if (strcmp(scheme, "GPT") == 0) return ("ms-basic-data"); else if (strcmp(scheme, "MBR") == 0) return ("fat32"); } + if (strcmp(platform, "mpc85xx") == 0) { + *mountpoint = "/boot/uboot"; + return ("fat16"); + } return ("freebsd-boot"); } From d61897abea6c619bf238b5a81d8ab7eda4e905c0 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Mon, 18 Feb 2019 02:59:47 +0000 Subject: [PATCH 73/89] lualoader: only clear the screen before first password prompt This was previously an unconditional screen clear, regardless of whether or not we would be prompting for any passwords. This is pointless, given that the screen clear is only there to put our screen into a consistent state before we draw the prompts and do cursor manipulation. This is also the only screen clear besides that to draw the menu. One can now see early pre-loader and loader output with the menu disabled, which may be useful for diagnostics. Reported by: ian MFC after: 3 days --- stand/lua/password.lua | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/stand/lua/password.lua b/stand/lua/password.lua index 4dfaf1a6525d..40cf72811af0 100644 --- a/stand/lua/password.lua +++ b/stand/lua/password.lua @@ -38,6 +38,7 @@ local INCORRECT_PASSWORD = "loader: incorrect password" -- Asterisks as a password mask local show_password_mask = false local twiddle_chars = {"/", "-", "\\", "|"} +local screen_setup = false -- Module exports function password.read(prompt_length) @@ -80,8 +81,6 @@ function password.read(prompt_length) end function password.check() - screen.clear() - screen.defcursor() -- pwd is optionally supplied if we want to check it local function doPrompt(prompt, pwd) local attempts = 1 @@ -90,6 +89,12 @@ function password.check() printc("\r" .. string.rep(" ", #INCORRECT_PASSWORD)) end + if not screen_setup then + screen.clear() + screen.defcursor() + screen_setup = true + end + while true do if attempts > 1 then clear_incorrect_text_prompt() From 994558c335fabca7a2116fd94a4a371aaa72d87a Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Mon, 18 Feb 2019 03:15:25 +0000 Subject: [PATCH 74/89] Fix memory / resource leaks in usr.sbin/rpc.ypupdated/update.c Re-apply r343909 to this file to get the issue fixed. PR: 204956 Reported by: David Binderman MFC after: 5 days --- usr.sbin/rpc.ypupdated/update.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/usr.sbin/rpc.ypupdated/update.c b/usr.sbin/rpc.ypupdated/update.c index 956b057ac9c3..f3d54b833d2f 100644 --- a/usr.sbin/rpc.ypupdated/update.c +++ b/usr.sbin/rpc.ypupdated/update.c @@ -263,11 +263,14 @@ localupdate(char *name, char *filename, u_int op, u_int keylen __unused, sprintf(tmpname, "%s.tmp", filename); rf = fopen(filename, "r"); if (rf == NULL) { - return (ERR_READ); + err = ERR_READ; + goto cleanup; } wf = fopen(tmpname, "w"); if (wf == NULL) { - return (ERR_WRITE); + fclose(rf); + err = ERR_WRITE; + goto cleanup; } err = -1; while (fgets(line, sizeof (line), rf)) { @@ -307,13 +310,17 @@ localupdate(char *name, char *filename, u_int op, u_int keylen __unused, fclose(rf); if (err == 0) { if (rename(tmpname, filename) < 0) { - return (ERR_DBASE); + err = ERR_DBASE; + goto cleanup; } } else { if (unlink(tmpname) < 0) { - return (ERR_DBASE); + err = ERR_DBASE; + goto cleanup; } } +cleanup: + free(tmpname); return (err); } From e5cb99d5a296449de843fcc3d3f2ddc77eac231b Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Mon, 18 Feb 2019 03:23:10 +0000 Subject: [PATCH 75/89] snmp_hostres(3): fix a typo in sanity checks in handle_chunk() PR: 204253 Submitted by: David Binderman MFC after: 5 days --- usr.sbin/bsnmpd/modules/snmp_hostres/hostres_partition_tbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/bsnmpd/modules/snmp_hostres/hostres_partition_tbl.c b/usr.sbin/bsnmpd/modules/snmp_hostres/hostres_partition_tbl.c index e7548b6f1dc5..1992c93cdf3d 100644 --- a/usr.sbin/bsnmpd/modules/snmp_hostres/hostres_partition_tbl.c +++ b/usr.sbin/bsnmpd/modules/snmp_hostres/hostres_partition_tbl.c @@ -312,7 +312,7 @@ handle_chunk(int32_t ds_index, const char *chunk_name, off_t chunk_size) assert(chunk_name != NULL); assert(chunk_name[0] != '\0'); - if (chunk_name == NULL || chunk_name == '\0') + if (chunk_name == NULL || chunk_name[0] == '\0') return; HRDBG("ANALYZE chunk %s", chunk_name); From c0347e182cc23853aaca05004d2da9b4d7e47e4a Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Mon, 18 Feb 2019 03:49:16 +0000 Subject: [PATCH 76/89] kdump: expand comment on reasons for CAPFAIL_LOOKUP Comment for CAPFAIL_LOOKUP refered only to paths containing ".." but it is returned for other restricted VFS lookup cases, such as absolute paths or openat(AT_FDCWD, ...). --- usr.bin/kdump/kdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 354b717e4889..25b2508f862f 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -2006,7 +2006,7 @@ ktrcapfail(struct ktr_cap_fail *ktr) printf("disallowed system call"); break; case CAPFAIL_LOOKUP: - /* used ".." in strict-relative mode */ + /* absolute or AT_FDCWD path, ".." path, etc. */ printf("restricted VFS lookup"); break; default: From 5a75fc4b11cb31ff90e0f4a93292861e26320ab3 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Mon, 18 Feb 2019 04:44:52 +0000 Subject: [PATCH 77/89] Make uboot_devdesc properly alias disk_devdesc, so that parsing the u-boot loaderdev variable works correctly. The uboot_devdesc struct is variously cast back and forth between uboot_devdesc and disk_devdesc as pointers are handed off through various opaque interfaces. uboot_devdesc attempted to mimic the layout of disk_devdesc by having a devdesc struct, followed by a union of some device-specific stuff that included a struct that contains the same fields as a disk_devdesc. However, one of those fields inside the struct is 64-bit which causes the entire union to be 64-bit aligned -- 32 bits of padding is added between the struct devdesc and the union, so the whole mess ends up NOT properly mimicking a disk_devdesc after all. (In disk_devdesc there is also 32 bits of padding, but it shows up immediately before the d_offset field, rather than before the whole collection of d_* fields.) This fixes the problem by using an anonymous union to overlay the devdesc field uboot network devices need with the disk_devdesc that uboot storage devices need. This is a different solution than the one contributed with the PR (so if anything goes wrong, the blame goes to me), but 95% of the credit for this fix goes to Pawel Worach and Manuel Stuhn who analyzed the problem and proposed a fix. PR: 233097 --- stand/uboot/common/main.c | 12 ++++++------ stand/uboot/lib/libuboot.h | 18 +++++++----------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index 50a12cd425fb..b1534b00d268 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -310,13 +310,13 @@ print_disk_probe_info() char slice[32]; char partition[32]; - if (currdev.d_disk.slice > 0) - sprintf(slice, "%d", currdev.d_disk.slice); + if (currdev.d_disk.d_slice > 0) + sprintf(slice, "%d", currdev.d_disk.d_slice); else strcpy(slice, ""); - if (currdev.d_disk.partition >= 0) - sprintf(partition, "%d", currdev.d_disk.partition); + if (currdev.d_disk.d_partition >= 0) + sprintf(partition, "%d", currdev.d_disk.d_partition); else strcpy(partition, ""); @@ -332,8 +332,8 @@ probe_disks(int devidx, int load_type, int load_unit, int load_slice, int open_result, unit; struct open_file f; - currdev.d_disk.slice = load_slice; - currdev.d_disk.partition = load_partition; + currdev.d_disk.d_slice = load_slice; + currdev.d_disk.d_partition = load_partition; f.f_devdata = &currdev; open_result = -1; diff --git a/stand/uboot/lib/libuboot.h b/stand/uboot/lib/libuboot.h index 8b81486b1fae..59438e711a65 100644 --- a/stand/uboot/lib/libuboot.h +++ b/stand/uboot/lib/libuboot.h @@ -27,18 +27,14 @@ * $FreeBSD$ */ -struct uboot_devdesc { - struct devdesc dd; /* Must be first. */ - union { - struct { - int slice; - int partition; - off_t offset; - } disk; - } d_kind; -}; +#include -#define d_disk d_kind.disk +struct uboot_devdesc { + union { + struct devdesc dd; + struct disk_devdesc d_disk; + }; +}; /* * Default network packet alignment in memory. On arm arches packets must be From 61250f78c4f025e308f266bbaea13a43f7302380 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Mon, 18 Feb 2019 08:26:18 +0000 Subject: [PATCH 78/89] cd9660: dirmatch fails to unmatch when name is prefix for directory record Loader does fail to properly match the file name in directory record and does open file based on prefix match. For fix, we check the name lengths first. Reviewed by: allanjude MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D19213 --- stand/libsa/cd9660.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stand/libsa/cd9660.c b/stand/libsa/cd9660.c index 01fafffb43d8..60b830de483b 100644 --- a/stand/libsa/cd9660.c +++ b/stand/libsa/cd9660.c @@ -241,6 +241,10 @@ dirmatch(struct open_file *f, const char *path, struct iso_directory_record *dp, icase = 1; } else icase = 0; + + if (strlen(path) != len) + return (0); + for (i = len; --i >= 0; path++, cp++) { if (!*path || *path == '/') break; From 19a227ee354eee259f8a61ae537e8bc3f4c72a92 Mon Sep 17 00:00:00 2001 From: Ruslan Bukin Date: Mon, 18 Feb 2019 13:14:53 +0000 Subject: [PATCH 79/89] Avoid orphan sections between __bss_start and .(s)bss. Ensure __bss_start is associated with the next section in case orphan sections are placed directly after .sdata, as has been seen to happen with LLD. Submitted by: "J.R.T. Clarke" Differential Revision: https://reviews.freebsd.org/D18429 --- sys/conf/ldscript.riscv | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/conf/ldscript.riscv b/sys/conf/ldscript.riscv index 5fe32aea1782..3665b1058279 100644 --- a/sys/conf/ldscript.riscv +++ b/sys/conf/ldscript.riscv @@ -87,6 +87,10 @@ SECTIONS .sdata : { *(.sdata) } _edata = .; PROVIDE (edata = .); + /* Ensure __bss_start is associated with the next section in case orphan + sections are placed directly after .sdata, as has been seen to happen with + LLD. */ + . = .; __bss_start = .; .sbss : { *(.sbss) *(.scommon) } .bss : From 45100257c683b4421bbf134238df2c3b3aba6fad Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Mon, 18 Feb 2019 14:21:41 +0000 Subject: [PATCH 80/89] netmap: don't schedule kqueue notify task when kqueue is not used This change adds a counter (kqueue_users) to keep track of how many kqueue users are referencing a given struct nm_selinfo. In this way, nm_os_selwakeup() can schedule the kevent notification task only when kqueue is actually being used. This is important to avoid wasting CPU in the common case where kqueue is not used. Reviewed by: Aleksandr Fedorov MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D19177 --- sys/dev/netmap/netmap_freebsd.c | 29 ++++++++++++++++++++++------- sys/dev/netmap/netmap_kern.h | 3 +++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index d99e526f3d10..c6cd24461b6b 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -105,6 +105,7 @@ int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) { snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name); mtx_init(&si->m, si->mtxname, NULL, MTX_DEF); knlist_init_mtx(&si->si.si_note, &si->m); + si->kqueue_users = 0; return (0); } @@ -1351,7 +1352,9 @@ void nm_os_selwakeup(struct nm_selinfo *si) { selwakeuppri(&si->si, PI_NET); - taskqueue_enqueue(si->ntfytq, &si->ntfytask); + if (si->kqueue_users > 0) { + taskqueue_enqueue(si->ntfytq, &si->ntfytask); + } } void @@ -1364,20 +1367,28 @@ static void netmap_knrdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = &priv->np_si[NR_RX]->si; + struct nm_selinfo *si = priv->np_si[NR_RX]; - nm_prinf("remove selinfo %p", si); - knlist_remove(&si->si_note, kn, /*islocked=*/0); + knlist_remove(&si->si.si_note, kn, /*islocked=*/0); + NMG_LOCK(); + KASSERT(si->kqueue_users > 0, ("kqueue_user underflow on %s", + si->mtxname)); + si->kqueue_users--; + nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users); + NMG_UNLOCK(); } static void netmap_knwdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = &priv->np_si[NR_TX]->si; + struct nm_selinfo *si = priv->np_si[NR_TX]; - nm_prinf("remove selinfo %p", si); - knlist_remove(&si->si_note, kn, /*islocked=*/0); + knlist_remove(&si->si.si_note, kn, /*islocked=*/0); + NMG_LOCK(); + si->kqueue_users--; + nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users); + NMG_UNLOCK(); } /* @@ -1465,6 +1476,10 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn) kn->kn_fop = (ev == EVFILT_WRITE) ? &netmap_wfiltops : &netmap_rfiltops; kn->kn_hook = priv; + NMG_LOCK(); + si->kqueue_users++; + nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users); + NMG_UNLOCK(); knlist_add(&si->si.si_note, kn, /*islocked=*/0); return 0; diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index f8db7a38ac9c..ef2b1bcf0fb5 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -132,11 +132,14 @@ struct netmap_adapter *netmap_getna(if_t ifp); #define MBUF_QUEUED(m) 1 struct nm_selinfo { + /* Support for select(2) and poll(2). */ struct selinfo si; + /* Support for kqueue(9). See comments in netmap_freebsd.c */ struct taskqueue *ntfytq; struct task ntfytask; struct mtx m; char mtxname[32]; + int kqueue_users; }; From 85c7139a7635e3dc8575b11fa1b33df313778bd4 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Mon, 18 Feb 2019 15:09:19 +0000 Subject: [PATCH 81/89] Use DEV_TYP_NONE instead of -1 to indicate no device was specified. DEV_TYP_NONE has a value of zero, which makes more sense since the device type is a bunch of bits describing the device, crammed into an int. --- stand/uboot/common/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index b1534b00d268..f31ff0694931 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -156,7 +156,7 @@ get_device_type(const char *devstr, int *devtype) printf("Unknown device type '%s'\n", devstr); } - *devtype = -1; + *devtype = DEV_TYP_NONE; return (NULL); } @@ -202,7 +202,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) const char *p; char *endp; - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; *slice = 0; *partition = -1; @@ -467,14 +467,14 @@ main(int argc, char **argv) currdev.dd.d_dev = devsw[i]; currdev.dd.d_unit = 0; - if ((load_type == -1 || (load_type & DEV_TYP_STOR)) && + if ((load_type == DEV_TYP_NONE || (load_type & DEV_TYP_STOR)) && strcmp(devsw[i]->dv_name, "disk") == 0) { if (probe_disks(i, load_type, load_unit, load_slice, load_partition) == 0) break; } - if ((load_type == -1 || (load_type & DEV_TYP_NET)) && + if ((load_type == DEV_TYP_NONE || (load_type & DEV_TYP_NET)) && strcmp(devsw[i]->dv_name, "net") == 0) break; } From e5c0d6a0cabea7137211b35dc62b700b3bc5eb45 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Mon, 18 Feb 2019 15:28:12 +0000 Subject: [PATCH 82/89] Fix more places to use DEV_TYP_NONE instead of -1 to indicate 'no device'. --- stand/uboot/common/main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index f31ff0694931..9cd613ac673f 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -221,13 +221,13 @@ get_load_device(int *type, int *unit, int *slice, int *partition) p++; /* Unknown device name, or a known name without unit number. */ - if ((*type == -1) || (*p == '\0')) { + if ((*type == DEV_TYP_NONE) || (*p == '\0')) { return; } /* Malformed unit number. */ if (!isdigit(*p)) { - *type = -1; + *type = DEV_TYP_NONE; return; } @@ -242,7 +242,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) /* Device string is malformed beyond unit number. */ if (*p != ':') { - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; return; } @@ -255,7 +255,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) /* Only DEV_TYP_STOR devices can have a slice specification. */ if (!(*type & DEV_TYP_STOR)) { - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; return; } @@ -264,7 +264,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) /* Malformed slice number. */ if (p == endp) { - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; *slice = 0; return; @@ -278,7 +278,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) /* Device string is malformed beyond slice number. */ if (*p != '.') { - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; *slice = 0; return; @@ -298,7 +298,7 @@ get_load_device(int *type, int *unit, int *slice, int *partition) return; /* Junk beyond partition number. */ - *type = -1; + *type = DEV_TYP_NONE; *unit = -1; *slice = 0; *partition = -1; From 8cbe929be50963827adde71d4df8c292e0e42805 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 18 Feb 2019 16:02:00 +0000 Subject: [PATCH 83/89] amd64: cleanup pmap_init_pat(). The pmap_works variable is always true for amd64. Remove it, the branch in the initialization taken when false, and corresponding sysctl. Remove pat_table[] local array, work on pat_index[] directly. Collapse whole initialization to not override already assigned values. Add comment explaining the choice for PAT4 and PAT7. Reviewed by: alc, markj Sponsored by: The FreeBSD Foundation MFC after: 1 week MFC note: Leave the sysctl around Differential revision: https://reviews.freebsd.org/D19225 --- sys/amd64/amd64/pmap.c | 56 ++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 94ad7d1d856a..c45b4316622b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -353,10 +353,6 @@ pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); -static int pat_works = 1; -SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, - "Is page attribute table fully functional?"); - static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); @@ -1222,7 +1218,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr) void pmap_init_pat(void) { - int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; @@ -1233,45 +1228,32 @@ pmap_init_pat(void) /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) - pat_table[i] = -1; - pat_table[PAT_WRITE_BACK] = 0; - pat_table[PAT_WRITE_THROUGH] = 1; - pat_table[PAT_UNCACHEABLE] = 3; - pat_table[PAT_WRITE_COMBINING] = 3; - pat_table[PAT_WRITE_PROTECTED] = 3; - pat_table[PAT_UNCACHED] = 3; + pat_index[i] = -1; + pat_index[PAT_WRITE_BACK] = 0; + pat_index[PAT_WRITE_THROUGH] = 1; + pat_index[PAT_UNCACHEABLE] = 3; + pat_index[PAT_WRITE_COMBINING] = 6; + pat_index[PAT_WRITE_PROTECTED] = 5; + pat_index[PAT_UNCACHED] = 2; - /* Initialize default PAT entries. */ + /* + * Initialize default PAT entries. + * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. + * Program 5 and 6 as WP and WC. + * + * Leave 4 and 7 as WB and UC. Note that a recursive page table + * mapping for a 2M page uses a PAT value with the bit 3 set due + * to its overload with PG_PS. + */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | - PAT_VALUE(5, PAT_WRITE_THROUGH) | - PAT_VALUE(6, PAT_UNCACHED) | + PAT_VALUE(5, PAT_WRITE_PROTECTED) | + PAT_VALUE(6, PAT_WRITE_COMBINING) | PAT_VALUE(7, PAT_UNCACHEABLE); - if (pat_works) { - /* - * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. - * Program 5 and 6 as WP and WC. - * Leave 4 and 7 as WB and UC. - */ - pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); - pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | - PAT_VALUE(6, PAT_WRITE_COMBINING); - pat_table[PAT_UNCACHED] = 2; - pat_table[PAT_WRITE_PROTECTED] = 5; - pat_table[PAT_WRITE_COMBINING] = 6; - } else { - /* - * Just replace PAT Index 2 with WC instead of UC-. - */ - pat_msr &= ~PAT_MASK(2); - pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); - pat_table[PAT_WRITE_COMBINING] = 2; - } - /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); @@ -1286,8 +1268,6 @@ pmap_init_pat(void) /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); - for (i = 0; i < PAT_INDEX_SIZE; i++) - pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); From 28ef0240e016add2f2e82d67417f7c1dffd87083 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Mon, 18 Feb 2019 17:12:30 +0000 Subject: [PATCH 84/89] Allow the u-boot loaderdev env var to be formatted in the "usual" loader(8) way: device[s|p]. E.g., disk0s2a or disk3p12. The code first tries to parse the variable in this format using the standard disk_parsedev(). If that fails, it falls back to parsing the legacy format that has been supported by ubldr for years. In addition to 'disk', all the valid uboot device names can also be used: mmc, sata, usb, ide, scsi. The 'disk' device serves as an alias for all those types and will match the Nth storage-type device found (where N is the unit number). --- stand/uboot/common/main.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index 9cd613ac673f..d420aa95a598 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -182,6 +182,14 @@ device_typename(int type) * The returned values for slice and partition are interpreted by * disk_open(). * + * The device string can be a standard loader(8) disk specifier: + * + * disks disk0s1 + * disks disk1s2a + * diskp disk0p4 + * + * or one of the following formats: + * * Valid device strings: For device types: * * DEV_TYP_STOR, DEV_TYP_NET @@ -198,6 +206,7 @@ device_typename(int type) static void get_load_device(int *type, int *unit, int *slice, int *partition) { + struct disk_devdesc dev; char *devstr; const char *p; char *endp; @@ -216,6 +225,19 @@ get_load_device(int *type, int *unit, int *slice, int *partition) p = get_device_type(devstr, type); + /* + * If type is DEV_TYP_STOR we have a disk-like device. If we can parse + * the remainder of the string as a standard unit+slice+partition (e.g., + * 0s2a or 1p12), return those results. Otherwise we'll fall through to + * the code that parses the legacy format. + */ + if ((*type & DEV_TYP_STOR) && disk_parsedev(&dev, p, NULL) == 0) { + *unit = dev.dd.d_unit; + *slice = dev.d_slice; + *partition = dev.d_partition; + return; + } + /* Ignore optional spaces after the device name. */ while (*p == ' ') p++; From 993b827f31878d30ae6281184c9253c8d23a9810 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 18 Feb 2019 18:34:13 +0000 Subject: [PATCH 85/89] Pull in r345199 from upstream libc++ trunk (by Petr Hosek): Revert "Teach __libcpp_is_floating_point that __fp16 and _Float16 are floating-point types." This reverts commits r333103 and r333108. _Float16 and __fp16 are C11 extensions and compilers other than Clang don't define these for C++. Differential Revision: https://reviews.llvm.org/D53670 This prevents "_Float16 is not supported on this target" errors in libc++'s type_traits header, in some cases. Reported by: Charlie Li MFC after: 3 days --- contrib/libc++/include/type_traits | 6 ------ 1 file changed, 6 deletions(-) diff --git a/contrib/libc++/include/type_traits b/contrib/libc++/include/type_traits index 30bfb161c0d4..479c2e039ff1 100644 --- a/contrib/libc++/include/type_traits +++ b/contrib/libc++/include/type_traits @@ -733,12 +733,6 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v // is_floating_point template struct __libcpp_is_floating_point : public false_type {}; -#ifdef __clang__ -template <> struct __libcpp_is_floating_point<__fp16> : public true_type {}; -#endif -#ifdef __FLT16_MANT_DIG__ -template <> struct __libcpp_is_floating_point<_Float16> : public true_type {}; -#endif template <> struct __libcpp_is_floating_point : public true_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; From 40af08b3a3fdfef0ded102014c933afb4f97cce7 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 18 Feb 2019 19:07:15 +0000 Subject: [PATCH 86/89] Add one additional file to libllvmminimal, since in some cases (e.g. upgrading from stable/10 to stable/11) symbols from it are needed to link llvm-tblgen and clang-tblgen. Reported by: kib MFC after: 3 days --- lib/clang/libllvmminimal/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/clang/libllvmminimal/Makefile b/lib/clang/libllvmminimal/Makefile index 248b8e38fc21..6ec0e0a2a249 100644 --- a/lib/clang/libllvmminimal/Makefile +++ b/lib/clang/libllvmminimal/Makefile @@ -18,6 +18,7 @@ SRCS+= Support/Errno.cpp SRCS+= Support/Error.cpp SRCS+= Support/ErrorHandling.cpp SRCS+= Support/FoldingSet.cpp +SRCS+= Support/FormatVariadic.cpp SRCS+= Support/FormattedStream.cpp SRCS+= Support/Hashing.cpp SRCS+= Support/Host.cpp From 256c1bca9efa15d92c5e381bc845a080bd9d8f50 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Mon, 18 Feb 2019 19:50:53 +0000 Subject: [PATCH 87/89] Add a compatible string to match recent changes in the upstream dts. --- sys/arm/freescale/imx/imx6_snvs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arm/freescale/imx/imx6_snvs.c b/sys/arm/freescale/imx/imx6_snvs.c index 457cc7671a9d..158de9edcfd5 100644 --- a/sys/arm/freescale/imx/imx6_snvs.c +++ b/sys/arm/freescale/imx/imx6_snvs.c @@ -73,6 +73,7 @@ struct snvs_softc { }; static struct ofw_compat_data compat_data[] = { + {"fsl,sec-v4.0-mon-rtc-lp", true}, {"fsl,sec-v4.0-mon", true}, {NULL, false} }; From 63f582e0f5ad0fe006d712e1f680e0be9c532d21 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Mon, 18 Feb 2019 20:29:19 +0000 Subject: [PATCH 88/89] loader: ptable_close() should check its argument If the passed in table is NULL, just return. --- stand/common/part.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stand/common/part.c b/stand/common/part.c index a1fa5e69fde7..672389dad3aa 100644 --- a/stand/common/part.c +++ b/stand/common/part.c @@ -788,6 +788,9 @@ ptable_close(struct ptable *table) { struct pentry *entry; + if (table == NULL) + return; + while (!STAILQ_EMPTY(&table->entries)) { entry = STAILQ_FIRST(&table->entries); STAILQ_REMOVE_HEAD(&table->entries, entry); From aa432983093fc696dcf72c7d3557bbbefee231f9 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Mon, 18 Feb 2019 21:27:13 +0000 Subject: [PATCH 89/89] With r343051 UMA switched from atomic counts to counter(9) and now kernel reports snap counts of how much a zone alloced and how much it freed. It may happen that snap values doesn't match, e.g alloced - freed < 0. Workaround that in memstat library. Reported by: pho --- lib/libmemstat/memstat_uma.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c index 87a6b573307b..89b394c151ad 100644 --- a/lib/libmemstat/memstat_uma.c +++ b/lib/libmemstat/memstat_uma.c @@ -213,6 +213,15 @@ memstat_sysctl_uma(struct memory_type_list *list, int flags) mtp->mt_numfrees += upsp->ups_frees; } + /* + * Values for uth_allocs and uth_frees frees are snap. + * It may happen that kernel reports that number of frees + * is greater than number of allocs. See counter(9) for + * details. + */ + if (mtp->mt_numallocs < mtp->mt_numfrees) + mtp->mt_numallocs = mtp->mt_numfrees; + mtp->mt_size = uthp->uth_size; mtp->mt_rsize = uthp->uth_rsize; mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size;