From 227ddfdd8f8accdba28cd357261011c5ace2ed72 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 4 Oct 2014 07:49:06 +0000 Subject: [PATCH 01/59] 5150 zfs clone of a defer_destroy snapshot causes strangeness Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Max Grossman Reviewed by: Saso Kiselkov Reviewed by: Richard Elling Approved by: Robert Mustacchi Author: Matthew Ahrens illumos/illumos-gate@42fcb65ea4f2c6f8cc5a3c6142a486cb49871fd2 --- uts/common/fs/zfs/dsl_dataset.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c index c075c6ac3dd5..f1b92f3eaa33 100644 --- a/uts/common/fs/zfs/dsl_dataset.c +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014 RackTop Systems. */ @@ -692,7 +692,13 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsphys->ds_uncompressed_bytes = origin->ds_phys->ds_uncompressed_bytes; dsphys->ds_bp = origin->ds_phys->ds_bp; - dsphys->ds_flags |= origin->ds_phys->ds_flags; + + /* + * Inherit flags that describe the dataset's contents + * (INCONSISTENT) or properties (Case Insensitive). + */ + dsphys->ds_flags |= origin->ds_phys->ds_flags & + (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; From 39eeb9830d131782c80cb4581c8c164f1c70c846 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 4 Oct 2014 07:50:06 +0000 Subject: [PATCH 02/59] 5177 remove dead code from dsl_scan.c Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Richard Elling Reviewed by: Richard Lowe Approved by: Robert Mustacchi Author: Matthew Ahrens illumos/illumos-gate@5f37736ac8f99922368294d745d3fefa22b49d11 --- uts/common/fs/zfs/dsl_scan.c | 80 ++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/uts/common/fs/zfs/dsl_scan.c b/uts/common/fs/zfs/dsl_scan.c index 7638ee183a6f..2392b7f33695 100644 --- a/uts/common/fs/zfs/dsl_scan.c +++ b/uts/common/fs/zfs/dsl_scan.c @@ -351,13 +351,12 @@ dsl_scan_cancel(dsl_pool_t *dp) dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); } -static void dsl_scan_visitbp(blkptr_t *bp, - const zbookmark_phys_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf, - dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, - dmu_tx_t *tx); +static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, + dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, + dmu_objset_type_t ostype, dmu_tx_t *tx); static void dsl_scan_visitdnode(dsl_scan_t *, dsl_dataset_t *ds, dmu_objset_type_t ostype, - dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx); + dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx); void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp) @@ -590,7 +589,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, static int dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, const blkptr_t *bp, - const zbookmark_phys_t *zb, dmu_tx_t *tx, arc_buf_t **bufp) + const zbookmark_phys_t *zb, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; @@ -601,76 +600,72 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + arc_buf_t *buf; - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp, + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } - for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { - dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset, + for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) { + dsl_scan_prefetch(scn, buf, cbp, zb->zb_objset, zb->zb_object, zb->zb_blkid * epb + i); } - for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) { + for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) { zbookmark_phys_t czb; SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, zb->zb_blkid * epb + i); dsl_scan_visitbp(cbp, &czb, dnp, - *bufp, ds, scn, ostype, tx); - } - } else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) { - uint32_t flags = ARC_WAIT; - - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp, - ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); - if (err) { - scn->scn_phys.scn_errors++; - return (err); + ds, scn, ostype, tx); } + (void) arc_buf_remove_ref(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { uint32_t flags = ARC_WAIT; dnode_phys_t *cdnp; int i, j; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + arc_buf_t *buf; - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp, + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } - for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) { + for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) { for (j = 0; j < cdnp->dn_nblkptr; j++) { blkptr_t *cbp = &cdnp->dn_blkptr[j]; - dsl_scan_prefetch(scn, *bufp, cbp, + dsl_scan_prefetch(scn, buf, cbp, zb->zb_objset, zb->zb_blkid * epb + i, j); } } - for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) { + for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) { dsl_scan_visitdnode(scn, ds, ostype, - cdnp, *bufp, zb->zb_blkid * epb + i, tx); + cdnp, zb->zb_blkid * epb + i, tx); } + (void) arc_buf_remove_ref(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { uint32_t flags = ARC_WAIT; objset_phys_t *osp; + arc_buf_t *buf; - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp, + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; return (err); } - osp = (*bufp)->b_data; + osp = buf->b_data; dsl_scan_visitdnode(scn, ds, osp->os_type, - &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx); + &osp->os_meta_dnode, DMU_META_DNODE_OBJECT, tx); - if (OBJSET_BUF_HAS_USERUSED(*bufp)) { + if (OBJSET_BUF_HAS_USERUSED(buf)) { /* * We also always visit user/group accounting * objects, and never skip them, even if we are @@ -678,12 +673,13 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, * deltas from this txg get integrated. */ dsl_scan_visitdnode(scn, ds, osp->os_type, - &osp->os_groupused_dnode, *bufp, + &osp->os_groupused_dnode, DMU_GROUPUSED_OBJECT, tx); dsl_scan_visitdnode(scn, ds, osp->os_type, - &osp->os_userused_dnode, *bufp, + &osp->os_userused_dnode, DMU_USERUSED_OBJECT, tx); } + (void) arc_buf_remove_ref(buf, &buf); } return (0); @@ -691,7 +687,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, static void dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, - dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf, + dmu_objset_type_t ostype, dnode_phys_t *dnp, uint64_t object, dmu_tx_t *tx) { int j; @@ -702,7 +698,7 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, dnp->dn_nlevels - 1, j); dsl_scan_visitbp(&dnp->dn_blkptr[j], - &czb, dnp, buf, ds, scn, ostype, tx); + &czb, dnp, ds, scn, ostype, tx); } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { @@ -710,7 +706,7 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object, 0, DMU_SPILL_BLKID); dsl_scan_visitbp(&dnp->dn_spill, - &czb, dnp, buf, ds, scn, ostype, tx); + &czb, dnp, ds, scn, ostype, tx); } } @@ -720,9 +716,8 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, */ static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, - dnode_phys_t *dnp, arc_buf_t *pbuf, - dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, - dmu_tx_t *tx) + dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, + dmu_objset_type_t ostype, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; arc_buf_t *buf = NULL; @@ -742,16 +737,15 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, scn->scn_visited_this_txg++; dprintf_bp(bp, - "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p", + "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx bp=%p", ds, ds ? ds->ds_object : 0, zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, - pbuf, bp); + bp); if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) return; - if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx, - &buf) != 0) + if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx) != 0) return; /* @@ -775,8 +769,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) { scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); } - if (buf) - (void) arc_buf_remove_ref(buf, &buf); } static void @@ -787,7 +779,7 @@ dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp, SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); - dsl_scan_visitbp(bp, &zb, NULL, NULL, + dsl_scan_visitbp(bp, &zb, NULL, ds, scn, DMU_OST_NONE, tx); dprintf_ds(ds, "finished scan%s", ""); From e3cadfdb321604d343e2315e46b988e428811d32 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sat, 4 Oct 2014 12:46:26 +0000 Subject: [PATCH 03/59] Bump max rule size to 512 opcodes. --- sys/netpfil/ipfw/ip_fw_sockopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index f5fbd15ed25f..4e5b56a7683f 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -940,7 +940,7 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) int ipfw_ctl(struct sockopt *sopt) { -#define RULE_MAXSIZE (256*sizeof(u_int32_t)) +#define RULE_MAXSIZE (512*sizeof(u_int32_t)) int error; size_t size, len, valsize; struct ip_fw *buf, *rule; From 657be2acf7c94bdc7e51115b4764e0c1e294d163 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Sat, 4 Oct 2014 13:14:37 +0000 Subject: [PATCH 04/59] Add movw and movt relocations to the list of relocations against function names that must nnot be adjusted. This fixes a bug where code such as: movw r2, :lower16:symbol movt r2, :upper16:symbol It is common for clang to generate such code when targeting armv7. --- contrib/binutils/gas/config/tc-arm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/binutils/gas/config/tc-arm.c b/contrib/binutils/gas/config/tc-arm.c index 1d9e27c0e834..8b470c0335e0 100644 --- a/contrib/binutils/gas/config/tc-arm.c +++ b/contrib/binutils/gas/config/tc-arm.c @@ -19395,6 +19395,12 @@ arm_fix_adjustable (fixS * fixP) || fixP->fx_r_type == BFD_RELOC_ARM_LDR_PC_G0) return 0; + if (fixP->fx_r_type == BFD_RELOC_ARM_MOVW + || fixP->fx_r_type == BFD_RELOC_ARM_MOVT + || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVW + || fixP->fx_r_type == BFD_RELOC_ARM_THUMB_MOVT) + return 0; + return 1; } #endif /* defined (OBJ_ELF) || defined (OBJ_COFF) */ From 92720216a934288de13f3de464731bc0bf194985 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Sat, 4 Oct 2014 14:17:30 +0000 Subject: [PATCH 05/59] Put and #ifdef _KERNEL around the #include for opt_capsicum.h to hopefully allow the build to finish after r272505. --- sys/sys/filedesc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index cda6c4ec7303..3b3241cbae3e 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -33,7 +33,9 @@ #ifndef _SYS_FILEDESC_H_ #define _SYS_FILEDESC_H_ +#ifdef _KERNEL #include "opt_capsicum.h" +#endif #include #include From e0d0c7b8f369b6b975cfe28b684718c608b938da Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Sat, 4 Oct 2014 14:30:16 +0000 Subject: [PATCH 06/59] Silence a warning about Tag_Virtualization_use being unknown. We don't handle merging this tag correctly, however it's unused. --- contrib/binutils/bfd/elf32-arm.c | 3 ++- contrib/binutils/include/elf/arm.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/contrib/binutils/bfd/elf32-arm.c b/contrib/binutils/bfd/elf32-arm.c index 02dd21b59468..83acfe564714 100644 --- a/contrib/binutils/bfd/elf32-arm.c +++ b/contrib/binutils/bfd/elf32-arm.c @@ -6965,7 +6965,8 @@ elf32_arm_merge_eabi_attributes (bfd *ibfd, bfd *obfd) for (; in_list; in_list = in_list->next) { - if ((in_list->tag & 128) < 64) + if ((in_list->tag & 128) < 64 + && in_list->tag != Tag_Virtualization_use) { _bfd_error_handler (_("Warning: %B: Unknown EABI object attribute %d"), diff --git a/contrib/binutils/include/elf/arm.h b/contrib/binutils/include/elf/arm.h index e70188338786..b10bb72eb643 100644 --- a/contrib/binutils/include/elf/arm.h +++ b/contrib/binutils/include/elf/arm.h @@ -271,6 +271,8 @@ enum Tag_ABI_optimization_goals, Tag_ABI_FP_optimization_goals, /* 32 is generic. */ + + Tag_Virtualization_use = 68, }; #endif From 4bb264ae15a1e40296ca33753a618535d51dc8a4 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 4 Oct 2014 15:42:52 +0000 Subject: [PATCH 07/59] Don't make nested definition for range_seg_cache. Reported by: ian MFC after: 1 week X-MFC-With: r272506 --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 3eced1aea686..1a1d4d8960ce 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -2584,6 +2584,7 @@ arc_reclaim_needed(void) extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; +extern kmem_cache_t *range_seg_cache; static void __noinline arc_kmem_reap_now(arc_reclaim_strategy_t strat) @@ -2591,7 +2592,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) size_t i; kmem_cache_t *prev_cache = NULL; kmem_cache_t *prev_data_cache = NULL; - extern kmem_cache_t *range_seg_cache; DTRACE_PROBE(arc__kmem_reap_start); #ifdef _KERNEL From 41e8f7efbe02be269cee820fc90a2ef8799646cc Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sat, 4 Oct 2014 15:59:15 +0000 Subject: [PATCH 08/59] Make kevent(2) periodic timer events more reliably periodic. The event callout is now scheduled using the C_ABSOLUTE flag, and the absolute time of each event is calculated as the time the previous event was scheduled for plus the interval. This ensures that latency in processing a given event doesn't perturb the arrival time of any subsequent events. Reviewed by: jhb --- sys/kern/kern_event.c | 14 +++++++++----- sys/sys/event.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 7488652c20de..55bffe77a9a6 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -569,9 +569,10 @@ filt_timerexpire(void *knx) if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { calloutp = (struct callout *)kn->kn_hook; - callout_reset_sbt_on(calloutp, - timer2sbintime(kn->kn_sdata, kn->kn_sfflags), 0, - filt_timerexpire, kn, PCPU_GET(cpuid), 0); + *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata, + kn->kn_sfflags); + callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, + filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); } } @@ -607,11 +608,13 @@ filt_timerattach(struct knote *kn) kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ + kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK); calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); callout_init(calloutp, CALLOUT_MPSAFE); kn->kn_hook = calloutp; - callout_reset_sbt_on(calloutp, to, 0, - filt_timerexpire, kn, PCPU_GET(cpuid), 0); + *kn->kn_ptr.p_nexttime = to + sbinuptime(); + callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, + filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); return (0); } @@ -625,6 +628,7 @@ filt_timerdetach(struct knote *kn) calloutp = (struct callout *)kn->kn_hook; callout_drain(calloutp); free(calloutp, M_KQUEUE); + free(kn->kn_ptr.p_nexttime, M_KQUEUE); old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ diff --git a/sys/sys/event.h b/sys/sys/event.h index 47ebd7667086..c712f76ac34c 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -221,6 +221,7 @@ struct knote { struct proc *p_proc; /* proc pointer */ struct aiocblist *p_aio; /* AIO job pointer */ struct aioliojob *p_lio; /* LIO job pointer */ + sbintime_t *p_nexttime; /* next timer event fires at */ void *p_v; /* generic other pointer */ } kn_ptr; struct filterops *kn_fop; From 79ec4cf466f57667df3caf49e5ef02adabc8bf1f Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Sat, 4 Oct 2014 18:00:15 +0000 Subject: [PATCH 09/59] Minor doc format fix. Submitted by: Yonghyeon PYUN --- lib/libc/stdtime/strptime.3 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libc/stdtime/strptime.3 b/lib/libc/stdtime/strptime.3 index 27054460e03e..2b50f0efd109 100644 --- a/lib/libc/stdtime/strptime.3 +++ b/lib/libc/stdtime/strptime.3 @@ -79,7 +79,8 @@ and .Fa \&%D , are now interpreted as beginning at 1969 per POSIX requirements. Years 69-00 are interpreted in the 20th century (1969-2000), years -01-68 in the 21st century (2001-2068). The +01-68 in the 21st century (2001-2068). +The .Fa \&%U and .Fa %W From e3d6feceb170bf1115f92f7ff456610b8e68170c Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 4 Oct 2014 18:28:27 +0000 Subject: [PATCH 10/59] Add IO_RANGELOCKED flag for vn_rdwr(9), which specifies that vnode is not locked, but range is. Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/kern/vfs_vnops.c | 17 ++++++++++------- sys/sys/vnode.h | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 617bda0d4ce6..65b537142d48 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -504,13 +504,16 @@ vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, error = 0; if ((ioflg & IO_NODELOCKED) == 0) { - if (rw == UIO_READ) { - rl_cookie = vn_rangelock_rlock(vp, offset, - offset + len); - } else { - rl_cookie = vn_rangelock_wlock(vp, offset, - offset + len); - } + if ((ioflg & IO_RANGELOCKED) == 0) { + if (rw == UIO_READ) { + rl_cookie = vn_rangelock_rlock(vp, offset, + offset + len); + } else { + rl_cookie = vn_rangelock_wlock(vp, offset, + offset + len); + } + } else + rl_cookie = NULL; mp = NULL; if (rw == UIO_WRITE) { if (vp->v_type != VCHR && diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 6b0729510bc1..8610aca03de3 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -305,6 +305,7 @@ struct vattr { #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ #define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ +#define IO_RANGELOCKED 0x4000 /* range locked */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ From 539c9eef121048744297b62672ff49374b847946 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 4 Oct 2014 18:35:00 +0000 Subject: [PATCH 11/59] Fixes for i/o during coredumping: - Do not dump into system files. - Do not acquire write reference to the mount point where img.core is written, in the coredump(). The vn_rdwr() calls from ELF imgact request the write ref from vn_rdwr(). Recursive acqusition of the write ref deadlocks with the unmount. - Instead, take the range lock for the whole core file. This prevents parallel dumping from two processes executing the same image, converting the useless interleaved dump into sequential dumping, with second core overwriting the first. Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/kern/imgact_elf.c | 8 ++++---- sys/kern/kern_sig.c | 29 +++++++++++------------------ 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 634a50c0a1bd..e3494f69d07b 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1112,8 +1112,8 @@ core_output(struct vnode *vp, void *base, size_t len, off_t offset, #endif } else { error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset, - UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred, - NULL, td); + UIO_USERSPACE, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, + active_cred, file_cred, NULL, td); } return (error); } @@ -1160,8 +1160,8 @@ sbuf_drain_core_output(void *arg, const char *data, int len) #endif error = vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, data), len, p->offset, UIO_SYSSPACE, - IO_UNIT | IO_DIRECT, p->active_cred, p->file_cred, NULL, - p->td); + IO_UNIT | IO_DIRECT | IO_RANGELOCKED, p->active_cred, + p->file_cred, NULL, p->td); if (locked) PROC_LOCK(p->td->td_proc); if (error != 0) diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 1bb042fbc02d..5cdc2cedf1d8 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -3214,8 +3214,8 @@ coredump(struct thread *td) struct flock lf; struct vattr vattr; int error, error1, locked; - struct mount *mp; char *name; /* name of corefile */ + void *rl_cookie; off_t limit; int compress; @@ -3248,39 +3248,33 @@ coredump(struct thread *td) } PROC_UNLOCK(p); -restart: error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, compress, &vp, &name); if (error != 0) return (error); - /* Don't dump to non-regular files or files with links. */ + /* + * Don't dump to non-regular files or files with links. + * Do not dump into system files. + */ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || - vattr.va_nlink != 1) { + vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0) { VOP_UNLOCK(vp, 0); error = EFAULT; goto close; } VOP_UNLOCK(vp, 0); + + /* Postpone other writers, including core dumps of other processes. */ + rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); + lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_WRLCK; locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); - if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { - lf.l_type = F_UNLCK; - if (locked) - VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); - if ((error = vn_close(vp, FWRITE, cred, td)) != 0) - goto out; - if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) - goto out; - free(name, M_TEMP); - goto restart; - } - VATTR_NULL(&vattr); vattr.va_size = 0; if (set_core_nodump_flag) @@ -3288,7 +3282,6 @@ coredump(struct thread *td) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); - vn_finished_write(mp); PROC_LOCK(p); p->p_acflag |= ACORE; PROC_UNLOCK(p); @@ -3304,11 +3297,11 @@ coredump(struct thread *td) lf.l_type = F_UNLCK; VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); } + vn_rangelock_unlock(vp, rl_cookie); close: error1 = vn_close(vp, FWRITE, cred, td); if (error == 0) error = error1; -out: #ifdef AUDIT audit_proc_coredump(td, name, error); #endif From b76278407dc1b20319f598998ce72d13ada7cc6b Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 4 Oct 2014 18:38:14 +0000 Subject: [PATCH 12/59] Add kernel option KSTACK_USAGE_PROF to sample the stack depth on interrupts and report the largest value seen as sysctl debug.max_kstack_used. Useful to estimate how close the kernel stack size is to overflow. In collaboration with: Larry Baird Sponsored by: The FreeBSD Foundation (kib) MFC after: 1 week --- sys/conf/NOTES | 1 + sys/conf/options | 1 + sys/kern/kern_intr.c | 5 +++++ sys/sys/systm.h | 2 ++ sys/vm/vm_glue.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+) diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 5baa306c99db..5cc146eb6b49 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2958,6 +2958,7 @@ options SC_RENDER_DEBUG # syscons rendering debugging options VFS_BIO_DEBUG # VFS buffer I/O debugging options KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack +options KSTACK_USAGE_PROF # Adaptec Array Controller driver options options AAC_DEBUG # Debugging levels: diff --git a/sys/conf/options b/sys/conf/options index 42113c35bd3e..83375217f89a 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -136,6 +136,7 @@ KDTRACE_FRAME opt_kdtrace.h KN_HASHSIZE opt_kqueue.h KSTACK_MAX_PAGES KSTACK_PAGES +KSTACK_USAGE_PROF KTRACE KTRACE_REQUEST_POOL opt_ktrace.h LIBICONV diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 6e9a4e8e48d2..d6de6111bb1f 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ddb.h" +#include "opt_kstack_usage_prof.h" #include #include @@ -1396,6 +1397,10 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) td = curthread; +#ifdef KSTACK_USAGE_PROF + intr_prof_stack_use(td, frame); +#endif + /* An interrupt with no event or handlers is a stray interrupt. */ if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) return (EINVAL); diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 0f2732c06601..c484b7b3374a 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -443,4 +443,6 @@ bitcount16(uint32_t x) return (x); } +void intr_prof_stack_use(struct thread *td, struct trapframe *frame); + #endif /* !_SYS_SYSTM_H_ */ diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 61c003bec8b7..c9ee890824a5 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include "opt_kstack_pages.h" #include "opt_kstack_max_pages.h" +#include "opt_kstack_usage_prof.h" #include #include @@ -98,6 +99,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #ifndef NO_SWAPPING static int swapout(struct proc *); static void swapclear(struct proc *); @@ -486,6 +489,52 @@ kstack_cache_init(void *nulll) SYSINIT(vm_kstacks, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, kstack_cache_init, NULL); +#ifdef KSTACK_USAGE_PROF +/* + * Track maximum stack used by a thread in kernel. + */ +static int max_kstack_used; + +SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD, + &max_kstack_used, 0, + "Maxiumum stack depth used by a thread in kernel"); + +void +intr_prof_stack_use(struct thread *td, struct trapframe *frame) +{ + vm_offset_t stack_top; + vm_offset_t current; + int used, prev_used; + + /* + * Testing for interrupted kernel mode isn't strictly + * needed. It optimizes the execution, since interrupts from + * usermode will have only the trap frame on the stack. + */ + if (TRAPF_USERMODE(frame)) + return; + + stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE; + current = (vm_offset_t)(uintptr_t)&stack_top; + + /* + * Try to detect if interrupt is using kernel thread stack. + * Hardware could use a dedicated stack for interrupt handling. + */ + if (stack_top <= current || current < td->td_kstack) + return; + + used = stack_top - current; + for (;;) { + prev_used = max_kstack_used; + if (prev_used >= used) + break; + if (atomic_cmpset_int(&max_kstack_used, prev_used, used)) + break; + } +} +#endif /* KSTACK_USAGE_PROF */ + #ifndef NO_SWAPPING /* * Allow a thread's kernel stack to be paged out. From 0d064468125c2cb477c8e8baed30d95219a20d5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Sat, 4 Oct 2014 18:40:40 +0000 Subject: [PATCH 13/59] vt(4): Don't recalculate buffer size if we don't know screen size When the screen size is unknown, it's set to 0x0. We can't use that as the buffer size, otherwise, functions such as vtbuf_fill() will fail. This fixes a panic on RaspberryPi, where there's no vt(4) backend configured early in boot. PR: 193981 Tested by: danilo@ MFC after: 3 days --- sys/dev/vt/vt_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index 2dd7e3f134bd..67d43489ea07 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -1269,7 +1269,8 @@ vtterm_cnprobe(struct terminal *tm, struct consdev *cp) * that we have the real viewable size, fix it in the static * buffer. */ - vt_termsize(vd, vw->vw_font, &vw->vw_buf.vb_scr_size); + if (vd->vd_width != 0 && vd->vd_height != 0) + vt_termsize(vd, vw->vw_font, &vw->vw_buf.vb_scr_size); vtbuf_init_early(&vw->vw_buf); vt_winsize(vd, vw->vw_font, &wsz); From 4142462eebe7a9c5e96816d11d920573433fc3f8 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 4 Oct 2014 18:51:55 +0000 Subject: [PATCH 14/59] Slightly reword comment. Move code, which is described by the comment, after it. Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/kern/vfs_vnops.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 65b537142d48..b86ffa726a29 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -2237,12 +2237,10 @@ vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, { int error; - error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); - /* - * From utimes(2): - * Grant permission if the caller is the owner of the file or - * the super-user. If the time pointer is null, then write + * Grant permission if the caller is the owner of the file, or + * the super-user, or has ACL_WRITE_ATTRIBUTES permission on + * on the file. If the time pointer is null, then write * permission on the file is also sufficient. * * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes: @@ -2250,6 +2248,7 @@ vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, * will be allowed to set the times [..] to the current * server time. */ + error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0) error = VOP_ACCESS(vp, VWRITE, cred, td); return (error); From b328e7bf72f2c27cbf47fda5a5eec80a33c0394b Mon Sep 17 00:00:00 2001 From: Gavin Atkinson Date: Sat, 4 Oct 2014 23:56:25 +0000 Subject: [PATCH 15/59] Include urndis(4) in list of devices for which we generate hardware notes. MFC after: 3 days --- release/doc/en_US.ISO8859-1/hardware/article.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/release/doc/en_US.ISO8859-1/hardware/article.xml b/release/doc/en_US.ISO8859-1/hardware/article.xml index 444e3cab796a..09cb3bea9f7f 100644 --- a/release/doc/en_US.ISO8859-1/hardware/article.xml +++ b/release/doc/en_US.ISO8859-1/hardware/article.xml @@ -908,6 +908,8 @@ &hwlist.udav; + &hwlist.urndis; + &hwlist.vge; &hwlist.vr; From 25108069ec57827e35a52743363400b0f960a521 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Oct 2014 02:16:53 +0000 Subject: [PATCH 16/59] Get rid of crshared. --- sys/kern/kern_prot.c | 12 +----------- sys/sys/ucred.h | 1 - 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 7552363ac520..fd2b7f4021f8 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -1883,16 +1883,6 @@ crfree(struct ucred *cr) } } -/* - * Check to see if this ucred is shared. - */ -int -crshared(struct ucred *cr) -{ - - return (cr->cr_ref > 1); -} - /* * Copy a ucred's contents from a template. Does not block. */ @@ -1900,7 +1890,7 @@ void crcopy(struct ucred *dest, struct ucred *src) { - KASSERT(crshared(dest) == 0, ("crcopy of shared ucred")); + KASSERT(dest->cr_ref == 1, ("crcopy of shared ucred")); bcopy(&src->cr_startcopy, &dest->cr_startcopy, (unsigned)((caddr_t)&src->cr_endcopy - (caddr_t)&src->cr_startcopy)); diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h index e1648d4b3649..81e45209b223 100644 --- a/sys/sys/ucred.h +++ b/sys/sys/ucred.h @@ -108,7 +108,6 @@ void cred_update_thread(struct thread *td); void crfree(struct ucred *cr); struct ucred *crget(void); struct ucred *crhold(struct ucred *cr); -int crshared(struct ucred *cr); void cru2x(struct ucred *cr, struct xucred *xcr); void crsetgroups(struct ucred *cr, int n, gid_t *groups); int groupmember(gid_t gid, struct ucred *cred); From 6d47816791b8deb91352f7b71d1266ca7d453ef3 Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sun, 5 Oct 2014 02:34:21 +0000 Subject: [PATCH 17/59] - Move L2 addr configuration for the primary port to a taskqueue. This fixes LOR of softc rmlock in iflladdr_event handlers. - Call if_delmulti_ifma() after LACP_UNLOCK(). This fixes another LOR. - Fix a panic in lacp_transit_expire(). - Fix a panic in lagg_input() upon shutting down a port. --- sys/net/ieee8023ad_lacp.c | 7 ++++-- sys/net/if_lagg.c | 50 ++++++++++++++++++++++++++++----------- sys/net/if_lagg.h | 4 ++++ 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/sys/net/ieee8023ad_lacp.c b/sys/net/ieee8023ad_lacp.c index 548e81b8baa8..106df68e807a 100644 --- a/sys/net/ieee8023ad_lacp.c +++ b/sys/net/ieee8023ad_lacp.c @@ -579,12 +579,13 @@ lacp_port_destroy(struct lagg_port *lgp) lacp_disable_distributing(lp); lacp_unselect(lp); + LIST_REMOVE(lp, lp_next); + LACP_UNLOCK(lsc); + /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); - LIST_REMOVE(lp, lp_next); - LACP_UNLOCK(lsc); free(lp, M_DEVBUF); } @@ -745,7 +746,9 @@ lacp_transit_expire(void *vp) LACP_LOCK_ASSERT(lsc); + CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); + CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 67895de1895a..7f7e4b5fc870 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -569,15 +569,15 @@ lagg_clone_destroy(struct ifnet *ifp) static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { - struct ifnet *ifp = sc->sc_ifp; + struct lagg_port lp; - if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) - return; + LAGG_WLOCK_ASSERT(sc); - bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); - /* Let the protocol know the MAC has changed */ - lagg_proto_lladdr(sc); - EVENTHANDLER_INVOKE(iflladdr_event, ifp); + bzero(&lp, sizeof(lp)); + lp.lp_ifp = sc->sc_ifp; + lp.lp_softc = sc; + + lagg_port_lladdr(&lp, lladdr); } static void @@ -648,6 +648,7 @@ lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; + llq->llq_primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) @@ -680,14 +681,35 @@ lagg_port_setlladdr(void *arg, int pending) for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; - /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); - error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); + if (llq->llq_primary == 0) { + /* + * Set the link layer address on the laggport interface. + * if_setlladdr() triggers gratuitous ARPs for INET. + */ + error = if_setlladdr(ifp, llq->llq_lladdr, + ETHER_ADDR_LEN); + if (error) + printf("%s: setlladdr failed on %s\n", __func__, + ifp->if_xname); + } else { + /* + * Set the link layer address on the lagg interface. + * lagg_proto_lladdr() notifies the MAC change to + * the aggregation protocol. iflladdr_event handler + * may trigger gratuitous ARPs for INET. + */ + if (memcmp(llq->llq_lladdr, IF_LLADDR(ifp), + ETHER_ADDR_LEN) != 0) { + bcopy(llq->llq_lladdr, IF_LLADDR(ifp), + ETHER_ADDR_LEN); + LAGG_WLOCK(sc); + lagg_proto_lladdr(sc); + LAGG_WUNLOCK(sc); + EVENTHANDLER_INVOKE(iflladdr_event, ifp); + } + } CURVNET_RESTORE(); - if (error) - printf("%s: setlladdr failed on %s\n", __func__, - ifp->if_xname); - head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } @@ -1639,7 +1661,7 @@ lagg_input(struct ifnet *ifp, struct mbuf *m) ETHER_BPF_MTAP(scifp, m); - m = lagg_proto_input(sc, lp, m); + m = (lp->lp_detaching == 0) ? lagg_proto_input(sc, lp, m) : NULL; if (m != NULL) { if (scifp->if_flags & IFF_MONITOR) { diff --git a/sys/net/if_lagg.h b/sys/net/if_lagg.h index bbb3497f96cd..e86ed06fcca7 100644 --- a/sys/net/if_lagg.h +++ b/sys/net/if_lagg.h @@ -159,6 +159,9 @@ struct lagg_reqopts { #define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts) #define SIOCSLAGGOPTS _IOW('i', 153, struct lagg_reqopts) +#define LAGG_OPT_BITS "\020\001USE_FLOWID\005LACP_STRICT" \ + "\006LACP_TXTEST\007LACP_RXTEST" + #ifdef _KERNEL /* @@ -203,6 +206,7 @@ struct lagg_mc { struct lagg_llq { struct ifnet *llq_ifp; uint8_t llq_lladdr[ETHER_ADDR_LEN]; + uint8_t llq_primary; SLIST_ENTRY(lagg_llq) llq_entries; }; From 7eb756fab1a9abde6b55d7f83d525b5b5593077a Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sun, 5 Oct 2014 02:37:01 +0000 Subject: [PATCH 18/59] Use printb() for boolean flags in ro_opts and actor_state for LACP. --- sbin/ifconfig/iflagg.c | 17 +++++------------ sys/net/ieee8023ad_lacp.h | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sbin/ifconfig/iflagg.c b/sbin/ifconfig/iflagg.c index 9c478b3dfc5f..51a6faa61963 100644 --- a/sbin/ifconfig/iflagg.c +++ b/sbin/ifconfig/iflagg.c @@ -17,6 +17,7 @@ static const char rcsid[] = #include #include #include +#include #include #include @@ -246,18 +247,9 @@ lagg_status(int s) putchar('\n'); if (verbose) { printf("\tlagg options:\n"); - printf("\t\tuse_flowid: %d\n", - (ro.ro_opts & LAGG_OPT_USE_FLOWID) ? 1 : 0); + printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS); + putchar('\n'); printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift); - switch (ra.ra_proto) { - case LAGG_PROTO_LACP: - printf("\t\tlacp_strict: %d\n", - (ro.ro_opts & LAGG_OPT_LACP_STRICT) ? 1 : 0); - printf("\t\tlacp_rxtest: %d\n", - (ro.ro_opts & LAGG_OPT_LACP_RXTEST) ? 1 : 0); - printf("\t\tlacp_txtest: %d\n", - (ro.ro_opts & LAGG_OPT_LACP_TXTEST) ? 1 : 0); - } printf("\tlagg statistics:\n"); printf("\t\tactive ports: %d\n", ro.ro_active); printf("\t\tflapping: %u\n", ro.ro_flapping); @@ -272,7 +264,8 @@ lagg_status(int s) printf("\tlaggport: %s ", rpbuf[i].rp_portname); printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) - printf(" state=%X", lp->actor_state); + printb(" state", lp->actor_state, + LACP_STATE_BITS); putchar('\n'); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf("\t\t%s\n", diff --git a/sys/net/ieee8023ad_lacp.h b/sys/net/ieee8023ad_lacp.h index 535cf1fd1dfa..e814f8321f81 100644 --- a/sys/net/ieee8023ad_lacp.h +++ b/sys/net/ieee8023ad_lacp.h @@ -75,6 +75,7 @@ "\007DEFAULTED" \ "\010EXPIRED" +#ifdef _KERNEL /* * IEEE802.3 slow protocols * @@ -336,3 +337,4 @@ lacp_isdistributing(struct lagg_port *lgp) #define LACP_LAGIDSTR_MAX \ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) #define LACP_STATESTR_MAX (255) /* XXX */ +#endif /* _KERNEL */ From 6e5254e0d70b8c7e4bbfaca0640b962937d2c41c Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Sun, 5 Oct 2014 03:18:30 +0000 Subject: [PATCH 19/59] Remove stray uma_mtx lock/unlock in zone_drain_wait() Callers of zone_drain_wait(M_WAITOK) do not need to hold (and were not) the uma_mtx, but we would attempt to unlock and relock the mutex if we had to sleep because the zone was already draining. The M_NOWAIT callers may hold the uma_mtx, but we do not sleep in that case. Reviewed by: jhb MFC after: 3 days --- sys/vm/uma_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 81b714a9f32d..c96da5c3e73a 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -897,9 +897,7 @@ zone_drain_wait(uma_zone_t zone, int waitok) while (zone->uz_flags & UMA_ZFLAG_DRAINING) { if (waitok == M_NOWAIT) goto out; - mtx_unlock(&uma_mtx); msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1); - mtx_lock(&uma_mtx); } zone->uz_flags |= UMA_ZFLAG_DRAINING; bucket_cache_drain(zone); From 7db9f2ba58c614492e20403af329a7aaaf707c53 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 5 Oct 2014 03:41:47 +0000 Subject: [PATCH 20/59] ipfiler bug #550 filter rule list corrupted with inserted rules Obtained from: ipfilter CVS repo (r1.128); NetBSD CVS repo (r1.15) --- sys/contrib/ipfilter/netinet/fil.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 2adfe26bdd23..f600092d015d 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -4496,7 +4496,15 @@ frrequest(softc, unit, req, data, set, makecopy) fp = f; f = NULL; + fp->fr_next = NULL; fp->fr_dnext = NULL; + fp->fr_pnext = NULL; + fp->fr_pdnext = NULL; + fp->fr_grp = NULL; + fp->fr_grphead = NULL; + fp->fr_icmpgrp = NULL; + fp->fr_isc = (void *)-1; + fp->fr_ptr = NULL; fp->fr_ref = 0; fp->fr_flags |= FR_COPIED; } else { @@ -5000,7 +5008,9 @@ frrequest(softc, unit, req, data, set, makecopy) if (f->fr_collect > fp->fr_collect) break; ftail = &f->fr_next; + fprev = ftail; } + ftail = fprev; f = NULL; ptr = NULL; } else if (req == (ioctlcmd_t)SIOCINAFR || @@ -5091,6 +5101,8 @@ frrequest(softc, unit, req, data, set, makecopy) fp->fr_ref = 1; fp->fr_pnext = ftail; fp->fr_next = *ftail; + if (fp->fr_next != NULL) + fp->fr_next->fr_pnext = &fp->fr_next; *ftail = fp; if (addrem == 0) ipf_fixskip(ftail, fp, 1); From a7bd2acdab065b90c6c6661dd99c9da8799b390e Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 5 Oct 2014 03:45:19 +0000 Subject: [PATCH 21/59] ipfilter bug #554 Determining why a ipf rule matches is hard -- replace ipfilter rule compare with new ipf_rule_compare() function. Obtained from: ipfilter CVS rep (r1.129) --- sys/contrib/ipfilter/netinet/fil.c | 45 ++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index f600092d015d..4d86e47d9fb6 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -4435,6 +4435,39 @@ ipf_matchicmpqueryreply(v, ic, icmp, rev) } +/* ------------------------------------------------------------------------ */ +/* Function: ipf_rule_compare */ +/* Parameters: fr1(I) - first rule structure to compare */ +/* fr2(I) - second rule structure to compare */ +/* Returns: int - 0 == rules are the same, else mismatch */ +/* */ +/* Compare two rules and return 0 if they match or a number indicating */ +/* which of the individual checks failed. */ +/* ------------------------------------------------------------------------ */ +static int +ipf_rule_compare(frentry_t *fr1, frentry_t *fr2) +{ + if (fr1->fr_cksum != fr2->fr_cksum) + return 1; + if (fr1->fr_size != fr2->fr_size) + return 2; + if (fr1->fr_dsize != fr2->fr_dsize) + return 3; + if (bcmp((char *)&fr1->fr_func, (char *)&fr2->fr_func, + fr1->fr_size - offsetof(struct frentry, fr_func)) != 0) + return 4; + if (fr1->fr_data && !fr2->fr_data) + return 5; + if (!fr1->fr_data && fr2->fr_data) + return 6; + if (fr1->fr_data) { + if (bcmp(fr1->fr_caddr, fr2->fr_caddr, fr1->fr_dsize)) + return 7; + } + return 0; +} + + /* ------------------------------------------------------------------------ */ /* Function: frrequest */ /* Returns: int - 0 == success, > 0 == errno value */ @@ -4928,17 +4961,7 @@ frrequest(softc, unit, req, data, set, makecopy) } for (; (f = *ftail) != NULL; ftail = &f->fr_next) { - DT2(rule_cmp, frentry_t *, fp, frentry_t *, f); - if ((fp->fr_cksum != f->fr_cksum) || - (fp->fr_size != f->fr_size) || - (f->fr_dsize != fp->fr_dsize)) - continue; - if (bcmp((char *)&f->fr_func, (char *)&fp->fr_func, - fp->fr_size - offsetof(struct frentry, fr_func)) != 0) - continue; - if ((!ptr && !f->fr_data) || - (ptr && f->fr_data && - !bcmp((char *)ptr, (char *)f->fr_data, f->fr_dsize))) + if (ipf_rule_compare(fp, f) == 0) break; } From 685545cdc727c97c955b92463c27b8ea08ebed27 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 5 Oct 2014 03:48:09 +0000 Subject: [PATCH 22/59] ipfilter bug #538 ipf_p_dns_del should return void Obtained from: ipfilter cvs repo (r1.8) --- sys/contrib/ipfilter/netinet/ip_dns_pxy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_dns_pxy.c b/sys/contrib/ipfilter/netinet/ip_dns_pxy.c index df863b8f6fb6..ff1bc792bd59 100644 --- a/sys/contrib/ipfilter/netinet/ip_dns_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_dns_pxy.c @@ -29,7 +29,7 @@ typedef struct ipf_dns_softc_s { int ipf_p_dns_allow_query __P((ipf_dns_softc_t *, dnsinfo_t *)); int ipf_p_dns_ctl __P((ipf_main_softc_t *, void *, ap_ctl_t *)); -int ipf_p_dns_del __P((ipf_main_softc_t *, ap_session_t *)); +void ipf_p_dns_del __P((ipf_main_softc_t *, ap_session_t *)); int ipf_p_dns_get_name __P((ipf_dns_softc_t *, char *, int, char *, int)); int ipf_p_dns_inout __P((void *, fr_info_t *, ap_session_t *, nat_t *)); int ipf_p_dns_match __P((fr_info_t *, ap_session_t *, nat_t *)); @@ -214,7 +214,7 @@ ipf_p_dns_new(arg, fin, aps, nat) /* ARGSUSED */ -int +void ipf_p_dns_del(softc, aps) ipf_main_softc_t *softc; ap_session_t *aps; @@ -227,7 +227,6 @@ ipf_p_dns_del(softc, aps) KFREES(aps->aps_data, aps->aps_psiz); aps->aps_data = NULL; aps->aps_psiz = 0; - return 0; } From 3a77b75120868c80a0b0f7636973a652003dbd84 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 5 Oct 2014 03:52:09 +0000 Subject: [PATCH 23/59] ipfilter bug #534 destination list hashing not endian neutral Obtained from: ipfilter CVS repo (r1.26), NetBSD CVS repo (r1.8) --- sys/contrib/ipfilter/netinet/ip_dstlist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_dstlist.c b/sys/contrib/ipfilter/netinet/ip_dstlist.c index d516556c39cc..dc7dacb99fb8 100644 --- a/sys/contrib/ipfilter/netinet/ip_dstlist.c +++ b/sys/contrib/ipfilter/netinet/ip_dstlist.c @@ -1193,7 +1193,7 @@ ipf_dstlist_select(fin, d) MD5Update(&ctx, (u_char *)&fin->fin_dst6, sizeof(fin->fin_dst6)); MD5Final((u_char *)hash, &ctx); - x = hash[0] % d->ipld_nodes; + x = ntohl(hash[0]) % d->ipld_nodes; sel = d->ipld_dests[x]; break; @@ -1203,7 +1203,7 @@ ipf_dstlist_select(fin, d) MD5Update(&ctx, (u_char *)&fin->fin_src6, sizeof(fin->fin_src6)); MD5Final((u_char *)hash, &ctx); - x = hash[0] % d->ipld_nodes; + x = ntohl(hash[0]) % d->ipld_nodes; sel = d->ipld_dests[x]; break; @@ -1213,7 +1213,7 @@ ipf_dstlist_select(fin, d) MD5Update(&ctx, (u_char *)&fin->fin_dst6, sizeof(fin->fin_dst6)); MD5Final((u_char *)hash, &ctx); - x = hash[0] % d->ipld_nodes; + x = ntohl(hash[0]) % d->ipld_nodes; sel = d->ipld_dests[x]; break; From 2777bfabc064d70f4ca6e94be8c188547188c699 Mon Sep 17 00:00:00 2001 From: Cy Schubert Date: Sun, 5 Oct 2014 03:58:30 +0000 Subject: [PATCH 24/59] ipfilter bug #537 NAT rules with sticky have incorrect hostmap IP address. This fixes when an IP address mapping is put in the hostmap table for sticky NAT rules, it ends up having the wrong byte order. Obtained from: ipfilter CVS repo (r1.102), NetBSD CVS repo (r1.12) --- sys/contrib/ipfilter/netinet/ip_nat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index d6647085a7eb..6c93810c215a 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -2946,10 +2946,11 @@ ipf_nat_newrdr(fin, nat, ni) */ if (np->in_flags & IPN_SPLIT) { in.s_addr = np->in_dnip; + inb.s_addr = htonl(in.s_addr); if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, - fin->fin_dst, in, (u_32_t)dport); + fin->fin_dst, inb, (u_32_t)dport); if (hm != NULL) { in.s_addr = hm->hm_ndstip.s_addr; move = 0; @@ -3050,13 +3051,14 @@ ipf_nat_newrdr(fin, nat, ni) return -1; } + inb.s_addr = htonl(in.s_addr); nat->nat_ndstaddr = htonl(in.s_addr); nat->nat_odstip = fin->fin_dst; nat->nat_nsrcip = fin->fin_src; nat->nat_osrcip = fin->fin_src; if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0)) nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src, - fin->fin_dst, in, (u_32_t)dport); + fin->fin_dst, inb, (u_32_t)dport); if (flags & IPN_TCPUDP) { nat->nat_odport = dport; From 34f50f4c342c8c44b80a638549fb563dabc4b494 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 5 Oct 2014 06:00:22 +0000 Subject: [PATCH 25/59] Add a bit more debug messages. --- sys/boot/common/part.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sys/boot/common/part.c b/sys/boot/common/part.c index 445761969563..f9786634bdd7 100644 --- a/sys/boot/common/part.c +++ b/sys/boot/common/part.c @@ -301,6 +301,7 @@ ptable_gptread(struct ptable *table, void *dev, diskread_t dread) } } } + DEBUG("GPT detected"); if (pri == 0 && sec == 0) { /* Both primary and backup tables are invalid. */ table->type = PTABLE_NONE; @@ -378,6 +379,7 @@ ptable_ebrread(struct ptable *table, void *dev, diskread_t dread) buf = malloc(table->sectorsize); if (buf == NULL) return (table); + DEBUG("EBR detected"); for (i = 0; i < MAXEBRENTRIES; i++) { #if 0 /* Some BIOSes return an incorrect number of sectors */ if (offset >= table->sectors) @@ -470,6 +472,7 @@ ptable_bsdread(struct ptable *table, void *dev, diskread_t dread) DEBUG("invalid number of partitions"); goto out; } + DEBUG("BSD detected"); part = &dl->d_partitions[0]; raw_offset = le32toh(part[RAW_PART].p_offset); for (i = 0; i < dl->d_npartitions; i++, part++) { @@ -553,6 +556,7 @@ ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread) DEBUG("invalid geometry"); goto out; } + DEBUG("VTOC8 detected"); for (i = 0; i < VTOC8_NPARTS; i++) { dl->part[i].tag = be16toh(dl->part[i].tag); if (i == VTOC_RAW_PART || @@ -665,6 +669,7 @@ ptable_open(void *dev, off_t sectors, uint16_t sectorsize, #endif #ifdef LOADER_MBR_SUPPORT /* Read MBR. */ + DEBUG("MBR detected"); table->type = PTABLE_MBR; for (i = has_ext = 0; i < NDOSPART; i++) { if (dp[i].dp_typ == 0) From 4118113fc1d7ee8bcc5d38017a0908f65ea34c00 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 5 Oct 2014 06:04:47 +0000 Subject: [PATCH 26/59] Rework bootparttest to use more code from sys/boot. Use disk_open() call to emulate loader behavior. --- sys/boot/common/disk.c | 4 +- tools/tools/bootparttest/Makefile | 13 +- tools/tools/bootparttest/bootparttest.c | 174 +++++++----------- tools/tools/bootparttest/{malloc.c => stub.c} | 8 + 4 files changed, 87 insertions(+), 112 deletions(-) rename tools/tools/bootparttest/{malloc.c => stub.c} (95%) diff --git a/sys/boot/common/disk.c b/sys/boot/common/disk.c index a62f6fd45e98..a541cb0199aa 100644 --- a/sys/boot/common/disk.c +++ b/sys/boot/common/disk.c @@ -90,7 +90,7 @@ disk_lookup(struct disk_devdesc *dev) entry->d_partition == dev->d_partition) { dev->d_offset = entry->d_offset; DEBUG("%s offset %lld", disk_fmtdev(dev), - dev->d_offset); + (long long)dev->d_offset); #ifdef DISK_DEBUG entry->count++; #endif @@ -367,7 +367,7 @@ disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize, dev->d_slice = slice; dev->d_partition = partition; DEBUG("%s offset %lld => %p", disk_fmtdev(dev), - dev->d_offset, od); + (long long)dev->d_offset, od); } return (rc); } diff --git a/tools/tools/bootparttest/Makefile b/tools/tools/bootparttest/Makefile index 46a8d70764b3..c7de091a9cb0 100644 --- a/tools/tools/bootparttest/Makefile +++ b/tools/tools/bootparttest/Makefile @@ -7,13 +7,14 @@ BINDIR?= /usr/bin PROG= bootparttest MAN= -SRCS= bootparttest.c crc32.c malloc.c part.c +SRCS= bootparttest.c crc32.c stub.c part.c disk.c -CFLAGS= -I${.CURDIR}/../../../sys/boot/common -I. \ - -DLOADER_GPT_SUPPORT -DLOADER_MBR_SUPPORT -DPART_DEBUG +CFLAGS= -I${.CURDIR}/../../../sys/boot/common \ + -DLOADER_GPT_SUPPORT -DLOADER_MBR_SUPPORT -DPART_DEBUG \ + -DDISK_DEBUG -DPADD+= ${LIBGEOM} ${LIBUTIL} -LDADD+= ${LIBGEOM} ${LIBUTIL} -LDFLAGS+= -lgeom -lutil +DPADD+= ${LIBGEOM} +LDADD+= ${LIBGEOM} +LDFLAGS+= -lgeom .include diff --git a/tools/tools/bootparttest/bootparttest.c b/tools/tools/bootparttest/bootparttest.c index f0fb8a9bab03..086248f058ff 100644 --- a/tools/tools/bootparttest/bootparttest.c +++ b/tools/tools/bootparttest/bootparttest.c @@ -33,140 +33,106 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include +#include #include +static int disk_strategy(void *devdata, int rw, daddr_t blk, + size_t size, char *buf, size_t *rsize); + +/* stub struct devsw */ +struct devsw { + const char dv_name[8]; + int dv_type; + void *dv_init; + int (*dv_strategy)(void *devdata, int rw, daddr_t blk, + size_t size, char *buf, size_t *rsize); + void *dv_open; + void *dv_close; + void *dv_ioctl; + void *dv_print; + void *dv_cleanupa; +} udisk = { + .dv_name = "disk", + .dv_strategy = disk_strategy +}; + struct disk { - const char *name; uint64_t mediasize; uint16_t sectorsize; int fd; int file; - off_t offset; -}; +} disk; static int -diskread(void *arg, void *buf, size_t blocks, off_t offset) +disk_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, + size_t *rsize) { - struct disk *dp; + struct disk_devdesc *dev = devdata; + int ret; - dp = (struct disk *)arg; - printf("%s: read %lu blocks from the offset %jd [+%jd]\n", dp->name, - blocks, offset, dp->offset); - if (offset >= dp->mediasize / dp->sectorsize) + if (rw != 1 /* F_READ */) return (-1); - - return (pread(dp->fd, buf, blocks * dp->sectorsize, - (offset + dp->offset) * dp->sectorsize) != blocks * dp->sectorsize); -} - -static const char* -ptable_type2str(const struct ptable *table) -{ - - switch (ptable_gettype(table)) { - case PTABLE_NONE: - return ("None"); - case PTABLE_BSD: - return ("BSD"); - case PTABLE_MBR: - return ("MBR"); - case PTABLE_GPT: - return ("GPT"); - case PTABLE_VTOC8: - return ("VTOC8"); - }; - return ("Unknown"); -} - -#define PWIDTH 35 -static void -ptable_print(void *arg, const char *pname, const struct ptable_entry *part) -{ - struct ptable *table; - struct disk *dp, bsd; - char line[80], size[6]; - - dp = (struct disk *)arg; - sprintf(line, " %s%s: %s", dp->file ? "disk0": dp->name, pname, - parttype2str(part->type)); - humanize_number(size, sizeof(size), - (part->end - part->start + 1) * dp->sectorsize, "", - HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); - printf("%-*s%s\n", PWIDTH, line, size); - if (part->type == PART_FREEBSD) { - sprintf(line, "%s%s", dp->file ? "disk0": dp->name, pname); - bsd.name = line; - bsd.fd = dp->fd; - bsd.file = 0; /* to use dp->name in the next sprintf */ - bsd.offset = dp->offset + part->start; - bsd.sectorsize = dp->sectorsize; - bsd.mediasize = (part->end - part->start + 1) * dp->sectorsize; - table = ptable_open(&bsd, bsd.mediasize / bsd.sectorsize, - bsd.sectorsize, diskread); - if (table == NULL) - return; - ptable_iterate(table, &bsd, ptable_print); - ptable_close(table); - } -} -#undef PWIDTH - -static void -inspect_disk(struct disk *dp) -{ - struct ptable *table; - - table = ptable_open(dp, dp->mediasize / dp->sectorsize, - dp->sectorsize, diskread); - if (table == NULL) { - printf("ptable_open failed\n"); - return; - } - printf("Partition table detected: %s\n", ptable_type2str(table)); - ptable_iterate(table, dp, ptable_print); - ptable_close(table); + if (rsize) + *rsize = 0; + printf("read %lu bytes from the block %ld [+%ld]\n", size, + blk, dev->d_offset); + ret = pread(disk.fd, buf, size, + (blk + dev->d_offset) * disk.sectorsize); + if (ret != size) + return (-1); + return (0); } int main(int argc, char **argv) { + struct disk_devdesc dev; struct stat sb; - struct disk d; + const char *p; if (argc < 2) errx(1, "Usage: %s | " "", argv[0]); - d.name = argv[1]; - if (stat(d.name, &sb) == 0 && S_ISREG(sb.st_mode)) { - d.fd = open(d.name, O_RDONLY); - if (d.fd < 0) - err(1, "open %s", d.name); - d.mediasize = sb.st_size; - d.sectorsize = 512; - d.file = 1; + memset(&disk, 0, sizeof(disk)); + memset(&dev, 0, sizeof(dev)); + dev.d_dev = &udisk; + dev.d_slice = -1; + dev.d_partition = -1; + if (stat(argv[1], &sb) == 0 && S_ISREG(sb.st_mode)) { + disk.fd = open(argv[1], O_RDONLY); + if (disk.fd < 0) + err(1, "open %s", argv[1]); + disk.mediasize = sb.st_size; + disk.sectorsize = 512; + disk.file = 1; } else { - d.fd = g_open(d.name, 0); - if (d.fd < 0) - err(1, "g_open %s", d.name); - d.mediasize = g_mediasize(d.fd); - d.sectorsize = g_sectorsize(d.fd); - d.file = 0; + disk.fd = g_open(argv[1], 0); + if (disk.fd < 0) + err(1, "g_open %s", argv[1]); + disk.mediasize = g_mediasize(disk.fd); + disk.sectorsize = g_sectorsize(disk.fd); + p = strpbrk(argv[1], "0123456789"); + if (p != NULL) + disk_parsedev(&dev, p, NULL); } - d.offset = 0; - printf("%s \"%s\" opened\n", d.file ? "Disk image": "GEOM provider", - d.name); + printf("%s \"%s\" opened\n", disk.file ? "Disk image": "GEOM provider", + argv[1]); printf("Mediasize: %ju Bytes (%ju sectors)\nSectorsize: %u Bytes\n", - d.mediasize, d.mediasize / d.sectorsize, d.sectorsize); + disk.mediasize, disk.mediasize / disk.sectorsize, disk.sectorsize); - inspect_disk(&d); + if (disk_open(&dev, disk.mediasize, disk.sectorsize, 0) != 0) + errx(1, "disk_open failed"); + printf("\tdisk0:\n"); + disk_print(&dev, "\tdisk0", 1); + disk_close(&dev); - if (d.file) - close(d.fd); + if (disk.file) + close(disk.fd); else - g_close(d.fd); + g_close(disk.fd); return (0); } diff --git a/tools/tools/bootparttest/malloc.c b/tools/tools/bootparttest/stub.c similarity index 95% rename from tools/tools/bootparttest/malloc.c rename to tools/tools/bootparttest/stub.c index bc5d65f466ec..dccf42522f15 100644 --- a/tools/tools/bootparttest/malloc.c +++ b/tools/tools/bootparttest/stub.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$"); #include +#include void* Malloc(size_t size, const char *file, int line) @@ -42,3 +43,10 @@ Free(void *ptr, const char *file, int line) return (free(ptr)); } + +void +pager_output(const char *s) +{ + + printf("%s", s); +} From 260ac36209a9f23f95d683f01cc6b2bc36be1cd0 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 5 Oct 2014 06:06:48 +0000 Subject: [PATCH 27/59] Fix typo. --- tools/tools/bootparttest/bootparttest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tools/bootparttest/bootparttest.c b/tools/tools/bootparttest/bootparttest.c index 086248f058ff..ecb8946108c8 100644 --- a/tools/tools/bootparttest/bootparttest.c +++ b/tools/tools/bootparttest/bootparttest.c @@ -53,7 +53,7 @@ struct devsw { void *dv_close; void *dv_ioctl; void *dv_print; - void *dv_cleanupa; + void *dv_cleanup; } udisk = { .dv_name = "disk", .dv_strategy = disk_strategy From 6c572040c677bfbb31b650653cde9f35b4c56c5d Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Sun, 5 Oct 2014 06:28:53 +0000 Subject: [PATCH 28/59] Eliminate use of M_EXT in IP6_EXTHDR_CHECK() by trimming a redundant 'if'/'else' case: it matches the simple 'else' case that follows. This reduces awareness of external-storage mechanics outside of the mbuf allocator. Reviewed by: bz MFC after: 3 days Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D900 --- sys/netinet/ip6.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sys/netinet/ip6.h b/sys/netinet/ip6.h index 8f498410733b..ff870579bbc7 100644 --- a/sys/netinet/ip6.h +++ b/sys/netinet/ip6.h @@ -277,12 +277,6 @@ do { \ (((m) = m_pullup((m), (off) + (hlen))) == NULL)) { \ IP6STAT_INC(ip6s_exthdrtoolong); \ return ret; \ - } else if ((m)->m_flags & M_EXT) { \ - if ((m)->m_len < (off) + (hlen)) { \ - IP6STAT_INC(ip6s_exthdrtoolong); \ - m_freem(m); \ - return ret; \ - } \ } else { \ if ((m)->m_len < (off) + (hlen)) { \ IP6STAT_INC(ip6s_exthdrtoolong); \ From bad2520a2b8d970d8623c199d0d57e667ca40069 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Oct 2014 07:21:41 +0000 Subject: [PATCH 29/59] Avoid unnecessary ppeers_lock acquisition in exit1. MFC after: 1 week --- sys/kern/kern_exit.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 1dbb9974bdb8..1e4c0956474d 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -265,8 +265,8 @@ exit1(struct thread *td, int rv) AUDIT_SYSCALL_EXIT(0, td); #endif - /* Are we a task leader? */ - if (p == p->p_leader) { + /* Are we a task leader with peers? */ + if (p->p_peers != NULL && p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { @@ -337,15 +337,17 @@ exit1(struct thread *td, int rv) /* * Remove ourself from our leader's peer list and wake our leader. */ - mtx_lock(&ppeers_lock); - if (p->p_leader->p_peers) { - q = p->p_leader; - while (q->p_peers != p) - q = q->p_peers; - q->p_peers = p->p_peers; - wakeup(p->p_leader); + if (p->p_leader->p_peers != NULL) { + mtx_lock(&ppeers_lock); + if (p->p_leader->p_peers != NULL) { + q = p->p_leader; + while (q->p_peers != p) + q = q->p_peers; + q->p_peers = p->p_peers; + wakeup(p->p_leader); + } + mtx_unlock(&ppeers_lock); } - mtx_unlock(&ppeers_lock); vmspace_exit(td); From 513798cc9ce57dcf2899af77dffe3103fb33722b Mon Sep 17 00:00:00 2001 From: Yoshihiro Takahashi Date: Sun, 5 Oct 2014 07:27:05 +0000 Subject: [PATCH 30/59] - Refactor defining variables. - Merge common modules both i386 and amd64 into one if-endif. - Sort. - There are no functional changes. --- sys/modules/Makefile | 475 ++++++++++++++++--------------------------- 1 file changed, 174 insertions(+), 301 deletions(-) diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 58ddecca06f4..a510658e623a 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -384,32 +384,7 @@ SUBDIR= \ ${_xe} \ xl \ ${_zfs} \ - zlib \ - -.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" -_filemon= filemon -_vmware= vmware -.endif - -.if ${MACHINE_CPUARCH} != "powerpc" && ${MACHINE_CPUARCH} != "arm" && \ - ${MACHINE_CPUARCH} != "mips" -_syscons= syscons -_vpo= vpo -.endif - -.if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "mips" -# no BUS_SPACE_UNSPECIFIED -# No barrier instruction support (specific to this driver) -_sym= sym -# intr_disable() is a macro, causes problems -.if ${MK_SOURCELESS_UCODE} != "no" -_cxgb= cxgb -.endif -.endif - -.if ${MK_SOURCELESS_UCODE} != "no" -_cxgbe= cxgbe -.endif + zlib .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if exists(${.CURDIR}/../opencrypto) @@ -423,23 +398,20 @@ _random= random .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) -_carp= carp +_carp= carp _toecore= toecore .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) _if_gre= if_gre +_ipdivert= ipdivert +_ipfw= ipfw .endif .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES) _ipfilter= ipfilter .endif -.if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) -_ipdivert= ipdivert -_ipfw= ipfw -.endif - .if ${MK_NAND} != "no" || defined(ALL_MODULES) _nandfs= nandfs _nandsim= nandsim @@ -460,6 +432,7 @@ _pfsync= pfsync .if ${MK_SOURCELESS_UCODE} != "no" _bce= bce +_cxgbe= cxgbe _fatm= fatm _fxp= fxp _ispfw= ispfw @@ -471,21 +444,178 @@ _ti= ti _txp= txp .endif + +.if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "mips" && \ + ${MACHINE_CPUARCH} != "powerpc" +_syscons= syscons +_vpo= vpo +.endif + +.if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "mips" +# no BUS_SPACE_UNSPECIFIED +# No barrier instruction support (specific to this driver) +_sym= sym +# intr_disable() is a macro, causes problems +.if ${MK_SOURCELESS_UCODE} != "no" +_cxgb= cxgb +.endif +.endif + +.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" +_agp= agp +_an= an +_aout= aout +_bktr= bktr +_bxe= bxe +_cardbus= cardbus +_cbb= cbb +_cpuctl= cpuctl +_cpufreq= cpufreq +.if ${MK_CDDL} != "no" || defined(ALL_MODULES) +_cyclic= cyclic +.endif +_dpms= dpms +_drm= drm +_drm2= drm2 +.if ${MK_CDDL} != "no" || defined(ALL_MODULES) +_dtrace= dtrace +.endif +_ed= ed +_em= em +_et= et +_exca= exca +_ext2fs= ext2fs +_filemon= filemon +_i2c= i2c +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_ibcore= ibcore +.endif +_if_ndis= if_ndis +_igb= igb +_io= io +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_ipoib= ipoib +.endif +_linprocfs= linprocfs +_linsysfs= linsysfs +_linux= linux +_ndis= ndis +.if ${MK_CDDL} != "no" || defined(ALL_MODULES) +_opensolaris= opensolaris +.endif +_pccard= pccard +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_rdma= rdma +.endif +_safe= safe +_scsi_low= scsi_low +_si= si +_smbfs= smbfs +_sound= sound +_speaker= speaker +_splash= splash +_sppp= sppp +_vmware= vmware +_vxge= vxge +_wbwd= wbwd +_wi= wi +.if ${MK_ZFS} != "no" || defined(ALL_MODULES) +_zfs= zfs +.endif +.if ${MACHINE} != "pc98" +_aac= aac +_aacraid= aacraid +_acpi= acpi +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_aesni= aesni +.endif +_amdsbwd= amdsbwd +_amdtemp= amdtemp +_arcmsr= arcmsr +_asmc= asmc +_ciss= ciss +_cmx= cmx +_coretemp= coretemp +.if ${MK_SOURCELESS_HOST} != "no" +_hpt27xx= hpt27xx +.endif +_hptiop= hptiop +.if ${MK_SOURCELESS_HOST} != "no" +_hptmv= hptmv +_hptnr= hptnr +_hptrr= hptrr +.endif +_hyperv= hyperv +_ichwd= ichwd +_ida= ida +_iir= iir +_ipmi= ipmi +_ips= ips +_ipw= ipw +.if ${MK_SOURCELESS_UCODE} != "no" +_ipwfw= ipwfw +.endif +_isci= isci +_iwi= iwi +.if ${MK_SOURCELESS_UCODE} != "no" +_iwifw= iwifw +.endif +_iwn= iwn +.if ${MK_SOURCELESS_UCODE} != "no" +_iwnfw= iwnfw +.endif +_ixgb= ixgb +_ixgbe= ixgbe +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_mlx4= mlx4 +_mlx4ib= mlx4ib +_mlxen= mlxen +.endif +_mly= mly +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_mthca= mthca +.endif +_nfe= nfe +_nvd= nvd +_nvme= nvme +_nvram= nvram +_nxge= nxge +.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +_padlock= padlock +.endif +_s3= s3 +_tpm= tpm +_twa= twa +_vesa= vesa +_viawd= viawd +_virtio= virtio +_wpi= wpi +.if ${MK_SOURCELESS_UCODE} != "no" +_wpifw= wpifw +.endif +_x86bios= x86bios +.endif +.endif + +.if ${MACHINE_CPUARCH} == "amd64" +_ixl= ixl +_ixlv= ixlv +_ntb= ntb +_qlxge= qlxge +_qlxgb= qlxgb +_qlxgbe= qlxgbe +_sfxge= sfxge +_vmm= vmm +.endif + .if ${MACHINE_CPUARCH} == "i386" # XXX some of these can move to the general case when de-i386'ed # XXX some of these can move now, but are untested on other architectures. _3dfx= 3dfx _3dfx_linux= 3dfx_linux -_agp= agp _aic= aic -_an= an -_aout= aout _apm= apm _arcnet= arcnet -_bktr= bktr -_bxe= bxe -_cardbus= cardbus -_cbb= cbb .if ${MK_SOURCELESS_UCODE} != "no" _ce= ce .endif @@ -493,159 +623,39 @@ _coff= coff .if ${MK_SOURCELESS_UCODE} != "no" _cp= cp .endif -_cpuctl= cpuctl -_cpufreq= cpufreq _cs= cs -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_cyclic= cyclic -.endif -_dpms= dpms -_drm= drm -_drm2= drm2 -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_dtrace= dtrace -.endif -_ed= ed _elink= elink -_em= em _ep= ep -_et= et -_exca= exca -_ext2fs= ext2fs _fe= fe _glxiic= glxiic _glxsb= glxsb -_i2c= i2c -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_ibcore= ibcore -.endif _ibcs2= ibcs2 _ie= ie -_if_ndis= if_ndis -_igb= igb -_io= io -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_ipoib= ipoib -.endif -_linprocfs= linprocfs -_linsysfs= linsysfs -_linux= linux _mse= mse -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_mlx4= mlx4 -_mlx4ib= mlx4ib -_mlxen= mlxen -_mthca= mthca -.endif _ncr= ncr _ncv= ncv -_ndis= ndis _nsp= nsp -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_opensolaris= opensolaris -.endif -_pccard= pccard _pcfclock= pcfclock _pst= pst -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_rdma= rdma -.endif -_safe= safe _sbni= sbni -_scsi_low= scsi_low -_si= si -_smbfs= smbfs -_sound= sound -_speaker= speaker -_splash= splash -_sppp= sppp -_stg= stg _streams= streams +_stg= stg _svr4= svr4 -_vxge= vxge -_wbwd= wbwd _wds= wds -_wi= wi _xe= xe -.if ${MK_ZFS} != "no" || defined(ALL_MODULES) -_zfs= zfs -.endif .if ${MACHINE} == "i386" -_aac= aac -_aacraid= aacraid -_acpi= acpi -.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) -_aesni= aesni -.endif .if ${MK_EISA} != "no" _ahb= ahb .endif -_amdsbwd= amdsbwd -_amdtemp= amdtemp -_arcmsr= arcmsr -_asmc= asmc _asr= asr _bios= bios -_ciss= ciss _cm= cm -_cmx= cmx -_coretemp= coretemp .if ${MK_SOURCELESS_UCODE} != "no" _ctau= ctau .endif _dpt= dpt _ex= ex -.if ${MK_SOURCELESS_HOST} != "no" -_hpt27xx= hpt27xx -.endif -_hptiop= hptiop -.if ${MK_SOURCELESS_HOST} != "no" -_hptmv= hptmv -_hptnr= hptnr -_hptrr= hptrr -.endif -_hyperv= hyperv -_ichwd= ichwd -_ida= ida -_iir= iir -_ipmi= ipmi -_ips= ips -_ipw= ipw -.if ${MK_SOURCELESS_UCODE} != "no" -_ipwfw= ipwfw -.endif -_isci= isci -_iwi= iwi -.if ${MK_SOURCELESS_UCODE} != "no" -_iwifw= iwifw -.endif -_iwn= iwn -.if ${MK_SOURCELESS_UCODE} != "no" -_iwnfw= iwnfw -.endif -_ixgb= ixgb -_ixgbe= ixgbe -_mly= mly -_nfe= nfe -_nvd= nvd -_nvme= nvme -_nvram= nvram -_nxge= nxge -_tpm= tpm -_viawd= viawd _wl= wl -_wpi= wpi -.if ${MK_SOURCELESS_UCODE} != "no" -_wpifw= wpifw -.endif -.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) -_padlock= padlock -.endif -_s3= s3 -_twa= twa -_vesa= vesa -_virtio= virtio -_x86bios= x86bios .elif ${MACHINE} == "pc98" _canbepm= canbepm _canbus= canbus @@ -655,144 +665,6 @@ _snc= snc .endif .endif -.if ${MACHINE_CPUARCH} == "amd64" -_aac= aac -_aacraid= aacraid -_aout= aout -_acpi= acpi -.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) -_aesni= aesni -.endif -_agp= agp -_an= an -_amdsbwd= amdsbwd -_amdtemp= amdtemp -_arcmsr= arcmsr -_asmc= asmc -_bktr= bktr -_bxe= bxe -_cardbus= cardbus -_cbb= cbb -_cmx= cmx -_ciss= ciss -_coretemp= coretemp -_cpuctl= cpuctl -_cpufreq= cpufreq -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_cyclic= cyclic -.endif -_dpms= dpms -_drm= drm -_drm2= drm2 -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_dtrace= dtrace -.endif -_ed= ed -_et= et -_em= em -_exca= exca -_ext2fs= ext2fs -.if ${MK_SOURCELESS_HOST} != "no" -_hpt27xx= hpt27xx -.endif -_hptiop= hptiop -.if ${MK_SOURCELESS_HOST} != "no" -_hptmv= hptmv -_hptnr= hptnr -_hptrr= hptrr -.endif -_hyperv= hyperv -_i2c= i2c -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_ibcore= ibcore -.endif -_ichwd= ichwd -_ida= ida -_if_ndis= if_ndis -_igb= igb -_iir= iir -_io= io -_ipmi= ipmi -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_ipoib= ipoib -.endif -_ips= ips -_ipw= ipw -.if ${MK_SOURCELESS_UCODE} != "no" -_ipwfw= ipwfw -.endif -_isci= isci -_iwi= iwi -.if ${MK_SOURCELESS_UCODE} != "no" -_iwifw= iwifw -.endif -_iwn= iwn -.if ${MK_SOURCELESS_UCODE} != "no" -_iwnfw= iwnfw -.endif -_ixgb= ixgb -_ixgbe= ixgbe -_ixl= ixl -_ixlv= ixlv -_linprocfs= linprocfs -_linsysfs= linsysfs -_linux= linux -_mly= mly -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_mlx4= mlx4 -_mlx4ib= mlx4ib -_mlxen= mlxen -_mthca= mthca -.endif -_ndis= ndis -_nfe= nfe -_ntb= ntb -_nvd= nvd -_nvme= nvme -_nvram= nvram -_nxge= nxge -.if ${MK_CDDL} != "no" || defined(ALL_MODULES) -_opensolaris= opensolaris -.endif -.if ${MK_CRYPT} != "no" || defined(ALL_MODULES) -_padlock= padlock -.endif -_pccard= pccard -_qlxge= qlxge -_qlxgb= qlxgb -_qlxgbe= qlxgbe -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_rdma= rdma -.endif -_s3= s3 -_safe= safe -_scsi_low= scsi_low -_sfxge= sfxge -_si= si -_smbfs= smbfs -_sound= sound -_speaker= speaker -_splash= splash -_sppp= sppp -_tpm= tpm -_twa= twa -_vesa= vesa -_viawd= viawd -_virtio= virtio -_vmm= vmm -_vxge= vxge -_x86bios= x86bios -_wbwd= wbwd -_wi= wi -_wpi= wpi -.if ${MK_SOURCELESS_UCODE} != "no" -_wpifw= wpifw -.endif -.if ${MK_ZFS} != "no" || defined(ALL_MODULES) -_zfs= zfs -.endif -.endif - .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw @@ -816,12 +688,12 @@ _dtrace= dtrace .endif _exca= exca _nvram= powermac_nvram -_pccard= pccard -_smbfs= smbfs -_sound= sound .if ${MK_CDDL} != "no" || defined(ALL_MODULES) _opensolaris= opensolaris .endif +_pccard= pccard +_smbfs= smbfs +_sound= sound _wi= wi .endif @@ -846,6 +718,7 @@ _sound= sound _zfs= zfs .endif .endif + .endif .for reject in ${WITHOUT_MODULES} From 420bc6cee4a3a10dc44f54079e96b270dac5c8be Mon Sep 17 00:00:00 2001 From: "Andrey A. Chernov" Date: Sun, 5 Oct 2014 07:29:50 +0000 Subject: [PATCH 31/59] 1) For %Z format, understand "UTC" name too. 2) Return NULL if timegm() fails, because it means we can convert what we have in GMT to local time needed. --- lib/libc/stdtime/strptime.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/libc/stdtime/strptime.c b/lib/libc/stdtime/strptime.c index 8576bdb3750a..55c9960ff2c9 100644 --- a/lib/libc/stdtime/strptime.c +++ b/lib/libc/stdtime/strptime.c @@ -552,7 +552,8 @@ _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp, strncpy(zonestr, buf, cp - buf); zonestr[cp - buf] = '\0'; tzset(); - if (0 == strcmp(zonestr, "GMT")) { + if (0 == strcmp(zonestr, "GMT") || + 0 == strcmp(zonestr, "UTC")) { *GMTp = 1; } else if (0 == strcmp(zonestr, tzname[0])) { tm->tm_isdst = 0; @@ -674,6 +675,9 @@ strptime_l(const char * __restrict buf, const char * __restrict fmt, ret = _strptime(buf, fmt, tm, &gmt, loc); if (ret && gmt) { time_t t = timegm(tm); + + if (t == -1) + return (NULL); localtime_r(&t, tm); } From 9514a9d16114da3d501490e515b986d8617f8e53 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Sun, 5 Oct 2014 11:06:22 +0000 Subject: [PATCH 32/59] Merge the big-endian ARM targets together, and the little-endian ARM targets. With this we assume any ARM target containing eb is big-endian, otherwise it is little-endian. --- contrib/binutils/bfd/config.bfd | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/contrib/binutils/bfd/config.bfd b/contrib/binutils/bfd/config.bfd index 4717091593f5..3d18dfac3b0f 100755 --- a/contrib/binutils/bfd/config.bfd +++ b/contrib/binutils/bfd/config.bfd @@ -273,19 +273,11 @@ case "${targ}" in targ_defvec=bfd_elf32_littlearm_vec targ_selvecs=bfd_elf32_bigarm_vec ;; - armeb-*-freebsd*) + arm*eb*-*-freebsd*) targ_defvec=bfd_elf32_bigarm_vec targ_selvecs=bfd_elf32_littlearm_vec ;; - armv6eb-*-freebsd*) - targ_defvec=bfd_elf32_bigarm_vec - targ_selvecs=bfd_elf32_littlearm_vec - ;; - armv6-*-freebsd* | armv6hf-*-freebsd*) - targ_defvec=bfd_elf32_littlearm_vec - targ_selvecs=bfd_elf32_bigarm_vec - ;; - arm-*-elf | arm-*-freebsd* | arm*-*-linux-* | arm*-*-conix* | \ + arm-*-elf | arm*-*-freebsd* | arm*-*-linux-* | arm*-*-conix* | \ arm*-*-uclinux* | arm-*-kfreebsd*-gnu | \ arm*-*-eabi* ) targ_defvec=bfd_elf32_littlearm_vec From d6a0c11b7a97803003107dd1c1a502b267014ea7 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 5 Oct 2014 11:16:16 +0000 Subject: [PATCH 33/59] Fix format string warnings. --- tools/tools/bootparttest/bootparttest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/tools/bootparttest/bootparttest.c b/tools/tools/bootparttest/bootparttest.c index ecb8946108c8..0987652d4d37 100644 --- a/tools/tools/bootparttest/bootparttest.c +++ b/tools/tools/bootparttest/bootparttest.c @@ -78,8 +78,8 @@ disk_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, return (-1); if (rsize) *rsize = 0; - printf("read %lu bytes from the block %ld [+%ld]\n", size, - blk, dev->d_offset); + printf("read %zu bytes from the block %lld [+%lld]\n", size, + (long long)blk, (long long)dev->d_offset); ret = pread(disk.fd, buf, size, (blk + dev->d_offset) * disk.sectorsize); if (ret != size) From 57c2505e658d735c8117f24ebeab191323d2a218 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 5 Oct 2014 17:35:59 +0000 Subject: [PATCH 34/59] On error, sbuf_bcat() returns -1. Some callers returned this -1 to the upper layers, which interpret it as errno value, which happens to be ERESTART. The result was spurious restarts of the sysctls in loop, e.g. kern.proc.proc, instead of returning ENOMEM to caller. Convert -1 from sbuf_bcat() to ENOMEM, when returning to the callers expecting errno. In collaboration with: pho Sponsored by: The FreeBSD Foundation (kib) MFC after: 1 week --- sys/kern/kern_descrip.c | 2 +- sys/kern/kern_proc.c | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 5f6aa6fc0696..72a7a8f96bba 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -3097,7 +3097,7 @@ export_kinfo_to_sb(struct export_fd_buf *efbuf) } efbuf->remainder -= kif->kf_structsize; } - return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize)); + return (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) == 0 ? 0 : ENOMEM); } static int diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 96510c9f66a9..41cd3043bf44 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1208,21 +1208,25 @@ kern_proc_out(struct proc *p, struct sbuf *sb, int flags) #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); - error = sbuf_bcat(sb, &ki32, sizeof(ki32)); + if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) + error = ENOMEM; } else #endif - error = sbuf_bcat(sb, &ki, sizeof(ki)); + if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) + error = ENOMEM; } else { FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &ki, 1); #ifdef COMPAT_FREEBSD32 if ((flags & KERN_PROC_MASK32) != 0) { freebsd32_kinfo_proc_out(&ki, &ki32); - error = sbuf_bcat(sb, &ki32, sizeof(ki32)); + if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0) + error = ENOMEM; } else #endif - error = sbuf_bcat(sb, &ki, sizeof(ki)); - if (error) + if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0) + error = ENOMEM; + if (error != 0) break; } } @@ -1777,7 +1781,8 @@ proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) else #endif size = vsize * sizeof(Elf_Auxinfo); - error = sbuf_bcat(sb, auxv, size); + if (sbuf_bcat(sb, auxv, size) != 0) + error = ENOMEM; free(auxv, M_TEMP); } return (error); @@ -2363,9 +2368,10 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) strlen(kve->kve_path) + 1; kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); - error = sbuf_bcat(sb, kve, kve->kve_structsize); + if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) + error = ENOMEM; vm_map_lock_read(map); - if (error) + if (error != 0) break; if (last_timestamp != map->timestamp) { vm_map_lookup_entry(map, addr - 1, &tmp_entry); From 2b4a2528d748ab5934bf58bc8faa16c5c847f3bb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Oct 2014 19:40:29 +0000 Subject: [PATCH 35/59] filedesc: fix up breakage introduced in 272505 Include sequence counter supports incoditionally [1]. This fixes reprted build problems with e.g. nvidia driver due to missing opt_capsicum.h. Replace fishy looking sizeof with offsetof. Make fde_seq the last member in order to simplify calculations. Suggested by: kib [1] X-MFC: with 272505 --- sys/kern/kern_descrip.c | 10 +++++----- sys/sys/filedesc.h | 18 ++---------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 72a7a8f96bba..c51a876feedf 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -295,7 +295,7 @@ _fdfree(struct filedesc *fdp, int fd, int last) filecaps_free(&fde->fde_caps); if (last) return; - bzero(fde_change(fde), fde_change_size); + bzero(fde, fde_change_size); fdunused(fdp, fd); #ifdef CAPABILITIES seq_write_end(&fde->fde_seq); @@ -894,7 +894,7 @@ do_dup(struct thread *td, int flags, int old, int new, seq_write_begin(&newfde->fde_seq); #endif filecaps_free(&newfde->fde_caps); - memcpy(fde_change(newfde), fde_change(oldfde), fde_change_size); + memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; @@ -2778,7 +2778,7 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, #ifdef CAPABILITIES seq_write_begin(&newfde->fde_seq); #endif - memcpy(fde_change(newfde), fde_change(oldfde), fde_change_size); + memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); #ifdef CAPABILITIES seq_write_end(&newfde->fde_seq); @@ -2793,8 +2793,8 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, #ifdef CAPABILITIES seq_write_begin(&newfde->fde_seq); #endif - memcpy(fde_change(newfde), fde_change(oldfde), fde_change_size); - bzero(fde_change(oldfde), fde_change_size); + memcpy(newfde, oldfde, fde_change_size); + bzero(oldfde, fde_change_size); fdunused(fdp, dfd); #ifdef CAPABILITIES seq_write_end(&newfde->fde_seq); diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 3b3241cbae3e..0065852c7ac4 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -33,10 +33,6 @@ #ifndef _SYS_FILEDESC_H_ #define _SYS_FILEDESC_H_ -#ifdef _KERNEL -#include "opt_capsicum.h" -#endif - #include #include #include @@ -55,24 +51,16 @@ struct filecaps { }; struct filedescent { -#ifdef CAPABILITIES - seq_t fde_seq; /* if you need fde_file and fde_caps in sync */ -#endif struct file *fde_file; /* file structure for open file */ struct filecaps fde_caps; /* per-descriptor rights */ uint8_t fde_flags; /* per-process open file flags */ + seq_t fde_seq; /* if you need fde_file and fde_caps in sync */ }; #define fde_rights fde_caps.fc_rights #define fde_fcntls fde_caps.fc_fcntls #define fde_ioctls fde_caps.fc_ioctls #define fde_nioctls fde_caps.fc_nioctls -#ifdef CAPABILITIES -#define fde_change(fde) ((char *)(fde) + sizeof(seq_t)) -#define fde_change_size (sizeof(struct filedescent) - sizeof(seq_t)) -#else -#define fde_change(fde) ((fde)) -#define fde_change_size (sizeof(struct filedescent)) -#endif +#define fde_change_size (offsetof(struct filedescent, fde_seq)) /* * This structure is used for the management of descriptors. It may be @@ -97,9 +85,7 @@ struct filedesc { int fd_holdleaderscount; /* block fdfree() for shared close() */ int fd_holdleaderswakeup; /* fdfree() needs wakeup */ }; -#ifdef CAPABILITIES #define fd_seq(fdp, fd) (&(fdp)->fd_ofiles[(fd)].fde_seq) -#endif /* * Structure to keep track of (process leader, struct fildedesc) tuples. From c51275260b318cc7899ebea4b469f1a8df5c4fe1 Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sun, 5 Oct 2014 19:43:37 +0000 Subject: [PATCH 36/59] Virtualize if_bridge(4) cloner. --- sys/net/if_bridge.c | 198 ++++++++++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 72 deletions(-) diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 252404f328e6..6c169ddb4852 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -226,7 +226,8 @@ struct bridge_softc { u_char sc_defaddr[6]; /* Default MAC address */ }; -static struct mtx bridge_list_mtx; +static VNET_DEFINE(struct mtx, bridge_list_mtx); +#define V_bridge_list_mtx VNET(bridge_list_mtx) eventhandler_tag bridge_detach_cookie = NULL; int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; @@ -350,36 +351,64 @@ static struct bstp_cb_ops bridge_ops = { SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); -static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */ -static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */ -static int pfil_member = 1; /* run pfil hooks on the member interface */ -static int pfil_ipfw = 0; /* layer2 filter with ipfw */ -static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */ -static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for - locally destined packets */ -static int log_stp = 0; /* log STP state changes */ -static int bridge_inherit_mac = 0; /* share MAC with first bridge member */ -SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RWTUN, - &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RWTUN, - &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RWTUN, - &pfil_bridge, 0, "Packet filter on the bridge interface"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RWTUN, - &pfil_member, 0, "Packet filter on the member interface"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RWTUN, - &pfil_local_phys, 0, +/* only pass IP[46] packets when pfil is enabled */ +static VNET_DEFINE(int, pfil_onlyip) = 1; +#define V_pfil_onlyip VNET(pfil_onlyip) +SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0, + "Only pass IP packets when pfil is enabled"); + +/* run pfil hooks on the bridge interface */ +static VNET_DEFINE(int, pfil_bridge) = 1; +#define V_pfil_bridge VNET(pfil_bridge) +SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0, + "Packet filter on the bridge interface"); + +/* layer2 filter with ipfw */ +static VNET_DEFINE(int, pfil_ipfw); +#define V_pfil_ipfw VNET(pfil_ipfw) + +/* layer2 ARP filter with ipfw */ +static VNET_DEFINE(int, pfil_ipfw_arp); +#define V_pfil_ipfw_arp VNET(pfil_ipfw_arp) +SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0, + "Filter ARP packets through IPFW layer2"); + +/* run pfil hooks on the member interface */ +static VNET_DEFINE(int, pfil_member) = 1; +#define V_pfil_member VNET(pfil_member) +SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0, + "Packet filter on the member interface"); + +/* run pfil hooks on the physical interface for locally destined packets */ +static VNET_DEFINE(int, pfil_local_phys); +#define V_pfil_local_phys VNET(pfil_local_phys) +SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0, "Packet filter on the physical interface for locally destined packets"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RWTUN, - &log_stp, 0, "Log STP state changes"); -SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RWTUN, - &bridge_inherit_mac, 0, + +/* log STP state changes */ +static VNET_DEFINE(int, log_stp); +#define V_log_stp VNET(log_stp) +SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0, + "Log STP state changes"); + +/* share MAC with first bridge member */ +static VNET_DEFINE(int, bridge_inherit_mac); +#define V_bridge_inherit_mac VNET(bridge_inherit_mac) +SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, + CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0, "Inherit MAC address from the first bridge member"); static VNET_DEFINE(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) -SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW, - &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " +SYSCTL_VNET_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, + CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, + "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); struct bridge_control { @@ -477,27 +506,52 @@ const struct bridge_control bridge_control_table[] = { BC_F_COPYIN|BC_F_SUSER }, }; -const int bridge_control_table_size = - sizeof(bridge_control_table) / sizeof(bridge_control_table[0]); +const int bridge_control_table_size = nitems(bridge_control_table); -LIST_HEAD(, bridge_softc) bridge_list; +static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list); +#define V_bridge_list VNET(bridge_list) +#define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \ + "if_bridge list", NULL, MTX_DEF) +#define BRIDGE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_bridge_list_mtx) +#define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx) +#define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx) + +static VNET_DEFINE(struct if_clone *, bridge_cloner); +#define V_bridge_cloner VNET(bridge_cloner) -static struct if_clone *bridge_cloner; static const char bridge_name[] = "bridge"; +static void +vnet_bridge_init(const void *unused __unused) +{ + + BRIDGE_LIST_LOCK_INIT(); + LIST_INIT(&V_bridge_list); + V_bridge_cloner = if_clone_simple(bridge_name, + bridge_clone_create, bridge_clone_destroy, 0); +} +VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_bridge_init, NULL); + +static void +vnet_bridge_uninit(const void *unused __unused) +{ + + if_clone_detach(V_bridge_cloner); + BRIDGE_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_bridge_uninit, NULL); + static int bridge_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - mtx_init(&bridge_list_mtx, "if_bridge list", NULL, MTX_DEF); - bridge_cloner = if_clone_simple(bridge_name, - bridge_clone_create, bridge_clone_destroy, 0); bridge_rtnode_zone = uma_zcreate("bridge_rtnode", sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - LIST_INIT(&bridge_list); bridge_input_p = bridge_input; bridge_output_p = bridge_output; bridge_dn_p = bridge_dummynet; @@ -509,13 +563,11 @@ bridge_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, bridge_detach_cookie); - if_clone_detach(bridge_cloner); uma_zdestroy(bridge_rtnode_zone); bridge_input_p = NULL; bridge_output_p = NULL; bridge_dn_p = NULL; bridge_linkstate_p = NULL; - mtx_destroy(&bridge_list_mtx); break; default: return (EOPNOTSUPP); @@ -538,14 +590,14 @@ MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); static int sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) { - int enable = pfil_ipfw; + int enable = V_pfil_ipfw; int error; error = sysctl_handle_int(oidp, &enable, 0, req); - enable = (enable) ? 1 : 0; + enable &= 1; - if (enable != pfil_ipfw) { - pfil_ipfw = enable; + if (enable != V_pfil_ipfw) { + V_pfil_ipfw = enable; /* * Disable pfil so that ipfw doesnt run twice, if the user @@ -553,17 +605,19 @@ sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS) * pfil_member. Also allow non-ip packets as ipfw can filter by * layer2 type. */ - if (pfil_ipfw) { - pfil_onlyip = 0; - pfil_bridge = 0; - pfil_member = 0; + if (V_pfil_ipfw) { + V_pfil_onlyip = 0; + V_pfil_bridge = 0; + V_pfil_member = 0; } } return (error); } -SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW, - &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW"); +SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I", + "Layer2 filter with IPFW"); /* * bridge_clone_create: @@ -632,8 +686,8 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) fb = 1; retry = 0; - mtx_lock(&bridge_list_mtx); - LIST_FOREACH(sc2, &bridge_list, sc_list) { + BRIDGE_LIST_LOCK(); + LIST_FOREACH(sc2, &V_bridge_list, sc_list) { bifp = sc2->sc_ifp; if (memcmp(sc->sc_defaddr, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) { @@ -641,7 +695,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) break; } } - mtx_unlock(&bridge_list_mtx); + BRIDGE_LIST_UNLOCK(); } while (retry == 1); bstp_attach(&sc->sc_stp, &bridge_ops); @@ -650,9 +704,9 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_baudrate = 0; ifp->if_type = IFT_BRIDGE; - mtx_lock(&bridge_list_mtx); - LIST_INSERT_HEAD(&bridge_list, sc, sc_list); - mtx_unlock(&bridge_list_mtx); + BRIDGE_LIST_LOCK(); + LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list); + BRIDGE_LIST_UNLOCK(); return (0); } @@ -684,9 +738,9 @@ bridge_clone_destroy(struct ifnet *ifp) callout_drain(&sc->sc_brcallout); - mtx_lock(&bridge_list_mtx); + BRIDGE_LIST_LOCK(); LIST_REMOVE(sc, sc_list); - mtx_unlock(&bridge_list_mtx); + BRIDGE_LIST_UNLOCK(); bstp_detach(&sc->sc_stp); ether_ifdetach(ifp); @@ -952,7 +1006,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, * the mac address of the bridge to the address of the next member, or * to its default address if no members are left. */ - if (bridge_inherit_mac && sc->sc_ifaddr == ifs) { + if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) { if (LIST_EMPTY(&sc->sc_iflist)) { bcopy(sc->sc_defaddr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); @@ -1118,7 +1172,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) * member and the MAC address of the bridge has not been changed from * the default randomly generated one. */ - if (bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) && + if (V_bridge_inherit_mac && LIST_EMPTY(&sc->sc_iflist) && !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) { bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN); sc->sc_ifaddr = ifs; @@ -1757,8 +1811,8 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) } /* Check if the interface is a span port */ - mtx_lock(&bridge_list_mtx); - LIST_FOREACH(sc, &bridge_list, sc_list) { + BRIDGE_LIST_LOCK(); + LIST_FOREACH(sc, &V_bridge_list, sc_list) { BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifp == bif->bif_ifp) { @@ -1768,7 +1822,7 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp) BRIDGE_UNLOCK(sc); } - mtx_unlock(&bridge_list_mtx); + BRIDGE_LIST_UNLOCK(); } /* @@ -2350,7 +2404,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if_inc_counter(iface, IFCOUNTER_IPACKETS, 1); \ if_inc_counter(iface, IFCOUNTER_IBYTES, m->m_pkthdr.len); \ /* Filter on the physical interface. */ \ - if (pfil_local_phys && \ + if (V_pfil_local_phys && \ (PFIL_HOOKED(&V_inet_pfil_hook) \ OR_PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ @@ -3004,7 +3058,7 @@ bridge_state_change(struct ifnet *ifp, int state) "discarding" }; - if (log_stp) + if (V_log_stp) log(LOG_NOTICE, "%s: state changed to %s on %s\n", sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname); } @@ -3032,7 +3086,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__)); #endif - if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0) + if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0) return (0); /* filtering is disabled */ i = min((*mp)->m_pkthdr.len, max_protohdr); @@ -3074,7 +3128,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) switch (ether_type) { case ETHERTYPE_ARP: case ETHERTYPE_REVARP: - if (pfil_ipfw_arp == 0) + if (V_pfil_ipfw_arp == 0) return (0); /* Automatically pass */ break; @@ -3089,12 +3143,12 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * packets, these will not be checked by pfil(9) and * passed unconditionally so the default is to drop. */ - if (pfil_onlyip) + if (V_pfil_onlyip) goto bad; } /* Run the packet through pfil before stripping link headers */ - if (PFIL_HOOKED(&V_link_pfil_hook) && pfil_ipfw != 0 && + if (PFIL_HOOKED(&V_link_pfil_hook) && V_pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { error = pfil_run_hooks(&V_link_pfil_hook, mp, ifp, dir, NULL); @@ -3147,21 +3201,21 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * Keep the order: * in_if -> bridge_if -> out_if */ - if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; - if (pfil_member && ifp != NULL) + if (V_pfil_member && ifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; - if (pfil_bridge && dir == PFIL_IN && bifp != NULL) + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) error = pfil_run_hooks(&V_inet_pfil_hook, mp, bifp, dir, NULL); @@ -3169,7 +3223,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) break; /* check if we need to fragment the packet */ - if (pfil_member && ifp != NULL && dir == PFIL_OUT) { + if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) { i = (*mp)->m_pkthdr.len; if (i > ifp->if_mtu) { error = bridge_fragment(ifp, *mp, &eh2, snap, @@ -3199,21 +3253,21 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) break; #ifdef INET6 case ETHERTYPE_IPV6: - if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) + if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; - if (pfil_member && ifp != NULL) + if (V_pfil_member && ifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, ifp, dir, NULL); if (*mp == NULL || error != 0) /* filter may consume */ break; - if (pfil_bridge && dir == PFIL_IN && bifp != NULL) + if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL) error = pfil_run_hooks(&V_inet6_pfil_hook, mp, bifp, dir, NULL); break; From 0837bbe4c146de9be71edd26f8d455c93b8bee87 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Oct 2014 19:44:40 +0000 Subject: [PATCH 37/59] Keep struct filedescent comments within 80-char limit. --- sys/sys/filedesc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 0065852c7ac4..ac1781980143 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -51,10 +51,10 @@ struct filecaps { }; struct filedescent { - struct file *fde_file; /* file structure for open file */ - struct filecaps fde_caps; /* per-descriptor rights */ - uint8_t fde_flags; /* per-process open file flags */ - seq_t fde_seq; /* if you need fde_file and fde_caps in sync */ + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ + seq_t fde_seq; /* keep file and caps in sync */ }; #define fde_rights fde_caps.fc_rights #define fde_fcntls fde_caps.fc_fcntls From d6f59204efb03de3c3d34f53c0c367a2350de8b7 Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sun, 5 Oct 2014 19:46:52 +0000 Subject: [PATCH 38/59] Virtualize if_disc(4) cloner. --- sys/net/if_disc.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c index 4cce4847d02b..de36886fcae4 100644 --- a/sys/net/if_disc.c +++ b/sys/net/if_disc.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "opt_inet.h" #include "opt_inet6.h" @@ -74,7 +75,8 @@ static void disc_clone_destroy(struct ifnet *); static const char discname[] = "disc"; static MALLOC_DEFINE(M_DISC, discname, "Discard interface"); -static struct if_clone *disc_cloner; +static VNET_DEFINE(struct if_clone *, disc_cloner); +#define V_disc_cloner VNET(disc_cloner) static int disc_clone_create(struct if_clone *ifc, int unit, caddr_t params) @@ -129,17 +131,32 @@ disc_clone_destroy(struct ifnet *ifp) free(sc, M_DISC); } +static void +vnet_disc_init(const void *unused __unused) +{ + + V_disc_cloner = if_clone_simple(discname, disc_clone_create, + disc_clone_destroy, 0); +} +VNET_SYSINIT(vnet_disc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_disc_init, NULL); + +static void +vnet_disc_uninit(const void *unused __unused) +{ + + if_clone_detach(V_disc_cloner); +} +VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_disc_uninit, NULL); + static int disc_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - disc_cloner = if_clone_simple(discname, disc_clone_create, - disc_clone_destroy, 0); - break; case MOD_UNLOAD: - if_clone_detach(disc_cloner); break; default: return (EOPNOTSUPP); @@ -185,6 +202,7 @@ discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, static void discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { + RT_LOCK_ASSERT(rt); rt->rt_mtu = DSMTU; } @@ -200,7 +218,6 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data) int error = 0; switch (cmd) { - case SIOCSIFADDR: ifp->if_flags |= IFF_UP; ifa = (struct ifaddr *)data; @@ -210,7 +227,6 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * Everything else is done at a higher level. */ break; - case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { @@ -218,7 +234,6 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } switch (ifr->ifr_addr.sa_family) { - #ifdef INET case AF_INET: break; @@ -227,17 +242,14 @@ discioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case AF_INET6: break; #endif - default: error = EAFNOSUPPORT; break; } break; - case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; - default: error = EINVAL; } From 041353aba4ae72473bff3a16a6338992bb54dfb8 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Sun, 5 Oct 2014 20:30:49 +0000 Subject: [PATCH 39/59] Remove unused MC_ALIGN macro as suggested by Robert. MFC after: 1 week --- sys/netinet/sctp_output.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index c9b7722f2367..8260ba7cd97e 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -6784,13 +6784,6 @@ sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED) SCTP_FREE(ca, SCTP_M_COPYAL); } - -#define MC_ALIGN(m, len) do { \ - SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1)); \ -} while (0) - - - static struct mbuf * sctp_copy_out_all(struct uio *uio, int len) { From 3b4b7de5066e9de2f2bcf02847f09080185aebfc Mon Sep 17 00:00:00 2001 From: Hiroki Sato Date: Sun, 5 Oct 2014 21:27:26 +0000 Subject: [PATCH 40/59] Virtualize if_edsc(4). --- sys/net/if_edsc.c | 51 ++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/sys/net/if_edsc.c b/sys/net/if_edsc.c index b75f134c9bc6..a58cc4466b43 100644 --- a/sys/net/if_edsc.c +++ b/sys/net/if_edsc.c @@ -51,6 +51,7 @@ #include /* network interface cloning */ #include /* IFT_ETHER and friends */ #include /* kernel-only part of ifnet(9) */ +#include static const char edscname[] = "edsc"; @@ -69,7 +70,8 @@ struct edsc_softc { /* * Attach to the interface cloning framework. */ -static struct if_clone *edsc_cloner; +static VNET_DEFINE(struct if_clone *, edsc_cloner); +#define V_edsc_cloner VNET(edsc_cloner) static int edsc_clone_create(struct if_clone *, int, caddr_t); static void edsc_clone_destroy(struct ifnet *); @@ -307,6 +309,36 @@ edsc_start(struct ifnet *ifp) */ } +static void +vnet_edsc_init(const void *unused __unused) +{ + + /* + * Connect to the network interface cloning framework. + * The last argument is the number of units to be created + * from the outset. It's also the minimum number of units + * allowed. We don't want any units created as soon as the + * driver is loaded. + */ + V_edsc_cloner = if_clone_simple(edscname, edsc_clone_create, + edsc_clone_destroy, 0); +} +VNET_SYSINIT(vnet_edsc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_edsc_init, NULL); + +static void +vnet_edsc_uninit(const void *unused __unused) +{ + + /* + * Disconnect from the cloning framework. + * Existing interfaces will be disposed of properly. + */ + if_clone_detach(V_edsc_cloner); +} +VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_edsc_uninit, NULL); + /* * This function provides handlers for module events, namely load and unload. */ @@ -316,25 +348,8 @@ edsc_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - /* - * Connect to the network interface cloning framework. - * The last argument is the number of units to be created - * from the outset. It's also the minimum number of units - * allowed. We don't want any units created as soon as the - * driver is loaded. - */ - edsc_cloner = if_clone_simple(edscname, edsc_clone_create, - edsc_clone_destroy, 0); - break; - case MOD_UNLOAD: - /* - * Disconnect from the cloning framework. - * Existing interfaces will be disposed of properly. - */ - if_clone_detach(edsc_cloner); break; - default: /* * There are other event types, but we don't handle them. From 111fbcd5ed8d52029eb6df75418bb70d01ef949d Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Sun, 5 Oct 2014 21:34:56 +0000 Subject: [PATCH 41/59] Change the UMA mutex into a rwlock Acquire the lock in read mode when just needed to ensure the stability of the keg list. The UMA lock may be held for a long time (relatively speaking) in uma_reclaim() on machines with lots of zones/kegs. If the uma_timeout() would fire during that period, subsequent callouts on that CPU may be significantly delayed. Reviewed by: jhb --- sys/vm/uma_core.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index c96da5c3e73a..8527b091b311 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -135,8 +135,8 @@ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs); static LIST_HEAD(,uma_zone) uma_cachezones = LIST_HEAD_INITIALIZER(uma_cachezones); -/* This mutex protects the keg list */ -static struct mtx_padalign uma_mtx; +/* This RW lock protects the keg list */ +static struct rwlock_padalign uma_rwlock; /* Linked list of boot time pages */ static LIST_HEAD(,uma_slab) uma_boot_pages = @@ -904,7 +904,7 @@ zone_drain_wait(uma_zone_t zone, int waitok) ZONE_UNLOCK(zone); /* * The DRAINING flag protects us from being freed while - * we're running. Normally the uma_mtx would protect us but we + * we're running. Normally the uma_rwlock would protect us but we * must be able to release and acquire the right lock for each keg. */ zone_foreach_keg(zone, &keg_drain); @@ -1540,9 +1540,9 @@ keg_ctor(void *mem, int size, void *udata, int flags) LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); return (0); } @@ -1592,9 +1592,9 @@ zone_ctor(void *mem, int size, void *udata, int flags) zone->uz_release = arg->release; zone->uz_arg = arg->arg; zone->uz_lockptr = &zone->uz_lock; - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); goto out; } @@ -1611,7 +1611,7 @@ zone_ctor(void *mem, int size, void *udata, int flags) zone->uz_fini = arg->fini; zone->uz_lockptr = &keg->uk_lock; zone->uz_flags |= UMA_ZONE_SECONDARY; - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); ZONE_LOCK(zone); LIST_FOREACH(z, &keg->uk_zones, uz_link) { if (LIST_NEXT(z, uz_link) == NULL) { @@ -1620,7 +1620,7 @@ zone_ctor(void *mem, int size, void *udata, int flags) } } ZONE_UNLOCK(zone); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); } else if (keg == NULL) { if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini, arg->align, arg->flags)) == NULL) @@ -1718,9 +1718,9 @@ zone_dtor(void *arg, int size, void *udata) if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) cache_drain(zone); - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_REMOVE(zone, uz_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); /* * XXX there are some races here where * the zone can be drained but zone lock @@ -1742,9 +1742,9 @@ zone_dtor(void *arg, int size, void *udata) * We only destroy kegs from non secondary zones. */ if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) { - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_REMOVE(keg, uk_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); zone_free_item(kegs, keg, NULL, SKIP_NONE); } ZONE_LOCK_FINI(zone); @@ -1766,12 +1766,12 @@ zone_foreach(void (*zfunc)(uma_zone_t)) uma_keg_t keg; uma_zone_t zone; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(keg, &uma_kegs, uk_link) { LIST_FOREACH(zone, &keg->uk_zones, uz_link) zfunc(zone); } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); } /* Public functions */ @@ -1787,7 +1787,7 @@ uma_startup(void *bootmem, int boot_pages) #ifdef UMA_DEBUG printf("Creating uma keg headers zone and keg.\n"); #endif - mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); + rw_init(&uma_rwlock, "UMA lock"); /* "manually" create the initial zone */ memset(&args, 0, sizeof(args)); @@ -3362,12 +3362,12 @@ sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) int count; count = 0; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); return (sysctl_handle_int(oidp, &count, 0, req)); } @@ -3392,7 +3392,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) sbuf_new_for_sysctl(&sbuf, NULL, 128, req); count = 0; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; @@ -3468,7 +3468,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) ZONE_UNLOCK(z); } } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); From 7775dfac2f59299743ac1f631d95503171b916eb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Oct 2014 21:39:50 +0000 Subject: [PATCH 42/59] seq_t needs to be visible to userspace Pointy hat to: mjg Reported by: bz X-MFC: with r272567 --- sys/sys/seq.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sys/sys/seq.h b/sys/sys/seq.h index ba6e80295a48..799b3a994b36 100644 --- a/sys/sys/seq.h +++ b/sys/sys/seq.h @@ -28,6 +28,16 @@ #ifndef _SYS_SEQ_H_ #define _SYS_SEQ_H_ +#ifdef _KERNEL +#include +#endif +#include + +/* + * seq_t may be included in structs visible to userspace + */ +typedef uint32_t seq_t; + #ifdef _KERNEL /* @@ -54,10 +64,7 @@ * foo(lobj); */ -typedef uint32_t seq_t; - /* A hack to get MPASS macro */ -#include #include #include From 33c5acf038f88dc8b4307620a0fb73ea2987f943 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 5 Oct 2014 21:51:36 +0000 Subject: [PATCH 43/59] sh: Eliminate some gotos. --- bin/sh/eval.c | 12 +++++------- bin/sh/expand.c | 15 +++++---------- bin/sh/jobs.c | 8 +++----- bin/sh/redir.c | 29 ++++++++++++++--------------- bin/sh/trap.c | 35 ++++++++++++++++------------------- 5 files changed, 43 insertions(+), 56 deletions(-) diff --git a/bin/sh/eval.c b/bin/sh/eval.c index 3fd3050111da..ea2d2641e7e9 100644 --- a/bin/sh/eval.c +++ b/bin/sh/eval.c @@ -316,9 +316,10 @@ evalloop(union node *n, int flags) loopnest++; status = 0; for (;;) { - evaltree(n->nbinary.ch1, EV_TESTED); + if (!evalskip) + evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip) { -skipping: if (evalskip == SKIPCONT && --skipcount <= 0) { + if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } @@ -337,8 +338,6 @@ skipping: if (evalskip == SKIPCONT && --skipcount <= 0) { } evaltree(n->nbinary.ch2, flags); status = exitstatus; - if (evalskip) - goto skipping; } loopnest--; exitstatus = status; @@ -648,15 +647,15 @@ evalbackcmd(union node *n, struct backcmd *result) struct jmploc *savehandler; struct localvar *savelocalvars; - setstackmark(&smark); result->fd = -1; result->buf = NULL; result->nleft = 0; result->jp = NULL; if (n == NULL) { exitstatus = 0; - goto out; + return; } + setstackmark(&smark); exitstatus = oexitstatus; if (is_valid_fast_cmdsubst(n)) { savelocalvars = localvars; @@ -698,7 +697,6 @@ evalbackcmd(union node *n, struct backcmd *result) result->fd = pip[0]; result->jp = jp; } -out: popstackmark(&smark); TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n", result->fd, result->buf, result->nleft, result->jp)); diff --git a/bin/sh/expand.c b/bin/sh/expand.c index 15afc9ce67fe..b716f5cdc88a 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -328,24 +328,19 @@ exptilde(char *p, int flag) done: *p = '\0'; if (*(startp+1) == '\0') { - if ((home = lookupvar("HOME")) == NULL) - goto lose; + home = lookupvar("HOME"); } else { - if ((pw = getpwnam(startp+1)) == NULL) - goto lose; - home = pw->pw_dir; + pw = getpwnam(startp+1); + home = pw != NULL ? pw->pw_dir : NULL; } - if (*home == '\0') - goto lose; *p = c; + if (home == NULL || *home == '\0') + return (startp); if (quotes) STPUTS_QUOTES(home, SQSYNTAX, expdest); else STPUTS(home, expdest); return (p); -lose: - *p = c; - return (startp); } diff --git a/bin/sh/jobs.c b/bin/sh/jobs.c index e58310b57f06..218e63d2485b 100644 --- a/bin/sh/jobs.c +++ b/bin/sh/jobs.c @@ -373,13 +373,13 @@ showjob(struct job *jp, int mode) strcat(statestr, " (core dumped)"); } - for (ps = jp->ps ; ; ps++) { /* for each process */ + for (ps = jp->ps ; procno > 0 ; ps++, procno--) { /* for each process */ if (mode == SHOWJOBS_PIDS || mode == SHOWJOBS_PGIDS) { out1fmt("%d\n", (int)ps->pid); - goto skip; + continue; } if (mode != SHOWJOBS_VERBOSE && ps != jp->ps) - goto skip; + continue; if (jobno == curr && ps == jp->ps) c = '+'; else if (jobno == prev && ps == jp->ps) @@ -410,8 +410,6 @@ showjob(struct job *jp, int mode) out1c('\n'); } else printjobcmd(jp); -skip: if (--procno <= 0) - break; } } diff --git a/bin/sh/redir.c b/bin/sh/redir.c index 6127e8603305..95d3238daf1b 100644 --- a/bin/sh/redir.c +++ b/bin/sh/redir.c @@ -173,21 +173,12 @@ openredirect(union node *redir, char memory[10]) fname = redir->nfile.expfname; if ((f = open(fname, O_RDONLY)) < 0) error("cannot open %s: %s", fname, strerror(errno)); -movefd: - if (f != fd) { - if (dup2(f, fd) == -1) { - e = errno; - close(f); - error("%d: %s", fd, strerror(e)); - } - close(f); - } break; case NFROMTO: fname = redir->nfile.expfname; if ((f = open(fname, O_RDWR|O_CREAT, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NTO: if (Cflag) { fname = redir->nfile.expfname; @@ -205,19 +196,19 @@ openredirect(union node *redir, char memory[10]) } else error("cannot create %s: %s", fname, strerror(EEXIST)); - goto movefd; + break; } /* FALLTHROUGH */ case NCLOBBER: fname = redir->nfile.expfname; if ((f = open(fname, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NAPPEND: fname = redir->nfile.expfname; if ((f = open(fname, O_WRONLY|O_CREAT|O_APPEND, 0666)) < 0) error("cannot create %s: %s", fname, strerror(errno)); - goto movefd; + break; case NTOFD: case NFROMFD: if (redir->ndup.dupfd >= 0) { /* if not ">&-" */ @@ -231,14 +222,22 @@ openredirect(union node *redir, char memory[10]) } else { close(fd); } - break; + return; case NHERE: case NXHERE: f = openhere(redir); - goto movefd; + break; default: abort(); } + if (f != fd) { + if (dup2(f, fd) == -1) { + e = errno; + close(f); + error("%d: %s", fd, strerror(e)); + } + close(f); + } } diff --git a/bin/sh/trap.c b/bin/sh/trap.c index e5a2a9135576..8ea3b126e1b1 100644 --- a/bin/sh/trap.c +++ b/bin/sh/trap.c @@ -510,28 +510,25 @@ exitshell_savedstatus(void) exiting_exitstatus = oexitstatus; } exitstatus = oexitstatus = exiting_exitstatus; - if (setjmp(loc1.loc)) { - goto l1; + if (!setjmp(loc1.loc)) { + handler = &loc1; + if ((p = trap[0]) != NULL && *p != '\0') { + /* + * Reset evalskip, or the trap on EXIT could be + * interrupted if the last command was a "return". + */ + evalskip = 0; + trap[0] = NULL; + evalstring(p, 0); + } } - if (setjmp(loc2.loc)) { - goto l2; - } - handler = &loc1; - if ((p = trap[0]) != NULL && *p != '\0') { - /* - * Reset evalskip, or the trap on EXIT could be - * interrupted if the last command was a "return". - */ - evalskip = 0; - trap[0] = NULL; - evalstring(p, 0); - } -l1: handler = &loc2; /* probably unnecessary */ - flushall(); + if (!setjmp(loc2.loc)) { + handler = &loc2; /* probably unnecessary */ + flushall(); #if JOBS - setjobctl(0); + setjobctl(0); #endif -l2: + } if (sig != 0 && sig != SIGSTOP && sig != SIGTSTP && sig != SIGTTIN && sig != SIGTTOU) { signal(sig, SIG_DFL); From 064291755a4925cb8653d6d3144db455aacbbeea Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sun, 5 Oct 2014 22:13:13 +0000 Subject: [PATCH 44/59] Fix dependency errors when linking libproc. Reported by: Oliver Hartmann X-MFC-With: r272488 --- Makefile.inc1 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile.inc1 b/Makefile.inc1 index c3b6d197789a..c87fa015663c 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1536,6 +1536,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ + ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} @@ -1586,9 +1587,11 @@ _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core +_cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L +cddl/lib/libctf__L: lib/libz__L .endif .if ${MK_CRYPT} != "no" From 4630309ca3c035e1d4206f76edc3ef20f2bfd7bc Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Mon, 6 Oct 2014 00:16:23 +0000 Subject: [PATCH 45/59] - Add some tests for modifying groups Reviewed by: will --- usr.sbin/pw/tests/Makefile | 2 +- usr.sbin/pw/tests/pw_modify.sh | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100755 usr.sbin/pw/tests/pw_modify.sh diff --git a/usr.sbin/pw/tests/Makefile b/usr.sbin/pw/tests/Makefile index 577ea9323919..3003c8f247fb 100644 --- a/usr.sbin/pw/tests/Makefile +++ b/usr.sbin/pw/tests/Makefile @@ -2,7 +2,7 @@ TESTSDIR= ${TESTSBASE}/usr.sbin/pw -ATF_TESTS_SH= pw_delete +ATF_TESTS_SH= pw_delete pw_modify FILES= group helper_functions.shin master.passwd FILESDIR= ${TESTSDIR} diff --git a/usr.sbin/pw/tests/pw_modify.sh b/usr.sbin/pw/tests/pw_modify.sh new file mode 100755 index 000000000000..28a89e1c24a7 --- /dev/null +++ b/usr.sbin/pw/tests/pw_modify.sh @@ -0,0 +1,34 @@ +# $FreeBSD$ + +# Import helper functions +. $(atf_get_srcdir)/helper_functions.shin + + +# Test adding & removing a user from a group +atf_test_case groupmod_user +groupmod_user_body() { + populate_etc_skel + atf_check -s exit:0 pw -V ${HOME} addgroup test + atf_check -s exit:0 pw -V ${HOME} groupmod test -m root + atf_check -s exit:0 -o match:"^test:\*:1001:root$" \ + grep "^test:\*:.*:root$" $HOME/group + atf_check -s exit:0 pw -V ${HOME} groupmod test -d root + atf_check -s exit:0 -o match:"^test:\*:1001:$" \ + grep "^test:\*:.*:$" $HOME/group +} + + +# Test adding and removing a user that does not exist +atf_test_case groupmod_invalid_user +groupmod_invalid_user_body() { + populate_etc_skel + atf_check -s exit:0 pw -V ${HOME} addgroup test + atf_check -s exit:67 -e match:"does not exist" pw -V ${HOME} groupmod test -m foo + atf_check -s exit:0 pw -V ${HOME} groupmod test -d foo +} + + +atf_init_test_cases() { + atf_add_test_case groupmod_user + atf_add_test_case groupmod_invalid_user +} From 1769d465c89d074cb52a64a558147e6dfbb9e03a Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Mon, 6 Oct 2014 00:25:10 +0000 Subject: [PATCH 46/59] Bump .Dd missed in r271424 --- share/man/man5/rc.conf.5 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5 index 47d9ef8e5520..721fef01c1c2 100644 --- a/share/man/man5/rc.conf.5 +++ b/share/man/man5/rc.conf.5 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 27, 2014 +.Dd September 11, 2014 .Dt RC.CONF 5 .Os .Sh NAME From dd2390be68e0245e45e2289f91b70e844a076078 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 6 Oct 2014 02:31:33 +0000 Subject: [PATCH 47/59] Convert racct stubs to inline functions. This saves some symbols and function calls for kernel without RACCT. MFC after: 1 week --- sys/kern/kern_racct.c | 84 ----------------------------------------- sys/sys/racct.h | 87 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 84 deletions(-) diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 26ebc4e6f272..84fa31225189 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -1203,88 +1203,4 @@ racct_init(void) } SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); -#else /* !RACCT */ - -int -racct_add(struct proc *p, int resource, uint64_t amount) -{ - - return (0); -} - -void -racct_add_cred(struct ucred *cred, int resource, uint64_t amount) -{ -} - -void -racct_add_force(struct proc *p, int resource, uint64_t amount) -{ - - return; -} - -int -racct_set(struct proc *p, int resource, uint64_t amount) -{ - - return (0); -} - -void -racct_set_force(struct proc *p, int resource, uint64_t amount) -{ -} - -void -racct_sub(struct proc *p, int resource, uint64_t amount) -{ -} - -void -racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) -{ -} - -uint64_t -racct_get_limit(struct proc *p, int resource) -{ - - return (UINT64_MAX); -} - -uint64_t -racct_get_available(struct proc *p, int resource) -{ - - return (UINT64_MAX); -} - -void -racct_create(struct racct **racctp) -{ -} - -void -racct_destroy(struct racct **racctp) -{ -} - -int -racct_proc_fork(struct proc *parent, struct proc *child) -{ - - return (0); -} - -void -racct_proc_fork_done(struct proc *child) -{ -} - -void -racct_proc_exit(struct proc *p) -{ -} - #endif /* !RACCT */ diff --git a/sys/sys/racct.h b/sys/sys/racct.h index 3b34891af14c..362529c4857b 100644 --- a/sys/sys/racct.h +++ b/sys/sys/racct.h @@ -37,6 +37,7 @@ #define _RACCT_H_ #include +#include #include #include @@ -141,6 +142,8 @@ struct racct { LIST_HEAD(, rctl_rule_link) r_rule_links; }; +#ifdef RACCT + int racct_add(struct proc *p, int resource, uint64_t amount); void racct_add_cred(struct ucred *cred, int resource, uint64_t amount); void racct_add_force(struct proc *p, int resource, uint64_t amount); @@ -162,4 +165,88 @@ void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, struct ucred *newcred); void racct_move(struct racct *dest, struct racct *src); +#else + +static inline int +racct_add(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +static inline void +racct_add_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +static inline void +racct_add_force(struct proc *p, int resource, uint64_t amount) +{ +} + +static inline int +racct_set(struct proc *p, int resource, uint64_t amount) +{ + + return (0); +} + +static inline void +racct_set_force(struct proc *p, int resource, uint64_t amount) +{ +} + +static inline void +racct_sub(struct proc *p, int resource, uint64_t amount) +{ +} + +static inline void +racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) +{ +} + +static inline uint64_t +racct_get_limit(struct proc *p, int resource) +{ + + return (UINT64_MAX); +} + +static inline uint64_t +racct_get_available(struct proc *p, int resource) +{ + + return (UINT64_MAX); +} + +static inline void +racct_create(struct racct **racctp) +{ +} + +static inline void +racct_destroy(struct racct **racctp) +{ +} + +static inline int +racct_proc_fork(struct proc *parent, struct proc *child) +{ + + return (0); +} + +static inline void +racct_proc_fork_done(struct proc *child) +{ +} + +static inline void +racct_proc_exit(struct proc *p) +{ +} + +#endif + #endif /* !_RACCT_H_ */ From 58c4ebeee879b4542c9b334b4bf9434b42aee179 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Mon, 6 Oct 2014 05:54:39 +0000 Subject: [PATCH 48/59] 5176 lock contention on godfather zio Reviewed by: Adam Leventhal Reviewed by: Alex Reece Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Richard Elling Reviewed by: Bayard Bell Approved by: Garrett D'Amore Author: Matthew Ahrens illumos/illumos-gate@6f834bc197c703a6568554c889157fb345bac079 --- cmd/zdb/zdb.c | 10 ++++++---- uts/common/fs/zfs/spa.c | 22 +++++++++++++++++----- uts/common/fs/zfs/sys/spa_impl.h | 3 ++- uts/common/fs/zfs/zio.c | 2 +- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 2def6241b711..c886a9dc7298 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2565,10 +2565,12 @@ dump_block_stats(spa_t *spa) * all async I/Os to complete. */ if (dump_opt['c']) { - (void) zio_wait(spa->spa_async_zio_root); - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); + for (int i = 0; i < max_ncpus; i++) { + (void) zio_wait(spa->spa_async_zio_root[i]); + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } } if (zcb.zcb_haderrors) { diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c index 72870b589891..be308e2f8730 100644 --- a/uts/common/fs/zfs/spa.c +++ b/uts/common/fs/zfs/spa.c @@ -1222,7 +1222,9 @@ spa_unload(spa_t *spa) * Wait for any outstanding async I/O to complete. */ if (spa->spa_async_zio_root != NULL) { - (void) zio_wait(spa->spa_async_zio_root); + for (int i = 0; i < max_ncpus; i++) + (void) zio_wait(spa->spa_async_zio_root[i]); + kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); spa->spa_async_zio_root = NULL; } @@ -2141,8 +2143,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, /* * Create "The Godfather" zio to hold all async IOs */ - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); + for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } /* * Parse the configuration into a vdev tree. We explicitly set the @@ -3481,8 +3488,13 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, /* * Create "The Godfather" zio to hold all async IOs */ - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); + for (int i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } /* * Create the root vdev. diff --git a/uts/common/fs/zfs/sys/spa_impl.h b/uts/common/fs/zfs/sys/spa_impl.h index 739b95988c25..48b28eb5a139 100644 --- a/uts/common/fs/zfs/sys/spa_impl.h +++ b/uts/common/fs/zfs/sys/spa_impl.h @@ -204,7 +204,8 @@ struct spa { uint64_t spa_failmode; /* failure mode for the pool */ uint64_t spa_delegation; /* delegation on/off */ list_t spa_config_list; /* previous cache file(s) */ - zio_t *spa_async_zio_root; /* root of all async I/O */ + /* per-CPU array of root of async I/O: */ + zio_t **spa_async_zio_root; zio_t *spa_suspend_zio_root; /* root of all suspended I/O */ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ kcondvar_t spa_suspend_cv; /* notification of resume */ diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index d8833f67079d..b154cb639d5d 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -1367,7 +1367,7 @@ zio_nowait(zio_t *zio) */ spa_t *spa = zio->io_spa; - zio_add_child(spa->spa_async_zio_root, zio); + zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio); } zio_execute(zio); From f9ac70702ff72d1ee5df5686cca48ce7e51a4293 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Mon, 6 Oct 2014 06:00:50 +0000 Subject: [PATCH 49/59] 5178 zdb -vvvvv on old-format pool fails in dump_deadlist() Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Richard Lowe Reviewed by: Saso Kiselkov Reviewed by: Richard Elling Reviewed by: Alek Pinchuk Approved by: Garrett D'Amore Author: Matthew Ahrens illumos/illumos-gate@90c76c66a2e21307c5370ae1bd3c7145a60c874f --- cmd/zdb/zdb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index c886a9dc7298..616c0b80195c 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1434,6 +1434,11 @@ dump_deadlist(dsl_deadlist_t *dl) if (dump_opt['d'] < 3) return; + if (dl->dl_oldfmt) { + dump_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); + return; + } + zdb_nicenum(dl->dl_phys->dl_used, bytes); zdb_nicenum(dl->dl_phys->dl_comp, comp); zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp); From ce78d06b49e5462b3afb4cb522b019a8c95f39ae Mon Sep 17 00:00:00 2001 From: Xin LI Date: Mon, 6 Oct 2014 06:04:10 +0000 Subject: [PATCH 50/59] 5162 zfs recv should use loaned arc buffer to avoid copy Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Bayard Bell Reviewed by: Richard Elling Approved by: Garrett D'Amore Author: Matthew Ahrens illumos/illumos-gate@8a9047098ad8ce5afa38b6d012c8b509bb619f40 --- uts/common/fs/zfs/dmu.c | 9 ++++++- uts/common/fs/zfs/dmu_send.c | 46 +++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c index 619b3b4aa458..4fdcb1475018 100644 --- a/uts/common/fs/zfs/dmu.c +++ b/uts/common/fs/zfs/dmu.c @@ -1264,7 +1264,14 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(dbuf); - if (offset == db->db.db_offset && blksz == db->db.db_size) { + /* + * We can only assign if the offset is aligned, the arc buf is the + * same size as the dbuf, and the dbuf is not metadata. It + * can't be metadata because the loaned arc buf comes from the + * user-data kmem arena. + */ + if (offset == db->db.db_offset && blksz == db->db.db_size && + DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) { dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c index 97c9926bd79b..9b03eff7a06b 100644 --- a/uts/common/fs/zfs/dmu_send.c +++ b/uts/common/fs/zfs/dmu_send.c @@ -1212,11 +1212,13 @@ free_guid_map_onexit(void *arg) } static void * -restore_read(struct restorearg *ra, int len) +restore_read(struct restorearg *ra, int len, char *buf) { - void *rv; int done = 0; + if (buf == NULL) + buf = ra->buf; + /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); @@ -1224,7 +1226,7 @@ restore_read(struct restorearg *ra, int len) ssize_t resid; ra->err = vn_rdwr(UIO_READ, ra->vp, - (caddr_t)ra->buf + done, len - done, + buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); @@ -1237,12 +1239,11 @@ restore_read(struct restorearg *ra, int len) } ASSERT3U(done, ==, len); - rv = ra->buf; if (ra->byteswap) - fletcher_4_incremental_byteswap(rv, len, &ra->cksum); + fletcher_4_incremental_byteswap(buf, len, &ra->cksum); else - fletcher_4_incremental_native(rv, len, &ra->cksum); - return (rv); + fletcher_4_incremental_native(buf, len, &ra->cksum); + return (buf); } static void @@ -1357,7 +1358,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) return (SET_ERROR(EINVAL)); if (drro->drr_bonuslen) { - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); + data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); if (ra->err != 0) return (ra->err); } @@ -1454,19 +1455,30 @@ restore_write(struct restorearg *ra, objset_t *os, !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrw->drr_length); - if (data == NULL) - return (ra->err); - if (dmu_object_info(os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); + dmu_buf_t *bonus; + if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) + return (SET_ERROR(EINVAL)); + + arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); + + data = restore_read(ra, drrw->drr_length, abuf->b_data); + if (data == NULL) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); + return (ra->err); + } + tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); dmu_tx_abort(tx); return (err); } @@ -1475,9 +1487,9 @@ restore_write(struct restorearg *ra, objset_t *os, DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); } - dmu_write(os, drrw->drr_object, - drrw->drr_offset, drrw->drr_length, data, tx); + dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); dmu_tx_commit(tx); + dmu_buf_rele(bonus, FTAG); return (0); } @@ -1559,7 +1571,7 @@ restore_write_embedded(struct restorearg *ra, objset_t *os, if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); - data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); + data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); if (data == NULL) return (ra->err); @@ -1594,7 +1606,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) drrs->drr_length > SPA_MAXBLOCKSIZE) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrs->drr_length); + data = restore_read(ra, drrs->drr_length, NULL); if (data == NULL) return (ra->err); @@ -1735,7 +1747,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, */ pcksum = ra.cksum; while (ra.err == 0 && - NULL != (drr = restore_read(&ra, sizeof (*drr)))) { + NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { ra.err = SET_ERROR(EINTR); goto out; From b228e6bf572c3d0a8103474c0a8b6d22383daa42 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Mon, 6 Oct 2014 06:19:54 +0000 Subject: [PATCH 51/59] Minor code styling. Suggested by: glebius @ --- sys/netinet/tcp_output.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 069776754432..07dd5c359524 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -806,41 +806,40 @@ tcp_output(struct tcpcb *tp) * Check if we should limit by maximum segment * size and count: */ - if (if_hw_tsomaxsegcount != 0 && if_hw_tsomaxsegsize != 0) { + if (if_hw_tsomaxsegcount != 0 && + if_hw_tsomaxsegsize != 0) { max_len = 0; mb = sbsndmbuf(&so->so_snd, off, &moff); while (mb != NULL && (u_int)max_len < len) { - u_int cur_length; - u_int cur_frags; + u_int mlen; + u_int frags; /* * Get length of mbuf fragment - * and how many hardware - * frags, rounded up, it would - * use: + * and how many hardware frags, + * rounded up, it would use: */ - cur_length = (mb->m_len - moff); - cur_frags = (cur_length + if_hw_tsomaxsegsize - - 1) / if_hw_tsomaxsegsize; + mlen = (mb->m_len - moff); + frags = howmany(mlen, + if_hw_tsomaxsegsize); /* Handle special case: Zero Length Mbuf */ - if (cur_frags == 0) - cur_frags = 1; + if (frags == 0) + frags = 1; /* * Check if the fragment limit - * will be reached or - * exceeded: + * will be reached or exceeded: */ - if (cur_frags >= if_hw_tsomaxsegcount) { - max_len += min(cur_length, + if (frags >= if_hw_tsomaxsegcount) { + max_len += min(mlen, if_hw_tsomaxsegcount * if_hw_tsomaxsegsize); break; } - max_len += cur_length; - if_hw_tsomaxsegcount -= cur_frags; + max_len += mlen; + if_hw_tsomaxsegcount -= frags; moff = 0; mb = mb->m_next; } From 1d1b55fbbac270f9fcfa4bf66327974b84c9b614 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 6 Oct 2014 06:20:35 +0000 Subject: [PATCH 52/59] devfs: don't take proctree_lock unconditionally in devfs_close MFC after: 1 week --- sys/fs/devfs/devfs_vnops.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index d0aabefe80a9..0317c695ef56 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -546,19 +546,22 @@ devfs_close(struct vop_close_args *ap) * plus the session), release the reference from the session. */ oldvp = NULL; - sx_xlock(&proctree_lock); if (td && vp == td->td_proc->p_session->s_ttyvp) { - SESS_LOCK(td->td_proc->p_session); - VI_LOCK(vp); - if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { - td->td_proc->p_session->s_ttyvp = NULL; - td->td_proc->p_session->s_ttydp = NULL; - oldvp = vp; + sx_xlock(&proctree_lock); + if (vp == td->td_proc->p_session->s_ttyvp) { + SESS_LOCK(td->td_proc->p_session); + VI_LOCK(vp); + if (count_dev(dev) == 2 && + (vp->v_iflag & VI_DOOMED) == 0) { + td->td_proc->p_session->s_ttyvp = NULL; + td->td_proc->p_session->s_ttydp = NULL; + oldvp = vp; + } + VI_UNLOCK(vp); + SESS_UNLOCK(td->td_proc->p_session); } - VI_UNLOCK(vp); - SESS_UNLOCK(td->td_proc->p_session); + sx_xunlock(&proctree_lock); } - sx_xunlock(&proctree_lock); if (oldvp != NULL) vrele(oldvp); /* From 5554eb9bfca7ff4379ae30b3a34b4c5458b62baa Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 6 Oct 2014 07:01:32 +0000 Subject: [PATCH 53/59] Fix length of Extended INQUIRY Data VPD page. MFC after: 3 days --- sys/cam/ctl/ctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index 3627a503c433..eaba50788541 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -9912,9 +9912,7 @@ ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len) lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; - data_len = sizeof(struct scsi_vpd_mode_page_policy) + - sizeof(struct scsi_vpd_mode_page_policy_descr); - + data_len = sizeof(struct scsi_vpd_extended_inquiry_data); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); eid_ptr = (struct scsi_vpd_extended_inquiry_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; From 3a222fe00084570292e6a97295716d447d4fd9b4 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 6 Oct 2014 07:22:48 +0000 Subject: [PATCH 54/59] devfs: tidy up after 272596 This moves a var to an if statement, no functional changes. MFC after: 1 week --- sys/fs/devfs/devfs_vnops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 0317c695ef56..d7009a49dd45 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -545,8 +545,8 @@ devfs_close(struct vop_close_args *ap) * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ - oldvp = NULL; if (td && vp == td->td_proc->p_session->s_ttyvp) { + oldvp = NULL; sx_xlock(&proctree_lock); if (vp == td->td_proc->p_session->s_ttyvp) { SESS_LOCK(td->td_proc->p_session); @@ -561,9 +561,9 @@ devfs_close(struct vop_close_args *ap) SESS_UNLOCK(td->td_proc->p_session); } sx_xunlock(&proctree_lock); + if (oldvp != NULL) + vrele(oldvp); } - if (oldvp != NULL) - vrele(oldvp); /* * We do not want to really close the device if it * is still in use unless we are trying to close it From 7f29c69aee963aedf5db477201e5c7e3356b2b64 Mon Sep 17 00:00:00 2001 From: Ganbold Tsagaankhuu Date: Mon, 6 Oct 2014 09:00:53 +0000 Subject: [PATCH 55/59] Use documented compat string for msm uart. Whilst here use tab instead of spaces. Approved by: stas (mentor) --- sys/dev/uart/uart_bus_fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/uart/uart_bus_fdt.c b/sys/dev/uart/uart_bus_fdt.c index f52fec1e5936..65ecf760f49b 100644 --- a/sys/dev/uart/uart_bus_fdt.c +++ b/sys/dev/uart/uart_bus_fdt.c @@ -84,7 +84,7 @@ static struct ofw_compat_data compat_data[] = { {"fsl,imx21-uart", (uintptr_t)&uart_imx_class}, {"fsl,mvf600-uart", (uintptr_t)&uart_vybrid_class}, {"lpc,uart", (uintptr_t)&uart_lpc_class}, - {"qcom,uart-dm", (uintptr_t)&uart_msm_class}, + {"qcom,msm-uartdm", (uintptr_t)&uart_msm_class}, {"ti,ns16550", (uintptr_t)&uart_ti8250_class}, {"ns16550", (uintptr_t)&uart_ns8250_class}, {NULL, (uintptr_t)NULL}, From 9677c48e30c340e7dcb83bf34e11a3b0d23d2753 Mon Sep 17 00:00:00 2001 From: Andrew Turner Date: Mon, 6 Oct 2014 09:52:28 +0000 Subject: [PATCH 56/59] Disable generating vfp and NEON instructions in the arm kernel. --- sys/conf/Makefile.arm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/conf/Makefile.arm b/sys/conf/Makefile.arm index 08e70f30a502..2727f815ebd9 100644 --- a/sys/conf/Makefile.arm +++ b/sys/conf/Makefile.arm @@ -42,6 +42,9 @@ STRIP_FLAGS = -S # We don't support gcc's thump interwork stuff, so disable it CFLAGS.gcc += -mno-thumb-interwork +# We generally don't want fpu instructions in the kernel. +CFLAGS.clang += -mfpu=none + .if !empty(DDB_ENABLED) CFLAGS += -funwind-tables # clang requires us to tell it to emit assembly with unwind information From 88971a900d9c277cd7f4fa710f3fbca619044fbd Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 6 Oct 2014 10:58:54 +0000 Subject: [PATCH 57/59] Use r271207 optimization only for MSI-enabled HBAs. It was found that VirtualBox' AHCI does not allow nterrupt to be cleared before the interrupt status register is read, causing interrupt storm. AHCI specification allows to skip this register use when multi-vector MSI is enabled and so interrupting port is known. For single-vector MSI that is not stated explicitly, but if the port is only one, it is obviously known too. --- sys/dev/ahci/ahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index 69fa76bfab42..ab4944a801fb 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -360,7 +360,7 @@ ahci_setup_interrupt(device_t dev) for (i = 0; i < ctlr->numirqs; i++) { ctlr->irqs[i].ctlr = ctlr; ctlr->irqs[i].r_irq_rid = i + (ctlr->msi ? 1 : 0); - if (ctlr->channels == 1 && !ctlr->ccc) + if (ctlr->channels == 1 && !ctlr->ccc && ctlr->msi) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; else if (ctlr->numirqs == 1 || i >= ctlr->channels || (ctlr->ccc && i == ctlr->cccv)) From 9bb47e7068d0c3efe415bd322b9a4694cf0d5c82 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 6 Oct 2014 11:00:47 +0000 Subject: [PATCH 58/59] Improve "reserved keywords" hack: we can't easily predict (in current parsing model) if the keyword is ipfw(8) reserved keyword or port name. Checking proto database via getprotobyname() consumes a lot of CPU and leads to tens of seconds for parsing large ruleset. Use list of reserved keywords and check them as pre-requisite before doing getprotobyname(). Obtained from: Yandex LLC --- sbin/ipfw/ipfw2.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 30fe6048d059..4ff815b7d711 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -2904,13 +2904,34 @@ add_dstip(ipfw_insn *cmd, char *av, int cblen, struct tidx *tstate) return cmd; } +static struct _s_x f_reserved_keywords[] = { + { "altq", TOK_OR }, + { "//", TOK_OR }, + { "diverted", TOK_OR }, + { "dst-port", TOK_OR }, + { "src-port", TOK_OR }, + { "established", TOK_OR }, + { "keep-state", TOK_OR }, + { "frag", TOK_OR }, + { "icmptypes", TOK_OR }, + { "in", TOK_OR }, + { "out", TOK_OR }, + { "ip6", TOK_OR }, + { "any", TOK_OR }, + { "to", TOK_OR }, + { "via", TOK_OR }, + { "{", TOK_OR }, + { NULL, 0 } /* terminator */ +}; + static ipfw_insn * add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode, int cblen) { - /* XXX "any" is trapped before. Perhaps "to" */ - if (_substrcmp(av, "any") == 0) { - return NULL; - } else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto, cblen)) { + + if (match_token(f_reserved_keywords, av) != -1) + return (NULL); + + if (fill_newports((ipfw_insn_u16 *)cmd, av, proto, cblen)) { /* XXX todo: check that we have a protocol with ports */ cmd->opcode = opcode; return cmd; From 3615981425e3694ec50e7978d34e0eb21c156432 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 6 Oct 2014 11:15:11 +0000 Subject: [PATCH 59/59] Fix O_TCPOPTS processing. Obtained from: luigi --- sys/netpfil/ipfw/ip_fw2.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index a66f19042e24..3e680acfe915 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -1722,9 +1722,13 @@ do { \ break; case O_TCPOPTS: - PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2)); - match = (proto == IPPROTO_TCP && offset == 0 && - tcpopts_match(TCP(ulp), cmd)); + if (proto == IPPROTO_TCP && ulp != NULL) { + PULLUP_LEN(hlen, ulp, + (TCP(ulp)->th_off << 2)); + match = (proto == IPPROTO_TCP && + offset == 0 && + tcpopts_match(TCP(ulp), cmd)); + } break; case O_TCPSEQ: