From 3b7175bdc706fa47075b410388eca46640e651aa Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Mon, 11 Apr 2016 17:24:26 +0000 Subject: [PATCH 01/29] rcp(1): replace 0 with NULL for pointers. Found with devel/coccinelle. --- bin/rcp/rcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/rcp/rcp.c b/bin/rcp/rcp.c index 186dde6d5e20..42f1702c3c64 100644 --- a/bin/rcp/rcp.c +++ b/bin/rcp/rcp.c @@ -447,7 +447,7 @@ rsource(char *name, struct stat *statp) return; } last = strrchr(name, '/'); - if (last == 0) + if (last == NULL) last = name; else last++; From f7d5087af21f187aa1b9be963b2e843f237a53e2 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Mon, 11 Apr 2016 17:57:54 +0000 Subject: [PATCH 02/29] libdtrace: Add a missing unlock to an error handler. Submitted by: Jihyun Yu MFC after: 3 days --- cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c index 5fe256503f5e..b9408944f8c5 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_module.c @@ -711,6 +711,7 @@ dt_module_load_proc(dtrace_hdl_t *dtp, dt_module_t *dmp) arg.dpa_count = 0; if (Pobject_iter_resolved(p, dt_module_load_proc_count, &arg) != 0) { dt_dprintf("failed to iterate objects\n"); + dt_proc_unlock(dtp, p); dt_proc_release(dtp, p); return (dt_set_errno(dtp, EDT_CANTLOAD)); } From 3fc873ffe255283ae4ff5a53947d8c296d591ad8 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Mon, 11 Apr 2016 18:08:12 +0000 Subject: [PATCH 03/29] libc: cleanup unnecessary semicolons. Found with devel/coccinelle. --- lib/libc/net/getservent.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/libc/net/getservent.c b/lib/libc/net/getservent.c index 45a5355245bb..d373dd3364a6 100644 --- a/lib/libc/net/getservent.c +++ b/lib/libc/net/getservent.c @@ -321,7 +321,7 @@ files_servent(void *retval, void *mdata, va_list ap) break; default: return NS_NOTFOUND; - }; + } serv = va_arg(ap, struct servent *); buffer = va_arg(ap, char *); @@ -463,7 +463,7 @@ files_setservent(void *retval, void *mdata, va_list ap) break; default: break; - }; + } st->compat_mode_active = 0; return (NS_UNAVAIL); @@ -522,7 +522,7 @@ db_servent(void *retval, void *mdata, va_list ap) break; default: return NS_NOTFOUND; - }; + } serv = va_arg(ap, struct servent *); buffer = va_arg(ap, char *); @@ -641,7 +641,7 @@ db_setservent(void *retval, void *mdata, va_list ap) break; default: break; - }; + } return (NS_UNAVAIL); } @@ -694,7 +694,7 @@ nis_servent(void *retval, void *mdata, va_list ap) break; default: return NS_NOTFOUND; - }; + } serv = va_arg(ap, struct servent *); buffer = va_arg(ap, char *); @@ -781,7 +781,7 @@ nis_servent(void *retval, void *mdata, va_list ap) } } break; - }; + } rv = parse_result(serv, buffer, bufsize, resultbuf, resultbuflen, errnop); @@ -815,7 +815,7 @@ nis_setservent(void *result, void *mdata, va_list ap) break; default: break; - }; + } return (NS_UNAVAIL); } From 341f552d8c069e849fb1d974e487dcaccc297cc8 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Mon, 11 Apr 2016 18:09:38 +0000 Subject: [PATCH 04/29] libc: cleanup unnecessary semicolons (part 2). Found with devel/coccinelle. --- lib/libc/rpc/clnt_bcast.c | 2 +- lib/libc/rpc/clnt_generic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libc/rpc/clnt_bcast.c b/lib/libc/rpc/clnt_bcast.c index a194ba6c77cc..99f54082d002 100644 --- a/lib/libc/rpc/clnt_bcast.c +++ b/lib/libc/rpc/clnt_bcast.c @@ -469,7 +469,7 @@ rpc_broadcast_exp(rpcprog_t prog, rpcvers_t vers, rpcproc_t proc, "broadcast packet"); stat = RPC_CANTSEND; continue; - }; + } #ifdef RPC_DEBUG if (!__rpc_lowvers) fprintf(stderr, "Broadcast packet sent " diff --git a/lib/libc/rpc/clnt_generic.c b/lib/libc/rpc/clnt_generic.c index 0c8a425750a4..0dea81d52eb5 100644 --- a/lib/libc/rpc/clnt_generic.c +++ b/lib/libc/rpc/clnt_generic.c @@ -402,7 +402,7 @@ clnt_tli_create(int fd, const struct netconfig *nconf, if (madefd) { (void) CLNT_CONTROL(cl, CLSET_FD_CLOSE, NULL); /* (void) CLNT_CONTROL(cl, CLSET_POP_TIMOD, NULL); */ - }; + } return (cl); From 6257b60dd05e0cf49ddf79ea98d7a2308384660e Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 11 Apr 2016 21:07:18 +0000 Subject: [PATCH 05/29] 6322 ZFS indirect block predictive prefetch Reviewed by: Matthew Ahrens Reviewed by: Paul Dagnelie Author: Alexander Motin Improve speculative prefetch of indirect blocks. Scalability of many operations on wide ZFS pool can be limited by requirement to prefetch indirect blocks first. Recently added asynchronous indirect block read partially helped, but did not solve the problem completely. This patch extends existing prefetcher functionality to explicitly work with indirect blocks. Before this change prefetcher issued reads for up to 8MB of data in advance. With this change it also issues indirect block reads for up to 64MB of data in advance, so that when it will be time to actually read those data, it can be done immediately. Alike effect can be achieved by just increasing maximal data prefetch distance, but at higher memory cost. Also this change introduces indirect block prefetch for rewrite operations, that was never done before. Previously ARC miss for Indirect blocks regularly blocked rewrites, converting perfectly aligned asynchronous operations into synchronous read-write pairs, significantly reducing maximal rewrite speed. While being there this issue was also fixed: - prefetch was done always, even if caching for the dataset was completely disabled. Testing on FreeBSD with zvol on top of 6x striped 2x mirrored pool of 12 assorted HDDs shown me such performance numbers: ------- BEFORE -------- Write 491363677 bytes/sec Read 312430631 bytes/sec Rewrite 97680464 bytes/sec -------- AFTER -------- Write 493524146 bytes/sec Read 438598079 bytes/sec Rewrite 277506044 bytes/sec Closes #65 Closes #80 openzfs/openzfs@792fd28ac04f78cc5e43ead2d72a96f244ea84e8 --- uts/common/fs/zfs/dbuf.c | 6 +-- uts/common/fs/zfs/dmu.c | 7 +-- uts/common/fs/zfs/dmu_zfetch.c | 82 ++++++++++++++++++++++++------ uts/common/fs/zfs/sys/dmu_zfetch.h | 9 +++- uts/common/fs/zfs/sys/dnode.h | 9 ++++ 5 files changed, 90 insertions(+), 23 deletions(-) diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c index 9b462fd388ab..9d722e04c1d4 100644 --- a/uts/common/fs/zfs/dbuf.c +++ b/uts/common/fs/zfs/dbuf.c @@ -721,7 +721,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (db->db_state == DB_CACHED) { mutex_exit(&db->db_mtx); if (prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1); + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); @@ -735,7 +735,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) /* dbuf_read_impl has dropped db_mtx for us */ if (prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1); + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); @@ -754,7 +754,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) */ mutex_exit(&db->db_mtx); if (prefetch) - dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1); + dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c index ceb08e227fd0..0f3730739bbf 100644 --- a/uts/common/fs/zfs/dmu.c +++ b/uts/common/fs/zfs/dmu.c @@ -441,9 +441,10 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dbp[i] = &db->db; } - if ((flags & DMU_READ_NO_PREFETCH) == 0 && read && - length <= zfetch_array_rd_sz) { - dmu_zfetch(&dn->dn_zfetch, blkid, nblks); + if ((flags & DMU_READ_NO_PREFETCH) == 0 && + DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) { + dmu_zfetch(&dn->dn_zfetch, blkid, nblks, + read && DNODE_IS_CACHEABLE(dn)); } rw_exit(&dn->dn_struct_rwlock); diff --git a/uts/common/fs/zfs/dmu_zfetch.c b/uts/common/fs/zfs/dmu_zfetch.c index f2cdf863d83c..de2360f580d7 100644 --- a/uts/common/fs/zfs/dmu_zfetch.c +++ b/uts/common/fs/zfs/dmu_zfetch.c @@ -49,6 +49,8 @@ uint32_t zfetch_max_streams = 8; uint32_t zfetch_min_sec_reap = 2; /* max bytes to prefetch per stream (default 8MB) */ uint32_t zfetch_max_distance = 8 * 1024 * 1024; +/* max bytes to prefetch indirects for per stream (default 64MB) */ +uint32_t zfetch_max_idistance = 64 * 1024 * 1024; /* max number of bytes in an array_read in which we allow prefetching (1MB) */ uint64_t zfetch_array_rd_sz = 1024 * 1024; @@ -186,6 +188,7 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP); zs->zs_blkid = blkid; zs->zs_pf_blkid = blkid; + zs->zs_ipf_blkid = blkid; zs->zs_atime = gethrtime(); mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL); @@ -193,13 +196,21 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) } /* - * This is the prefetch entry point. It calls all of the other dmu_zfetch - * routines to create, delete, find, or operate upon prefetch streams. + * This is the predictive prefetch entry point. It associates dnode access + * specified with blkid and nblks arguments with prefetch stream, predicts + * further accesses based on that stats and initiates speculative prefetch. + * fetch_data argument specifies whether actual data blocks should be fetched: + * FALSE -- prefetch only indirect blocks for predicted data blocks; + * TRUE -- prefetch predicted data blocks plus following indirect blocks. */ void -dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks) +dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data) { zstream_t *zs; + int64_t pf_start, ipf_start, ipf_istart, ipf_iend; + int64_t pf_ahead_blks, max_blks; + int epbs, max_dist_blks, pf_nblks, ipf_nblks; + uint64_t end_of_access_blkid = blkid + nblks; if (zfs_prefetch_disable) return; @@ -236,7 +247,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks) */ ZFETCHSTAT_BUMP(zfetchstat_misses); if (rw_tryupgrade(&zf->zf_rwlock)) - dmu_zfetch_stream_create(zf, blkid + nblks); + dmu_zfetch_stream_create(zf, end_of_access_blkid); rw_exit(&zf->zf_rwlock); return; } @@ -248,35 +259,74 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks) * Normally, we start prefetching where we stopped * prefetching last (zs_pf_blkid). But when we get our first * hit on this stream, zs_pf_blkid == zs_blkid, we don't - * want to prefetch to block we just accessed. In this case, + * want to prefetch the block we just accessed. In this case, * start just after the block we just accessed. */ - int64_t pf_start = MAX(zs->zs_pf_blkid, blkid + nblks); + pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid); /* * Double our amount of prefetched data, but don't let the * prefetch get further ahead than zfetch_max_distance. */ - int pf_nblks = - MIN((int64_t)zs->zs_pf_blkid - zs->zs_blkid + nblks, - zs->zs_blkid + nblks + - (zfetch_max_distance >> zf->zf_dnode->dn_datablkshift) - pf_start); + if (fetch_data) { + max_dist_blks = + zfetch_max_distance >> zf->zf_dnode->dn_datablkshift; + /* + * Previously, we were (zs_pf_blkid - blkid) ahead. We + * want to now be double that, so read that amount again, + * plus the amount we are catching up by (i.e. the amount + * read just now). + */ + pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks; + max_blks = max_dist_blks - (pf_start - end_of_access_blkid); + pf_nblks = MIN(pf_ahead_blks, max_blks); + } else { + pf_nblks = 0; + } zs->zs_pf_blkid = pf_start + pf_nblks; - zs->zs_atime = gethrtime(); - zs->zs_blkid = blkid + nblks; /* - * dbuf_prefetch() issues the prefetch i/o - * asynchronously, but it may need to wait for an - * indirect block to be read from disk. Therefore - * we do not want to hold any locks while we call it. + * Do the same for indirects, starting from where we stopped last, + * or where we will stop reading data blocks (and the indirects + * that point to them). */ + ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid); + max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift; + /* + * We want to double our distance ahead of the data prefetch + * (or reader, if we are not prefetching data). Previously, we + * were (zs_ipf_blkid - blkid) ahead. To double that, we read + * that amount again, plus the amount we are catching up by + * (i.e. the amount read now + the amount of data prefetched now). + */ + pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks; + max_blks = max_dist_blks - (ipf_start - end_of_access_blkid); + ipf_nblks = MIN(pf_ahead_blks, max_blks); + zs->zs_ipf_blkid = ipf_start + ipf_nblks; + + epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT; + ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs; + ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs; + + zs->zs_atime = gethrtime(); + zs->zs_blkid = end_of_access_blkid; mutex_exit(&zs->zs_lock); rw_exit(&zf->zf_rwlock); + + /* + * dbuf_prefetch() is asynchronous (even when it needs to read + * indirect blocks), but we still prefer to drop our locks before + * calling it to reduce the time we hold them. + */ + for (int i = 0; i < pf_nblks; i++) { dbuf_prefetch(zf->zf_dnode, 0, pf_start + i, ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH); } + for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) { + dbuf_prefetch(zf->zf_dnode, 1, iblk, + ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH); + } ZFETCHSTAT_BUMP(zfetchstat_hits); } diff --git a/uts/common/fs/zfs/sys/dmu_zfetch.h b/uts/common/fs/zfs/sys/dmu_zfetch.h index 6f61198ebc45..21a3ff3a2032 100644 --- a/uts/common/fs/zfs/sys/dmu_zfetch.h +++ b/uts/common/fs/zfs/sys/dmu_zfetch.h @@ -43,6 +43,13 @@ struct dnode; /* so we can reference dnode */ typedef struct zstream { uint64_t zs_blkid; /* expect next access at this blkid */ uint64_t zs_pf_blkid; /* next block to prefetch */ + + /* + * We will next prefetch the L1 indirect block of this level-0 + * block id. + */ + uint64_t zs_ipf_blkid; + kmutex_t zs_lock; /* protects stream */ hrtime_t zs_atime; /* time last prefetch issued */ list_node_t zs_node; /* link for zf_stream */ @@ -59,7 +66,7 @@ void zfetch_fini(void); void dmu_zfetch_init(zfetch_t *, struct dnode *); void dmu_zfetch_fini(zfetch_t *); -void dmu_zfetch(zfetch_t *, uint64_t, uint64_t); +void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t); #ifdef __cplusplus diff --git a/uts/common/fs/zfs/sys/dnode.h b/uts/common/fs/zfs/sys/dnode.h index 69cc54dc272b..dfa3e576c58e 100644 --- a/uts/common/fs/zfs/sys/dnode.h +++ b/uts/common/fs/zfs/sys/dnode.h @@ -305,6 +305,15 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, void dnode_evict_dbufs(dnode_t *dn); void dnode_evict_bonus(dnode_t *dn); +#define DNODE_IS_CACHEABLE(_dn) \ + ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ + (DMU_OT_IS_METADATA((_dn)->dn_type) && \ + (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA)) + +#define DNODE_META_IS_CACHEABLE(_dn) \ + ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ + (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA) + #ifdef ZFS_DEBUG /* From 63208a7a29c48c3248ca26ce08056d14cd530c97 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Mon, 11 Apr 2016 21:10:14 +0000 Subject: [PATCH 06/29] META_MODE: Support targets that already have .OBJDIR in them for META_COOKIE. Sponsored by: EMC / Isilon Storage Division --- share/mk/local.meta.sys.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/share/mk/local.meta.sys.mk b/share/mk/local.meta.sys.mk index e55d68de3953..773009f5534f 100644 --- a/share/mk/local.meta.sys.mk +++ b/share/mk/local.meta.sys.mk @@ -61,6 +61,7 @@ MACHINE_ARCH_LIST.$m?= ${TARGET_ARCHES_${m}:U$m} MACHINE_ARCH.$m?= ${MACHINE_ARCH_LIST.$m:[1]} BOOT_MACHINE_DIR.$m ?= boot/$m .endfor +ALL_MACHINE_LIST+= common .ifndef _TARGET_SPEC .if empty(MACHINE_ARCH) From 5528bda1b228800a8e10d7e3e0ee37c70446ceb9 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Mon, 11 Apr 2016 21:12:00 +0000 Subject: [PATCH 07/29] Revert r297833 which committed the wrong file --- share/mk/local.meta.sys.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/share/mk/local.meta.sys.mk b/share/mk/local.meta.sys.mk index 773009f5534f..e55d68de3953 100644 --- a/share/mk/local.meta.sys.mk +++ b/share/mk/local.meta.sys.mk @@ -61,7 +61,6 @@ MACHINE_ARCH_LIST.$m?= ${TARGET_ARCHES_${m}:U$m} MACHINE_ARCH.$m?= ${MACHINE_ARCH_LIST.$m:[1]} BOOT_MACHINE_DIR.$m ?= boot/$m .endfor -ALL_MACHINE_LIST+= common .ifndef _TARGET_SPEC .if empty(MACHINE_ARCH) From 0457a4e053c5c5598c81639ed5b399a002f27d71 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Mon, 11 Apr 2016 21:12:24 +0000 Subject: [PATCH 08/29] META_MODE: Support targets that already have .OBJDIR in them for META_COOKIE. Sponsored by: EMC / Isilon Storage Division --- share/mk/local.sys.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/share/mk/local.sys.mk b/share/mk/local.sys.mk index ced441fe26d8..f6a98245a857 100644 --- a/share/mk/local.sys.mk +++ b/share/mk/local.sys.mk @@ -33,7 +33,8 @@ OBJTOP?= ${.OBJDIR:S,${.CURDIR},,}${SRCTOP} # we can afford to use cookies to prevent some targets # re-running needlessly but only when using filemon. .if ${.MAKE.MODE:Mnofilemon} == "" -META_COOKIE= ${COOKIE.${.TARGET}:U${.OBJDIR}/${.TARGET}} +META_COOKIE_COND= empty(.TARGET:M${.OBJDIR}) +META_COOKIE= ${COOKIE.${.TARGET}:U${${META_COOKIE_COND}:?${.OBJDIR}/${.TARGET}:${.TARGET}}} META_COOKIE_RM= @rm -f ${META_COOKIE} META_COOKIE_TOUCH= @touch ${META_COOKIE} CLEANFILES+= ${META_TARGETS} From 306c3c4d9084ba42d118b6187e66074e0d12b31d Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Mon, 11 Apr 2016 21:15:48 +0000 Subject: [PATCH 09/29] Fix appending -O0 to CFLAGS The previous method would completely nerf CFLAGS once bsd.progs.mk had recursed into the per-PROG logic and make the CFLAGS for tap testcases to -O0, instead of appending to CFLAGS for all of the tap testcases. MFC after: 1 week Sponsored by: EMC / Isilon Storage Division --- lib/msun/tests/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/msun/tests/Makefile b/lib/msun/tests/Makefile index 8b114fde1c7c..b18c07d5a115 100644 --- a/lib/msun/tests/Makefile +++ b/lib/msun/tests/Makefile @@ -62,9 +62,9 @@ TAP_TESTS_C+= next_test TAP_TESTS_C+= rem_test TAP_TESTS_C+= trig_test -.for t in ${TAP_TESTS_C} -CFLAGS.$t+= -O0 -.endfor +.if !empty(PROG) && !empty(TAP_TESTS_C:M${PROG}) +CFLAGS+= -O0 +.endif CSTD= c99 From 84aa8a8ad100a103404402dcad21120ad48ca2e9 Mon Sep 17 00:00:00 2001 From: Rick Macklem Date: Mon, 11 Apr 2016 21:55:21 +0000 Subject: [PATCH 10/29] Bruce Evans reported that there was a performance regression between the old and new NFS clients. He did a good job of isolating the problem which was caused by the new NFS client not setting the post write mtime correctly. The new NFS client code was cloned from the old client, but was incorrect, because the mtime in the nfs vnode's cache wasn't yet updated. This patch fixes this problem. The patch also adds missing mutex locking. Reported and tested by: bde MFC after: 2 weeks --- sys/fs/nfs/nfsport.h | 8 +++++--- sys/fs/nfsclient/nfs_clrpcops.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 59e353dadb05..04bb27c54412 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -788,12 +788,14 @@ MALLOC_DECLARE(M_NEWNFSDSESSION); /* * Set the n_time in the client write rpc, as required. */ -#define NFSWRITERPC_SETTIME(w, n, v4) \ +#define NFSWRITERPC_SETTIME(w, n, a, v4) \ do { \ if (w) { \ - (n)->n_mtime = (n)->n_vattr.na_vattr.va_mtime; \ + mtx_lock(&((n)->n_mtx)); \ + (n)->n_mtime = (a)->na_mtime; \ if (v4) \ - (n)->n_change = (n)->n_vattr.na_vattr.va_filerev; \ + (n)->n_change = (a)->na_filerev; \ + mtx_unlock(&((n)->n_mtx)); \ } \ } while (0) diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index cd12fba7c2d4..6e6ece3da346 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -1734,7 +1734,7 @@ nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, } if (error) goto nfsmout; - NFSWRITERPC_SETTIME(wccflag, np, (nd->nd_flag & ND_NFSV4)); + NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; From 2c6254c2878bb31c971818e767e2d13e5e06029f Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Mon, 11 Apr 2016 22:14:29 +0000 Subject: [PATCH 11/29] Fix an intermittent bug in sbin/devd/client_test.stream In case where the two events were being received in separate reads, the event buffer was being null-terminated at the wrong offset. Also, factored out some common code between the tests, and fixed a comment. Submitted by: will MFC after: 3 days Sponsored by: Spectra Logic Corp --- sbin/devd/tests/client_test.c | 66 ++++++++++++++--------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/sbin/devd/tests/client_test.c b/sbin/devd/tests/client_test.c index dda9a899005d..daea14716b46 100644 --- a/sbin/devd/tests/client_test.c +++ b/sbin/devd/tests/client_test.c @@ -34,6 +34,10 @@ __FBSDID("$FreeBSD$"); #include #include + +const char create_pat[] = "!system=DEVFS subsystem=CDEV type=CREATE cdev=md"; +const char destroy_pat[] = "!system=DEVFS subsystem=CDEV type=DESTROY cdev=md"; + /* Helper functions*/ /* @@ -63,6 +67,24 @@ create_two_events(void) ATF_REQUIRE_EQ(0, pclose(destroy_stdout)); } +/* Setup and return an open client socket */ +static int +common_setup(int socktype, const char* sockpath) { + struct sockaddr_un devd_addr; + int s, error; + + memset(&devd_addr, 0, sizeof(devd_addr)); + devd_addr.sun_family = PF_LOCAL; + strlcpy(devd_addr.sun_path, sockpath, sizeof(devd_addr.sun_path)); + s = socket(PF_LOCAL, socktype, 0); + ATF_REQUIRE(s >= 0); + error = connect(s, (struct sockaddr*)&devd_addr, SUN_LEN(&devd_addr)); + ATF_REQUIRE_EQ(0, error); + + create_two_events(); + return (s); +} + /* * Test Cases */ @@ -75,27 +97,10 @@ ATF_TC_WITHOUT_HEAD(seqpacket); ATF_TC_BODY(seqpacket, tc) { int s; - int error; - struct sockaddr_un devd_addr; bool got_create_event = false; bool got_destroy_event = false; - const char create_pat[] = - "!system=DEVFS subsystem=CDEV type=CREATE cdev=md"; - const char destroy_pat[] = - "!system=DEVFS subsystem=CDEV type=DESTROY cdev=md"; - - memset(&devd_addr, 0, sizeof(devd_addr)); - devd_addr.sun_family = PF_LOCAL; - strlcpy(devd_addr.sun_path, "/var/run/devd.seqpacket.pipe", - sizeof(devd_addr.sun_path)); - - s = socket(PF_LOCAL, SOCK_SEQPACKET, 0); - ATF_REQUIRE(s >= 0); - error = connect(s, (struct sockaddr*)&devd_addr, SUN_LEN(&devd_addr)); - ATF_REQUIRE_EQ(0, error); - - create_two_events(); + s = common_setup(SOCK_SEQPACKET, "/var/run/devd.seqpacket.pipe"); /* * Loop until both events are detected on _different_ reads * There may be extra events due to unrelated system activity @@ -132,31 +137,14 @@ ATF_TC_WITHOUT_HEAD(stream); ATF_TC_BODY(stream, tc) { int s; - int error; - struct sockaddr_un devd_addr; bool got_create_event = false; bool got_destroy_event = false; - const char create_pat[] = - "!system=DEVFS subsystem=CDEV type=CREATE cdev=md"; - const char destroy_pat[] = - "!system=DEVFS subsystem=CDEV type=DESTROY cdev=md"; ssize_t len = 0; - memset(&devd_addr, 0, sizeof(devd_addr)); - devd_addr.sun_family = PF_LOCAL; - strlcpy(devd_addr.sun_path, "/var/run/devd.pipe", - sizeof(devd_addr.sun_path)); - - s = socket(PF_LOCAL, SOCK_STREAM, 0); - ATF_REQUIRE(s >= 0); - error = connect(s, (struct sockaddr*)&devd_addr, SUN_LEN(&devd_addr)); - ATF_REQUIRE_EQ(0, error); - - create_two_events(); - + s = common_setup(SOCK_STREAM, "/var/run/devd.pipe"); /* - * Loop until both events are detected on _different_ reads - * There may be extra events due to unrelated system activity + * Loop until both events are detected on the same or different reads. + * There may be extra events due to unrelated system activity. * If we never get both events, then the test will timeout. */ while (!(got_create_event && got_destroy_event)) { @@ -169,7 +157,7 @@ ATF_TC_BODY(stream, tc) ATF_REQUIRE(newlen != -1); len += newlen; /* NULL terminate the result */ - event[newlen] = '\0'; + event[len] = '\0'; printf("%s", event); create_pos = strstr(event, create_pat); From 3d861d81b9599b97bf0aaaa97879fe492d9fea46 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Tue, 12 Apr 2016 02:01:16 +0000 Subject: [PATCH 12/29] hyperv: Replace 0 w/ NULL Submitted by: pfg MFC after: 1 week Sponsored by: Microsoft OSTC --- sys/dev/hyperv/vmbus/hv_hv.c | 2 +- sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c index b835a347ff02..797bb8c9b358 100644 --- a/sys/dev/hyperv/vmbus/hv_hv.c +++ b/sys/dev/hyperv/vmbus/hv_hv.c @@ -127,7 +127,7 @@ int hv_vmbus_init(void) { hv_vmbus_x64_msr_hypercall_contents hypercall_msr; - void* virt_addr = 0; + void* virt_addr = NULL; memset( hv_vmbus_g_context.syn_ic_event_page, diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c index 8a07e4517b0c..01b35160f2b2 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -581,7 +581,7 @@ vmbus_bus_exit(void) smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); for(i = 0; i < 2 * MAXCPU; i++) { - if (setup_args.page_buffers[i] != 0) + if (setup_args.page_buffers[i] != NULL) free(setup_args.page_buffers[i], M_DEVBUF); } From 8a5f4396775074170d5c509be438d72ef266d0bd Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Tue, 12 Apr 2016 02:45:19 +0000 Subject: [PATCH 13/29] META_MODE: Avoid changed build command every build. Because the file is generated with -f using another Makefile, 2 different Makefiles are trying to handle the .meta file for the target. The obvious .NOMETA_CMP or .NOMETA on the ${MAKE} targets don't work as they are very limited in scope in bmake. Using .PHONY fixes the problem and ensures that the ${MAKE} command is always ran to check if it is outdated in the sub-make. An example of the problem in gnu/lib/libgcc (with make -dM): /usr/obj/root/git/freebsd/gnu/lib/libgcc/tm.h.meta: 2: a build command has changed TARGET_CPU_DEFAULT="" HEADERS="options.h i386/biarch64.h i386/i386.h i386/unix.h i386/att.h dbxelf.h elfos-undef.h elfos.h freebsd-native.h freebsd-spec.h freebsd.h i386/x86-64.h i386/freebsd.h i386/freebsd64.h defaults.h" DEFINES="" /bin/sh /root/git/freebsd/gnu/lib/libgcc/../../../contrib/gcc/mkconfig.sh tm.h vs (cd /root/git/freebsd/gnu/lib/libgcc; make -f /root/git/freebsd/gnu/lib/libgcc/../../usr.bin/cc/cc_tools/Makefile MFILE=/root/git/freebsd/gnu/lib/libgcc/../../usr.bin/cc/cc_tools/Makefile GCCDIR=/root/git/freebsd/gnu/lib/libgcc/../../../contrib/gcc tm.h) Skipping meta for tm.h: .NOMETA (cd /root/git/freebsd/gnu/lib/libgcc; make -f /root/git/freebsd/gnu/lib/libgcc/../../usr.bin/cc/cc_tools/Makefile MFILE=/root/git/freebsd/gnu/lib/libgcc/../../usr.bin/cc/cc_tools/Makefile GCCDIR=/root/git/freebsd/gnu/lib/libgcc/../../../contrib/gcc tm.h) `tm.h' is up to date. Sponsored by: EMC / Isilon Storage Division --- gnu/lib/csu/Makefile | 2 +- gnu/lib/libgcc/Makefile | 2 +- gnu/lib/libgcov/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gnu/lib/csu/Makefile b/gnu/lib/csu/Makefile index 431baac607c5..0e50152afd5d 100644 --- a/gnu/lib/csu/Makefile +++ b/gnu/lib/csu/Makefile @@ -71,7 +71,7 @@ crtendS.o: ${ENDSRC} -c -o ${.TARGET} ${.ALLSRC:N*.h} CLEANFILES+= tm.h tconfig.h options.h optionlist cs-tconfig.h cs-tm.h -tm.h tconfig.h options.h: ${CCDIR}/cc_tools/Makefile +tm.h tconfig.h options.h: ${CCDIR}/cc_tools/Makefile .PHONY (cd ${.CURDIR}; ${MAKE} -f ${.ALLSRC} MFILE=${.ALLSRC} GCCDIR=${GCCDIR} ${.TARGET}) .include diff --git a/gnu/lib/libgcc/Makefile b/gnu/lib/libgcc/Makefile index f824bbc6112c..04c3a55c63aa 100644 --- a/gnu/lib/libgcc/Makefile +++ b/gnu/lib/libgcc/Makefile @@ -343,7 +343,7 @@ ${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS} # # Generated headers # -${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile +${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile .PHONY (cd ${.CURDIR}; ${MAKE} -f ${.ALLSRC} MFILE=${.ALLSRC} GCCDIR=${GCCDIR} ${.TARGET}) CLEANFILES += ${COMMONHDRS} diff --git a/gnu/lib/libgcov/Makefile b/gnu/lib/libgcov/Makefile index 7582720f53b7..34932aac2ed1 100644 --- a/gnu/lib/libgcov/Makefile +++ b/gnu/lib/libgcov/Makefile @@ -45,7 +45,7 @@ CC_S = ${CC} -c ${CFLAGS} ${PICFLAG} -DSHARED COMMONHDRS= tm.h tconfig.h gcov-iov.h options.h CLEANFILES+= ${COMMONHDRS} cs-tm.h cs-tconfig.h options.h optionlist -${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile +${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile .PHONY (cd ${.CURDIR}; ${MAKE} -f ${.ALLSRC} MFILE=${.ALLSRC} GCCDIR=${GCCDIR} ${.TARGET}) ${OBJS} beforedepend: ${COMMONHDRS} From 4cfc7d9db5682ab6161bef30f3dd24069f20dfb2 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Tue, 12 Apr 2016 03:37:42 +0000 Subject: [PATCH 14/29] Document the behavior of NO_DIRDEPS/NO_DIRDEPS_BELOW. Sponsored by: EMC / Isilon Storage Division --- tools/build/options/WITH_DIRDEPS_BUILD | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/build/options/WITH_DIRDEPS_BUILD b/tools/build/options/WITH_DIRDEPS_BUILD index eafe857dd460..981c1a93db11 100644 --- a/tools/build/options/WITH_DIRDEPS_BUILD +++ b/tools/build/options/WITH_DIRDEPS_BUILD @@ -19,7 +19,9 @@ computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS will skip checking dirdep dependencies and will only build in the current -directory. +and child directories. +.Va NO_DIRDEPS_BELOW +will skip building any dirdeps and only build the current directory. .Pp As each target is made .Xr make 1 From d32e910cb2dedbe9d30651ede72fef3794103616 Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Tue, 12 Apr 2016 03:40:13 +0000 Subject: [PATCH 15/29] Add some more content for WITH_META_MODE. Sponsored by: EMC / Isilon Storage Division --- tools/build/options/WITH_META_MODE | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/build/options/WITH_META_MODE b/tools/build/options/WITH_META_MODE index 6f2f296b5a43..906b557769c9 100644 --- a/tools/build/options/WITH_META_MODE +++ b/tools/build/options/WITH_META_MODE @@ -1,3 +1,12 @@ .\" $FreeBSD$ Create meta files when not doing DIRDEPS_BUILD. -The meta files can be useful for debugging. +When the +.Xr filemon 4 +module is loaded, dependencies will be tracked for all commands. +If any command, its dependencies, or files it generates are missing then +the target will be considered out-of-date and rebuilt. +The meta files can also be useful for debugging. +.Pp +The build will hide commands ran unless +.Va NO_SILENT +is defined. From 7f76a95cd0102ed4cf70e95399462439c42eeb6d Mon Sep 17 00:00:00 2001 From: Bryan Drewery Date: Tue, 12 Apr 2016 03:55:33 +0000 Subject: [PATCH 16/29] Regenerate --- share/man/man5/src.conf.5 | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 68e16217e48b..e6388033a5f2 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,7 +1,7 @@ .\" DO NOT EDIT-- this file is automatically generated. .\" from FreeBSD: head/tools/build/options/makeman 292283 2015-12-15 18:42:30Z bdrewery .\" $FreeBSD$ -.Dd March 30, 2016 +.Dd April 11, 2016 .Dt SRC.CONF 5 .Os .Sh NAME @@ -473,7 +473,7 @@ executable binary and shared library. .\" from FreeBSD: head/tools/build/options/WITHOUT_DICT 156932 2006-03-21 07:50:50Z ru Set to not build the Webster dictionary files. .It Va WITH_DIRDEPS_BUILD -.\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_BUILD 290816 2015-11-14 03:24:48Z sjg +.\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_BUILD 297843 2016-04-12 03:37:42Z bdrewery Enable building in meta mode. This is an experimental build feature. For details see @@ -494,7 +494,9 @@ computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS will skip checking dirdep dependencies and will only build in the current -directory. +and child directories. +.Va NO_DIRDEPS_BELOW +will skip building any dirdeps and only build the current directory. .Pp As each target is made .Xr make 1 @@ -1057,9 +1059,18 @@ Set to not build utilities for manual pages, .Xr manctl 8 , and related support files. .It Va WITH_META_MODE -.\" from FreeBSD: head/tools/build/options/WITH_META_MODE 290816 2015-11-14 03:24:48Z sjg +.\" from FreeBSD: head/tools/build/options/WITH_META_MODE 297844 2016-04-12 03:40:13Z bdrewery Create meta files when not doing DIRDEPS_BUILD. -The meta files can be useful for debugging. +When the +.Xr filemon 4 +module is loaded, dependencies will be tracked for all commands. +If any command, its dependencies, or files it generates are missing then +the target will be considered out-of-date and rebuilt. +The meta files can also be useful for debugging. +.Pp +The build will hide commands ran unless +.Va NO_SILENT +is defined. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , From 9054bcbce7237aa1390a5758fef47b2924c140cd Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 12 Apr 2016 06:46:54 +0000 Subject: [PATCH 17/29] [amd64] dtrace_invop handler is to be called only for kernel exceptions DTrace-related exceptions in userland code are handled elsewhere. One practical problem was a crash in dtrace_invop_start() when saved %rsp pointed to a virtual address that was not backed. i386 code already ignored userland exceptions. Reviewed by: markj, kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D5906 --- sys/amd64/amd64/exception.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index a855bd362919..caabfd9f0cd4 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -211,6 +211,8 @@ alltraps_pushregs_no_rdi: * interrupt. For all other trap types, just handle them in * the usual way. */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jnz calltrap /* ignore userland traps */ cmpl $T_BPTFLT,TF_TRAPNO(%rsp) jne calltrap From 6a50036052534bc6c066fddc9ae3c9bfe407c381 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 12 Apr 2016 06:54:18 +0000 Subject: [PATCH 18/29] Revert r297396 Modify "4958 zdb trips assert on pools with ashift >= 0xe" A better fix is following. --- .../contrib/opensolaris/uts/common/fs/zfs/zio.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 6dc0ad39239d..b706dd32f2ba 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -2777,19 +2777,10 @@ zio_vdev_io_start(zio_t *zio) (void) atomic_cas_64(&spa->spa_last_io, old, new); } -#ifdef illumos align = 1ULL << vd->vdev_top->vdev_ashift; if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) && P2PHASE(zio->io_size, align) != 0) { -#else - if (zio->io_flags & ZIO_FLAG_PHYSICAL) - align = 1ULL << vd->vdev_top->vdev_logical_ashift; - else - align = 1ULL << vd->vdev_top->vdev_ashift; - - if (P2PHASE(zio->io_size, align) != 0) { -#endif /* Transform logical writes to be a full physical block size. */ uint64_t asize = P2ROUNDUP(zio->io_size, align); char *abuf = NULL; @@ -2805,7 +2796,6 @@ zio_vdev_io_start(zio_t *zio) zio_subblock); } -#ifdef illumos /* * If this is not a physical io, make sure that it is properly aligned * before proceeding. @@ -2821,10 +2811,6 @@ zio_vdev_io_start(zio_t *zio) ASSERT0(P2PHASE(zio->io_offset, SPA_MINBLOCKSIZE)); ASSERT0(P2PHASE(zio->io_size, SPA_MINBLOCKSIZE)); } -#else - ASSERT0(P2PHASE(zio->io_offset, align)); - ASSERT0(P2PHASE(zio->io_size, align)); -#endif VERIFY(zio->io_type == ZIO_TYPE_READ || spa_writeable(spa)); From c3249989efdc1723db3e19df665cdeee5996e796 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 12 Apr 2016 06:56:35 +0000 Subject: [PATCH 19/29] l2arc: make sure that all writes honor ashift of a cache device Previously uncompressed buffers did not obey that rule. Type of b_asize is changed to uint64_t for consistency, given that this is a zeta-byte filesystem. l2arc_compress_buf is renamed to l2arc_transform_buf to better reflect its new utility. Now not only we ensure that a compressed buffer has a size aligned to ashift, but we also allocate a properly sized temporary buffer if the original buffer is not compressed and it has an odd size. This ensures that all I/O to the cache device is always ashift-aligned, in terms of both a request offset and a request size. If the aligned data is larger than the original data, then we have to use a temporary buffer when reading it as well. Also, enhance physical zio alignment checks using vdev_logical_ashift. On FreeBSD we have this information, so we can make stricter assertions. Reviewed by: smh, mav MFC after: 1 month Sponsored by: ClusterHQ Differential Revision: https://reviews.freebsd.org/D2789 --- .../opensolaris/uts/common/fs/zfs/arc.c | 244 +++++++++++------- .../opensolaris/uts/common/fs/zfs/zio.c | 10 +- 2 files changed, 157 insertions(+), 97 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 534dfb2cd682..c1a254aecac1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -563,6 +563,7 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_compress_successes; kstat_named_t arcstat_l2_compress_zeros; kstat_named_t arcstat_l2_compress_failures; + kstat_named_t arcstat_l2_padding_needed; kstat_named_t arcstat_l2_write_trylock_fail; kstat_named_t arcstat_l2_write_passed_headroom; kstat_named_t arcstat_l2_write_spa_mismatch; @@ -664,6 +665,7 @@ static arc_stats_t arc_stats = { { "l2_compress_successes", KSTAT_DATA_UINT64 }, { "l2_compress_zeros", KSTAT_DATA_UINT64 }, { "l2_compress_failures", KSTAT_DATA_UINT64 }, + { "l2_padding_needed", KSTAT_DATA_UINT64 }, { "l2_write_trylock_fail", KSTAT_DATA_UINT64 }, { "l2_write_passed_headroom", KSTAT_DATA_UINT64 }, { "l2_write_spa_mismatch", KSTAT_DATA_UINT64 }, @@ -837,7 +839,7 @@ typedef struct l1arc_buf_hdr { refcount_t b_refcnt; arc_callback_t *b_acb; - /* temporary buffer holder for in-flight compressed data */ + /* temporary buffer holder for in-flight compressed or padded data */ void *b_tmp_cdata; } l1arc_buf_hdr_t; @@ -1098,6 +1100,7 @@ typedef struct l2arc_read_callback { zbookmark_phys_t l2rcb_zb; /* original bookmark */ int l2rcb_flags; /* original flags */ enum zio_compress l2rcb_compress; /* applied compress */ + void *l2rcb_data; /* temporary buffer */ } l2arc_read_callback_t; typedef struct l2arc_write_callback { @@ -1128,7 +1131,7 @@ static uint32_t arc_bufc_to_flags(arc_buf_contents_t); static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); static void l2arc_read_done(zio_t *); -static boolean_t l2arc_compress_buf(arc_buf_hdr_t *); +static boolean_t l2arc_transform_buf(arc_buf_hdr_t *, boolean_t); static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress); static void l2arc_release_cdata_buf(arc_buf_hdr_t *); @@ -2215,6 +2218,8 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) static void arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) { + size_t align, asize, len; + ASSERT(HDR_HAS_L2HDR(hdr)); ASSERT(MUTEX_HELD(&hdr->b_l2hdr.b_dev->l2ad_mtx)); @@ -2236,16 +2241,15 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) } /* - * The header does not have compression enabled. This can be due - * to the buffer not being compressible, or because we're - * freeing the buffer before the second phase of - * l2arc_write_buffer() has started (which does the compression - * step). In either case, b_tmp_cdata does not point to a - * separately compressed buffer, so there's nothing to free (it - * points to the same buffer as the arc_buf_t's b_data field). + * The bufer has been chosen for writing to L2ARC, but it's + * not being written just yet. In other words, + * b_tmp_cdata points to exactly the same buffer as b_data, + * l2arc_transform_buf hasn't been called. */ - if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) { - hdr->b_l1hdr.b_tmp_cdata = NULL; + if (hdr->b_l2hdr.b_daddr == L2ARC_ADDR_UNSET) { + ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, + hdr->b_l1hdr.b_buf->b_data); + ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF); return; } @@ -2258,12 +2262,18 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) return; } - ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress)); - - arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata, - hdr->b_size, zio_data_buf_free); + /* + * Nothing to do if the temporary buffer was not required. + */ + if (hdr->b_l1hdr.b_tmp_cdata == NULL) + return; ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write); + len = hdr->b_size; + align = (size_t)1 << hdr->b_l2hdr.b_dev->l2ad_vdev->vdev_ashift; + asize = P2ROUNDUP(len, align); + arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata, asize, + zio_data_buf_free); hdr->b_l1hdr.b_tmp_cdata = NULL; } @@ -4534,6 +4544,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && !(l2arc_noprefetch && HDR_PREFETCH(hdr))) { l2arc_read_callback_t *cb; + void* b_data; DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_hits); @@ -4546,6 +4557,14 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, cb->l2rcb_zb = *zb; cb->l2rcb_flags = zio_flags; cb->l2rcb_compress = b_compress; + if (b_asize > hdr->b_size) { + ASSERT3U(b_compress, ==, + ZIO_COMPRESS_OFF); + b_data = zio_data_buf_alloc(b_asize); + cb->l2rcb_data = b_data; + } else { + b_data = buf->b_data; + } ASSERT(addr >= VDEV_LABEL_START_SIZE && addr + size < vd->vdev_psize - @@ -4558,6 +4577,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, * was squashed to zero size by compression. */ if (b_compress == ZIO_COMPRESS_EMPTY) { + ASSERT3U(b_asize, ==, 0); rzio = zio_null(pio, spa, vd, l2arc_read_done, cb, zio_flags | ZIO_FLAG_DONT_CACHE | @@ -4566,7 +4586,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, ZIO_FLAG_DONT_RETRY); } else { rzio = zio_read_phys(pio, vd, addr, - b_asize, buf->b_data, + b_asize, b_data, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | @@ -6050,6 +6070,32 @@ l2arc_read_done(zio_t *zio) hdr = buf->b_hdr; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); + /* + * If the data was read into a temporary buffer, + * move it and free the buffer. + */ + if (cb->l2rcb_data != NULL) { + ASSERT3U(hdr->b_size, <, zio->io_size); + ASSERT3U(cb->l2rcb_compress, ==, ZIO_COMPRESS_OFF); + if (zio->io_error == 0) + bcopy(cb->l2rcb_data, buf->b_data, hdr->b_size); + + /* + * The following must be done regardless of whether + * there was an error: + * - free the temporary buffer + * - point zio to the real ARC buffer + * - set zio size accordingly + * These are required because zio is either re-used for + * an I/O of the block in the case of the error + * or the zio is passed to arc_read_done() and it + * needs real data. + */ + zio_data_buf_free(cb->l2rcb_data, zio->io_size); + zio->io_size = zio->io_orig_size = hdr->b_size; + zio->io_data = zio->io_orig_data = buf->b_data; + } + /* * If the buffer was compressed, decompress it first. */ @@ -6334,6 +6380,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, kmutex_t *hash_lock; uint64_t buf_sz; uint64_t buf_a_sz; + size_t align; if (arc_warm == B_FALSE) hdr_prev = multilist_sublist_next(mls, hdr); @@ -6371,7 +6418,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, * disk block size. */ buf_sz = hdr->b_size; - buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); + align = (size_t)1 << dev->l2ad_vdev->vdev_ashift; + buf_a_sz = P2ROUNDUP(buf_sz, align); if ((write_asize + buf_a_sz) > target_sz) { full = B_TRUE; @@ -6474,27 +6522,16 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, mutex_enter(&dev->l2ad_mtx); - /* - * Note that elsewhere in this file arcstat_l2_asize - * and the used space on l2ad_vdev are updated using b_asize, - * which is not necessarily rounded up to the device block size. - * Too keep accounting consistent we do the same here as well: - * stats_size accumulates the sum of b_asize of the written buffers, - * while write_asize accumulates the sum of b_asize rounded up - * to the device block size. - * The latter sum is used only to validate the corectness of the code. - */ - uint64_t stats_size = 0; - write_asize = 0; - /* * Now start writing the buffers. We're starting at the write head * and work backwards, retracing the course of the buffer selector * loop above. */ + write_asize = 0; for (hdr = list_prev(&dev->l2ad_buflist, head); hdr; hdr = list_prev(&dev->l2ad_buflist, hdr)) { uint64_t buf_sz; + boolean_t compress; /* * We rely on the L1 portion of the header below, so @@ -6513,22 +6550,26 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, */ hdr->b_l2hdr.b_daddr = dev->l2ad_hand; - if ((HDR_L2COMPRESS(hdr)) && - hdr->b_l2hdr.b_asize >= buf_compress_minsz) { - if (l2arc_compress_buf(hdr)) { - /* - * If compression succeeded, enable headroom - * boost on the next scan cycle. - */ - *headroom_boost = B_TRUE; - } + /* + * Save a pointer to the original buffer data we had previously + * stashed away. + */ + buf_data = hdr->b_l1hdr.b_tmp_cdata; + + compress = HDR_L2COMPRESS(hdr) && + hdr->b_l2hdr.b_asize >= buf_compress_minsz; + if (l2arc_transform_buf(hdr, compress)) { + /* + * If compression succeeded, enable headroom + * boost on the next scan cycle. + */ + *headroom_boost = B_TRUE; } /* - * Pick up the buffer data we had previously stashed away - * (and now potentially also compressed). + * Get the new buffer size that accounts for compression + * and padding. */ - buf_data = hdr->b_l1hdr.b_tmp_cdata; buf_sz = hdr->b_l2hdr.b_asize; /* @@ -6540,8 +6581,12 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, /* Compression may have squashed the buffer to zero length. */ if (buf_sz != 0) { - uint64_t buf_a_sz; - + /* + * If the data was padded or compressed, then it + * it is in a new buffer. + */ + if (hdr->b_l1hdr.b_tmp_cdata != NULL) + buf_data = hdr->b_l1hdr.b_tmp_cdata; wzio = zio_write_phys(pio, dev->l2ad_vdev, dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, @@ -6551,14 +6596,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, zio_t *, wzio); (void) zio_nowait(wzio); - stats_size += buf_sz; - - /* - * Keep the clock hand suitably device-aligned. - */ - buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz); - write_asize += buf_a_sz; - dev->l2ad_hand += buf_a_sz; + write_asize += buf_sz; + dev->l2ad_hand += buf_sz; } } @@ -6568,8 +6607,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, ARCSTAT_BUMP(arcstat_l2_writes_sent); ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize); ARCSTAT_INCR(arcstat_l2_size, write_sz); - ARCSTAT_INCR(arcstat_l2_asize, stats_size); - vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0); + ARCSTAT_INCR(arcstat_l2_asize, write_asize); + vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0); /* * Bump device hand to the device start if it is approaching the end. @@ -6588,12 +6627,18 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, } /* - * Compresses an L2ARC buffer. + * Transforms, possibly compresses and pads, an L2ARC buffer. * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its * size in l2hdr->b_asize. This routine tries to compress the data and * depending on the compression result there are three possible outcomes: - * *) The buffer was incompressible. The original l2hdr contents were left - * untouched and are ready for writing to an L2 device. + * *) The buffer was incompressible. The buffer size was already ashift aligned. + * The original hdr contents were left untouched except for b_tmp_cdata, + * which is reset to NULL. The caller must keep a pointer to the original + * data. + * *) The buffer was incompressible. The buffer size was not ashift aligned. + * b_tmp_cdata was replaced with a temporary data buffer which holds a padded + * (aligned) copy of the data. Once writing is done, invoke + * l2arc_release_cdata_buf on this hdr to free the temporary buffer. * *) The buffer was all-zeros, so there is no need to write it to an L2 * device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is * set to zero and b_compress is set to ZIO_COMPRESS_EMPTY. @@ -6607,10 +6652,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, * buffer was incompressible). */ static boolean_t -l2arc_compress_buf(arc_buf_hdr_t *hdr) +l2arc_transform_buf(arc_buf_hdr_t *hdr, boolean_t compress) { void *cdata; - size_t csize, len, rounded; + size_t align, asize, csize, len, rounded; + ASSERT(HDR_HAS_L2HDR(hdr)); l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; @@ -6619,14 +6665,19 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr) ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL); len = l2hdr->b_asize; - cdata = zio_data_buf_alloc(len); + align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift; + asize = P2ROUNDUP(len, align); + cdata = zio_data_buf_alloc(asize); ASSERT3P(cdata, !=, NULL); - csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata, - cdata, l2hdr->b_asize); + if (compress) + csize = zio_compress_data(ZIO_COMPRESS_LZ4, + hdr->b_l1hdr.b_tmp_cdata, cdata, len); + else + csize = len; if (csize == 0) { /* zero block, indicate that there's nothing to write */ - zio_data_buf_free(cdata, len); + zio_data_buf_free(cdata, asize); l2hdr->b_compress = ZIO_COMPRESS_EMPTY; l2hdr->b_asize = 0; hdr->b_l1hdr.b_tmp_cdata = NULL; @@ -6634,8 +6685,8 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr) return (B_TRUE); } - rounded = P2ROUNDUP(csize, - (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift); + rounded = P2ROUNDUP(csize, align); + ASSERT3U(rounded, <=, asize); if (rounded < len) { /* * Compression succeeded, we'll keep the cdata around for @@ -6652,11 +6703,32 @@ l2arc_compress_buf(arc_buf_hdr_t *hdr) return (B_TRUE); } else { /* - * Compression failed, release the compressed buffer. - * l2hdr will be left unmodified. + * Compression did not save space. */ - zio_data_buf_free(cdata, len); - ARCSTAT_BUMP(arcstat_l2_compress_failures); + if (P2PHASE(len, align) != 0) { + /* + * Use compression buffer for a copy of data padded to + * the proper size. Compression algorithm remains set + * to ZIO_COMPRESS_OFF. + */ + ASSERT3U(len, <, asize); + bcopy(hdr->b_l1hdr.b_tmp_cdata, cdata, len); + bzero((char *)cdata + len, asize - len); + l2hdr->b_asize = asize; + hdr->b_l1hdr.b_tmp_cdata = cdata; + ARCSTAT_BUMP(arcstat_l2_padding_needed); + } else { + ASSERT3U(len, ==, asize); + /* + * The original buffer is good as is, + * release the compressed buffer. + * l2hdr will be left unmodified except for b_tmp_cdata. + */ + zio_data_buf_free(cdata, asize); + hdr->b_l1hdr.b_tmp_cdata = NULL; + } + if (compress) + ARCSTAT_BUMP(arcstat_l2_compress_failures); return (B_FALSE); } } @@ -6725,44 +6797,30 @@ l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c) /* * Releases the temporary b_tmp_cdata buffer in an l2arc header structure. - * This buffer serves as a temporary holder of compressed data while + * This buffer serves as a temporary holder of compressed or padded data while * the buffer entry is being written to an l2arc device. Once that is * done, we can dispose of it. */ static void l2arc_release_cdata_buf(arc_buf_hdr_t *hdr) { - ASSERT(HDR_HAS_L2HDR(hdr)); + size_t align, asize, len; enum zio_compress comp = hdr->b_l2hdr.b_compress; + ASSERT(HDR_HAS_L2HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp)); - if (comp == ZIO_COMPRESS_OFF) { - /* - * In this case, b_tmp_cdata points to the same buffer - * as the arc_buf_t's b_data field. We don't want to - * free it, since the arc_buf_t will handle that. - */ + if (hdr->b_l1hdr.b_tmp_cdata != NULL) { + ASSERT(comp != ZIO_COMPRESS_EMPTY); + len = hdr->b_size; + align = (size_t)1 << hdr->b_l2hdr.b_dev->l2ad_vdev->vdev_ashift; + asize = P2ROUNDUP(len, align); + zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata, asize); hdr->b_l1hdr.b_tmp_cdata = NULL; - } else if (comp == ZIO_COMPRESS_EMPTY) { - /* - * In this case, b_tmp_cdata was compressed to an empty - * buffer, thus there's nothing to free and b_tmp_cdata - * should have been set to NULL in l2arc_write_buffers(). - */ - ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL); } else { - /* - * If the data was compressed, then we've allocated a - * temporary buffer for it, so now we need to release it. - */ - ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL); - zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata, - hdr->b_size); - hdr->b_l1hdr.b_tmp_cdata = NULL; + ASSERT(comp == ZIO_COMPRESS_OFF || comp == ZIO_COMPRESS_EMPTY); } - } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index b706dd32f2ba..9f1a98b09b63 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -2805,11 +2805,13 @@ zio_vdev_io_start(zio_t *zio) ASSERT0(P2PHASE(zio->io_size, align)); } else { /* - * For physical writes, we allow 512b aligned writes and assume - * the device will perform a read-modify-write as necessary. + * For the physical io we allow alignment + * to a logical block size. */ - ASSERT0(P2PHASE(zio->io_offset, SPA_MINBLOCKSIZE)); - ASSERT0(P2PHASE(zio->io_size, SPA_MINBLOCKSIZE)); + uint64_t log_align = + 1ULL << vd->vdev_top->vdev_logical_ashift; + ASSERT0(P2PHASE(zio->io_offset, log_align)); + ASSERT0(P2PHASE(zio->io_size, log_align)); } VERIFY(zio->io_type == ZIO_TYPE_READ || spa_writeable(spa)); From a82ccc6f1ef5be6fadb0cdcef98bd6f15d4dd5e9 Mon Sep 17 00:00:00 2001 From: Stanislav Galabov Date: Tue, 12 Apr 2016 07:18:48 +0000 Subject: [PATCH 20/29] Define PCI_RES_BUS for MIPS. This is done as part of the work on D5908, but as a separate commit. Approved by: adrian (mentor) Sponsored by: Smartcom - Bulgaria AD --- sys/mips/include/resource.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/mips/include/resource.h b/sys/mips/include/resource.h index c5b4283bb029..ec279cd1c513 100644 --- a/sys/mips/include/resource.h +++ b/sys/mips/include/resource.h @@ -42,5 +42,8 @@ #define SYS_RES_DRQ 2 /* isa dma lines */ #define SYS_RES_MEMORY 3 /* i/o memory */ #define SYS_RES_IOPORT 4 /* i/o ports */ +#ifdef NEW_PCIB +#define PCI_RES_BUS 5 +#endif #endif /* !_MACHINE_RESOURCE_H_ */ From 6573205a07980dff87ad8d688c2df0f8a9ade2a1 Mon Sep 17 00:00:00 2001 From: Stanislav Galabov Date: Tue, 12 Apr 2016 07:21:22 +0000 Subject: [PATCH 21/29] Move Mediatek/Ralink PCIe to NEW_PCIB This revision fixes minor issues and moves the Mediatek/Ralink PCIe support to use NEW_PCIB. https://svnweb.freebsd.org/changeset/base/297849 is the other part of this changeset. Approved by: adrian (mentor) Sponsored by: Smartcom - Bulgaria AD Differential Revision: https://reviews.freebsd.org/D5908 --- sys/mips/mediatek/mtk_pcie.c | 364 +++++++---------------------------- sys/mips/mediatek/mtk_pcie.h | 8 - 2 files changed, 69 insertions(+), 303 deletions(-) diff --git a/sys/mips/mediatek/mtk_pcie.c b/sys/mips/mediatek/mtk_pcie.c index a43d277ba192..e88b9ab92060 100644 --- a/sys/mips/mediatek/mtk_pcie.c +++ b/sys/mips/mediatek/mtk_pcie.c @@ -21,15 +21,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * The pci allocator parts are based on code from sys/dev/arm/mv/: - * - * Copyright (c) 2008 MARVELL INTERNATIONAL LTD. - * Copyright (c) 2010 The FreeBSD Foundation - * Copyright (c) 2010-2012 Semihalf - * All rights reserved. - * - * Developed by Semihalf. */ #include __FBSDID("$FreeBSD$"); @@ -72,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include "ofw_bus_if.h" #include "pcib_if.h" #include "pic_if.h" @@ -98,7 +90,6 @@ static void mtk_pcie_phy_setup_slots(device_t); struct mtx mtk_pci_mtx; MTX_SYSINIT(mtk_pci_mtx, &mtk_pci_mtx, "MTK PCIe mutex", MTX_SPIN); -static int mtk_pcib_init(device_t, int, int); static int mtk_pci_intr(void *); static struct mtk_pci_softc *mt_sc = NULL; @@ -340,9 +331,6 @@ mtk_pci_attach(device_t dev) } } - /* Do generic PCIe initialization and resource allocation */ - mtk_pcib_init(dev, 0, PCI_SLOTMAX); - /* Attach our PCI child so bus enumeration can start */ if (device_add_child(dev, "pci", -1) == NULL) { device_printf(dev, "could not attach pci bus\n"); @@ -426,6 +414,9 @@ mtk_pci_alloc_resource(device_t bus, device_t child, int type, int *rid, struct rman *rm; switch (type) { + case PCI_RES_BUS: + return pci_domain_alloc_bus(0, child, rid, start, end, count, + flags); case SYS_RES_IRQ: rm = &sc->sc_irq_rman; break; @@ -456,6 +447,47 @@ mtk_pci_alloc_resource(device_t bus, device_t child, int type, int *rid, return (rv); } +static int +mtk_pci_release_resource(device_t bus, device_t child, int type, int rid, + struct resource *res) +{ + + if (type == PCI_RES_BUS) + return (pci_domain_release_bus(0, child, rid, res)); + + return (bus_generic_release_resource(bus, child, type, rid, res)); +} + +static int +mtk_pci_adjust_resource(device_t bus, device_t child, int type, + struct resource *res, rman_res_t start, rman_res_t end) +{ + struct mtk_pci_softc *sc = device_get_softc(bus); + struct rman *rm; + + switch (type) { + case PCI_RES_BUS: + return pci_domain_adjust_bus(0, child, res, start, end); + case SYS_RES_IRQ: + rm = &sc->sc_irq_rman; + break; + case SYS_RES_IOPORT: + rm = &sc->sc_io_rman; + break; + case SYS_RES_MEMORY: + rm = &sc->sc_mem_rman; + break; + default: + rm = NULL; + break; + } + + if (rm != NULL) + return (rman_adjust_resource(res, start, end)); + + return (bus_generic_adjust_resource(bus, child, type, res, start, end)); +} + static inline int mtk_idx_to_irq(int idx) { @@ -643,22 +675,15 @@ mtk_pci_write_config(device_t dev, u_int bus, u_int slot, u_int func, mtx_unlock_spin(&mtk_pci_mtx); } -#if 0 -/* We take care of interrupt routing in the allocator code below */ static int mtk_pci_route_interrupt(device_t pcib, device_t device, int pin) { - //struct mtk_pci_softc *sc = device_get_softc(pcib); int bus, sl, dev; - if (1) return PCI_INVALID_IRQ; - bus = pci_get_bus(device); sl = pci_get_slot(device); dev = pci_get_device(device); - printf("%s: for %d:%d:%d, int = %d\n", __FUNCTION__, bus, sl, dev, pin); - if (bus != 0) panic("Unexpected bus number %d\n", bus); @@ -672,7 +697,6 @@ mtk_pci_route_interrupt(device_t pcib, device_t device, int pin) return (-1); } -#endif static device_method_t mtk_pci_methods[] = { /* Device interface */ @@ -686,7 +710,8 @@ static device_method_t mtk_pci_methods[] = { DEVMETHOD(bus_read_ivar, mtk_pci_read_ivar), DEVMETHOD(bus_write_ivar, mtk_pci_write_ivar), DEVMETHOD(bus_alloc_resource, mtk_pci_alloc_resource), - DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_release_resource, mtk_pci_release_resource), + DEVMETHOD(bus_adjust_resource, mtk_pci_adjust_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, mtk_pci_setup_intr), @@ -696,9 +721,14 @@ static device_method_t mtk_pci_methods[] = { DEVMETHOD(pcib_maxslots, mtk_pci_maxslots), DEVMETHOD(pcib_read_config, mtk_pci_read_config), DEVMETHOD(pcib_write_config, mtk_pci_write_config), -#if 0 DEVMETHOD(pcib_route_interrupt, mtk_pci_route_interrupt), -#endif + + /* OFW bus interface */ + DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), + DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), + DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), + DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), + DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), DEVMETHOD_END }; @@ -713,276 +743,6 @@ static devclass_t mtk_pci_devclass; DRIVER_MODULE(mtk_pci, simplebus, mtk_pci_driver, mtk_pci_devclass, 0, 0); -/* Resource allocation code */ -static inline uint32_t -pcib_bit_get(uint32_t *map, uint32_t bit) -{ - uint32_t n = bit / BITS_PER_UINT32; - - bit = bit % BITS_PER_UINT32; - return (map[n] & (1 << bit)); -} - -static inline void -pcib_bit_set(uint32_t *map, uint32_t bit) -{ - uint32_t n = bit / BITS_PER_UINT32; - - bit = bit % BITS_PER_UINT32; - map[n] |= (1 << bit); -} - -static inline uint32_t -pcib_map_check(uint32_t *map, uint32_t start, uint32_t bits) -{ - uint32_t i; - - for (i = start; i < start + bits; i++) - if (pcib_bit_get(map, i)) - return (0); - - return (1); -} - -static inline void -pcib_map_set(uint32_t *map, uint32_t start, uint32_t bits) -{ - uint32_t i; - - for (i = start; i < start + bits; i++) - pcib_bit_set(map, i); -} - -static bus_addr_t -pcib_alloc(device_t dev, uint32_t smask) -{ - struct mtk_pci_softc *sc = device_get_softc(dev); - uint32_t bits, bits_limit, i, *map, min_alloc, size; - bus_addr_t addr = 0; - bus_addr_t base; - - if (smask & 1) { - base = sc->sc_io_base; - min_alloc = PCI_MIN_IO_ALLOC; - bits_limit = sc->sc_io_size / min_alloc; - map = sc->sc_io_map; - smask &= ~0x3; - } else { - base = sc->sc_mem_base; - min_alloc = PCI_MIN_MEM_ALLOC; - bits_limit = sc->sc_mem_size / min_alloc; - map = sc->sc_mem_map; - smask &= ~0xF; - } - - size = ~smask + 1; - bits = size / min_alloc; - - for (i = 0; i + bits <= bits_limit; i+= bits) - if (pcib_map_check(map, i, bits)) { - pcib_map_set(map, i, bits); - addr = base + (i * min_alloc); - return (addr); - } - - return (addr); -} - -static int -mtk_pcib_init_bar(device_t dev, int bus, int slot, int func, int barno) -{ - uint32_t addr, bar; - int reg, width; - - reg = PCIR_BAR(barno); - - mtk_pci_write_config(dev, bus, slot, func, reg, ~0, 4); - bar = mtk_pci_read_config(dev, bus, slot, func, reg, 4); - if (bar == 0) - return (1); - - /* Calculate BAR size: 64 or 32 bit (in 32-bit units) */ - width = ((bar & 7) == 4) ? 2 : 1; - - addr = pcib_alloc(dev, bar); - if (!addr) - return (-1); - - if (bootverbose) - printf("PCI %u:%u:%u: reg %x: smask=%08x: addr=%08x\n", - bus, slot, func, reg, bar, addr); - - mtk_pci_write_config(dev, bus, slot, func, reg, addr, 4); - if (width == 2) - mtk_pci_write_config(dev, bus, slot, func, reg + 4, 0, 4); - - return (width); -} - -static int -mtk_pcib_init_all_bars(device_t dev, int bus, int slot, int func, - int hdrtype) -{ - int maxbar, bar, i; - - maxbar = (hdrtype & PCIM_HDRTYPE) ? 0 : 6; - bar = 0; - - while (bar < maxbar) { - i = mtk_pcib_init_bar(dev, bus, slot, func, bar); - bar += i; - if (i < 0) { - device_printf(dev, "PCI IO/Memory space exhausted\n"); - return (ENOMEM); - } - } - - return (0); -} - -static void -mtk_pcib_init_bridge(device_t dev, int bus, int slot, int func) -{ - struct mtk_pci_softc *sc = device_get_softc(dev); - bus_addr_t io_base, mem_base; - uint32_t io_limit, mem_limit; - int secbus; - - if (bus == 0 && !mtk_pci_slot_has_link(dev, slot)) { - sc->sc_cur_secbus++; - device_printf(dev, "Skip bus %d due to no link\n", - sc->sc_cur_secbus); - return; - } - - io_base = sc->sc_io_base; - io_limit = io_base + sc->sc_io_size - 1; - mem_base = sc->sc_mem_base; - mem_limit = mem_base + sc->sc_mem_size - 1; - - mtk_pci_write_config(dev, bus, slot, func, PCIR_IOBASEL_1, - io_base >> 8, 1); - mtk_pci_write_config(dev, bus, slot, func, PCIR_IOBASEH_1, - io_base >> 16, 2); - mtk_pci_write_config(dev, bus, slot, func, PCIR_IOLIMITL_1, - io_limit >> 8, 1); - mtk_pci_write_config(dev, bus, slot, func, PCIR_IOLIMITH_1, - io_limit >> 16, 2); - - mtk_pci_write_config(dev, bus, slot, func, PCIR_MEMBASE_1, - mem_base >> 16, 2); - mtk_pci_write_config(dev, bus, slot, func, PCIR_MEMLIMIT_1, - mem_limit >> 16, 2); - - mtk_pci_write_config(dev, bus, slot, func, PCIR_PMBASEL_1, - 0x10, 2); - mtk_pci_write_config(dev, bus, slot, func, PCIR_PMBASEH_1, - 0x0, 4); - mtk_pci_write_config(dev, bus, slot, func, PCIR_PMLIMITL_1, - 0xF, 2); - mtk_pci_write_config(dev, bus, slot, func, PCIR_PMLIMITH_1, - 0x0, 4); - - mtk_pci_write_config(dev, bus, slot, func, PCIR_INTLINE, 0xff, 1); - - secbus = mtk_pci_read_config(dev, bus, slot, func, PCIR_SECBUS_1, 1); - - if (secbus == 0) { - sc->sc_cur_secbus++; - mtk_pci_write_config(dev, bus, slot, func, PCIR_SECBUS_1, - sc->sc_cur_secbus, 1); - mtk_pci_write_config(dev, bus, slot, func, PCIR_SUBBUS_1, - sc->sc_cur_secbus, 1); - secbus = sc->sc_cur_secbus; - } - - mtk_pcib_init(dev, secbus, PCI_SLOTMAX); -} - -static uint8_t -mtk_pci_get_int(device_t dev, int bus, int slot) -{ - - if (slot != 0) - return (PCI_INVALID_IRQ); - - switch (bus) { - case 1: - return (MTK_PCIE0_IRQ); - case 2: - return (MTK_PCIE1_IRQ); - case 3: - return (MTK_PCIE2_IRQ); - default: - device_printf(dev, "Bus %d out of range\n", slot); - return (PCI_INVALID_IRQ); - } - - /* Unreachable */ - return (PCI_INVALID_IRQ); -} - -static int -mtk_pcib_init(device_t dev, int bus, int maxslot) -{ - int slot, func, maxfunc, error; - uint8_t hdrtype, command, class, subclass; - - for (slot = 0; slot <= maxslot; slot++) { - maxfunc = 0; - for (func = 0; func <= maxfunc; func++) { - hdrtype = mtk_pci_read_config(dev, bus, slot, func, - PCIR_HDRTYPE, 1); - - if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) - continue; - - if (func == 0 && (hdrtype & PCIM_MFDEV)) - maxfunc = PCI_FUNCMAX; - - command = mtk_pci_read_config(dev, bus, slot, func, - PCIR_COMMAND, 1); - command &= ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN); - mtk_pci_write_config(dev, bus, slot, func, - PCIR_COMMAND, command, 1); - - error = mtk_pcib_init_all_bars(dev, bus, slot, func, - hdrtype); - - if (error) - return (error); - - command |= PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN | - PCIM_CMD_PORTEN; - mtk_pci_write_config(dev, bus, slot, func, - PCIR_COMMAND, command, 1); - - mtk_pci_write_config(dev, bus, slot, func, - PCIR_CACHELNSZ, 16, 1); - - class = mtk_pci_read_config(dev, bus, slot, func, - PCIR_CLASS, 1); - subclass = mtk_pci_read_config(dev, bus, slot, func, - PCIR_SUBCLASS, 1); - - if (class != PCIC_BRIDGE || - subclass != PCIS_BRIDGE_PCI) { - uint8_t val; - - val = mtk_pci_get_int(dev, bus, slot); - - mtk_pci_write_config(dev, bus, slot, func, - PCIR_INTLINE, val, 1); /* XXX */ - continue; - } - - mtk_pcib_init_bridge(dev, bus, slot, func); - } - } - - return (0); -} - /* Our interrupt handler */ static int mtk_pci_intr(void *arg) @@ -1467,6 +1227,8 @@ mtk_pcie_phy_setup_slots(device_t dev) /* If slot has link - mark it */ if (MT_READ32(sc, MTK_PCIE_STATUS(i)) & 1) sc->pcie_link_status |= (1< Date: Tue, 12 Apr 2016 10:25:44 +0000 Subject: [PATCH 22/29] If off-page lookup failed, there is no memory to perform shared_mutex_init() upon. Sponsored by: The FreeBSD Foundation --- lib/libthr/thread/thr_mutex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c index 28fd9eea56f7..f75ea6f72f5c 100644 --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -476,7 +476,8 @@ check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m) *m = __thr_pshared_offpage(mutex, 0); if (*m == NULL) ret = EINVAL; - shared_mutex_init(*m, NULL); + else + shared_mutex_init(*m, NULL); } else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) { if (*m == THR_MUTEX_DESTROYED) { ret = EINVAL; From 003c82d71372c02a18a87cd8a18bbd8aad16a457 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Apr 2016 11:48:50 +0000 Subject: [PATCH 23/29] Add couple missing memory barriers. --- sys/dev/isp/isp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/dev/isp/isp.c b/sys/dev/isp/isp.c index 2e69855578b3..95cd9b92b142 100644 --- a/sys/dev/isp/isp.c +++ b/sys/dev/isp/isp.c @@ -2802,12 +2802,13 @@ isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb) isp_prt(isp, ISP_LOGERR, sacq); return (-1); } - MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (un), chan); + MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof(un), chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); return (mbs.param[0] | (mbs.param[1] << 16)); } + MEMORYBARRIER(isp, SYNC_SFORCPU, 0, sizeof(un), chan); if (IS_24XX(isp)) { isp_get_pdb_24xx(isp, fcp->isp_scratch, &un.bill); pdb->handle = un.bill.pdb_handle; @@ -2875,6 +2876,7 @@ isp_gethandles(ispsoftc_t *isp, int chan, uint16_t *handles, int *num, int loop) FC_SCRATCH_RELEASE(isp, chan); return (mbs.param[0] | (mbs.param[1] << 16)); } + MEMORYBARRIER(isp, SYNC_SFORCPU, 0, ISP_FC_SCRLEN, chan); elp1 = fcp->isp_scratch; elp3 = fcp->isp_scratch; elp4 = fcp->isp_scratch; From cf4476eb39575e2ad601e85a5fe6796fd188ee6d Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Tue, 12 Apr 2016 11:48:54 +0000 Subject: [PATCH 24/29] When processing an ICMP packet containing an SCTP packet, it is required to check the verification tag. However, this requires the verification tag to be not 0. Enforce this. For packets with a verification tag of 0, we need to check it it contains an INIT chunk and use the initiate tag for the validation. This will be a separate commit, since it touches also other code. MFC after: 1 week --- sys/netinet/sctp_usrreq.c | 56 ++++++++++++++++++++++----------------- sys/netinet/sctp_var.h | 4 --- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index 64d3a2629436..4024d1d29a16 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -147,26 +147,19 @@ static void sctp_notify_mbuf(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, - struct ip *ip, - struct sctphdr *sh) + struct ip *ip) { struct icmp *icmph; int totsz, tmr_stopped = 0; uint16_t nxtsz; /* protection */ - if ((inp == NULL) || (stcb == NULL) || (net == NULL) || - (ip == NULL) || (sh == NULL)) { + if ((inp == NULL) || (stcb == NULL) || (net == NULL) || (ip == NULL)) { if (stcb != NULL) { SCTP_TCB_UNLOCK(stcb); } return; } - /* First job is to verify the vtag matches what I would send */ - if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) { - SCTP_TCB_UNLOCK(stcb); - return; - } icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) - sizeof(struct ip))); if (icmph->icmp_type != ICMP_UNREACH) { @@ -213,10 +206,9 @@ sctp_notify_mbuf(struct sctp_inpcb *inp, SCTP_TCB_UNLOCK(stcb); } -void +static void sctp_notify(struct sctp_inpcb *inp, struct ip *ip, - struct sctphdr *sh, struct sockaddr *to, struct sctp_tcb *stcb, struct sctp_nets *net) @@ -228,17 +220,11 @@ sctp_notify(struct sctp_inpcb *inp, struct icmp *icmph; /* protection */ - if ((inp == NULL) || (stcb == NULL) || (net == NULL) || - (sh == NULL) || (to == NULL)) { + if ((inp == NULL) || (stcb == NULL) || (net == NULL) || (to == NULL)) { if (stcb) SCTP_TCB_UNLOCK(stcb); return; } - /* First job is to verify the vtag matches what I would send */ - if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) { - SCTP_TCB_UNLOCK(stcb); - return; - } icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) - sizeof(struct ip))); if (icmph->icmp_type != ICMP_UNREACH) { @@ -304,10 +290,7 @@ sctp_notify(struct sctp_inpcb *inp, #ifdef INET void -sctp_ctlinput(cmd, sa, vip) - int cmd; - struct sockaddr *sa; - void *vip; +sctp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; struct sctphdr *sh; @@ -348,14 +331,37 @@ sctp_ctlinput(cmd, sa, vip) stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to, (struct sockaddr *)&from, &inp, &net, 1, vrf_id); - if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { + if ((stcb != NULL) && + (inp != NULL) && + (inp->sctp_socket != NULL)) { + /* Check the verification tag */ + if (ntohl(sh->v_tag) != 0) { + /* + * This must be the verification tag used + * for sending out packets. We don't + * consider packets reflecting the + * verification tag. + */ + if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) { + SCTP_TCB_UNLOCK(stcb); + return; + } + } else { + /* + * In this case we could check if we got an + * INIT chunk and if the initiate tag + * matches. But this is not there yet... + */ + SCTP_TCB_UNLOCK(stcb); + return; + } if (cmd != PRC_MSGSIZE) { - sctp_notify(inp, ip, sh, + sctp_notify(inp, ip, (struct sockaddr *)&to, stcb, net); } else { /* handle possible ICMP size messages */ - sctp_notify_mbuf(inp, stcb, net, ip, sh); + sctp_notify_mbuf(inp, stcb, net, ip); } } else { if ((stcb == NULL) && (inp != NULL)) { diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 009ffdb40786..77e5da2edf07 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -344,10 +344,6 @@ void sctp_init(void); void sctp_finish(void); int sctp_flush(struct socket *, int); int sctp_shutdown(struct socket *); -void -sctp_notify(struct sctp_inpcb *, struct ip *ip, struct sctphdr *, - struct sockaddr *, struct sctp_tcb *, - struct sctp_nets *); int sctp_bindx(struct socket *, int, struct sockaddr_storage *, int, int, struct proc *); From 53791a95a48f5e6f0463e14837d26a065f212f39 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Apr 2016 12:31:41 +0000 Subject: [PATCH 25/29] Reimplement ISP_TSK_MGMT IOCTL via asynchronous request. I am not sure this code is not completely dead, but it used DMA scratch are without good reason and asked to be refactored. --- sys/dev/isp/isp_freebsd.c | 101 ++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c index 23ec42a6b6c7..8787d94a643a 100644 --- a/sys/dev/isp/isp_freebsd.c +++ b/sys/dev/isp/isp_freebsd.c @@ -607,9 +607,10 @@ ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td) nphdl = fct->loopid; ISP_LOCK(isp); if (IS_24XX(isp)) { - uint8_t local[QENTRY_LEN]; - isp24xx_tmf_t *tmf; - isp24xx_statusreq_t *sp; + void *reqp; + uint8_t resp[QENTRY_LEN]; + isp24xx_tmf_t tmf; + isp24xx_statusreq_t sp; fcparam *fcp = FCPARAM(isp, chan); fcportdb_t *lp; int i; @@ -625,39 +626,37 @@ ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td) ISP_UNLOCK(isp); break; } - /* XXX VALIDATE LP XXX */ - tmf = (isp24xx_tmf_t *) local; - ISP_MEMZERO(tmf, QENTRY_LEN); - tmf->tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT; - tmf->tmf_header.rqs_entry_count = 1; - tmf->tmf_nphdl = lp->handle; - tmf->tmf_delay = 2; - tmf->tmf_timeout = 4; - tmf->tmf_tidlo = lp->portid; - tmf->tmf_tidhi = lp->portid >> 16; - tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan); - tmf->tmf_lun[1] = fct->lun & 0xff; + ISP_MEMZERO(&tmf, sizeof(tmf)); + tmf.tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT; + tmf.tmf_header.rqs_entry_count = 1; + tmf.tmf_nphdl = lp->handle; + tmf.tmf_delay = 2; + tmf.tmf_timeout = 4; + tmf.tmf_tidlo = lp->portid; + tmf.tmf_tidhi = lp->portid >> 16; + tmf.tmf_vpidx = ISP_GET_VPIDX(isp, chan); + tmf.tmf_lun[1] = fct->lun & 0xff; if (fct->lun >= 256) { - tmf->tmf_lun[0] = 0x40 | (fct->lun >> 8); + tmf.tmf_lun[0] = 0x40 | (fct->lun >> 8); } switch (fct->action) { case IPT_CLEAR_ACA: - tmf->tmf_flags = ISP24XX_TMF_CLEAR_ACA; + tmf.tmf_flags = ISP24XX_TMF_CLEAR_ACA; break; case IPT_TARGET_RESET: - tmf->tmf_flags = ISP24XX_TMF_TARGET_RESET; + tmf.tmf_flags = ISP24XX_TMF_TARGET_RESET; needmarker = 1; break; case IPT_LUN_RESET: - tmf->tmf_flags = ISP24XX_TMF_LUN_RESET; + tmf.tmf_flags = ISP24XX_TMF_LUN_RESET; needmarker = 1; break; case IPT_CLEAR_TASK_SET: - tmf->tmf_flags = ISP24XX_TMF_CLEAR_TASK_SET; + tmf.tmf_flags = ISP24XX_TMF_CLEAR_TASK_SET; needmarker = 1; break; case IPT_ABORT_TASK_SET: - tmf->tmf_flags = ISP24XX_TMF_ABORT_TASK_SET; + tmf.tmf_flags = ISP24XX_TMF_ABORT_TASK_SET; needmarker = 1; break; default: @@ -668,36 +667,52 @@ ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td) ISP_UNLOCK(isp); break; } - MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, - MBCMD_DEFAULT_TIMEOUT + tmf->tmf_timeout * 1000000); - mbs.param[1] = QENTRY_LEN; - mbs.param[2] = DMA_WD1(fcp->isp_scdma); - mbs.param[3] = DMA_WD0(fcp->isp_scdma); - mbs.param[6] = DMA_WD3(fcp->isp_scdma); - mbs.param[7] = DMA_WD2(fcp->isp_scdma); - if (FC_SCRATCH_ACQUIRE(isp, chan)) { + /* Prepare space for response in memory */ + memset(resp, 0xff, sizeof(resp)); + tmf.tmf_handle = isp_allocate_handle(isp, resp, + ISP_HANDLE_CTRL); + if (tmf.tmf_handle == 0) { + isp_prt(isp, ISP_LOGERR, + "%s: TMF of Chan %d out of handles", + __func__, chan); ISP_UNLOCK(isp); retval = ENOMEM; break; } - isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch); - MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan); - sp = (isp24xx_statusreq_t *) local; - sp->req_completion_status = 1; - retval = isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs); - MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); - isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp); - FC_SCRATCH_RELEASE(isp, chan); - if (retval || sp->req_completion_status != 0) { - FC_SCRATCH_RELEASE(isp, chan); + + /* Send request and wait for response. */ + reqp = isp_getrqentry(isp); + if (reqp == NULL) { + isp_prt(isp, ISP_LOGERR, + "%s: TMF of Chan %d out of rqent", + __func__, chan); + isp_destroy_handle(isp, tmf.tmf_handle); + ISP_UNLOCK(isp); retval = EIO; + break; } - if (retval == 0) { - if (needmarker) { - fcp->sendmarker = 1; - } + isp_put_24xx_tmf(isp, &tmf, (isp24xx_tmf_t *)reqp); + if (isp->isp_dblev & ISP_LOGDEBUG1) + isp_print_bytes(isp, "IOCB TMF", QENTRY_LEN, reqp); + ISP_SYNC_REQUEST(isp); + if (msleep(resp, &isp->isp_lock, 0, "TMF", 5*hz) == EWOULDBLOCK) { + isp_prt(isp, ISP_LOGERR, + "%s: TMF of Chan %d timed out", + __func__, chan); + isp_destroy_handle(isp, tmf.tmf_handle); + ISP_UNLOCK(isp); + retval = EIO; + break; } + if (isp->isp_dblev & ISP_LOGDEBUG1) + isp_print_bytes(isp, "IOCB TMF response", QENTRY_LEN, resp); + isp_get_24xx_response(isp, (isp24xx_statusreq_t *)resp, &sp); + + if (sp.req_completion_status != 0) + retval = EIO; + else if (needmarker) + fcp->sendmarker = 1; } else { MBSINIT(&mbs, 0, MBLOGALL, 0); if (ISP_CAP_2KLOGIN(isp) == 0) { From 0d63fc3ed8bac59f7966921bfacf2cc3899b1ed3 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 12 Apr 2016 13:30:39 +0000 Subject: [PATCH 26/29] re-enable AMD Topology extension on certain models if disabled by BIOS Some BIOSes disable AMD Topology extension on AMD Family 15h notebook processors. We re-enable the extension, so that we can properly discover core and cache topology. Linux seems to do the same. Reported by: Johannes Dieterich Reviewed by: jhb, kib Tested by: Johannes Dieterich (earlier version) MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D5883 --- sys/amd64/amd64/mp_machdep.c | 2 +- sys/i386/i386/mp_machdep.c | 2 +- sys/x86/include/specialreg.h | 1 + sys/x86/include/x86_var.h | 2 +- sys/x86/x86/identcpu.c | 39 +++++++++++++++++++++++++----------- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index d87d38c198e6..f7d93de824e1 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -247,7 +247,7 @@ init_secondary(void) wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ - intel_fix_cpuid(); + fix_cpuid(); lidt(&r_idt); diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index be00559d9528..c2c200a09b64 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -242,7 +242,7 @@ init_secondary(void) pc->pc_prvspace = pc; pc->pc_curthread = 0; - intel_fix_cpuid(); + fix_cpuid(); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index c2d2c59f74c8..5f71b2dce368 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -816,6 +816,7 @@ #define MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */ #define MSR_SMM_ADDR 0xc0010112 /* SMM TSEG base address */ #define MSR_SMM_MASK 0xc0010113 /* SMM TSEG address mask */ +#define MSR_EXTFEATURES 0xc0011005 /* Extended CPUID Features override */ #define MSR_IC_CFG 0xc0011021 /* Instruction Cache Configuration */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ #define MSR_MC0_CTL_MASK 0xc0010044 diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h index 46ce1a0facbc..07e9d800ba17 100644 --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -103,7 +103,7 @@ void dump_drop_page(vm_paddr_t); void identify_cpu(void); void initializecpu(void); void initializecpucache(void); -bool intel_fix_cpuid(void); +bool fix_cpuid(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); int is_physical_memory(vm_paddr_t addr); int isa_nmi(int cd); diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index bc232a261ce3..41ba36a78dd1 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -1342,23 +1342,22 @@ identify_hypervisor(void) } } -/* - * Clear "Limit CPUID Maxval" bit and return true if the caller should - * get the largest standard CPUID function number again if it is set - * from BIOS. It is necessary for probing correct CPU topology later - * and for the correct operation of the AVX-aware userspace. - */ bool -intel_fix_cpuid(void) +fix_cpuid(void) { uint64_t msr; - if (cpu_vendor_id != CPU_VENDOR_INTEL) - return (false); - if ((CPUID_TO_FAMILY(cpu_id) == 0xf && + /* + * Clear "Limit CPUID Maxval" bit and return true if the caller should + * get the largest standard CPUID function number again if it is set + * from BIOS. It is necessary for probing correct CPU topology later + * and for the correct operation of the AVX-aware userspace. + */ + if (cpu_vendor_id == CPU_VENDOR_INTEL && + ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) || (CPUID_TO_FAMILY(cpu_id) == 0x6 && - CPUID_TO_MODEL(cpu_id) >= 0xe)) { + CPUID_TO_MODEL(cpu_id) >= 0xe))) { msr = rdmsr(MSR_IA32_MISC_ENABLE); if ((msr & IA32_MISC_EN_LIMCPUID) != 0) { msr &= ~IA32_MISC_EN_LIMCPUID; @@ -1366,6 +1365,22 @@ intel_fix_cpuid(void) return (true); } } + + /* + * Re-enable AMD Topology Extension that could be disabled by BIOS + * on some notebook processors. Without the extension it's really + * hard to determine the correct CPU cache topology. + * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h + * Models 60h-6Fh Processors, Publication # 50742. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { + msr = rdmsr(MSR_EXTFEATURES); + if ((msr & ((uint64_t)1 << 54)) == 0) { + msr |= (uint64_t)1 << 54; + wrmsr(MSR_EXTFEATURES, msr); + return (true); + } + } return (false); } @@ -1403,7 +1418,7 @@ identify_cpu(void) identify_hypervisor(); cpu_vendor_id = find_cpu_vendor_id(); - if (intel_fix_cpuid()) { + if (fix_cpuid()) { do_cpuid(0, regs); cpu_high = regs[0]; } From 4ff970c46236857f6a260a617044516a05ea4694 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Apr 2016 14:19:19 +0000 Subject: [PATCH 27/29] Allocate separate DMA area for synchronous IOCB execution. Usually IOCBs should be put on queue for asynchronous processing and should not require additional DMA memory. But there are some cases like aborts and resets that for external reasons has to be synchronous. Give those cases separate 2*64 byte DMA area to decouple them from other DMA scratch area users, using it for asynchronous requests. --- sys/dev/isp/isp.c | 60 +++++++++++++++------------------------ sys/dev/isp/isp_freebsd.h | 18 ++++++++++++ sys/dev/isp/isp_pci.c | 23 +++++++++++++-- sys/dev/isp/ispvar.h | 8 ++++++ 4 files changed, 70 insertions(+), 39 deletions(-) diff --git a/sys/dev/isp/isp.c b/sys/dev/isp/isp.c index 95cd9b92b142..2692ba80d542 100644 --- a/sys/dev/isp/isp.c +++ b/sys/dev/isp/isp.c @@ -4667,31 +4667,25 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...) tmf->tmf_tidlo = lp->portid; tmf->tmf_tidhi = lp->portid >> 16; tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan); + isp_put_24xx_tmf(isp, tmf, isp->isp_iocb); + MEMORYBARRIER(isp, SYNC_IFORDEV, 0, QENTRY_LEN, chan); + fcp->sendmarker = 1; + isp_prt(isp, ISP_LOGALL, "Chan %d Reset N-Port Handle 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, MBCMD_DEFAULT_TIMEOUT + tmf->tmf_timeout * 1000000); mbs.param[1] = QENTRY_LEN; - mbs.param[2] = DMA_WD1(fcp->isp_scdma); - mbs.param[3] = DMA_WD0(fcp->isp_scdma); - mbs.param[6] = DMA_WD3(fcp->isp_scdma); - mbs.param[7] = DMA_WD2(fcp->isp_scdma); - - if (FC_SCRATCH_ACQUIRE(isp, chan)) { - isp_prt(isp, ISP_LOGERR, sacq); - break; - } - isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch); - MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan); - fcp->sendmarker = 1; + mbs.param[2] = DMA_WD1(isp->isp_iocb_dma); + mbs.param[3] = DMA_WD0(isp->isp_iocb_dma); + mbs.param[6] = DMA_WD3(isp->isp_iocb_dma); + mbs.param[7] = DMA_WD2(isp->isp_iocb_dma); isp_mboxcmd(isp, &mbs); - if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { - FC_SCRATCH_RELEASE(isp, chan); + if (mbs.param[0] != MBOX_COMMAND_COMPLETE) break; - } - MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); + + MEMORYBARRIER(isp, SYNC_IFORCPU, QENTRY_LEN, QENTRY_LEN, chan); sp = (isp24xx_statusreq_t *) local; - isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp); - FC_SCRATCH_RELEASE(isp, chan); + isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)isp->isp_iocb)[1], sp); if (sp->req_completion_status == 0) { return (0); } @@ -4731,7 +4725,7 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...) break; } if (IS_24XX(isp)) { - isp24xx_abrt_t local, *ab = &local, *ab2; + isp24xx_abrt_t local, *ab = &local; fcparam *fcp; fcportdb_t *lp; @@ -4755,31 +4749,23 @@ isp_control(ispsoftc_t *isp, ispctl_t ctl, ...) ab->abrt_tidlo = lp->portid; ab->abrt_tidhi = lp->portid >> 16; ab->abrt_vpidx = ISP_GET_VPIDX(isp, chan); + isp_put_24xx_abrt(isp, ab, isp->isp_iocb); + MEMORYBARRIER(isp, SYNC_IFORDEV, 0, 2 * QENTRY_LEN, chan); ISP_MEMZERO(&mbs, sizeof (mbs)); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000); mbs.param[1] = QENTRY_LEN; - mbs.param[2] = DMA_WD1(fcp->isp_scdma); - mbs.param[3] = DMA_WD0(fcp->isp_scdma); - mbs.param[6] = DMA_WD3(fcp->isp_scdma); - mbs.param[7] = DMA_WD2(fcp->isp_scdma); + mbs.param[2] = DMA_WD1(isp->isp_iocb_dma); + mbs.param[3] = DMA_WD0(isp->isp_iocb_dma); + mbs.param[6] = DMA_WD3(isp->isp_iocb_dma); + mbs.param[7] = DMA_WD2(isp->isp_iocb_dma); - if (FC_SCRATCH_ACQUIRE(isp, chan)) { - isp_prt(isp, ISP_LOGERR, sacq); - break; - } - isp_put_24xx_abrt(isp, ab, fcp->isp_scratch); - ab2 = (isp24xx_abrt_t *) &((uint8_t *)fcp->isp_scratch)[QENTRY_LEN]; - ab2->abrt_nphdl = 0xdeaf; - MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan); isp_mboxcmd(isp, &mbs); - if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { - FC_SCRATCH_RELEASE(isp, chan); + if (mbs.param[0] != MBOX_COMMAND_COMPLETE) break; - } - MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); - isp_get_24xx_abrt(isp, ab2, ab); - FC_SCRATCH_RELEASE(isp, chan); + + MEMORYBARRIER(isp, SYNC_IFORCPU, QENTRY_LEN, QENTRY_LEN, chan); + isp_get_24xx_abrt(isp, &((isp24xx_abrt_t *)isp->isp_iocb)[1], ab); if (ab->abrt_nphdl == ISP24XX_ABRT_OKAY) { return (0); } diff --git a/sys/dev/isp/isp_freebsd.h b/sys/dev/isp/isp_freebsd.h index 3e7707705553..d6c62a291608 100644 --- a/sys/dev/isp/isp_freebsd.h +++ b/sys/dev/isp/isp_freebsd.h @@ -293,10 +293,12 @@ struct isposinfo { bus_dma_tag_t reqdmat; bus_dma_tag_t respdmat; bus_dma_tag_t atiodmat; + bus_dma_tag_t iocbdmat; bus_dma_tag_t scdmat; bus_dmamap_t reqmap; bus_dmamap_t respmap; bus_dmamap_t atiomap; + bus_dmamap_t iocbmap; /* * Command and transaction related related stuff @@ -441,6 +443,14 @@ case SYNC_ATIOQ: \ bus_dmamap_sync(isp->isp_osinfo.atiodmat, \ isp->isp_osinfo.atiomap, BUS_DMASYNC_POSTREAD); \ break; \ +case SYNC_IFORDEV: \ + bus_dmamap_sync(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap, \ + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); \ + break; \ +case SYNC_IFORCPU: \ + bus_dmamap_sync(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap, \ + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); \ + break; \ default: \ break; \ } @@ -469,6 +479,14 @@ case SYNC_REG: \ bus_barrier(isp->isp_osinfo.regs, offset, size, \ BUS_SPACE_BARRIER_WRITE); \ break; \ +case SYNC_IFORDEV: \ + bus_dmamap_sync(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap, \ + BUS_DMASYNC_PREWRITE); \ + break; \ +case SYNC_IFORCPU: \ + bus_dmamap_sync(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap, \ + BUS_DMASYNC_POSTWRITE); \ + break; \ default: \ break; \ } diff --git a/sys/dev/isp/isp_pci.c b/sys/dev/isp/isp_pci.c index 11386eedf388..0eefc16eaaef 100644 --- a/sys/dev/isp/isp_pci.c +++ b/sys/dev/isp/isp_pci.c @@ -1730,9 +1730,23 @@ isp_pci_mbxdma(ispsoftc_t *isp) if (IS_FC(isp)) { if (isp_dma_tag_create(isp->isp_osinfo.dmat, 64, slim, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - ISP_FC_SCRLEN, 1, ISP_FC_SCRLEN, 0, &isp->isp_osinfo.scdmat)) { + 2*QENTRY_LEN, 1, 2*QENTRY_LEN, 0, &isp->isp_osinfo.iocbdmat)) { goto bad; } + if (bus_dmamem_alloc(isp->isp_osinfo.iocbdmat, + (void **)&base, BUS_DMA_COHERENT, &isp->isp_osinfo.iocbmap) != 0) + goto bad; + isp->isp_iocb = base; + im.error = 0; + if (bus_dmamap_load(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap, + base, 2*QENTRY_LEN, imc, &im, 0) || im.error) + goto bad; + isp->isp_iocb_dma = im.maddr; + + if (isp_dma_tag_create(isp->isp_osinfo.dmat, 64, slim, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, + ISP_FC_SCRLEN, 1, ISP_FC_SCRLEN, 0, &isp->isp_osinfo.scdmat)) + goto bad; for (cmap = 0; cmap < isp->isp_nchan; cmap++) { struct isp_fc *fc = ISP_FC_PC(isp, cmap); if (bus_dmamem_alloc(isp->isp_osinfo.scdmat, @@ -1791,7 +1805,8 @@ isp_pci_mbxdma(ispsoftc_t *isp) while (--cmap >= 0) { struct isp_fc *fc = ISP_FC_PC(isp, cmap); bus_dmamap_unload(isp->isp_osinfo.scdmat, fc->scmap); - bus_dmamem_free(isp->isp_osinfo.scdmat, base, fc->scmap); + bus_dmamem_free(isp->isp_osinfo.scdmat, + FCPARAM(isp, cmap)->isp_scratch, fc->scmap); while (fc->nexus_free_list) { struct isp_nexus *n = fc->nexus_free_list; fc->nexus_free_list = n->next; @@ -1799,6 +1814,10 @@ isp_pci_mbxdma(ispsoftc_t *isp) } } bus_dma_tag_destroy(isp->isp_osinfo.scdmat); + bus_dmamap_unload(isp->isp_osinfo.iocbdmat, isp->isp_osinfo.iocbmap); + bus_dmamem_free(isp->isp_osinfo.iocbdmat, isp->isp_iocb, + isp->isp_osinfo.iocbmap); + bus_dma_tag_destroy(isp->isp_osinfo.iocbdmat); } bad1: if (isp->isp_rquest_dma != 0) { diff --git a/sys/dev/isp/ispvar.h b/sys/dev/isp/ispvar.h index a7184e24e9d4..f0f8cf82cce7 100644 --- a/sys/dev/isp/ispvar.h +++ b/sys/dev/isp/ispvar.h @@ -130,6 +130,8 @@ struct ispmdvec { #define SYNC_SFORCPU 3 /* scratch, sync for CPU */ #define SYNC_REG 4 /* for registers */ #define SYNC_ATIOQ 5 /* atio result queue (24xx) */ +#define SYNC_IFORDEV 6 /* synchrounous IOCB, sync for ISP */ +#define SYNC_IFORCPU 7 /* synchrounous IOCB, sync for CPU */ /* * Request/Response Queue defines and macros. @@ -595,6 +597,12 @@ struct ispsoftc { isp_hdl_t *isp_xflist; isp_hdl_t *isp_xffree; + /* + * DMA mapped in area for synchronous IOCB requests. + */ + void * isp_iocb; + XS_DMA_ADDR_T isp_iocb_dma; + /* * request/result queue pointers and DMA handles for them. */ From e3188c2f317e138429d3063725fd8b5672ccaad6 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Apr 2016 14:43:17 +0000 Subject: [PATCH 28/29] Switch isp_getpdb() to synchronous IOCB DMA area. While technically it is not IOCB, it is synchronous and can be called from different places, so calling FC_SCRATCH_ACQUIRE() here is inconvenient. --- sys/dev/isp/isp.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/sys/dev/isp/isp.c b/sys/dev/isp/isp.c index 2692ba80d542..88ea7de5ea83 100644 --- a/sys/dev/isp/isp.c +++ b/sys/dev/isp/isp.c @@ -2776,7 +2776,6 @@ isp_port_logout(ispsoftc_t *isp, uint16_t handle, uint32_t portid) static int isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb) { - fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; union { isp_pdb_21xx_t fred; @@ -2794,23 +2793,19 @@ isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb) } else { mbs.param[1] = id << 8; } - mbs.param[2] = DMA_WD1(fcp->isp_scdma); - mbs.param[3] = DMA_WD0(fcp->isp_scdma); - mbs.param[6] = DMA_WD3(fcp->isp_scdma); - mbs.param[7] = DMA_WD2(fcp->isp_scdma); - if (FC_SCRATCH_ACQUIRE(isp, chan)) { - isp_prt(isp, ISP_LOGERR, sacq); - return (-1); - } - MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof(un), chan); + mbs.param[2] = DMA_WD1(isp->isp_iocb_dma); + mbs.param[3] = DMA_WD0(isp->isp_iocb_dma); + mbs.param[6] = DMA_WD3(isp->isp_iocb_dma); + mbs.param[7] = DMA_WD2(isp->isp_iocb_dma); + MEMORYBARRIER(isp, SYNC_IFORDEV, 0, sizeof(un), chan); + isp_mboxcmd(isp, &mbs); - if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { - FC_SCRATCH_RELEASE(isp, chan); + if (mbs.param[0] != MBOX_COMMAND_COMPLETE) return (mbs.param[0] | (mbs.param[1] << 16)); - } - MEMORYBARRIER(isp, SYNC_SFORCPU, 0, sizeof(un), chan); + + MEMORYBARRIER(isp, SYNC_IFORCPU, 0, sizeof(un), chan); if (IS_24XX(isp)) { - isp_get_pdb_24xx(isp, fcp->isp_scratch, &un.bill); + isp_get_pdb_24xx(isp, isp->isp_iocb, &un.bill); pdb->handle = un.bill.pdb_handle; pdb->prli_word3 = un.bill.pdb_prli_svc3; pdb->portid = BITS2WORD_24XX(un.bill.pdb_portid_bits); @@ -2822,11 +2817,10 @@ isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb) un.bill.pdb_curstate); if (un.bill.pdb_curstate < PDB2400_STATE_PLOGI_DONE || un.bill.pdb_curstate > PDB2400_STATE_LOGGED_IN) { mbs.param[0] = MBOX_NOT_LOGGED_IN; - FC_SCRATCH_RELEASE(isp, chan); return (mbs.param[0]); } } else { - isp_get_pdb_21xx(isp, fcp->isp_scratch, &un.fred); + isp_get_pdb_21xx(isp, isp->isp_iocb, &un.fred); pdb->handle = un.fred.pdb_loopid; pdb->prli_word3 = un.fred.pdb_prli_svc3; pdb->portid = BITS2WORD(un.fred.pdb_portid_bits); @@ -2835,7 +2829,6 @@ isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb) isp_prt(isp, ISP_LOGDEBUG1, "Chan %d handle 0x%x Port 0x%06x", chan, id, pdb->portid); } - FC_SCRATCH_RELEASE(isp, chan); return (0); } From e321146fc5f5ebfc0fb2e5482d89297dfebb3d32 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Tue, 12 Apr 2016 16:07:41 +0000 Subject: [PATCH 29/29] Make the usage() mention the -u option added in r295212. MFC after: 1 month Sponsored by: The FreeBSD Foundation --- usr.sbin/ctld/ctld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/ctld/ctld.c b/usr.sbin/ctld/ctld.c index 6c8b4a89fe93..a2bc17d1ad95 100644 --- a/usr.sbin/ctld/ctld.c +++ b/usr.sbin/ctld/ctld.c @@ -66,7 +66,7 @@ static void usage(void) { - fprintf(stderr, "usage: ctld [-d][-f config-file]\n"); + fprintf(stderr, "usage: ctld [-d][-u][-f config-file]\n"); exit(1); }