1b7c1e5ce9
- After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. - Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. - Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z*_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. - While there, untangle and unify code of z*_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc. Authored by: Alexander Motin <mav@FreeBSD.org> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <ryao@gentoo.org> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7578 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/aeb13ac Closes #6191
130 lines
4.7 KiB
C
130 lines
4.7 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
|
|
|
|
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM zfs
|
|
|
|
#undef TRACE_SYSTEM_VAR
|
|
#define TRACE_SYSTEM_VAR zfs_zil
|
|
|
|
#if !defined(_TRACE_ZIL_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_ZIL_H
|
|
|
|
#include <linux/tracepoint.h>
|
|
#include <sys/types.h>
|
|
|
|
/*
|
|
* Generic support for one argument tracepoints of the form:
|
|
*
|
|
* DTRACE_PROBE1(...,
|
|
* zilog_t *, ...);
|
|
*/
|
|
/* BEGIN CSTYLED */
|
|
DECLARE_EVENT_CLASS(zfs_zil_class,
|
|
TP_PROTO(zilog_t *zilog),
|
|
TP_ARGS(zilog),
|
|
TP_STRUCT__entry(
|
|
__field(uint64_t, zl_lr_seq)
|
|
__field(uint64_t, zl_commit_lr_seq)
|
|
__field(uint64_t, zl_destroy_txg)
|
|
__field(uint64_t, zl_replaying_seq)
|
|
__field(uint32_t, zl_suspend)
|
|
__field(uint8_t, zl_suspending)
|
|
__field(uint8_t, zl_keep_first)
|
|
__field(uint8_t, zl_replay)
|
|
__field(uint8_t, zl_stop_sync)
|
|
__field(uint8_t, zl_writer)
|
|
__field(uint8_t, zl_logbias)
|
|
__field(uint8_t, zl_sync)
|
|
__field(int, zl_parse_error)
|
|
__field(uint64_t, zl_parse_blk_seq)
|
|
__field(uint64_t, zl_parse_lr_seq)
|
|
__field(uint64_t, zl_parse_blk_count)
|
|
__field(uint64_t, zl_parse_lr_count)
|
|
__field(uint64_t, zl_next_batch)
|
|
__field(uint64_t, zl_com_batch)
|
|
__field(uint64_t, zl_cur_used)
|
|
__field(clock_t, zl_replay_time)
|
|
__field(uint64_t, zl_replay_blks)
|
|
),
|
|
TP_fast_assign(
|
|
__entry->zl_lr_seq = zilog->zl_lr_seq;
|
|
__entry->zl_commit_lr_seq = zilog->zl_commit_lr_seq;
|
|
__entry->zl_destroy_txg = zilog->zl_destroy_txg;
|
|
__entry->zl_replaying_seq = zilog->zl_replaying_seq;
|
|
__entry->zl_suspend = zilog->zl_suspend;
|
|
__entry->zl_suspending = zilog->zl_suspending;
|
|
__entry->zl_keep_first = zilog->zl_keep_first;
|
|
__entry->zl_replay = zilog->zl_replay;
|
|
__entry->zl_stop_sync = zilog->zl_stop_sync;
|
|
__entry->zl_writer = zilog->zl_writer;
|
|
__entry->zl_logbias = zilog->zl_logbias;
|
|
__entry->zl_sync = zilog->zl_sync;
|
|
__entry->zl_parse_error = zilog->zl_parse_error;
|
|
__entry->zl_parse_blk_seq = zilog->zl_parse_blk_seq;
|
|
__entry->zl_parse_lr_seq = zilog->zl_parse_lr_seq;
|
|
__entry->zl_parse_blk_count = zilog->zl_parse_blk_count;
|
|
__entry->zl_parse_lr_count = zilog->zl_parse_lr_count;
|
|
__entry->zl_next_batch = zilog->zl_next_batch;
|
|
__entry->zl_com_batch = zilog->zl_com_batch;
|
|
__entry->zl_cur_used = zilog->zl_cur_used;
|
|
__entry->zl_replay_time = zilog->zl_replay_time;
|
|
__entry->zl_replay_blks = zilog->zl_replay_blks;
|
|
),
|
|
TP_printk("zl { lr_seq %llu commit_lr_seq %llu destroy_txg %llu "
|
|
"replaying_seq %llu suspend %u suspending %u keep_first %u "
|
|
"replay %u stop_sync %u writer %u logbias %u sync %u "
|
|
"parse_error %u parse_blk_seq %llu parse_lr_seq %llu "
|
|
"parse_blk_count %llu parse_lr_count %llu next_batch %llu "
|
|
"com_batch %llu cur_used %llu replay_time %lu replay_blks %llu }",
|
|
__entry->zl_lr_seq, __entry->zl_commit_lr_seq,
|
|
__entry->zl_destroy_txg, __entry->zl_replaying_seq,
|
|
__entry->zl_suspend, __entry->zl_suspending, __entry->zl_keep_first,
|
|
__entry->zl_replay, __entry->zl_stop_sync, __entry->zl_writer,
|
|
__entry->zl_logbias, __entry->zl_sync, __entry->zl_parse_error,
|
|
__entry->zl_parse_blk_seq, __entry->zl_parse_lr_seq,
|
|
__entry->zl_parse_blk_count, __entry->zl_parse_lr_count,
|
|
__entry->zl_next_batch, __entry->zl_com_batch, __entry->zl_cur_used,
|
|
__entry->zl_replay_time, __entry->zl_replay_blks)
|
|
);
|
|
/* END CSTYLED */
|
|
|
|
/* BEGIN CSTYLED */
|
|
#define DEFINE_ZIL_EVENT(name) \
|
|
DEFINE_EVENT(zfs_zil_class, name, \
|
|
TP_PROTO(zilog_t *zilog), \
|
|
TP_ARGS(zilog))
|
|
DEFINE_ZIL_EVENT(zfs_zil__cw1);
|
|
DEFINE_ZIL_EVENT(zfs_zil__cw2);
|
|
/* END CSTYLED */
|
|
|
|
#endif /* _TRACE_ZIL_H */
|
|
|
|
#undef TRACE_INCLUDE_PATH
|
|
#undef TRACE_INCLUDE_FILE
|
|
#define TRACE_INCLUDE_PATH sys
|
|
#define TRACE_INCLUDE_FILE trace_zil
|
|
#include <trace/define_trace.h>
|
|
|
|
#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
|