diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 763a086ac468..a329e4a8337e 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -4161,6 +4161,7 @@ static const char *zdb_ot_extname[] = { }; #define ZB_TOTAL DN_MAX_LEVELS +#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1) typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; @@ -4168,6 +4169,15 @@ typedef struct zdb_cb { uint64_t zcb_checkpoint_size; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_psize_count[SPA_MAX_FOR_16M]; + uint64_t zcb_lsize_count[SPA_MAX_FOR_16M]; + uint64_t zcb_asize_count[SPA_MAX_FOR_16M]; + uint64_t zcb_psize_len[SPA_MAX_FOR_16M]; + uint64_t zcb_lsize_len[SPA_MAX_FOR_16M]; + uint64_t zcb_asize_len[SPA_MAX_FOR_16M]; + uint64_t zcb_psize_total; + uint64_t zcb_lsize_total; + uint64_t zcb_asize_total; uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] [BPE_PAYLOAD_SIZE + 1]; @@ -4191,6 +4201,172 @@ same_metaslab(spa_t *spa, uint64_t vdev, uint64_t off1, uint64_t off2) return ((off1 >> ms_shift) == (off2 >> ms_shift)); } +/* + * Used to simplify reporting of the histogram data. + */ +typedef struct one_histo { + char *name; + uint64_t *count; + uint64_t *len; + uint64_t cumulative; +} one_histo_t; + +/* + * The number of separate histograms processed for psize, lsize and asize. + */ +#define NUM_HISTO 3 + +/* + * This routine will create a fixed column size output of three different + * histograms showing by blocksize of 512 - 2^ SPA_MAX_FOR_16M + * the count, length and cumulative length of the psize, lsize and + * asize blocks. + * + * All three types of blocks are listed on a single line + * + * By default the table is printed in nicenumber format (e.g. 123K) but + * if the '-P' parameter is specified then the full raw number (parseable) + * is printed out. + */ +static void +dump_size_histograms(zdb_cb_t *zcb) +{ + /* + * A temporary buffer that allows us to convert a number into + * a string using zdb_nicenumber to allow either raw or human + * readable numbers to be output. + */ + char numbuf[32]; + + /* + * Define titles which are used in the headers of the tables + * printed by this routine. + */ + const char blocksize_title1[] = "block"; + const char blocksize_title2[] = "size"; + const char count_title[] = "Count"; + const char length_title[] = "Size"; + const char cumulative_title[] = "Cum."; + + /* + * Setup the histogram arrays (psize, lsize, and asize). + */ + one_histo_t parm_histo[NUM_HISTO]; + + parm_histo[0].name = "psize"; + parm_histo[0].count = zcb->zcb_psize_count; + parm_histo[0].len = zcb->zcb_psize_len; + parm_histo[0].cumulative = 0; + + parm_histo[1].name = "lsize"; + parm_histo[1].count = zcb->zcb_lsize_count; + parm_histo[1].len = zcb->zcb_lsize_len; + parm_histo[1].cumulative = 0; + + parm_histo[2].name = "asize"; + parm_histo[2].count = zcb->zcb_asize_count; + parm_histo[2].len = zcb->zcb_asize_len; + parm_histo[2].cumulative = 0; + + + (void) printf("\nBlock Size Histogram\n"); + /* + * Print the first line titles + */ + if (dump_opt['P']) + (void) printf("\n%s\t", blocksize_title1); + else + (void) printf("\n%7s ", blocksize_title1); + + for (int j = 0; j < NUM_HISTO; j++) { + if (dump_opt['P']) { + if (j < NUM_HISTO - 1) { + (void) printf("%s\t\t\t", parm_histo[j].name); + } else { + /* Don't print trailing spaces */ + (void) printf(" %s", parm_histo[j].name); + } + } else { + if (j < NUM_HISTO - 1) { + /* Left aligned strings in the output */ + (void) printf("%-7s ", + parm_histo[j].name); + } else { + /* Don't print trailing spaces */ + (void) printf("%s", parm_histo[j].name); + } + } + } + (void) printf("\n"); + + /* + * Print the second line titles + */ + if (dump_opt['P']) { + (void) printf("%s\t", blocksize_title2); + } else { + (void) printf("%7s ", blocksize_title2); + } + + for (int i = 0; i < NUM_HISTO; i++) { + if (dump_opt['P']) { + (void) printf("%s\t%s\t%s\t", + count_title, length_title, cumulative_title); + } else { + (void) printf("%7s%7s%7s", + count_title, length_title, cumulative_title); + } + } + (void) printf("\n"); + + /* + * Print the rows + */ + for (int i = SPA_MINBLOCKSHIFT; i < SPA_MAX_FOR_16M; i++) { + + /* + * Print the first column showing the blocksize + */ + zdb_nicenum((1ULL << i), numbuf, sizeof (numbuf)); + + if (dump_opt['P']) { + printf("%s", numbuf); + } else { + printf("%7s:", numbuf); + } + + /* + * Print the remaining set of 3 columns per size: + * for psize, lsize and asize + */ + for (int j = 0; j < NUM_HISTO; j++) { + parm_histo[j].cumulative += parm_histo[j].len[i]; + + zdb_nicenum(parm_histo[j].count[i], + numbuf, sizeof (numbuf)); + if (dump_opt['P']) + (void) printf("\t%s", numbuf); + else + (void) printf("%7s", numbuf); + + zdb_nicenum(parm_histo[j].len[i], + numbuf, sizeof (numbuf)); + if (dump_opt['P']) + (void) printf("\t%s", numbuf); + else + (void) printf("%7s", numbuf); + + zdb_nicenum(parm_histo[j].cumulative, + numbuf, sizeof (numbuf)); + if (dump_opt['P']) + (void) printf("\t%s", numbuf); + else + (void) printf("%7s", numbuf); + } + (void) printf("\n"); + } +} + static void zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type) @@ -4284,6 +4460,28 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, [BPE_GET_PSIZE(bp)]++; return; } + /* + * The binning histogram bins by powers of two up to + * SPA_MAXBLOCKSIZE rather than creating bins for + * every possible blocksize found in the pool. + */ + int bin = highbit64(BP_GET_PSIZE(bp)) - 1; + + zcb->zcb_psize_count[bin]++; + zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); + zcb->zcb_psize_total += BP_GET_PSIZE(bp); + + bin = highbit64(BP_GET_LSIZE(bp)) - 1; + + zcb->zcb_lsize_count[bin]++; + zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); + zcb->zcb_lsize_total += BP_GET_LSIZE(bp); + + bin = highbit64(BP_GET_ASIZE(bp)) - 1; + + zcb->zcb_asize_count[bin]++; + zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); + zcb->zcb_asize_total += BP_GET_ASIZE(bp); if (dump_opt['L']) return; @@ -5645,6 +5843,11 @@ dump_block_stats(spa_t *spa) } } } + + /* Output a table summarizing block sizes in the pool */ + if (dump_opt['b'] >= 2) { + dump_size_histograms(&zcb); + } } (void) printf("\n"); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index d8c109eb7ddc..765ffea8a302 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -104,8 +104,9 @@ tags = ['functional', 'clean_mirror'] [tests/functional/cli_root/zdb] tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', - 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_checksum', 'zdb_decompress', - 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_display_block', + 'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', + 'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress', + 'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id'] pre = post = diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am index e332a91a856a..3cf13f3ae60e 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am @@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \ zdb_006_pos.ksh \ zdb_args_neg.ksh \ zdb_args_pos.ksh \ + zdb_block_size_histogram.ksh \ zdb_checksum.ksh \ zdb_decompress.ksh \ zdb_object_range_neg.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh new file mode 100755 index 000000000000..0c949f9839e1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh @@ -0,0 +1,272 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# Copyright (c) 2020 by Lawrence Livermore National Security LLC. + +. $STF_SUITE/include/libtest.shlib + + +# +# DESCRIPTION: +# Create a pool and populate it with files of various +# recordsizes +# +# STRATEGY: +# 1. Create pool +# 2. Populate it +# 3. Run zdb -Pbbb on pool +# 4. Verify variance on blocksizes +# +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +SPA_MAXBLOCKSHIFT=24 + +function histo_populate_test_pool +{ + if [ $# -ne 1 ]; then + log_note "histo_populate_test_pool: insufficient parameters" + log_fail "hptp: 1 requested $# received" + fi + typeset pool=$1 + + set -A recordsizes + typeset -i min_rsbits=9 #512 + typeset -i max_rsbits=SPA_MAXBLOCKSHIFT #16 MiB + typeset -i sum_filesizes=0 + re_number='^[0-9]+$' + + let histo_pool_size=$(get_pool_prop size ${pool}) + if [[ ! ${histo_pool_size} =~ ${re_number} ]]; then + log_fail "histo_pool_size is not numeric ${pool_size}" + fi + let max_pool_record_size=$(get_prop recordsize ${pool}) + if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then + log_fail "hptp: max_pool_record_size is not numeric ${max_pool_record_size}" + fi + + sum_filesizes=$(echo "2^21"|bc) + ((min_pool_size=12*sum_filesizes)) + if [ ${histo_pool_size} -lt ${min_pool_size} ]; then + log_note "hptp: Your pool size ${histo_pool_size}" + log_fail "hptp: is less than minimum ${min_pool_size}" + fi + this_ri=min_rsbits + file_num=0 + total_count=0 + ################### + # generate 10% + 20% + 30% + 31% = 91% of the filespace + # attempting to use 100% will lead to no space left on device + # Heuristic testing showed that 91% was the practical upper + # bound on the default 4G zpool (mirrored) that is used in + # testing. + # + # In order to expedite testing, we will only fill 2G (of 4G) + # of the test pool. You may want to modify this for + # standalone testing. + # + # In filling only 50% of the pool, we create one object on + # each "pass" below to achieve multiple objects per record + # size. Creating one file per object would lead to + # excessive file creation time. + ################### + # for pass in 10 20 30 31 # 91% + for pass in 20 20 10 # 50% + do + ((thiscount=(((histo_pool_size*pass)/100)/sum_filesizes))) + + ((total_count+=thiscount)) + for rb in $(seq ${min_rsbits} ${max_rsbits}) + do + this_rs=$(echo "2^${rb}" | bc) + if [ ${this_rs} -gt ${max_pool_record_size} ]; then + continue + fi + + if [ ! -d /${pool}/B_${this_rs} ]; then + zfs create ${pool}/B_${this_rs} + zfs set recordsize=${this_rs} \ + ${pool}/B_${this_rs} + fi + #################### + # Create the files in the devices and datasets + # of the right size. The files are filled + # with random data to defeat the compression + # + # Note that the dd output is suppressed unless + # there are errors + #################### + + dd if=/dev/urandom \ + of=/${pool}/B_${this_rs}/file_${filenum} \ + bs=${this_rs} count=${thiscount} \ + iflag=fullblock 2>&1 | \ + egrep -v -e "records in" -e "records out" \ + -e "bytes.*copied" + ((filenum+=1)) + done + done + + #################### + # Testing showed that on some devices, unless the pool is + # synchronized, that the block counts will be below the + # anticipated sizes since not all of the blocks will be flushed + # to the device. This 'sync' command prevents that from + # happening. + #################### + log_must zpool sync ${pool} +} +function histo_check_test_pool +{ + if [ $# -ne 1 ]; then + log_note "histo_check_test_pool: insufficient parameters" + log_fail "hctp: 1 requested $# received" + fi + typeset pool=$1 + + set -A recordsizes + set -A recordcounts + typeset -i rb + typeset -i min_rsbits=9 #512 + typeset -i max_rsbits=SPA_MAXBLOCKSHIFT+1 + typeset -i this_rs + typeset -i this_ri + typeset -i sum_filesizes=0 + typeset dumped + typeset stripped + + let histo_check_pool_size=$(get_pool_prop size ${pool}) + if [[ ! ${histo_check_pool_size} =~ ${re_number} ]]; then + log_fail "histo_check_pool_size is not numeric ${histo_check_pool_size}" + fi + let max_pool_record_size=$(get_prop recordsize ${pool}) + if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then + log_fail "hctp: max_pool_record_size is not numeric ${max_pool_record_size}" + fi + + dumped="${TEST_BASE_DIR}/${pool}_dump.txt" + stripped="${TEST_BASE_DIR}/${pool}_stripped.txt" + + zdb -Pbbb ${pool} | \ + tee ${dumped} | \ + sed -e '1,/^block[ ][ ]*psize[ ][ ]*lsize.*$/d' \ + -e '/^size[ ]*Count/d' -e '/^$/,$d' \ + > ${stripped} + + sum_filesizes=$(echo "2^21"|bc) + + ################### + # generate 10% + 20% + 30% + 31% = 91% of the filespace + # attempting to use 100% will lead to no space left on device + # attempting to use 100% will lead to no space left on device + # Heuristic testing showed that 91% was the practical upper + # bound on the default 4G zpool (mirrored) that is used in + # testing. + # + # In order to expedite testing, we will only fill 2G (of 4G) + # of the test pool. You may want to modify this for + # standalone testing. + # + # In filling only 50% of the pool, we create one object on + # each "pass" below to achieve multiple objects per record + # size. Creating one file per object would lead to + # excessive file creation time. + ################### + # for pass in 10 20 30 31 # 91% + for pass in 20 20 10 # 50% + do + ((thiscount=(((histo_check_pool_size*pass)/100)/sum_filesizes))) + + for rb in $(seq ${min_rsbits} ${max_rsbits}) + do + blksize=$(echo "2^$rb"|bc) + if [ $blksize -le $max_pool_record_size ]; then + ((recordcounts[$blksize]+=thiscount)) + fi + done + done + + ################### + # compare the above computed counts for blocks against + # lsize count. Since some devices have a minimum hardware + # blocksize > 512, we cannot compare against the asize count. + # E.G., if the HWBlocksize = 4096, then the asize counts for + # 512, 1024 and 2048 will be zero and rolled up into the + # 4096 blocksize count for asize. For verification we stick + # to just lsize counts. + # + # The max_variance is hard-coded here at 10%. testing so far + # has shown this to be in the range of 2%-8% so we leave a + # generous allowance... This might need changes in the future + ################### + let max_variance=10 + let fail_value=0 + let error_count=0 + log_note "Comparisons for ${pool}" + log_note "Bsize is the blocksize, Count is predicted value" + log_note "Bsize\tCount\tpsize\tlsize\tasize" + while read -r blksize pc pl pm lc ll lm ac al am + do + if [ $blksize -gt $max_pool_record_size ]; then + continue + fi + log_note \ + "$blksize\t${recordcounts[${blksize}]}\t$pc\t$lc\t$ac" + + ################### + # get the computer record count and compute the + # difference percentage in integer arithmetic + ################### + rc=${recordcounts[${blksize}]} + ((rclc=(rc-lc)<0?lc-rc:rc-lc)) # absolute value + ((dp=(rclc*100)/rc)) + + ################### + # Check against the allowed variance + ################### + if [ $dp -gt ${max_variance} ]; then + log_note \ + "Expected variance < ${max_variance}% observed ${dp}%" + if [ ${dp} -gt ${fail_value} ]; then + fail_value=${dp} + ((error_count++)) + fi + fi + done < ${stripped} + if [ ${fail_value} -gt 0 ]; then + if [ ${error_count} -eq 1 ]; then + log_note "hctp: There was ${error_count} error" + else + log_note "hctp:There were a total of ${error_count} errors" + fi + log_fail \ + "hctp: Max variance of ${max_variance}% exceeded, saw ${fail_value}%" + fi +} + +log_assert "Verify zdb -Pbbb (block histogram) works as expected" +log_onexit cleanup +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS + +histo_populate_test_pool $TESTPOOL + +histo_check_test_pool $TESTPOOL + +log_pass "Histogram for zdb"