Last active
September 21, 2017 23:17
-
-
Save szaydel/1e68ec3584bb3ebd7ec1 to your computer and use it in GitHub Desktop.
Dtrace snippets for observing behaviour of ZIL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dtrace -qn ' | |
/* Add a number greater than 0 after the script on the command line, to | |
* tell the script how often to print results to screen. If no value is | |
* given, script will fail immediately. Value must be positive and is in | |
* seconds. Results will be printed "this many" seconds apart, but will | |
* be normalized to a per-second result. | |
*/ | |
int ts; | |
BEGIN { ts = timestamp; } | |
::zfs_log_write:entry { | |
this->file_path = stringof(args[3]->z_vnode->v_path); | |
@[this->file_path] = quantize(arg5); | |
@cnt = count(); | |
@bytes = sum(arg5); | |
bypass_log = `zfs_slog_bypass_ok == 0 ? "will not" : "will"; | |
} | |
tick-$$1sec { | |
this->now = timestamp; | |
this->delta = (this->now - ts) / 1000000000; | |
/* Adjust data to reflect per second values */ | |
normalize(@, this->delta); | |
normalize(@cnt, this->delta); | |
normalize(@bytes, this->delta); | |
printf("----------------------------------------\n"); | |
printf("Summary:\n"); | |
printf("IOs >%d(kb) %s bypass log\n", | |
`zfs_immediate_write_sz >> 10, bypass_log); | |
printa("IOs through ZIL/sec: %@d\nBytes through ZIL/sec: %@d\n", | |
@cnt, @bytes); | |
printf("----------------------------------------\n"); | |
printa(" Filepath: %s\n IOsize\t\t\t\tIOs/sec %@d\n", @); | |
trunc(@); trunc(@cnt); trunc(@bytes); | |
ts = this->now; | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Observe size buckets of IOs through the zfs_log_write function and measure zil_commit latency. */ | |
dtrace -qn ' | |
::zil_commit:entry { | |
self->zilog = args[0] ; | |
self->spa = args[0]->zl_spa ; | |
self->t = timestamp ; | |
} | |
::zfs_log_write:entry { | |
this->spa = args[0]->zl_spa ; | |
this->sync = args[0]->zl_sync == 0 ? "Async" : "Sync" ; | |
@x4[this->spa->spa_name, this->sync] = | |
sum(args[5] <= 0x1000 ? 1 : 0) ; | |
@x8[this->spa->spa_name, this->sync] = | |
sum((args[5] <= 0x2000 && args[5] > 0x1000) ? 1 : 0) ; | |
@x16[this->spa->spa_name, this->sync] = | |
sum((args[5] <= 0x4000 && args[5] > 0x2000) ? 1 : 0) ; | |
@x32[this->spa->spa_name, this->sync] = | |
sum((args[5] <= 0x8000 && args[5] > 0x4000) ? 1 : 0) ; | |
@x64[this->spa->spa_name, this->sync] = | |
sum((args[5] <= 0x10000 && args[5] > 0x8000) ? 1 : 0) ; | |
@x128[this->spa->spa_name, this->sync] = | |
sum((args[5] <= 0x20000 && args[5] > 0x10000) ? 1 : 0) ; | |
@xulim[this->spa->spa_name, this->sync] = | |
sum(args[5] > 0x20000 ? 1 : 0) ; | |
} | |
::zil_commit:return /self->t/ { | |
this->sync = self->zilog == 0 ? "Async" : "Sync" ; | |
@lathist[self->spa->spa_name, this->sync] = | |
quantize((timestamp - self->t) / 1000); | |
self->t = 0 ; self->spa = 0 ; self->zilog = 0 ; | |
} | |
END { | |
printa("\n Pool [%s] zil_commit %s IO record latency%@d\n", | |
@lathist); | |
printf("IO Count by Size:\n") ; | |
printf("pool type %-8s %-8s %-8s %-8s %-8s %-8s %-8s\n", | |
"4kb","8kb","16kb","32kb","64kb","128kb",">128kb") ; | |
printa("%-8s %-8s %-8@d %-8@d %-8@d %-8@d %-8@d %-8@d %-8@d\n", | |
@x4, @x8, @x16, @x32, @x64, @x128, @xulim) ; | |
}' | |
/* Sizes of IOs through the zfs_log_write */ | |
dtrace -qn '::zfs_log_write:entry { | |
this->ts = (walltimestamp / 1000000000) * 1000000000; | |
@sz[this->ts, args[5]] = count(); | |
} | |
END { | |
printf("timestamp,io.size,count\n"); | |
printa("%Y,%d,%@d\n", @sz); | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dtrace -qn ' | |
/* | |
* Measure IO sizes and latency through the zil code. This script hides | |
* reality through effectively quantizing data to multiples of 4K | |
* numbers, where actual sizes may not be on those exact boundaries. | |
* We can get a sense for how much we are lied to by looking at the total | |
* count and comparing against the count given by base2 column, which counts | |
* each IO that is actually sized on the bucket boundaries. | |
*/ | |
BEGIN { | |
printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", | |
"timestamp","sz.1k","sz.4k","sz.8k","sz.16k","sz.32k","sz.64k", | |
"sz.128k","sz.256k","sz.512k", "sz.1m","unk", | |
"l1k","l4k","l8k","l16k","l32k","l64k","l128k","l256k","l512k", "l1m","lunk", "mult.4k","total.ios", "throughput"); | |
} | |
::zfs_log_write:entry { | |
/* | |
* this->wt reduces precision down to 1 second, to make result | |
* presentation a little bit easier. | |
*/ | |
this->wt = (walltimestamp / 1000000000) * 1000000000; | |
self->start = timestamp; | |
this->sz = args[5]; | |
this->m4k = this->sz%4096 == 0 ? 1 : 0; | |
this->l = this->sz > 0 && this->sz <= 0x400 ? "1k" : | |
this->sz > 0x400 && this->sz <= 0x1000 ? "4k" : | |
this->sz > 0x1000 && this->sz <= 0x2000 ? "8k" : | |
this->sz > 0x2000 && this->sz <= 0x4000 ? "16k" : | |
this->sz > 0x4000 && this->sz <= 0x8000 ? "32k" : | |
this->sz > 0x8000 && this->sz <= 0x10000 ? "64k" : | |
this->sz > 0x10000 && this->sz <= 0x20000 ? "128k" : | |
this->sz > 0x20000 && this->sz <= 0x40000 ? "256k" : | |
this->sz > 0x40000 && this->sz <= 0x80000 ? "512k" : | |
this->sz > 0x80000 && this->sz <= 0x100000 ? "1m" : | |
"unknown"; | |
@sz1k[this->wt] = sum(this->l == "1k" ? 1 : 0); | |
@sz4k[this->wt] = sum(this->l == "4k" ? 1 : 0); | |
@sz8k[this->wt] = sum(this->l == "8k" ? 1 : 0); | |
@sz16k[this->wt] = sum(this->l == "16k" ? 1 : 0); | |
@sz32k[this->wt] = sum(this->l == "32k" ? 1 : 0); | |
@sz64k[this->wt] = sum(this->l == "64k" ? 1 : 0); | |
@sz128k[this->wt] = sum(this->l == "128k" ? 1 : 0); | |
@sz256k[this->wt] = sum(this->l == "256k" ? 1 : 0); | |
@sz512k[this->wt] = sum(this->l == "512k" ? 1 : 0); | |
@sz1m[this->wt] = sum(this->l == "1m" ? 1 : 0); | |
@szu[this->wt] = sum(this->l == "unknown" ? 1 : 0); | |
@bucksize[this->wt] = sum(this->m4k); | |
/* IOPS, assuming regular interval sampling */ | |
@tot[this->wt] = count(); | |
/* Throughput assuming regular interval sampling */ | |
@tput[this->wt] = sum(this->sz); | |
} | |
::zfs_log_write:return /this->sz != NULL/ { | |
this->delta = (timestamp - self->start); /* note, this is nanoseconds */ | |
@l1k[this->wt] = avg(this->l == "1k" ? this->delta : 0); | |
@l4k[this->wt] = avg(this->l == "4k" ? this->delta : 0); | |
@l8k[this->wt] = avg(this->l == "8k" ? this->delta : 0); | |
@l16k[this->wt] = avg(this->l == "16k" ? this->delta : 0); | |
@l32k[this->wt] = avg(this->l == "32k" ? this->delta : 0); | |
@l64k[this->wt] = avg(this->l == "64k" ? this->delta : 0); | |
@l128k[this->wt] = avg(this->l == "128k" ? this->delta : 0); | |
@l256k[this->wt] = avg(this->l == "256k" ? this->delta : 0); | |
@l512k[this->wt] = avg(this->l == "512k" ? this->delta : 0); | |
@l1m[this->wt] = avg(this->l == "1m" ? this->delta : 0); | |
@lu[this->wt] = sum(this->l == "unknown" ? this->delta : 0); | |
} | |
tick-1sec { | |
printa("%Y,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d\n", | |
@sz1k, @sz4k, @sz8k, @sz16k, @sz32k, @sz64k, @sz128k, @sz256k, | |
@sz512k, @sz1m, @szu, @l1k, @l4k, @l8k, @l16k, @l32k, | |
@l64k, @l128k, @l256k, @l512k, @l1m, @lu, @bucksize, @tot, | |
@tput); | |
trunc(@sz1k); trunc(@sz4k); trunc(@sz8k); | |
trunc(@sz16k); trunc(@sz32k); trunc(@sz64k); trunc(@sz128k); trunc(@sz256k); | |
trunc(@sz512k); trunc(@sz1m); trunc(@szu); | |
trunc(@l1k); trunc(@l4k); trunc(@l8k); | |
trunc(@l16k); trunc(@l32k); trunc(@l64k); trunc(@l128k); trunc(@l256k); | |
trunc(@l512k); trunc(@l1m); trunc(@lu); | |
trunc(@bucksize); trunc(@tot); trunc(@tput); | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dtrace -qn ' | |
BEGIN {totalbytes=0; total = 0;} | |
::zfs_log_write:entry { totalbytes+=args[5]; total++; @ = quantize(args[5]); } | |
END { | |
printf("\tTotal IOs: %d Total KBytes: %d\n", total, totalbytes >> 10); printa("\tIO size (bytes): %@d\n", @); }' | |
// IO size through the zil by process ID and execname. Includes both the size of IO, | |
// and amount of work done per microsecond of time spent in the zfs_log_write function. | |
dtrace -qn ' | |
self int64_t bytes; | |
self int64_t delta; | |
::zfs_log_write:entry { | |
@[pid, execname] = quantize(args[5]); | |
self->ts = timestamp; | |
self->bytes += args[5]; | |
} | |
::zfs_log_write:return /(timestamp - self->ts) > 0/ { | |
self->delta += (timestamp - self->ts); /* measure delta */ | |
} | |
::zfs_log_write:return /self->delta > 100000/ { | |
/* How many bytes per microsecond? */ | |
this->delta_us = self->delta / 1000; | |
@bytes_us[pid, execname] = quantize(self->bytes/this->delta_us); | |
self->ts = 0; self->bytes = 0; self->delta = 0; | |
} | |
tick-10sec { | |
printa("IO Size\npid: %d, name: %s %@d\n", @); | |
printa("Bytes/us\npid: %d, name: %s %@d\n", @bytes_us); | |
trunc(@); trunc(@bytes_us); | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/sbin/dtrace -Cs | |
#include <sys/fs/zfs.h> | |
#pragma D option quiet | |
/* | |
* CDDL HEADER START | |
* | |
* The contents of this file are subject to the terms of the | |
* Common Development and Distribution License (the "License"). | |
* You may not use this file except in compliance with the License. | |
* | |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
* or http://www.opensolaris.org/os/licensing. | |
* See the License for the specific language governing permissions | |
* and limitations under the License. | |
* | |
* When distributing Covered Code, include this CDDL HEADER in each | |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
* If applicable, add the following below this CDDL HEADER, with the | |
* fields enclosed by brackets "[]" replaced with your own identifying | |
* information: Portions Copyright [yyyy] [name of copyright owner] | |
* | |
* CDDL HEADER END | |
* | |
* Copyright (c) 2016, RackTop Systems. | |
*/ | |
string logbias[zfs_logbias_op_t]; | |
int x; | |
inline int BLKSZ_ABOVE_128K = -0xffff; /* Just a magic value */ | |
/* This is a good way to play with this a little bit. Just copy and paste in | |
* a separate shell and let it roll. Make sure /bp/test exists. | |
while :; do dd if=/dev/urandom of=/bp/test/out.bin bs=4096 count=$RANDOM ; | |
dd if=/dev/urandom of=/bp/test/out.bin bs=0x2000 count=$RANDOM ; | |
dd if=/dev/urandom of=/bp/test/out.bin bs=0x4000 count=$RANDOM ; | |
sleep 1; done | |
*/ | |
BEGIN { | |
printf("%s,%s,%s,%s,%s,%s,%s,%s,%s\n", "poolname", "bias", | |
"4K", "8K", "16K", "32K", "64K", "128K", "gt.128K" | |
); | |
logbias[ZFS_LOGBIAS_LATENCY] = "latency"; | |
logbias[ZFS_LOGBIAS_THROUGHPUT] = "throughput"; | |
} | |
::zil_lwb_write_start:entry { | |
this->pooln = stringof(args[0]->zl_dmu_pool->dp_spa->spa_name); | |
this->size = args[1]->lwb_sz; | |
this->lb = args[0]->zl_logbias; | |
/* We need to fit into one of these buckewith anything larger than | |
* 128K tagged with BLKSZ_ABOVE_128K, and ultimately resulting in | |
* being counted by @bgt128k aggregate. | |
*/ | |
this->buck = 0x1000; /* We assume this is smallest possible IO size. */ | |
this->buck = ( this->size > 0x1000 ) ? 8192 : this->buck ; | |
this->buck = ( this->size > 0x2000 ) ? 16384 : this->buck ; | |
this->buck = ( this->size >= 0x4000 ) ? 32768 : this->buck ; | |
this->buck = ( this->size >= 0x8000 ) ? 65536 : this->buck ; | |
this->buck = ( this->size > 0x10000 ) ? 131072 : this->buck ; | |
this->buck = ( this->size > 0x20000 ) ? BLKSZ_ABOVE_128K : this->buck ; | |
@b0x1000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x1000 ? 1 : 0); | |
@b0x2000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x2000 ? 1 : 0); | |
@b0x4000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x4000 ? 1 : 0); | |
@b0x8000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x8000 ? 1 : 0); | |
@b0x10000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x10000 ? 1 : 0); | |
@b0x20000[this->pooln, logbias[this->lb]] = | |
sum(this->buck == 0x20000 ? 1 : 0); | |
@bgt128k[this->pooln, logbias[this->lb]] = | |
sum(this->buck == BLKSZ_ABOVE_128K ? 1 : 0); | |
++x; | |
} | |
tick-1sec /x/ { | |
printa("%s,%s,%@d,%@d,%@d,%@d,%@d,%@d,%@d\n", | |
@b0x1000, @b0x2000, @b0x4000, @b0x8000, @b0x10000, @b0x20000, @bgt128k | |
); | |
trunc(@b0x1000); trunc(@b0x2000); | |
trunc(@b0x4000); trunc(@b0x8000); trunc(@b0x10000); | |
trunc(@b0x20000); trunc(@bgt128k); | |
x = 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment