Skip to content

Instantly share code, notes, and snippets.

@phiresky
Last active May 29, 2019 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phiresky/7b7559623317640e17fd783c7b09ab39 to your computer and use it in GitHub Desktop.
Save phiresky/7b7559623317640e17fd783c7b09ab39 to your computer and use it in GitHub Desktop.
qdirstat server script

Use this to run qdirstat for a server on which it is not installed:

ssh root@server 'curl -sL https://git.io/fj42l | perl -- - / -' | qdirstat -c /dev/stdin

Yes, the arguments to perl are perl -- - dir_to_scan -. Beautiful, isn't it?

#!/usr/bin/perl -w
#
# qdirstat-cache-writer - script to write QDirStat cache files from cron jobs
#
# QDirStat can read its information from cache files. This is a lot faster than
# reading all the directories in a directory tree and obtaining detailed
# information (size, type, last modification time) for each file and directory
# with the opendir() / readdir() and lstat() system calls for each individual
# file and directory.
#
# QDirStat can also write those cache files ("Write Cache File..." from the
# "File" menu), but the whole point of cache files is being able to do that in
# the background when the user does not have to wait for it - like in a cron
# job running in the middle of the night. QDirStat itself cannot be used to do
# that because it is a KDE program and thus an X program that needs access to
# an X display - which cron does not provide.
#
# This is what this Perl script is for.
#
# Usage:
# qdirstat-cache-writer [-lvdh] <directory> [<cache-file-name>]
#
# If not specified, <cache-file-name> defaults to ".qdirstat.cache.gz"
# in <directory>.
#
# If <cache-file-name> ends with ".gz", it will be compressed with gzip.
# qdirstat can read gzipped and plain text cache files.
#
# -l long format - always add full path, even for plain files
# -m scan mounted file systems (cross file system boundaries)
# -v verbose
# -d debug
# -h help (usage message)
#
# Author: Stefan Hundhammer <Stefan.Hundhammer@gmx.de>
#
# This script is freeware. Fuck the lawyers and their legalese bullshit.
# All they ever contribute to software is those legalese headers that are just
# in the way of people working with the code.
# TO DO:
#
# - ensure to use UTF-8
use strict;
use English;
use Getopt::Std;
use Fcntl ':mode';
use Encode;
use vars qw( $opt_l $opt_m $opt_v $opt_d $opt_h );
# Forward declarations.
sub main();
# Global variables.
my $long_format = 0;
my $scan_mounted = 0;
my $verbose = 0;
my $debug = 0;
my $default_cache_file_name = ".qdirstat.cache.gz";
my $toplevel_dev_no = undef;
my $toplevel_dev_name = undef;
my $unsafe_chars = "\x00-\x20%";
my %escapes;
for (0..255) {
$escapes{chr($_)} = sprintf("%%%02X", $_);
}
# Call the main function and exit.
# DO NOT enter any other code outside a sub -
# any variables would otherwise be global.
main();
exit 0;
#-----------------------------------------------------------------------------
sub uri_escape
{
my ($text, $dontcare) = @_;
$text =~ s/([\x00-\x20%])/$escapes{$1}/ge;
return $text;
}
sub main()
{
# Extract command line options.
# This will set a variable opt_? for any option,
# e.g. opt_v if option '-v' is passed on the command line.
getopts('lmvdh');
usage() if $opt_h;
$long_format = 1 if $opt_l;
$scan_mounted = 1 if $opt_m;
$verbose = 1 if $opt_v;
$debug = 1 if $opt_d;
# One or two parameters are required
# (yes, Perl does weird counting)
usage() if $#ARGV < 0 || $#ARGV > 1;
my $toplevel_dir = shift @ARGV;
$toplevel_dir = absolute_path( $toplevel_dir );
my $cache_file_name;
if ( $#ARGV < 0 ) # No more command line arguments?
{
$cache_file_name = $toplevel_dir . "/" . $default_cache_file_name;
}
else
{
$cache_file_name = shift @ARGV;
}
write_cache_file( $toplevel_dir, $cache_file_name );
}
#-----------------------------------------------------------------------------
# Write a QDirStat cache.
#
# Parameters:
# $toplevel_dir
# $cache_file_name
sub write_cache_file()
{
my ( $toplevel_dir, $cache_file_name ) = @_;
my $start_time = time();
my $cache_file_name_part = $cache_file_name . ".part";
my $open_type = ">";
my $open_name = $cache_file_name_part;
if ( $cache_file_name =~ /.*\.gz$/ )
{
$open_type = "|-";
$open_name = "gzip > '$open_name'";
}
if ( $cache_file_name eq "-" )
{
open( CACHE, ">" . $cache_file_name ) or die "Can't open $cache_file_name";
}
else
{
open( CACHE, $open_type , $open_name ) or die "Can't open $cache_file_name";
}
binmode( CACHE, ":bytes" );
write_cache_header();
write_cache_tree( $toplevel_dir );
my $elapsed = time() - $start_time;
my ( $sec, $min, $hours ) = gmtime( $elapsed );
printf CACHE "# Elapsed time: %d:%02d:%02d\n", $hours, $min, $sec;
close( CACHE );
rename($cache_file_name_part, $cache_file_name);
}
#-----------------------------------------------------------------------------
# Write the cache file header
#
# Parameters:
# ---
sub write_cache_header()
{
print CACHE <<'EOF';
[qdirstat 1.0 cache file]
# Generated by qdirstat-cache-writer
# Do not edit!
#
# Type path size mtime <optional fields>
EOF
}
#-----------------------------------------------------------------------------
# Write cache entries for a directory tree.
#
# Parameters:
# $dir Starting directory
sub write_cache_tree($); # Need prototype for calling recursively
sub write_cache_tree($)
{
my ( $dir ) = @_;
logf( "Reading $dir" );
my @files;
my @subdirs;
my $success = opendir( DIR, $dir );
if ( ! $success )
{
my $msg = "Can't open $dir: $ERRNO\n";
print CACHE "# $msg\n";
logf( $msg );
return;
}
my $entry;
while ( $entry = readdir( DIR ) )
{
if ( $entry ne "." and
$entry ne ".." )
{
my $full_path = $dir . "/" . $entry;
if ( -d $full_path &&
! -l $full_path )
{
push @subdirs, $entry;
}
else
{
push @files, $entry;
}
}
}
closedir( DIR );
if ( write_dir_entry( $dir ) )
{
my $file;
foreach $file ( @files )
{
write_file_entry( $dir, $file );
}
my $subdir;
foreach $subdir ( @subdirs )
{
write_cache_tree( $dir . "/" . $subdir );
}
}
}
#-----------------------------------------------------------------------------
# Write a cache entry for a directory.
#
# If the device of this directory is not the same as the toplevel device
# (i.e., if this is a mount point and thus file system boundaries would be
# crossed) only a comment line is written and an error value '0' is returned
# unless the "-m" command line option was used.
#
# Parameters:
# $dir directory
#
# Return value:
# 1 OK to continue
# 0 don't continue, file system boundary would be crossed
sub write_dir_entry()
{
my ( $dir ) = @_;
my @lstat_result = lstat( $dir );
if ( scalar @lstat_result == 0 ) # Empty array -> lstat() failed
{
my $msg = "lstat() failed for $dir";
print CACHE "# $msg\n";
logf( $msg );
return;
}
my ( $dev_no,
$ino,
$mode,
$links,
$uid,
$gid,
$rdev,
$size,
$atime,
$mtime,
$ctime,
$blksize,
$blocks ) = @lstat_result;
$dir =~ s://+:/:g; # Replace multiple // with one
my $escaped_dir = uri_escape( $dir, $unsafe_chars );
# Write cache file entry for this directory (even if it's a mount point)
print CACHE "D $escaped_dir";
print CACHE "\t$size";
printf CACHE "\t0x%x\n", $mtime;
if ( ! defined( $toplevel_dev_no ) )
{
$toplevel_dev_no = $dev_no;
$toplevel_dev_name = device_name( $dir );
print CACHE "# Device: $toplevel_dev_name\n\n";
}
if ( $dev_no == $toplevel_dev_no || $scan_mounted )
{
return 1;
}
my $dev_name = device_name( $dir );
my $fs_boundary = $dev_name ne $toplevel_dev_name;
my $msg;
if ( $fs_boundary )
{
$msg = "File system boundary at mount point $dir on device $dev_name";
}
else
{
$msg = "Mount point $dir is still on the same device $dev_name";
}
print CACHE "# $msg\n\n";
logf( $msg );
return ! $fs_boundary;
}
#-----------------------------------------------------------------------------
# Get the device name where a directory is on from the 'df' command.
#
# Parameters:
# $dir directory
#
# Return value:
# device name ("/dev/sda3", "/dev/system/root")
sub device_name()
{
my ( $dir ) = @_;
my @df_output = `df "$dir" 2>/dev/null`;
return "<unknown>" if scalar @df_output < 1;
shift @df_output; # Remove header line
my ( $line ) = @df_output;
my ( $device_name ) = split( '\s+', $line );
deb( "Directory $dir is on device $device_name" );
return $device_name;
}
#-----------------------------------------------------------------------------
# Write a cache entry for a plain file (or other non-directory i-node)
#
# Parameters:
# $dir directory
# $name file name (without path)
sub write_file_entry()
{
my ( $dir, $name ) = @_;
my @lstat_result = lstat( $dir . "/" . $name );
if ( scalar @lstat_result == 0 ) # Empty array -> lstat() failed
{
my $msg = "lstat() failed for $dir/$name";
print CACHE "# $msg\n";
logf( $msg );
return;
}
my ( $dev,
$ino,
$mode,
$links,
$uid,
$gid,
$rdev,
$size,
$atime,
$mtime,
$ctime,
$blksize,
$blocks ) = @lstat_result;
my $type = "F";
if ( S_ISREG ( $mode ) ) { $type = "F"; }
elsif ( S_ISLNK ( $mode ) ) { $type = "L"; }
elsif ( S_ISBLK ( $mode ) ) { $type = "BlockDev"; }
elsif ( S_ISCHR ( $mode ) ) { $type = "CharDev"; }
elsif ( S_ISFIFO( $mode ) ) { $type = "FIFO"; }
elsif ( S_ISSOCK( $mode ) ) { $type = "Socket"; }
print CACHE "$type";
$name = uri_escape( $name, $unsafe_chars );
if ( $long_format )
{
$dir = uri_escape( $dir, $unsafe_chars );
my $full_path = $dir . "/" . $name;
$full_path =~ s://+:/:g; # Replace multiple // with one
print CACHE " $full_path";
}
else
{
print CACHE "\t$name";
}
print CACHE "\t$size";
printf CACHE "\t0x%x", $mtime;
print CACHE "\tblocks: $blocks" if $blocks > 0 && $blocks * 512 < $size; # Sparse file?
print CACHE "\tlinks: $links" if $links > 1;
print CACHE "\n";
}
#-----------------------------------------------------------------------------
# Make an absolute path of a possible relative path.
#
# Parameters:
# $dir relative or absolute path
#
# Return value:
# absolute path
sub absolute_path()
{
my ( $dir ) = @_;
return $dir if ( $dir =~ '^/' );
my $save_dir = $ENV{'PWD'};
chdir( $dir );
$dir = $ENV{'PWD'};
chdir $save_dir;
return $dir;
}
#-----------------------------------------------------------------------------
# Log a message to stdout if verbose mode is set
# (command line option '-v').
#
# Parameters:
# Messages to write (any number).
sub logf()
{
my $msg;
if ( $verbose )
{
foreach $msg( @_ )
{
print $msg . " ";
}
$OUTPUT_AUTOFLUSH = 1; # inhibit buffering
print "\n";
}
}
#-----------------------------------------------------------------------------
# Log a debugging message to stdout if debug mode is set
# (command line option '-d').
#
# Parameters:
# Messages to write (any number).
sub deb()
{
my $msg;
if ( $debug )
{
foreach $msg( @_ )
{
print $msg . " ";
}
$OUTPUT_AUTOFLUSH = 1; # inhibit buffering
print "\n";
}
}
#-----------------------------------------------------------------------------
# Print usage message and abort program.
#
# Parameters:
# ---
sub usage()
{
die <<"USAGE-END";
qdirstat-cache-writer - script to write QDirStat cache files from cron jobs
QDirStat can read its information from cache files. This is a lot faster than
reading all the directories in a directory tree and obtaining detailed
information (size, type, last modification time) for each file and directory
with the opendir() / readdir() and lstat() system calls for each individual
file and directory.
QDirStat can also write those cache files (\"Write Cache File...\" from the
\"File\" menu), but the whole point of cache files is being able to do that in
the background when the user does not have to wait for it - like in a cron
job running in the middle of the night. QDirStat itself cannot be used to do
that because it is a KDE program and thus an X program that needs access to
an X display - which cron does not provide.
This is what this Perl script is for.
Usage:
$0 [-ldvh] <directory> [<cache-file-name>]
If not specified, <cache-file-name> defaults to \"$default_cache_file_name\"
in <directory>.
If <cache-file-name> ends with \".gz\", it will be compressed with gzip.
qdirstat can read gzipped and plain text cache files.
-l long format - always add full path, even for plain files
-m scan mounted file systems (cross file system boundaries)
-v verbose
-d debug
-h help (this usage message)
USAGE-END
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment