Skip to content

Instantly share code, notes, and snippets.

@yoe
Last active February 27, 2018 10:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoe/1e0f11effb9ac9ec15cd1f119299ca30 to your computer and use it in GitHub Desktop.
Save yoe/1e0f11effb9ac9ec15cd1f119299ca30 to your computer and use it in GitHub Desktop.
script to read the gridengine accounting file and parse the qstat output into a prometheus stat
#!/usr/bin/perl -w
use strict;
use warnings;
use Scalar::Util qw(looks_like_number);
open FH, '</opt/sge/default/common/accounting';
my @fields = ('qname', 'hostname', 'group', 'owner', 'job_name', 'job_number', 'account', 'priority', 'submission_time', 'start_time', 'end_time', 'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project', 'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io', 'category', 'iow', 'pe_taskid', 'maxvmem', 'arid', 'ar_sub_time');
my %results;
$results{job_count} = {}; # number of jobs
$results{job_time} = {}; # total time between start and end time
$results{job_delay} = {}; # total time between submission and start time
$results{job_slots} = {}; # number of used slots
$results{job_cpu} = {}; # total CPU time used
$ENV{SGE_ROOT} = "/opt/sge";
$ENV{SGE_CELL} = "default";
$ENV{PATH} = "$ENV{SGE_ROOT}/bin/lx-amd64:" . $ENV{PATH};
my %helps = (
job_count => "The total number of finished gridengine jobs",
job_time => "The total time between start and end time of jobs",
job_delay => "The total time between submission and start time of jobs",
job_slots => "The number of slots used for jobs",
job_cpu => "The CPU time in seconds used for all jobs",
slots => "The number of known slots",
waiting => "The number of jobs waiting to be scheduled",
);
my %types = (
job_count => "counter",
job_time => "counter",
job_delay => "counter",
job_slots => "counter",
job_cpu => "counter",
slots => "gauge",
waiting => "gauge",
);
sub add_val($$$) {
my $hash = shift;
my $label = shift;
my $val = shift;
return if(!looks_like_number($val));
$hash->{$label} = 0 unless exists($hash->{$label});
$hash->{$label} += abs($val);
}
for(;;) {
while(<FH>) {
next if /^#/;
chomp;
my @line = split /:/;
my $val = {};
foreach my $field(@fields) {
$val->{$field} = shift(@line);
}
my $label = "qname=\"$val->{qname}\",hostname=\"$val->{hostname}\",group=\"$val->{group}\",owner=\"$val->{owner}\",priority=\"$val->{priority}\",exit_status=\"$val->{exit_status}\",department=\"$val->{department}\",project=\"$val->{project}\",pe=\"$val->{granted_pe}\"";
add_val($results{job_count}, $label, 1);
add_val($results{job_time}, $label, ($val->{end_time} - $val->{start_time}));
add_val($results{job_delay}, $label, ($val->{start_time} - $val->{submission_time}));
add_val($results{job_slots}, $label, $val->{slots});
add_val($results{job_cpu}, $label, $val->{cpu});
}
$results{slots} = {};
$results{waiting} = {};
my $fh;
open $fh, "qstat -F -u '*'|";
while(<$fh>) {
my $label;
if(/(.*)\@([-a-z0-9]+).*\/([0-9]+)\/([0-9]+)/) {
my $qname = $1;
my $host = $2;
my $slots_used = $3;
my $total_slots = $4;
my $avail = $total_slots - $slots_used;
add_val($results{slots}, "qname=\"$qname\",hostname=\"$host\",state=\"used\"", $slots_used);
add_val($results{slots}, "qname=\"$qname\",hostname=\"$host\",state=\"available\"", $total_slots - $slots_used);
}
if(/ .qw /) {
add_val($results{waiting}, "", 1);
}
}
close $fh;
open $fh,">/tmp/sge.prom.tmp" or die $!;
foreach my $metric(keys %results) {
print $fh "# TYPE gridengine_$metric $types{$metric}\n";
print $fh "# HELP gridengine_$metric $helps{$metric}\n";
foreach my $labels(keys %{$results{$metric}}) {
if(length ($labels) > 0) {
print $fh "gridengine_$metric\{$labels\} $results{$metric}->{$labels}\n";
} else {
print $fh "gridengine_$metric $results{$metric}->{\"\"}\n";
}
}
}
close $fh;
rename "/tmp/sge.prom.tmp","/var/lib/prometheus/node-exporter/sge.prom";
sleep 40;
seek FH,0,1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment