Skip to content

Instantly share code, notes, and snippets.

@LeoCavaille
Created March 18, 2015 20:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LeoCavaille/1f5ec91aba8a6eebcd57 to your computer and use it in GitHub Desktop.
Save LeoCavaille/1f5ec91aba8a6eebcd57 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use warnings;
use Data::Dumper;
use IO::Zlib;
use DBI;
my $dbh = DBI->connect("dbi:Pg:dbname=datadog_test;host=localhost;port=15432", "", "");
sub insert_hash {
my ($table, $field_values) = @_;
# sort to keep field order, and thus sql, stable for prepare_cached
my @fields = sort keys %$field_values;
my @values = @{$field_values}{@fields};
my $sql = sprintf "insert into %s (%s) values (%s)",
$table, join(",", @fields), join(",", ("?")x@fields);
# print $sql."\n";
my $sth = $dbh->prepare_cached($sql);
return $sth->execute(@values);
}
my $filename = $ARGV[0];
$filename =~ /dogweb.log_(i-\w+)\.gz/;
my $node = $1;
print "Node $node\n";
my $fh = new IO::Zlib;
$fh->open($filename, 'rb');
my %byqueryid;
my $i = 0;
while(<$fh>) {
if (/(?<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}),\d{3}.*Running query \| id:(?<org_id>\d+) org:(?<query_id>\w+) range:\(EpochSpan\((?<start_date>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) -> (?<end_date>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\)\) int:(?<interval>\w+) query:'(?<query>.*)'$/) {
my $query_hash = {
'start_ts' => $+{ts},
'org_id' => $+{org_id},
'query_start_ts' => $+{start_date},
'query_end_ts' => $+{end_date},
'interval' => $+{interval},
'query' => $+{query},
'node' => $node,
'cass' => 0,
};
$byqueryid{$+{query_id}} = $query_hash;
if ($byqueryid{$+{query_id}}->{interval} eq "None") {
$byqueryid{$+{query_id}}->{interval} = 0;
}
$byqueryid{$+{query_id}}->{start_ts} =~ s/T/ /;
}
if (/(?<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}),\d{3}.*cass_query\(query_id:(?<query_id>\w+)\) fetched (?<num_series>\d+) series \(points:(?<num_points>\d+) cols:(?<num_cols>\d+) rollup_points:(?<num_rollup>\d+)\) in (?<ttotal>\d+\.\d+)s \(pre:(?<tpre>\d+\.\d+)s fetch:(?<tfetch>\d+\.\d+)s read:(?<tread>\d+\.\d+)s rollup:(?<trollup>\d+\.\d+)s\)/) {
next unless exists $byqueryid{$+{query_id}};
$byqueryid{$+{query_id}}->{cass} = 1;
$byqueryid{$+{query_id}}->{cass_ts} = $+{ts};
$byqueryid{$+{query_id}}->{cass_series} = $+{num_series};
$byqueryid{$+{query_id}}->{cass_points} = $+{num_points};
$byqueryid{$+{query_id}}->{cass_cols} = $+{num_cols};
$byqueryid{$+{query_id}}->{cass_rollup} = $+{num_rollup};
$byqueryid{$+{query_id}}->{cass_t_total} = $+{ttotal};
$byqueryid{$+{query_id}}->{cass_t_pre} = $+{tpre};
$byqueryid{$+{query_id}}->{cass_t_fetch} = $+{tfetch};
$byqueryid{$+{query_id}}->{cass_t_read} = $+{tread};
$byqueryid{$+{query_id}}->{cass_t_rollup} = $+{trollup};
$byqueryid{$+{query_id}}->{cass_ts} =~ s/T/ /;
}
if (/(?<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}),\d{3}.*raw_query\[query_id:(?<query_id>\w+)\] org:(?<org_id>\d+) - ran (?<queries>\d+) queries across (?<results>\d+) results \(cache reads:(?<cache_reads>\d+) writes:(?<cache_writes>\d+)\) in (?<ttotal>\d+\.\d+)s \(cache_read:(?<tcache_read>\d+\.\d+)s ctxs:(?<tctxs>\d+\.\d+)s live:(?<tlive>\d+\.\d+)s cass:(?<tcass>\d+\.\d+)s h5s3:(?<th5s3>\d+\.\d+)s merge:(?<tmerge>\d+\.\d+)s aggr:(?<taggr>\d+\.\d+)s post:(?<tpost>\d+\.\d+)s cache_write:(?<tcache_write>\d+\.\d+)s\)/) {
next unless exists $byqueryid{$+{query_id}};
$byqueryid{$+{query_id}}->{end_ts} = $+{ts};
$byqueryid{$+{query_id}}->{num_queries} = $+{queries};
$byqueryid{$+{query_id}}->{num_results} = $+{results};
$byqueryid{$+{query_id}}->{cache_reads} = $+{cache_reads};
$byqueryid{$+{query_id}}->{cache_writes} = $+{cache_writes};
$byqueryid{$+{query_id}}->{t_total} = $+{ttotal};
$byqueryid{$+{query_id}}->{t_cache_read} = $+{tcache_read};
$byqueryid{$+{query_id}}->{t_ctxs} = $+{tctxs};
$byqueryid{$+{query_id}}->{t_live} = $+{tlive};
$byqueryid{$+{query_id}}->{t_cass} = $+{tcass};
$byqueryid{$+{query_id}}->{t_h5s3} = $+{th5s3};
$byqueryid{$+{query_id}}->{t_merge} = $+{tmerge};
$byqueryid{$+{query_id}}->{t_aggr} = $+{taggr};
$byqueryid{$+{query_id}}->{t_post} = $+{tpost};
$byqueryid{$+{query_id}}->{t_cache_write} = $+{tcache_write};
my $query_id = $+{query_id};
# End of a query put it back in PG
if ($byqueryid{$+{query_id}}->{cass}) {
$byqueryid{$+{query_id}}->{query_id} = $+{query_id};
# Remove the cass attr
delete $byqueryid{$+{query_id}}->{cass};
$byqueryid{$+{query_id}}->{end_ts} =~ s/T/ /;
insert_hash('cass_queries', $byqueryid{$query_id});
}
delete $byqueryid{$query_id}
}
$i++;
print "$node $i\n" unless $i % 1000;
}
$fh->close;
#print Dumper(\%byqueryid);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment