Skip to content

Instantly share code, notes, and snippets.

@leverich
Created September 19, 2015 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leverich/8079867fd73b478e7e0a to your computer and use it in GitHub Desktop.
Save leverich/8079867fd73b478e7e0a to your computer and use it in GitHub Desktop.
Hacky tool to join timestamped logs
#!/usr/bin/perl
## assumptions...
### 1. time samples within some correlation coefficient are candidates
### for a matching. this correlation coefficient should be longer than
### the longest sample period.
### 2. the nearest sample (rounded up or down) shall be the matching
### sample. should some samples be collected at a faster rate than
### others, the slower samples will be matched multiple times.
### 3. big jumps or backwards jumps in time samples denote a new log
## each stream is an array so that time is binary searachable
# @streams = ( [ { time => secs.usecs, text => log string } ],
# )
$correlation_period = 1; ## output sample rate after correlation
$correlation_distance = undef;
$start_time = undef;
$stop_time = undef;
$zero = undef;
use Time::HiRes qw/clock_gettime/;
use Getopt::Long;
Getopt::Long::Configure("bundling");
$result = GetOptions("start|s=f" => \$start_time,
"stop|t=f" => \$stop_time,
"period|p=f" => \$correlation_period,
"zero|z" => \$zero);
@streams = ();
$current_stream = 0;
$current_time = undef;
$last_time = undef;
$current_entry = undef;
## input logs, separate into streams and collect multi-line log
## messages. cut-off times should probably be applied at this point to
## avoid further processing...
#print clock_gettime, "\n";
outer: while (<>) {
$line = $_;
# chomp $line;
# print "$line";
if (/^(\d+\.\d+)/) { ## new time
$check_time = $1;
if ($start_time - $check_time > 3600) {
for (0..600) { $x = <> or last outer; }
next outer;
}
if ($check_time - $stop_time > 3600) {
for (0..600) { $x = <> or last outer; }
next outer;
}
$last_time = $current_time;
$current_time = $1;
$line =~ s/^$1\s+//;
if (($last_time - $current_time) > 10) { # $current_time < $last_time) { ## backwards!!
# print "Starting new... $current_time $last_time\n";
$current_stream++;
}
if ($current_time && $current_time != $last_time) { ## new unique time..
$current_entry = { "time" => $current_time };
push @{$streams[$current_stream]}, $current_entry;
}
}
if (($start_time - $current_time) > 3600) {
}
$current_entry->{"text"} .= $line;
# print $current_entry->{"time"} . " x " . $current_entry->{"text"};
chomp $line;
}
#print clock_gettime, "\n";
if (!$start_time) { ## no start time specified... find earliest entry
for my $stream (@streams) {
if (!$start_time || $stream->[0]{"time"} < $start_time) {
$start_time = $stream->[0]{"time"};
}
}
}
if (!$stop_time) { ## no stop time specified... find latest entry
for my $stream (@streams) {
if (!$stop_time || $stream->[scalar @{$stream} - 1]{"time"} > $stop_time) {
$stop_time = $stream->[scalar @{$stream} - 1]{"time"};
}
}
}
@stream_pointers = ();
$current_time = $start_time;
map { $stream_pointers[$_] = 0 } 0 .. $#streams;
while ($current_time < $stop_time && grep { $_ eq 1 } map { $stream_pointers[$_] < scalar @{$streams[$_]} } 0 .. $#streams) {
@correlations = ();
## advance each log to >= $current_time
# print "OK starting...\n";
# print "start: $current_time " . join(" ", @stream_pointers) . "\n";
for my $n (0..$#streams) {
while (($stream_pointers[$n] < scalar @{$streams[$n]} - 1) && ($streams[$n][$stream_pointers[$n]]{"time"} < $current_time)) {
$stream_pointers[$n]++;
# print $n . " " . $streams[$n][$stream_pointers[$n]]{"time"} . " " . $current_time . " " . $stream_pointers[$n] . " " . (scalar @{$streams[$n]}) . "\n";
}
}
## this entry is either 1) last entry or 2) >= current_time
## previous entry is either 1) non-existant or 2) < current_time
## 1, 2 => prev < this < current_time => this
## 1, 1 => current_time <= this => this
## 2, 2 => prev < current_time <= this => depends
## 2, 1 => current_time <= this => this
for my $n (0..$#streams) {
$prev = 0;
# $fail = 0;
if ($stream_pointers[$n] == scalar @{$streams[$n]} - 1) { ## this is the last entry, its going to be the best correlation
} elsif ($stream_pointers[$n] == 0) { ## this is the first entry, its going to be the best correlation
} else { ## determine which is best based on time distance
$thistime = $streams[$n][$stream_pointers[$n]]{"time"};
$prevtime = $streams[$n][$stream_pointers[$n]-1]{"time"};
# if (abs($current_time - $prevtime) > $correlation_distance &&
# abs($current_time - $thistime) > $correlation_distance) {
# $fail = 1;
# }
if (abs($current_time - $prevtime) < abs($current_time - $thistime)) {
$prev = 1;
# prev
# } else {
# this
}
}
# if ($fail) {
# $correlations[$n] = -1;
# } elsif ($prev) {
if ($prev) {
$correlations[$n] = $stream_pointers[$n]-1;
} else {
$correlations[$n] = $stream_pointers[$n];
}
}
## is this what we want to output?
$print_time = $zero ? ($current_time - $start_time) : $current_time;
print $print_time . ": " . join(" | ", map { join(" ", split("\n", $streams[$_][$correlations[$_]]{"text"})) } 0..$#streams) . "\n";
$current_time += $correlation_period;
## RULE: to avoid livelock, at least one stream must advance every iteration
# map { $stream_pointers[$_]++ } 0 .. $#stream_pointers;
}
use Data::Dumper;
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 1;
#print Dumper(\@streams);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment