Skip to content

Instantly share code, notes, and snippets.

@asigatchov
Last active November 4, 2016 14:40
Show Gist options
  • Save asigatchov/c499d1d21216ef8eedc0d13e1984b67b to your computer and use it in GitHub Desktop.
Save asigatchov/c499d1d21216ef8eedc0d13e1984b67b to your computer and use it in GitHub Desktop.
clickhouse parse acces log
#!/usr/bin/perl
use strict;
use warnings;
use Date::Parse;
use DateTime;
my $file = $ARGV[0];
my $hostname = $ARGV[1];
print $file, "\n";
open FILE, "zcat $file|" or die $!;
open(PIPE2,"|/usr/bin/clickhouse-client --database=default --query='INSERT INTO access_log FORMAT Values'");
my $i = 0;
my @parts = ();
while (my $line = <FILE>){
if ($line =~ /(https?):\/\/(.*?) (\d+\.\d+\.\d+\.\d+).*?\[(.*?)\] "(\w+) (.*?) HTTP.*?" (\d+) (\d+) "(.*?)" "(.*?)".*? (\d+\.\d+)$/){
my ( $schema, $domain, $ip, $date, $method, $path, $code, $size, $ref, $ua, $resp_time) =
($1, $2, $3 , $4, $5, $6,$7, $8, $9,$10,$11);
$ua =~ s/'/\\'/ig;
$path =~ s/'/\\'/ig;
$ref =~ s/'/\\'/ig;
my $epoch = str2time($date);
my $datetime = DateTime->from_epoch(epoch => $epoch);
my ($event_date, $tmp) = split('T', $datetime );
$datetime = join(' ', $event_date, $tmp);
push @parts, "(" . join(',', ( "'$event_date'", "'$hostname'", "'$schema'", "'$domain'",
"'$ip'", "'$datetime'", "'$method'", "'$path'", $code, $size, "'$ref'", "'$ua'", $resp_time)) . ")";
$i++;
}
if ($i % 10000 == 0){
print PIPE2 join(', ', @parts) ;
@parts = ();
print "insert $i rows\n";
}
}
print PIPE2 join(', ', @parts) ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment