Skip to content

Instantly share code, notes, and snippets.

@levonet
Created November 8, 2017 11:16
Show Gist options
  • Save levonet/596b6a6f4db03f833a9adbcf91230b84 to your computer and use it in GitHub Desktop.
Save levonet/596b6a6f4db03f833a9adbcf91230b84 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# log_format analitiz '$remote_addr - $remote_user [$time_local] '
# '"$request" $status $body_bytes_sent '
# '"$http_referer" "$http_user_agent" '
# '"$scheme" "$host" $remote_port '
# '"$request_method" "$request_uri" "$uri" "$args" '
# '"$proxy_add_x_forwarded_for" "$http_x_real_ip" '
# '["$cookie_language"] '
# '"$msec" "$request_time" '
# '$pid [$tcpinfo_rtt,$tcpinfo_rttvar,$tcpinfo_snd_cwnd,$tcpinfo_rcv_space]';
use strict;
use warnings;
use Date::Parse;
use DateTime;
my $file = $ARGV[0];
#my $hostname = $ARGV[1];
print "Open ", $file, "\n";
#open FILE, "zcat $file|" or die $!;
open FILE, "$file" or die $!;
open(PIPE2,"|/usr/bin/clickhouse-client --database=default --query='INSERT INTO access_log FORMAT Values'");
my $i = 0;
my @parts = ();
while (my $line = <FILE>){
if ($line =~ /^([\d\.:abcdef]+) - [\w\d\._-]+ \[(.*)\] "(\w+ .*? HTTP.*?)" (\d+) (\d+) "(.*)" "(.*)" "(.*)" "(.*)" (\d+) "(.*)" "(.*)" "(.*)" "(.*)" "(.*)" "(.*)" \["(.*)"\] "(.*)" "(.*)" (\d+) \[(\d+),(\d+),(\d+),(\d+)\]$/){
my ($remote_addr, $clf_datetime, $request, $code, $size,
$referer, $ua, $schema, $hostname, $remote_port,
$method, $request_uri, $local_uri, $args, $forward_for,
$real_ip, $cookie_language, $unix_time, $requst_time, $pid,
$tcpinfo_rtt, $tcpinfo_rttvar, $tcpinfo_snd_cwnd, $tcpinfo_rcv_space) =
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25);
$ua =~ s/'/\\'/ig;
$request_uri =~ s/'/\\'/ig;
$args =~ s/'/\\'/ig;
$referer =~ s/'/\\'/ig;
my $datetime = DateTime->from_epoch(epoch => str2time($clf_datetime));
$datetime =~ tr/T/ /;
my $event_date = $datetime =~ s/\s.*$//r;
my $platform = 'common';
$platform = $1 if $local_uri =~ m/^\/(desktop|touch\-pad|touch\-phone)\.bundles\/.*$/;
my $path = $request_uri =~ s/\?.*$//r;
push(@parts, '('.join(',', (
"'$event_date'",
"'$datetime'",
"'$remote_addr'",
$remote_port,
"'$request'",
$code,
"'$hostname'",
"'$schema'",
"'$method'",
"'$platform'",
"'$request_uri'",
"'$local_uri'",
"'$path'",
"'$args'",
"'$cookie_language'",
"'$referer'",
"'$ua'",
$size,
$unix_time,
$requst_time,
"'$forward_for'",
"'$real_ip'",
$pid,
$tcpinfo_rtt,
$tcpinfo_rttvar,
$tcpinfo_snd_cwnd,
$tcpinfo_rcv_space
)).')');
$i++;
} else {
print "$line\n";
}
if ($i % 10 == 0){
print PIPE2 join(', ', @parts) ;
@parts = ();
print "insert $i rows\n";
}
}
close FILE;
print PIPE2 join(', ', @parts) ;
close PIPE2;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment