Skip to content

Instantly share code, notes, and snippets.

@bonobo78
Last active January 1, 2016 04:09
Show Gist options
  • Save bonobo78/8089753 to your computer and use it in GitHub Desktop.
Save bonobo78/8089753 to your computer and use it in GitHub Desktop.
Creates a json file ready for _bulk ES API tested with strawberry-perl-5.18.1.1-64bit on window$
#!/usr/bin/env perl
## Creates a json file ready for _bulk ES API
## tested with strawberry-perl-5.18.1.1-64bit
## 2013-12-22 Author : FRN
use strict; use warnings;
use JSON; ## brings json_encode
use Tie::Array::CSV; ## brings @Array from csv file
use Tie::IxHash; ## brings ordered hashes
use Getopt::Long; ## brings GetOptions facilities
my $separator = " ";
my $show_help = 0;
my $index = "test_index";
my $type = "test_type";
GetOptions(
"--separator|s=s" => \$separator,
"--help|h!" => \$show_help,
"--index=s" => \$index,
"--type=s" => \$type,
) or show_help();
show_help() if $show_help;
## die if file name is not specified
my ($csvfile) = @ARGV;
unless (defined $csvfile) {
die "input file name is required";
}
## Create array() from csv file
tie my @file, 'Tie::Array::CSV', $csvfile, sep_char => $separator;
## first line must contain fields name
my $header = \@{$file[0]};
## starting iterate array of rows from second row (offset[1]) to avoid header
## $#file = last @file element
for my $rownum (1 .. $#file) {
my $row = \@{$file[$rownum]}; ## getting the row
my ($key, $value);
tie my %r, "Tie::IxHash"; ## create ordered hash
@r{@$header} = @$row; ## magic trick : assigning each value to its column name
$r{'message'} = ""; ## empty message field
## get into each assigned field to provide little transformation
while ( ($key, $value) = each %r ) {
## prevent some lines that does not contain all fields
## TODO: better field number control
if ($value) {
## special case : remove '[field name=value]' from value
$r{$key} = $1 if $value =~ /\[$key=(.+)\]/;
## field message contains every value except himself
## TODO: trim leading space
$r{'message'} = $r{'message'}." ".$value if $key !~ /message/;
}
}
## timestamp from date end time fields
## TODO: try to catch from any field without knowing it
## TODO: add eventtreatedTime
$r{'@timestamp'} = $1."-".$2."-".$3."T".$4 if "$r{'date'} $r{'time'}" =~ /(\d{4})\/(\d{2})\/(\d{2})\s+(\d{2}:\d{2}:\d{2})/;
## output bulk method
## TODO: a lot to be more versatile
print "{ \"index\" : { \"_index\" : \"$index\", \"_type\" : \"$type\"} })\n";
## output json line
print encode_json(\%r)."\n";
## testing pretty output
#print JSON->new->utf8->pretty->encode(\%r)."\n";
}
sub show_help {
print "Usage: $0 [--separator=?] [--index=<index>] [--type=<type>] [--help] <file>\n";
exit(1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment