Skip to content

Instantly share code, notes, and snippets.

@clintongormley
Created March 11, 2013 19:02
Show Gist options
  • Save clintongormley/5136749 to your computer and use it in GitHub Desktop.
Save clintongormley/5136749 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use ElasticSearch;
my $file = shift or die usage();
my $index = $ENV{INDEX} or die usage();
my $type = $ENV{TYPE};
my $csv = CSV::Streamer->new( $file, $index, $type );
my $e = ElasticSearch->new;
$e->reindex( source => $csv, bulk_size => 1000, quiet => 1 );
print "Done\n";
#===================================
sub usage {
#===================================
"Usage: INDEX=index_name TYPE=type_name $0 file.csv\n";
}
#===================================
package CSV::Streamer;
#===================================
use strict;
use warnings;
use Text::CSV_XS;
#===================================
sub new {
#===================================
my ( $class, $file, $index, $type ) = @_;
my $csv = Text::CSV_XS->new( { binary => 1 } );
open my $fh, "<:encoding(utf8)", $file
or die "$file: $!";
my $cols = $csv->getline($fh) or die $!;
$csv->column_names(@$cols);
bless {
csv => $csv,
index => $index,
type => $type,
fh => $fh,
};
}
#===================================
sub next {
#===================================
my $self = shift;
my $data = $self->{csv}->getline_hr( $self->{fh} )
or return;
return {
index => $self->{index},
type => $self->{type},
id => delete $data->{id},
data => $data
};
}
1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment