Skip to content

Instantly share code, notes, and snippets.

@bobular
Created March 3, 2020 14:43
Show Gist options
  • Save bobular/d1b9e9eb7e3de41c9f0654e7e82c14cc to your computer and use it in GitHub Desktop.
Save bobular/d1b9e9eb7e3de41c9f0654e7e82c14cc to your computer and use it in GitHub Desktop.
Preliminary Solr for site search
#!/usr/bin/env perl
# -*- mode: cperl -*-
use strict;
use warnings;
use Text::CSV_XS;
use JSON;
my ($file) = @ARGV;
my $json = JSON->new->pretty;
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
open my $fh, "<:encoding(utf8)", $file or die "$file: $!";
my $headers = $csv->getline($fh);
# header to index
my %h2i;
for (my $i=0; $i<@$headers; $i++) { $h2i{$headers->[$i]}=$i }
my %SolrField2function =
(
# Solr_Field_name => function that takes a $row object
"id" => sub { "popbio_sample_".$_[0]->[$h2i{"Sample ID"}] },
"documentType" => sub { 'popbioSample' },
"TEXT__popbio_species" => sub { $_[0]->[$h2i{Species}] },
);
my $count;
print "[\n";
while (my $row = $csv->getline($fh)) {
my $doc = { };
foreach my $SolrField (keys %SolrField2function) {
$doc->{$SolrField} = $SolrField2function{$SolrField}($row);
}
print ",\n" if ($count++);
print $json->encode($doc);
# printing each document at a time, to prevent unnecessary memory use
}
print "]\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment