Skip to content

Instantly share code, notes, and snippets.

@netsensei
Created October 16, 2017 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save netsensei/bab8b3aa31a206d6b994626d94003353 to your computer and use it in GitHub Desktop.
Save netsensei/bab8b3aa31a206d6b994626d94003353 to your computer and use it in GitHub Desktop.
Add raw XML data from an OAI endpoint to a JSON dump in two passes.
#!perl
use JSON;
use Catmandu;
use Try::Tiny::ByClass;
use Data::Dumper;
sub prepare {
my $store = Catmandu->store(
'DBI',
data_source => 'dbi:SQLite:/tmp/index.oai_raw.sqlite',
);
my $importer = Catmandu->importer(
'OAI',
url => 'http://datahub.box/oai',
handler => 'raw',
metadataPrefix => 'oai_lido',
);
$importer->each(sub {
my $item = shift;
my $bag = $store->bag();
$bag->add($item);
});
}
sub process {
if (! -e "/tmp/index.oai_raw.sqlite") {
prepare();
}
my $importer = Catmandu->importer('JSON', file => 'bulk.json');
my $fixer = Catmandu->fixer(
'copy_field(data_pid, raw)',
'lookup_in_store(raw, DBI, data_source: "dbi:SQLite:/tmp/index.oai_raw.sqlite")',
'copy_field(raw._metadata, xml)',
'remove_field(raw)'
);
my $exporter = Catmandu->exporter('JSON', pretty => 1);
$exporter->add_many($fixer->fix($importer));
}
process();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment