Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save indraniel/a31f7ad3d0750c4a1d95 to your computer and use it in GitHub Desktop.
Save indraniel/a31f7ad3d0750c4a1d95 to your computer and use it in GitHub Desktop.
Filter for NCBI SRA documents (produced by "srasearch init-dump/increment-dump") that have have been "Received" after a certain time point
#!/usr/bin/env perl
use warnings;
use strict;
use DateTime;
use DateTime::Format::Strptime;
use IO::Uncompress::Gunzip qw($GunzipError);
use JSON;
my $input_dump = $ARGV[0];
my $threshold_date = $ARGV[1];
unless ($input_dump) {
die "[err] Please supply an SRA Dump as input!\n";
}
unless ($threshold_date) {
die "[err] Please supply a threshold_date (YYYY-MM-DD) as input!\n";
}
filter($input_dump, $threshold_date);
exit(0);
sub filter {
my ($input_dump, $threshold_date) = @_;
my $z = IO::Uncompress::Gunzip->new( $input_dump )
or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
my $strp = DateTime::Format::Strptime->new(pattern => '%Y-%m-%dT%H:%M:%SZ');
my $threshold = $strp->parse_datetime(join('T', $threshold_date, '00:00:00Z'));
while (my $line = <$z>) {
chomp($line);
my ($id, $json) = split(/,/, $line, 2);
#print $id, "\n";
#print $json, "\n";
my $entry = decode_json($json);
my $received = $strp->parse_datetime($entry->{'Received'} );
if (keep($received, $threshold)) {
print $line, "\n";
}
}
}
sub keep {
my ($received, $threshold) = @_;
my $delta = $received - $threshold;
if ($delta->is_zero || $delta->is_positive) {
return 1;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment