Skip to content

Instantly share code, notes, and snippets.

@dbolser
Created April 28, 2011 10:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dbolser/0dbebac2441aa9eee4b1 to your computer and use it in GitHub Desktop.
Save dbolser/0dbebac2441aa9eee4b1 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
use strict;
## for debugging
use Data::Dumper;
## parse command line options
use Getopt::Long;
use Bio::GFF3::LowLevel qw/ gff3_parse_feature gff3_format_feature /;
use Bio::Location::Simple;
## The superscaffold 'split-report' GFF mapper
use Chimera;
## HANDLE COMMAND LINE OPTIONS
my $verbose = 0;
my $debug = 0;
my $split_report_gff;
GetOptions( 'verbose+' => \$verbose,
'debug+' => \$debug,
'split_report_gff=s' => \$split_report_gff,
)
or die "failure to communcate\n";
## Check command line options
die "please pass a split_report.gff style file to map over (-s)\n"
unless defined ($split_report_gff) && -s $split_report_gff;
die "please pass a GFF file (or files) file to map\n"
unless @ARGV;
warn "debug: $debug\nverbose: $verbose\n"
if $verbose > 0;
## GO TIME
## Initalise the 'mapping' object
my $mapper = Chimera->
new( file => $split_report_gff );
warn "\nthe split report ('$split_report_gff') has ",
scalar $mapper->components, " components\n"
if $verbose > 0;
## NOTE, below we expect all sub-features to be preceeded by their
## parent features in the GFF
warn "\nprocessing GFF\n\n";
my (%features_mapped, %failed_to_map);
while(<>){
if (/^#/){
print;
next;
}
## Parse the GFF
my $feature = gff3_parse_feature( $_ );
warn Dumper $feature
if $debug > 1;
## Build a location object from the feature
my $feature_location = Bio::Location::Simple->
new( -seq_id => $feature->{seq_id},
-start => $feature->{start},
-end => $feature->{end},
-strand => $feature->{strand},
);
warn Dumper $feature_location
if $debug > 1;
## Try to map the feature location onto the new coordinates
my $new_feature_location =
$mapper->map( $feature_location );
warn Dumper $new_feature_location
if $debug > 1;
## After mapping, there are three possibilities:
## 1) The feature maps outside the coordinate space defined by the
## mapper and should be passed through unchanged.
## 2) The feature maps cleanly, and its coordinates should be
## updated to the new position.
## 3) The feature may span multiple regions, and should now be
## dropped.
## The mapping result object allows us to conveniently differentiate
## between these three cases...
my $num_gaps = $new_feature_location->each_gap;
my $num_match = $new_feature_location->each_match;
if(0){}
elsif($num_gaps == 1 && $num_match == 0){
## Case 1, nothing to do here
}
elsif($num_gaps == 0 && $num_match == 1){
## Case 2, adjust the feature coordinates
$feature->{seq_id} = $new_feature_location->seq_id;
$feature->{start} = $new_feature_location->start;
$feature->{end} = $new_feature_location->end;
$feature->{strand} = $new_feature_location->strand;
}
else{
## Case 3, perform a 'sanity check'...
if($num_gaps > 1 || $num_match > 1){
if($num_gaps > 2 || $num_match > 2){
warn Dumper $feature;
warn Dumper $new_feature_location;
die "failure of sanity!\n"
}
warn "spanning feature! : ",
$feature->{attributes}{ID}[0], "\n";
warn Dumper $feature
if $debug > 0;
warn Dumper $new_feature_location
if $debug > 0;
}
## log our 'failure'...
push @{$failed_to_map{$feature->{seq_id}}}, $feature;
## and move on without printing
next;
}
## Made it!
$features_mapped{$feature->{attributes}{ID}[0]}++;
## If this feature has a parent feature, check that the parent
## feature made it through, else, orphan the feature (so cruel!).
if($feature->{attributes}{Parent}){
unless($features_mapped{$feature->{attributes}{Parent}[0]}){
warn "removing unmapped parent from ",
$feature->{attributes}{ID}[0], "\n"
if $verbose > 0;
delete $feature->{attributes}{Parent};
}
}
## Write it
print gff3_format_feature( $feature );
}
warn "OK\n\n";
warn "error reporting\n\n";
for (sort keys %failed_to_map){
warn 'failed to map ',
scalar @{$failed_to_map{$_}}, " features on $_\n";
for(@{$failed_to_map{$_}}){
warn "\t", $_->{attributes}{ID}[0], "\n"
if $verbose > 0;
warn Dumper $_
if $debug > 0;
}
}
warn "OK\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment