-
-
Save dbolser/0dbebac2441aa9eee4b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
## for debugging | |
use Data::Dumper; | |
## parse command line options | |
use Getopt::Long; | |
use Bio::GFF3::LowLevel qw/ gff3_parse_feature gff3_format_feature /; | |
use Bio::Location::Simple; | |
## The superscaffold 'split-report' GFF mapper | |
use Chimera; | |
## HANDLE COMMAND LINE OPTIONS | |
my $verbose = 0; | |
my $debug = 0; | |
my $split_report_gff; | |
GetOptions( 'verbose+' => \$verbose, | |
'debug+' => \$debug, | |
'split_report_gff=s' => \$split_report_gff, | |
) | |
or die "failure to communcate\n"; | |
## Check command line options | |
die "please pass a split_report.gff style file to map over (-s)\n" | |
unless defined ($split_report_gff) && -s $split_report_gff; | |
die "please pass a GFF file (or files) file to map\n" | |
unless @ARGV; | |
warn "debug: $debug\nverbose: $verbose\n" | |
if $verbose > 0; | |
## GO TIME | |
## Initalise the 'mapping' object | |
my $mapper = Chimera-> | |
new( file => $split_report_gff ); | |
warn "\nthe split report ('$split_report_gff') has ", | |
scalar $mapper->components, " components\n" | |
if $verbose > 0; | |
## NOTE, below we expect all sub-features to be preceeded by their | |
## parent features in the GFF | |
warn "\nprocessing GFF\n\n"; | |
my (%features_mapped, %failed_to_map); | |
while(<>){ | |
if (/^#/){ | |
print; | |
next; | |
} | |
## Parse the GFF | |
my $feature = gff3_parse_feature( $_ ); | |
warn Dumper $feature | |
if $debug > 1; | |
## Build a location object from the feature | |
my $feature_location = Bio::Location::Simple-> | |
new( -seq_id => $feature->{seq_id}, | |
-start => $feature->{start}, | |
-end => $feature->{end}, | |
-strand => $feature->{strand}, | |
); | |
warn Dumper $feature_location | |
if $debug > 1; | |
## Try to map the feature location onto the new coordinates | |
my $new_feature_location = | |
$mapper->map( $feature_location ); | |
warn Dumper $new_feature_location | |
if $debug > 1; | |
## After mapping, there are three possibilities: | |
## 1) The feature maps outside the coordinate space defined by the | |
## mapper and should be passed through unchanged. | |
## 2) The feature maps cleanly, and its coordinates should be | |
## updated to the new position. | |
## 3) The feature may span multiple regions, and should now be | |
## dropped. | |
## The mapping result object allows us to conveniently differentiate | |
## between these three cases... | |
my $num_gaps = $new_feature_location->each_gap; | |
my $num_match = $new_feature_location->each_match; | |
if(0){} | |
elsif($num_gaps == 1 && $num_match == 0){ | |
## Case 1, nothing to do here | |
} | |
elsif($num_gaps == 0 && $num_match == 1){ | |
## Case 2, adjust the feature coordinates | |
$feature->{seq_id} = $new_feature_location->seq_id; | |
$feature->{start} = $new_feature_location->start; | |
$feature->{end} = $new_feature_location->end; | |
$feature->{strand} = $new_feature_location->strand; | |
} | |
else{ | |
## Case 3, perform a 'sanity check'... | |
if($num_gaps > 1 || $num_match > 1){ | |
if($num_gaps > 2 || $num_match > 2){ | |
warn Dumper $feature; | |
warn Dumper $new_feature_location; | |
die "failure of sanity!\n" | |
} | |
warn "spanning feature! : ", | |
$feature->{attributes}{ID}[0], "\n"; | |
warn Dumper $feature | |
if $debug > 0; | |
warn Dumper $new_feature_location | |
if $debug > 0; | |
} | |
## log our 'failure'... | |
push @{$failed_to_map{$feature->{seq_id}}}, $feature; | |
## and move on without printing | |
next; | |
} | |
## Made it! | |
$features_mapped{$feature->{attributes}{ID}[0]}++; | |
## If this feature has a parent feature, check that the parent | |
## feature made it through, else, orphan the feature (so cruel!). | |
if($feature->{attributes}{Parent}){ | |
unless($features_mapped{$feature->{attributes}{Parent}[0]}){ | |
warn "removing unmapped parent from ", | |
$feature->{attributes}{ID}[0], "\n" | |
if $verbose > 0; | |
delete $feature->{attributes}{Parent}; | |
} | |
} | |
## Write it | |
print gff3_format_feature( $feature ); | |
} | |
warn "OK\n\n"; | |
warn "error reporting\n\n"; | |
for (sort keys %failed_to_map){ | |
warn 'failed to map ', | |
scalar @{$failed_to_map{$_}}, " features on $_\n"; | |
for(@{$failed_to_map{$_}}){ | |
warn "\t", $_->{attributes}{ID}[0], "\n" | |
if $verbose > 0; | |
warn Dumper $_ | |
if $debug > 0; | |
} | |
} | |
warn "OK\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment