Created
June 22, 2012 15:00
-
-
Save epaule/2973286 to your computer and use it in GitHub Desktop.
strip out untouched CDS isoforms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env perl | |
# that will trim away any low-scoring isoforms from uncurated genes | |
use Ace; | |
use Ace::Sequence; | |
use strict; | |
my $db = Ace->connect(-path => shift()) ||die (Ace->error); | |
my $genes= $db->fetch_many(-query => 'Find Gene SMap;COUNT Corresponding_CDS>1'); | |
my $startGeneid=shift(); | |
while (my $gene = $genes->next){ | |
my $flip=0; | |
my @cdses = map {Ace::Sequence->new($_)} $gene->Corresponding_CDS; | |
map {$_->refseq($_->parent)}@cdses; | |
foreach my $cds(@cdses){ | |
my ($start,$stop)=sort {$a <=> $b} ($cds->start,$cds->stop); | |
my @overlappingCDSes = grep{ | |
my($tmp_start,$tmp_stop)=sort{$a<=>$b}($_->start,$_->end); | |
$tmp_start <= $stop && $tmp_stop >= $start | |
} @cdses; | |
if (scalar(@overlappingCDSes) == 1) { | |
# print "can't find overlapping CDSes in gene $gene / ${\$cds->name}($cds)\n"; | |
$flip=$cds; | |
} | |
} | |
if ($flip){ | |
renameCDS($flip); | |
$startGeneid++; | |
} | |
} | |
sub renameCDS { | |
my ($f)=@_; | |
my $from = $f->name; | |
my $to = "Bm$startGeneid"; | |
my $gene = "TAG$startGeneid"; | |
my $seq = $f->parent->name; | |
print <<HERE; | |
-R CDS : $from $to | |
CDS : $to | |
Gene $gene | |
Gene : $gene | |
Version 1 | |
Sequence $seq | |
Sequence_name $to | |
Public_name $to | |
Live | |
History Version_change 1 now WBPerson4055 Imported merge_split | |
Species "Brugia malayi" | |
Method Gene | |
HERE | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# go through all CDSes with Evidence, get their genes and delete connected CDSes without Evidence | |
use Ace; | |
use strict; | |
my $db= Ace->connect(-path => shift()) ||die (Ace->error); | |
my @cdses = $db->fetch(-query=>'find CDS Evidence ; follow gene ; follow Corresponding_CDS ; ! Evidence'); | |
foreach my $cds(@cdses){ | |
print<<HERE; | |
CDS : $cds | |
Method history | |
Remark "removed automatically based on curation tags" | |
CDS : $cds | |
-D Gene | |
-R CDS $cds $cds:232 | |
HERE | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# go through all CDSes with Evidence, get their genes and delete connected CDSes without Evidence | |
use Ace; | |
use strict; | |
my $db= Ace->connect(-path => shift()) ||die (Ace->error); | |
my $gene=shift(); | |
my @cdses = $db->fetch(-query=>"find gene $gene ; follow Corresponding_CDS ; ! Evidence"); | |
foreach my $cds(@cdses){ | |
print<<HERE; | |
CDS : $cds | |
Method history | |
Gene_history $gene | |
Remark "removed automatically based on curation tags" | |
CDS : $cds | |
-D Gene | |
-R CDS $cds $cds:bm7 | |
HERE | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env perl | |
# that will trim away any low-scoring isoforms from uncurated genes | |
use Ace; | |
use strict; | |
my $db = Ace->connect(-path => shift()) ||die (Ace->error); | |
my $genes= $db->fetch_many(-query => 'Find Gene SMap;Corresponding_CDS'); | |
while (my $gene = $genes->next){ | |
my @a_; | |
my @b_; | |
my @cdses = sort {scalar(@a_=$b->CDS_predicted_by) <=> scalar(@b_=$a->CDS_predicted_by) } ($gene->Corresponding_CDS); | |
next if $cdses[0]->Evidence; | |
my @kill = grep {scalar(@a_= $_->CDS_predicted_by) < scalar(@b_=$cdses[0]->CDS_predicted_by) && scalar(@a_= $_->CDS_predicted_by)<7} @cdses; | |
map {historify_cds($_)} @kill; | |
} | |
sub historify_cds { | |
my ($c) = @_; | |
my $g = $c->Gene; | |
print<<HERE; | |
CDS : $c | |
Method history | |
Gene_history $g | |
Remark "removed automatically based on supporting predicitions count" | |
CDS : $c | |
-D Gene | |
-R CDS $c $c:bm11 | |
HERE | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment