Skip to content

Instantly share code, notes, and snippets.

@epaule
Created June 22, 2012 15:00
Show Gist options
  • Save epaule/2973286 to your computer and use it in GitHub Desktop.
Save epaule/2973286 to your computer and use it in GitHub Desktop.
strip out untouched CDS isoforms
#!/usr/bin/env perl
#
# add missing tags to curation databases
use Ace;
use strict;
my $db= Ace->connect(-path => shift()) ||die (Ace->error);
# From Laboratory tag
my @cdses = $db->fetch(-query=>"find CDS; Gene; SMap");
foreach my $cds(@cdses){
next if $cds->From_laboratory;
print "CDS : $cds\n";
print "From_laboratory ELG\n";
print "\n";
}
undef @cdses;
my @transcripts = $db->fetch(-query=>"find Transcript; Gene; SMap");
foreach my $transcript (@transcripts){
next if $transcript->From_laboratory;
print "Transcript : $transcript\n";
print "From_laboratory ELG\n";
print "\n";
}
undef @transcripts;
# isoform tag
my @genes = $db->fetch(-query=>"find Gene; Corresponding_CDS; SMap");
foreach my $gene (@genes) {
@cdses=$gene->Corresponding_CDS;
if (scalar @cdses >1 ){
foreach my $cds(@cdses) {
next if $cds->Isoform;
print "CDS : $cds\n";
print "Isoform Inferred_automatically\n";
print "\n"
}
}else{
next unless $cdses[0]->Isoform;
print "CDS : $cdses[0]\n";
print "-D Isoform\n";
print "\n";
}
}
# clean dodgy cdses
print "//CDSes without SMaps\n";
@cdses = $db->fetch(-query => 'find CDS; ! SMap');
foreach my $cds(@cdses){
next unless $cds->Species eq 'Brugia malayi';
print "-D CDS $cds\n";
}
print "\n//CDSes with hand_built method\n";
@cdses = $db->fetch(-query => 'find CDS; Method=hand_built');
foreach my $cds(@cdses){
print "-D CDS $cds\n";
}
print "\n//Transcripts with hand_built method\n";
@cdses = $db->fetch(-query => 'find Transcript; Method=hand_built');
foreach my $cds(@cdses){
print "-D Transcript $cds\n";
}
#!/bin/env perl
# that will trim away any low-scoring isoforms from uncurated genes
use Ace;
use Ace::Sequence;
use strict;
my $db = Ace->connect(-path => shift()) ||die (Ace->error);
my $genes= $db->fetch_many(-query => 'Find Gene SMap;COUNT Corresponding_CDS>1');
my $startGeneid=shift();
while (my $gene = $genes->next){
my $flip=0;
my @cdses = map {Ace::Sequence->new($_)} $gene->Corresponding_CDS;
map {$_->refseq($_->parent)}@cdses;
foreach my $cds(@cdses){
my ($start,$stop)=sort {$a <=> $b} ($cds->start,$cds->stop);
my @overlappingCDSes = grep{
my($tmp_start,$tmp_stop)=sort{$a<=>$b}($_->start,$_->end);
$tmp_start <= $stop && $tmp_stop >= $start
} @cdses;
if (scalar(@overlappingCDSes) == 1) {
# print "can't find overlapping CDSes in gene $gene / ${\$cds->name}($cds)\n";
$flip=$cds;
}
}
if ($flip){
renameCDS($flip);
$startGeneid++;
}
}
sub renameCDS {
my ($f)=@_;
my $from = $f->name;
my $to = "Bm$startGeneid";
my $gene = "TAG$startGeneid";
my $seq = $f->parent->name;
print <<HERE;
-R CDS : $from $to
CDS : $to
Gene $gene
Gene : $gene
Version 1
Sequence $seq
Sequence_name $to
Public_name $to
Live
History Version_change 1 now WBPerson4055 Imported merge_split
Species "Brugia malayi"
Method Gene
HERE
}
#!/usr/bin/env perl
# go through all CDSes with Evidence, get their genes and delete connected CDSes without Evidence
use Ace;
use strict;
my $db= Ace->connect(-path => shift()) ||die (Ace->error);
my @cdses = $db->fetch(-query=>'find CDS Evidence ; follow gene ; follow Corresponding_CDS ; ! Evidence');
foreach my $cds(@cdses){
print<<HERE;
CDS : $cds
Method history
Remark "removed automatically based on curation tags"
CDS : $cds
-D Gene
-R CDS $cds $cds:232
HERE
}
#!/usr/bin/env perl
# go through all CDSes with Evidence, get their genes and delete connected CDSes without Evidence
use Ace;
use strict;
my $db= Ace->connect(-path => shift()) ||die (Ace->error);
my $gene=shift();
my @cdses = $db->fetch(-query=>"find gene $gene ; follow Corresponding_CDS ; ! Evidence");
foreach my $cds(@cdses){
print<<HERE;
CDS : $cds
Method history
Gene_history $gene
Remark "removed automatically based on curation tags"
CDS : $cds
-D Gene
-R CDS $cds $cds:bm7
HERE
}
#!/bin/env perl
# that will trim away any low-scoring isoforms from uncurated genes
use Ace;
use strict;
my $db = Ace->connect(-path => shift()) ||die (Ace->error);
my $genes= $db->fetch_many(-query => 'Find Gene SMap;Corresponding_CDS');
while (my $gene = $genes->next){
my @a_;
my @b_;
my @cdses = sort {scalar(@a_=$b->CDS_predicted_by) <=> scalar(@b_=$a->CDS_predicted_by) } ($gene->Corresponding_CDS);
next if $cdses[0]->Evidence;
my @kill = grep {scalar(@a_= $_->CDS_predicted_by) < scalar(@b_=$cdses[0]->CDS_predicted_by) && scalar(@a_= $_->CDS_predicted_by)<7} @cdses;
map {historify_cds($_)} @kill;
}
sub historify_cds {
my ($c) = @_;
my $g = $c->Gene;
print<<HERE;
CDS : $c
Method history
Gene_history $g
Remark "removed automatically based on supporting predicitions count"
CDS : $c
-D Gene
-R CDS $c $c:bm11
HERE
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment