Created
April 20, 2013 14:18
-
-
Save standage/5426127 to your computer and use it in GitHub Desktop.
Several scripts/command for converting output of gene prediction programs to variants of GFF3 compatible with ParsEval.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Usage: bash augustus-gff3-groom.sh genes-before.gff3 > genes-after.gff3 | |
sed $'s/\ttranscript\t/\tmRNA\t/' < $1 | grep -v $'\tintron\t' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Usage: bash genemarkhmm3-gff3-groom.sh genes-before.gff3 > genes-after.gff3 | |
perl -ne 's/ID=cds(\d+)(.+)Parent=mRNA(\d+)/ID=cds$3$2Parent=mRNA$3/; print' < $1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Usage: bash glimmerhmm-gff3-groom.sh genes-before.gff3 > genes-after.gff3 | |
perl -ne 'if(m/\tmRNA\t/){ $g = $_; $g =~ s/ID=([^;]+);/ID=$1.gene;/; $g =~ s/\tmRNA\t/\tgene\t/; print $g; $_ =~ s/ID=([^;]+)(\S+)/ID=$1$2;Parent=$1.gene/ } print' < $1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Usage: perl snap2.gff3 < genes-before.gff > genes-after.gff3 | |
use strict; | |
my @exons; | |
sub by_position | |
{ | |
$a->[3] <=> $b->[3] or $a->[4] <=> $b->[4] | |
} | |
while(<STDIN>) | |
{ | |
chomp; | |
my @v = split(/\t/); | |
push(@exons, \@v); | |
if($v[2] eq "Eterm" or $v[2] eq "Esngl") | |
{ | |
@exons = sort by_position @exons; | |
my $numexons = scalar(@exons); | |
my $start = $exons[0]->[3]; | |
my $end = $exons[$numexons-1]->[4]; | |
printf | |
( | |
"%s\tSNAP\tgene\t%lu\t%lu\t.\t%s\t.\tID=%s.gene\n", | |
$exons[0]->[0], | |
$start, | |
$end, | |
$exons[0]->[6], | |
$exons[0]->[8], | |
); | |
printf | |
( | |
"%s\tSNAP\tmRNA\t%lu\t%lu\t.\t%s\t.\tID=%s.mrna;Parent=%s.gene\n", | |
$exons[0]->[0], | |
$start, | |
$end, | |
$exons[0]->[6], | |
$exons[0]->[8], | |
$exons[0]->[8], | |
); | |
my $start_codon = []; | |
my $stop_codon = []; | |
foreach my $exon(@exons) | |
{ | |
if($exon->[2] eq "Einit") | |
{ | |
if($exon->[6] eq "+") | |
{ | |
$start_codon = [$exon->[3], $exon->[3] + 2]; | |
} | |
else | |
{ | |
$start_codon = [$exon->[4] - 2, $exon->[4]]; | |
} | |
printf | |
( | |
"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n", | |
$exon->[0], | |
$start_codon->[0], | |
$start_codon->[1], | |
$exon->[6], | |
$exon->[8], | |
); | |
} | |
elsif($exon->[2] eq "Eterm") | |
{ | |
if($exon->[6] eq "+") | |
{ | |
$stop_codon = [$exon->[4] - 2, $exon->[4]]; | |
} | |
else | |
{ | |
$stop_codon = [$exon->[3], $exon->[3] + 2]; | |
} | |
printf | |
( | |
"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n", | |
$exon->[0], | |
$stop_codon->[0], | |
$stop_codon->[1], | |
$exon->[6], | |
$exon->[8], | |
); | |
} | |
elsif($exon->[2] eq "Esngl") | |
{ | |
if($exon->[6] eq "+") | |
{ | |
$start_codon = [$exon->[3], $exon->[3] + 2]; | |
$stop_codon = [$exon->[4] - 2, $exon->[4]]; | |
} | |
else | |
{ | |
$start_codon = [$exon->[4] - 2, $exon->[4]]; | |
$stop_codon = [$exon->[3], $exon->[3] + 2]; | |
} | |
printf | |
( | |
"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n", | |
$exon->[0], | |
$start_codon->[0], | |
$start_codon->[1], | |
$exon->[6], | |
$exon->[8], | |
); | |
printf | |
( | |
"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n", | |
$exon->[0], | |
$stop_codon->[0], | |
$stop_codon->[1], | |
$exon->[6], | |
$exon->[8], | |
); | |
} | |
$exon->[2] = "exon"; | |
$exon->[8] = "Parent=". $exon->[8] .".mrna"; | |
printf("%s\n", join("\t", @$exon)); | |
} | |
@exons = (); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment