Skip to content

Instantly share code, notes, and snippets.

@standage
Created April 20, 2013 14:18
Show Gist options
  • Save standage/5426127 to your computer and use it in GitHub Desktop.
Save standage/5426127 to your computer and use it in GitHub Desktop.
Several scripts/command for converting output of gene prediction programs to variants of GFF3 compatible with ParsEval.
#!/usr/bin/env bash
# Usage: bash augustus-gff3-groom.sh genes-before.gff3 > genes-after.gff3
sed $'s/\ttranscript\t/\tmRNA\t/' < $1 | grep -v $'\tintron\t'
#!/usr/bin/env bash
# Usage: bash genemarkhmm3-gff3-groom.sh genes-before.gff3 > genes-after.gff3
perl -ne 's/ID=cds(\d+)(.+)Parent=mRNA(\d+)/ID=cds$3$2Parent=mRNA$3/; print' < $1
#!/usr/bin/env bash
# Usage: bash glimmerhmm-gff3-groom.sh genes-before.gff3 > genes-after.gff3
perl -ne 'if(m/\tmRNA\t/){ $g = $_; $g =~ s/ID=([^;]+);/ID=$1.gene;/; $g =~ s/\tmRNA\t/\tgene\t/; print $g; $_ =~ s/ID=([^;]+)(\S+)/ID=$1$2;Parent=$1.gene/ } print' < $1
#!/usr/bin/env perl
# Usage: perl snap2.gff3 < genes-before.gff > genes-after.gff3
use strict;
my @exons;
sub by_position
{
$a->[3] <=> $b->[3] or $a->[4] <=> $b->[4]
}
while(<STDIN>)
{
chomp;
my @v = split(/\t/);
push(@exons, \@v);
if($v[2] eq "Eterm" or $v[2] eq "Esngl")
{
@exons = sort by_position @exons;
my $numexons = scalar(@exons);
my $start = $exons[0]->[3];
my $end = $exons[$numexons-1]->[4];
printf
(
"%s\tSNAP\tgene\t%lu\t%lu\t.\t%s\t.\tID=%s.gene\n",
$exons[0]->[0],
$start,
$end,
$exons[0]->[6],
$exons[0]->[8],
);
printf
(
"%s\tSNAP\tmRNA\t%lu\t%lu\t.\t%s\t.\tID=%s.mrna;Parent=%s.gene\n",
$exons[0]->[0],
$start,
$end,
$exons[0]->[6],
$exons[0]->[8],
$exons[0]->[8],
);
my $start_codon = [];
my $stop_codon = [];
foreach my $exon(@exons)
{
if($exon->[2] eq "Einit")
{
if($exon->[6] eq "+")
{
$start_codon = [$exon->[3], $exon->[3] + 2];
}
else
{
$start_codon = [$exon->[4] - 2, $exon->[4]];
}
printf
(
"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
$exon->[0],
$start_codon->[0],
$start_codon->[1],
$exon->[6],
$exon->[8],
);
}
elsif($exon->[2] eq "Eterm")
{
if($exon->[6] eq "+")
{
$stop_codon = [$exon->[4] - 2, $exon->[4]];
}
else
{
$stop_codon = [$exon->[3], $exon->[3] + 2];
}
printf
(
"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
$exon->[0],
$stop_codon->[0],
$stop_codon->[1],
$exon->[6],
$exon->[8],
);
}
elsif($exon->[2] eq "Esngl")
{
if($exon->[6] eq "+")
{
$start_codon = [$exon->[3], $exon->[3] + 2];
$stop_codon = [$exon->[4] - 2, $exon->[4]];
}
else
{
$start_codon = [$exon->[4] - 2, $exon->[4]];
$stop_codon = [$exon->[3], $exon->[3] + 2];
}
printf
(
"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
$exon->[0],
$start_codon->[0],
$start_codon->[1],
$exon->[6],
$exon->[8],
);
printf
(
"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
$exon->[0],
$stop_codon->[0],
$stop_codon->[1],
$exon->[6],
$exon->[8],
);
}
$exon->[2] = "exon";
$exon->[8] = "Parent=". $exon->[8] .".mrna";
printf("%s\n", join("\t", @$exon));
}
@exons = ();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment