standage/augustus-gff3-groom.sh

## augustus-gff3-groom.sh
#!/usr/bin/env bash
# Usage: bash augustus-gff3-groom.sh genes-before.gff3 > genes-after.gff3

sed $'s/\ttranscript\t/\tmRNA\t/' < $1 | grep -v $'\tintron\t'

## genemarkhmm3-gff3-groom.sh
#!/usr/bin/env bash
# Usage: bash genemarkhmm3-gff3-groom.sh genes-before.gff3 > genes-after.gff3

perl -ne 's/ID=cds(\d+)(.+)Parent=mRNA(\d+)/ID=cds$3$2Parent=mRNA$3/; print' < $1

## glimmerhmm-gff3-groom.sh
#!/usr/bin/env bash
# Usage: bash glimmerhmm-gff3-groom.sh genes-before.gff3 > genes-after.gff3

perl -ne 'if(m/\tmRNA\t/){ $g = $_; $g =~ s/ID=([^;]+);/ID=$1.gene;/; $g =~ s/\tmRNA\t/\tgene\t/; print $g; $_ =~ s/ID=([^;]+)(\S+)/ID=$1$2;Parent=$1.gene/ } print' < $1

## snap2gff3.pl
#!/usr/bin/env perl
# Usage: perl snap2.gff3 < genes-before.gff > genes-after.gff3
use strict;

my @exons;
sub by_position
{
  $a->[3] <=> $b->[3] or $a->[4] <=> $b->[4]
}

while(<STDIN>)
{
  chomp;
  my @v = split(/\t/);
  push(@exons, \@v);
  if($v[2] eq "Eterm" or $v[2] eq "Esngl")
  {
    @exons = sort by_position @exons;
    my $numexons = scalar(@exons);
    my $start = $exons[0]->[3];
    my $end = $exons[$numexons-1]->[4];
    printf
    (
      "%s\tSNAP\tgene\t%lu\t%lu\t.\t%s\t.\tID=%s.gene\n",
      $exons[0]->[0],
      $start,
      $end,
      $exons[0]->[6],
      $exons[0]->[8],
    );
    printf
    (
      "%s\tSNAP\tmRNA\t%lu\t%lu\t.\t%s\t.\tID=%s.mrna;Parent=%s.gene\n",
      $exons[0]->[0],
      $start,
      $end,
      $exons[0]->[6],
      $exons[0]->[8],
      $exons[0]->[8],
    );
    my $start_codon = [];
    my $stop_codon = [];
    foreach my $exon(@exons)
    {
      if($exon->[2] eq "Einit")
      {
        if($exon->[6] eq "+")
        {
          $start_codon = [$exon->[3], $exon->[3] + 2];
        }
        else
        {
          $start_codon = [$exon->[4] - 2, $exon->[4]];
        }
        printf
        (
          "%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
          $exon->[0],
          $start_codon->[0],
          $start_codon->[1],
          $exon->[6],
          $exon->[8],
        );
      }
      elsif($exon->[2] eq "Eterm")
      {
        if($exon->[6] eq "+")
        {
          $stop_codon = [$exon->[4] - 2, $exon->[4]];
        }
        else
        {
          $stop_codon = [$exon->[3], $exon->[3] + 2];
        }
        printf
        (
          "%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
          $exon->[0],
          $stop_codon->[0],
          $stop_codon->[1],
          $exon->[6],
          $exon->[8],
        );
      }
      elsif($exon->[2] eq "Esngl")
      {
        if($exon->[6] eq "+")
        {
          $start_codon = [$exon->[3], $exon->[3] + 2];
          $stop_codon = [$exon->[4] - 2, $exon->[4]];
        }
        else
        {
          $start_codon = [$exon->[4] - 2, $exon->[4]];
          $stop_codon = [$exon->[3], $exon->[3] + 2];
        }
        printf
        (
          "%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
          $exon->[0],
          $start_codon->[0],
          $start_codon->[1],
          $exon->[6],
          $exon->[8],
        );
        printf
        (
          "%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
          $exon->[0],
          $stop_codon->[0],
          $stop_codon->[1],
          $exon->[6],
          $exon->[8],
        );
      }
      $exon->[2] = "exon";
      $exon->[8] = "Parent=". $exon->[8] .".mrna";
      printf("%s\n", join("\t", @$exon));
    }
    @exons = ();
  }
}
	#!/usr/bin/env bash
	# Usage: bash augustus-gff3-groom.sh genes-before.gff3 > genes-after.gff3

	sed $'s/\ttranscript\t/\tmRNA\t/' < $1 \| grep -v $'\tintron\t'
	#!/usr/bin/env bash
	# Usage: bash genemarkhmm3-gff3-groom.sh genes-before.gff3 > genes-after.gff3

	perl -ne 's/ID=cds(\d+)(.+)Parent=mRNA(\d+)/ID=cds$3$2Parent=mRNA$3/; print' < $1
	#!/usr/bin/env bash
	# Usage: bash glimmerhmm-gff3-groom.sh genes-before.gff3 > genes-after.gff3

	perl -ne 'if(m/\tmRNA\t/){ $g = $_; $g =~ s/ID=([^;]+);/ID=$1.gene;/; $g =~ s/\tmRNA\t/\tgene\t/; print $g; $_ =~ s/ID=([^;]+)(\S+)/ID=$1$2;Parent=$1.gene/ } print' < $1
	#!/usr/bin/env perl
	# Usage: perl snap2.gff3 < genes-before.gff > genes-after.gff3
	use strict;

	my @exons;
	sub by_position
	{
	$a->[3] <=> $b->[3] or $a->[4] <=> $b->[4]
	}

	while(<STDIN>)
	{
	chomp;
	my @v = split(/\t/);
	push(@exons, \@v);
	if($v[2] eq "Eterm" or $v[2] eq "Esngl")
	{
	@exons = sort by_position @exons;
	my $numexons = scalar(@exons);
	my $start = $exons[0]->[3];
	my $end = $exons[$numexons-1]->[4];
	printf
	(
	"%s\tSNAP\tgene\t%lu\t%lu\t.\t%s\t.\tID=%s.gene\n",
	$exons[0]->[0],
	$start,
	$end,
	$exons[0]->[6],
	$exons[0]->[8],
	);
	printf
	(
	"%s\tSNAP\tmRNA\t%lu\t%lu\t.\t%s\t.\tID=%s.mrna;Parent=%s.gene\n",
	$exons[0]->[0],
	$start,
	$end,
	$exons[0]->[6],
	$exons[0]->[8],
	$exons[0]->[8],
	);
	my $start_codon = [];
	my $stop_codon = [];
	foreach my $exon(@exons)
	{
	if($exon->[2] eq "Einit")
	{
	if($exon->[6] eq "+")
	{
	$start_codon = [$exon->[3], $exon->[3] + 2];
	}
	else
	{
	$start_codon = [$exon->[4] - 2, $exon->[4]];
	}
	printf
	(
	"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
	$exon->[0],
	$start_codon->[0],
	$start_codon->[1],
	$exon->[6],
	$exon->[8],
	);
	}
	elsif($exon->[2] eq "Eterm")
	{
	if($exon->[6] eq "+")
	{
	$stop_codon = [$exon->[4] - 2, $exon->[4]];
	}
	else
	{
	$stop_codon = [$exon->[3], $exon->[3] + 2];
	}
	printf
	(
	"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
	$exon->[0],
	$stop_codon->[0],
	$stop_codon->[1],
	$exon->[6],
	$exon->[8],
	);
	}
	elsif($exon->[2] eq "Esngl")
	{
	if($exon->[6] eq "+")
	{
	$start_codon = [$exon->[3], $exon->[3] + 2];
	$stop_codon = [$exon->[4] - 2, $exon->[4]];
	}
	else
	{
	$start_codon = [$exon->[4] - 2, $exon->[4]];
	$stop_codon = [$exon->[3], $exon->[3] + 2];
	}
	printf
	(
	"%s\tSNAP\tstart_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
	$exon->[0],
	$start_codon->[0],
	$start_codon->[1],
	$exon->[6],
	$exon->[8],
	);
	printf
	(
	"%s\tSNAP\tstop_codon\t%lu\t%lu\t.\t%s\t.\tParent=%s.mrna\n",
	$exon->[0],
	$stop_codon->[0],
	$stop_codon->[1],
	$exon->[6],
	$exon->[8],
	);
	}
	$exon->[2] = "exon";
	$exon->[8] = "Parent=". $exon->[8] .".mrna";
	printf("%s\n", join("\t", @$exon));
	}
	@exons = ();
	}
	}