Created
August 27, 2013 14:22
-
-
Save avrilcoghlan/6354164 to your computer and use it in GitHub Desktop.
Perl script that renames genes in the maker gff files so that they have unique names.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=head1 NAME | |
rename_genes_in_maker_gff.pl | |
=head1 SYNOPSIS | |
rename_genes_in_maker_gff.pl input_gff output_gff outputdir species | |
where input_gff is the input gff file, | |
output_gff is the output gff file, | |
outputdir is the output directory for writing output files, | |
species is the abbreviation for the species name, eg. BPAG. | |
=head1 DESCRIPTION | |
This script takes an input gff file from maker (<input_gff>), and renames the genes | |
and transcripts, and writes the output gff file (<output_gff>) in directory | |
<outputdir>. This script can handle multiple transcripts per gene. | |
=head1 VERSION | |
Perl script last edited 9-May-2013. | |
=head1 CONTACT | |
alc@sanger.ac.uk (Avril Coghlan) | |
=cut | |
# | |
# Perl script rename_genes_in_maker_gff.pl | |
# Written by Avril Coghlan (alc@sanger.ac.uk) | |
# 9-May-13. | |
# Last edited 9-May-2013. | |
# SCRIPT SYNOPSIS: rename_genes_in_maker_gff.pl: renames genes in the maker gff files so that they have unique names. | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
use strict; | |
use warnings; | |
my $num_args = $#ARGV + 1; | |
if ($num_args != 4) | |
{ | |
print "Usage of rename_genes_in_maker_gff.pl\n\n"; | |
print "perl rename_genes_in_maker_gff.pl <input_gff> <output_gff> <outputdir> <species>\n"; | |
print "where <input_gff> is the input gff file,\n"; | |
print " <output_gff> is the output gff file,\n"; | |
print " <outputdir> is the output directory for writing output files,\n"; | |
print " <species> is the the abbreviation for the species name\n"; | |
print "For example, >perl rename_genes_in_maker_gff.pl round3.gff new_round3.gff\n"; | |
print "/lustre/scratch108/parasites/alc/ BPAG\n"; | |
exit; | |
} | |
# FIND THE PATH TO THE INPUT GFF FILE: | |
my $input_gff = $ARGV[0]; | |
# FIND THE PATH TO THE OUTPUT GFF FILE: | |
my $output_gff = $ARGV[1]; | |
# FIND THE DIRECTORY TO USE FOR OUTPUT FILES: | |
my $outputdir = $ARGV[2]; | |
# FIND THE ABBREVIATION FOR THE SPECIES NAME: | |
my $species = $ARGV[3]; | |
#------------------------------------------------------------------# | |
# TEST SUBROUTINES: | |
my $PRINT_TEST_DATA = 0; # SAYS WHETHER TO PRINT DATA USED DURING TESTING. | |
&test_check_whether_new_lines_complete; | |
&test_find_overlapping_features($outputdir); | |
&test_sort_gene_gff($outputdir); | |
&test_print_error; | |
&test_make_gene_gff($outputdir); | |
&test_read_genes_for_transcripts($outputdir); | |
&test_get_new_genename; | |
&test_assign_exon_numbers($outputdir); | |
&test_fix_gff_lines_for_gene($outputdir); | |
&test_count_lines($outputdir); | |
&test_make_gene_gff2($outputdir); | |
&test_run_main_program($outputdir); | |
print STDERR "Finished tests, running main code now...\n"; | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
&run_main_program($outputdir,$input_gff,$output_gff,$species,0); | |
print STDERR "FINISHED.\n"; | |
#------------------------------------------------------------------# | |
# TEST THE MAIN PROGRAM: | |
sub test_run_main_program | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $input_gff; # INPUT GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $output_gff; # OUTPUT GFF FILE | |
my $expected_output_gff; # FILE WITH EXPECTED CONTENTS OF $output_gff | |
my $differences; # DIFFERENCES BETWEEN $output_gff AND $expected_output_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
my @temp; # | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_run_main_program: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 103190 104650 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.10610.1000\n"; | |
print INPUT_GFF "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(INPUT_GFF); | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$output_gff); | |
$output_gff = $temp[$#temp]; | |
&run_main_program($outputdir,$input_gff,$output_gff,'SRAE',1); | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_run_main_program: failed test1\n"; | |
print EXPECTED "##gff-version 3\n"; | |
print EXPECTED "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=SRAE_0000000001-mRNA-1:exon:2;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=SRAE_0000000001-mRNA-1:exon:3;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=SRAE_0000000001-mRNA-1:exon:4;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=SRAE_0000000001-mRNA-1:exon:5;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=SRAE_0000000001-mRNA-1:exon:6;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=SRAE_0000000101;Name=SRAE_0000000101\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=SRAE_0000000101-mRNA-1;Parent=SRAE_0000000101;Name=SRAE_0000000101-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 103190 104650 . + 0 ID=SRAE_0000000101-mRNA-1:exon:1;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 103190 104650 . + . ID=SRAE_0000000101-mRNA-1:cds;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print EXPECTED "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">BPAG.contig.10610.1000\n"; | |
print EXPECTED "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(EXPECTED); | |
$output_gff = $outputdir."/".$output_gff; | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test1 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $output_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_output_gff"; | |
# EXAMPLE WITH MULTIPLE TRANSCRIPTS IN THE SAME GENE: | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_run_main_program: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-2\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 103190 104650 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.10610.1000\n"; | |
print INPUT_GFF "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(INPUT_GFF); | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$output_gff); | |
$output_gff = $temp[$#temp]; | |
&run_main_program($outputdir,$input_gff,$output_gff,'SRAE',1); | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_run_main_program: failed test2\n"; | |
print EXPECTED "##gff-version 3\n"; | |
print EXPECTED "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=SRAE_0000000001-mRNA-1:exon:2;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=SRAE_0000000001-mRNA-1:exon:3;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=SRAE_0000000001-mRNA-1:exon:4;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=SRAE_0000000001-mRNA-1:exon:5;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=SRAE_0000000001-mRNA-1:exon:6;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_0000000001-mRNA-2;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-2;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-2:exon:1;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=SRAE_0000000001-mRNA-2:exon:2;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=SRAE_0000000001-mRNA-2:exon:3;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=SRAE_0000000001-mRNA-2:exon:4;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=SRAE_0000000001-mRNA-2:exon:5;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=SRAE_0000000001-mRNA-2:exon:6;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=SRAE_0000000001-mRNA-2:cds;Parent=SRAE_0000000001-mRNA-2\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=SRAE_0000000101;Name=SRAE_0000000101\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=SRAE_0000000101-mRNA-1;Parent=SRAE_0000000101;Name=SRAE_0000000101-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 103190 104650 . + 0 ID=SRAE_0000000101-mRNA-1:exon:1;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 103190 104650 . + . ID=SRAE_0000000101-mRNA-1:cds;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print EXPECTED "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">BPAG.contig.10610.1000\n"; | |
print EXPECTED "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(EXPECTED); | |
$output_gff = $outputdir."/".$output_gff; | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test2 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $output_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_output_gff"; | |
# EXAMPLE WITH TWO OVERLAPPING GENES WITH THE SAME NAME, ON OPPOSITE STRANDS: | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_run_main_program: cannot open $input_gff\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN . contig 1 1317 . . . ID=BPAG.contig.08311.1317.reaprBIN;Name=BPAG.contig.08311.1317.reaprBIN\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker gene 835 1248 . + . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker mRNA 835 1248 . + . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1;_AED=0.03;_eAED=0.03;_QI=0|0|0|0.5|0|0|2|0|77\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker exon 835 837 . + . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:exon:9;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker exon 1018 1248 . + . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:exon:10;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker CDS 835 837 . + 0 ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:cds;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker CDS 1018 1248 . + 0 ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:cds;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker gene 1035 1265 . - . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker mRNA 1035 1265 . - . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1;_AED=1.00;_eAED=1.00;_QI=0|-1|0|0|-1|1|1|0|76\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker exon 1035 1265 . - . ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:exon:11;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN maker CDS 1035 1265 . - 0 ID=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1:cds;Parent=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN augustus_masked match 1035 1265 0.37 - . ID=BPAG.contig.08311.1317.reaprBIN:hit:56:3.5.0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-abinit-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN augustus_masked match_part 1035 1265 0.37 - . ID=BPAG.contig.08311.1317.reaprBIN:hsp:116:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:56:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-abinit-gene-0.0-mRNA-1 1 231 +;Gap=M231\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN model_gff:maker match 835 1248 . + . ID=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN model_gff:maker match_part 835 837 . + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:117:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1 1 3 +;Gap=M3\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN model_gff:maker match_part 1018 1248 . + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:118:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1 4 234 +;Gap=M231\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN repeatmasker match 104 144 17 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:37:1.3.0.0;Name=species:%2528ACCT%2529n|genus:Simple_repeat;Target=species:%2528ACCT%2529n|genus:Simple_repeat 1 41 +\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN repeatmasker match_part 104 144 17 + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:97:1.3.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:37:1.3.0.0;Target=species:%2528ACCT%2529n|genus:Simple_repeat 1 41 +\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN blastx protein_match 1015 1239 287 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:38:2.10.0.0;Name=BUX_s00036.29.1\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN blastx match_part 1015 1239 287 + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:98:2.10.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:38:2.10.0.0;Target=BUX_s00036.29.1 1 74;Gap=M38 D1 M36\n"; | |
print INPUT_GFF "BPAG.contig.08311.1317.reaprBIN blastx protein_match 1018 1242 272 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:39:2.10.0.0;Name=UniProtKB:H3I027\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.08311.1317.reaprBIN\n"; | |
close(INPUT_GFF); | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$output_gff); | |
$output_gff = $temp[$#temp]; | |
&run_main_program($outputdir,$input_gff,$output_gff,'SRAE',1); | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_run_main_program: failed test3\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN . contig 1 1317 . . . ID=BPAG.contig.08311.1317.reaprBIN;Name=BPAG.contig.08311.1317.reaprBIN\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker gene 835 1248 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker mRNA 835 1248 . + . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.03;_eAED=0.03;_QI=0|0|0|0.5|0|0|2|0|77\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker exon 835 837 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker exon 1018 1248 . + . ID=SRAE_0000000001-mRNA-1:exon:2;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker CDS 835 837 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker CDS 1018 1248 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker gene 1035 1265 . - . ID=SRAE_0000000101;Name=SRAE_0000000101\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker mRNA 1035 1265 . - . ID=SRAE_0000000101-mRNA-1;Parent=SRAE_0000000101;Name=SRAE_0000000101-mRNA-1;_AED=1.00;_eAED=1.00;_QI=0|-1|0|0|-1|1|1|0|76\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker exon 1035 1265 . - . ID=SRAE_0000000101-mRNA-1:exon:1;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN maker CDS 1035 1265 . - 0 ID=SRAE_0000000101-mRNA-1:cds;Parent=SRAE_0000000101-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN augustus_masked match 1035 1265 0.37 - . ID=BPAG.contig.08311.1317.reaprBIN:hit:56:3.5.0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-abinit-gene-0.0-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN augustus_masked match_part 1035 1265 0.37 - . ID=BPAG.contig.08311.1317.reaprBIN:hsp:116:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:56:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-abinit-gene-0.0-mRNA-1 1 231 +;Gap=M231\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN model_gff:maker match 835 1248 . + . ID=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Name=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN model_gff:maker match_part 835 837 . + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:117:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1 1 3 +;Gap=M3\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN model_gff:maker match_part 1018 1248 . + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:118:3.5.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:57:3.5.0.0;Target=augustus_masked-BPAG.contig.08311.1317.reaprBIN-processed-gene-0.0-mRNA-1 4 234 +;Gap=M231\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN repeatmasker match 104 144 17 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:37:1.3.0.0;Name=species:%2528ACCT%2529n|genus:Simple_repeat;Target=species:%2528ACCT%2529n|genus:Simple_repeat 1 41 +\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN repeatmasker match_part 104 144 17 + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:97:1.3.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:37:1.3.0.0;Target=species:%2528ACCT%2529n|genus:Simple_repeat 1 41 +\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN blastx protein_match 1015 1239 287 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:38:2.10.0.0;Name=BUX_s00036.29.1\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN blastx match_part 1015 1239 287 + . ID=BPAG.contig.08311.1317.reaprBIN:hsp:98:2.10.0.0;Parent=BPAG.contig.08311.1317.reaprBIN:hit:38:2.10.0.0;Target=BUX_s00036.29.1 1 74;Gap=M38 D1 M36\n"; | |
print EXPECTED "BPAG.contig.08311.1317.reaprBIN blastx protein_match 1018 1242 272 + . ID=BPAG.contig.08311.1317.reaprBIN:hit:39:2.10.0.0;Name=UniProtKB:H3I027\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">BPAG.contig.08311.1317.reaprBIN\n"; | |
close(EXPECTED); | |
$output_gff = $outputdir."/".$output_gff; | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test3 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $output_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_output_gff"; | |
# EXAMPLE WITH OVERLAPPING GENE PREDICTIONS ON THE SAME STRAND, WHERE ONE OF THE GENES HAS + AND - EXONS: | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_run_main_program: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12\n"; | |
print INPUT_GFF "AG00002 maker mRNA 683300 686711 . + . ID=F07C4.9;Parent=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1;_AED=1.00;_eAED=1.00;_QI=84|0|0|0|1|1|6|0|282\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match 683300 686711 . + . ID=AG00002:hit:2251:4.5.0.7;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 683300 683550 . - . ID=AG00002:hsp:8272:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 1 251 +;Gap=M251\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 683670 683878 . - . ID=AG00002:hsp:8273:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 252 460 +;Gap=M209\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 684544 684548 . - . ID=AG00002:hsp:8274:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 461 465 +;Gap=M5\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 685452 685459 . + . ID=AG00002:hsp:8275:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 466 473 +;Gap=M8\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 686133 686341 . + . ID=AG00002:hsp:8276:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 474 682 +;Gap=M209\n"; | |
print INPUT_GFF "AG00002 model_gff:maker match_part 686461 686711 . + . ID=AG00002:hsp:8277:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 683 933 +;Gap=M251\n"; | |
print INPUT_GFF "AG00002 maker exon 684544 684548 . - . ID=F07C4.9:exon:255;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker exon 683670 683878 . - . ID=F07C4.9:exon:254;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker exon 683300 683550 . - . ID=F07C4.9:exon:253;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker exon 685452 685459 . + . ID=F07C4.9:exon:256;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker exon 686133 686341 . + . ID=F07C4.9:exon:257;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker exon 686461 686711 . + . ID=F07C4.9:exon:258;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker five_prime_UTR 683550 683633 . + . ID=F07C4.9:five_prime_utr;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 683300 683634 . + 0 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 683670 683878 . + 1 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 684544 684548 . + 2 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 685452 685459 . + 0 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 686133 686341 . + 1 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker CDS 686461 686711 . + 2 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print INPUT_GFF "AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5\n"; | |
print INPUT_GFF "AG00002 maker mRNA 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;Parent=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;_AED=0.23;_eAED=0.29;_QI=54|0|0|0.66|1|1|3|0|162\n"; | |
print INPUT_GFF "AG00002 maker exon 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:210;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker exon 683646 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:209;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker exon 683300 683550 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:208;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker five_prime_UTR 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker five_prime_UTR 683884 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker CDS 683646 683883 . - 0 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print INPUT_GFF "AG00002 maker CDS 683300 683550 . - 2 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
close(INPUT_GFF); | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$output_gff); | |
$output_gff = $temp[$#temp]; | |
&run_main_program($outputdir,$input_gff,$output_gff,'ASG',1); | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# WE DON'T KNOW THE GENE NAME FOR THE TRANSCRIPTS FOR GENE maker-AG00002-pred_gff_protein_coding-gene-7.12, BECAUSE THE mRNA LINE IS ON ANOTHER STRAND. AS A RESULT WE DON'T GET THE EXONS/UTR/CDS/mRNA/gene IN THE OUTPUT GFF, | |
# BECAUSE WE EXCLUDE GENES WITHOUT gene+CDS+exon+mRNA FROM THE OUTPUT GFF. | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_run_main_program: failed test4\n"; | |
print EXPECTED "##gff-version 3\n"; | |
print EXPECTED "AG00002 maker gene 683300 684111 . - . ID=ASG_0000000001;Name=ASG_0000000001\n"; | |
print EXPECTED "AG00002 maker mRNA 683300 684111 . - . ID=ASG_0000000001-mRNA-1;Parent=ASG_0000000001;Name=ASG_0000000001-mRNA-1;_AED=0.23;_eAED=0.29;_QI=54|0|0|0.66|1|1|3|0|162\n"; | |
print EXPECTED "AG00002 maker exon 684077 684111 . - . ID=ASG_0000000001-mRNA-1:exon:1;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker exon 683646 683902 . - . ID=ASG_0000000001-mRNA-1:exon:2;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker exon 683300 683550 . - . ID=ASG_0000000001-mRNA-1:exon:3;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker five_prime_UTR 684077 684111 . - . ID=ASG_0000000001-mRNA-1:five_prime_utr;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker five_prime_UTR 683884 683902 . - . ID=ASG_0000000001-mRNA-1:five_prime_utr;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker CDS 683646 683883 . - 0 ID=ASG_0000000001-mRNA-1:cds;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 maker CDS 683300 683550 . - 2 ID=ASG_0000000001-mRNA-1:cds;Parent=ASG_0000000001-mRNA-1\n"; | |
print EXPECTED "AG00002 model_gff:maker match 683300 686711 . + . ID=AG00002:hit:2251:4.5.0.7;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 683300 683550 . - . ID=AG00002:hsp:8272:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 1 251 +;Gap=M251\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 683670 683878 . - . ID=AG00002:hsp:8273:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 252 460 +;Gap=M209\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 684544 684548 . - . ID=AG00002:hsp:8274:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 461 465 +;Gap=M5\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 685452 685459 . + . ID=AG00002:hsp:8275:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 466 473 +;Gap=M8\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 686133 686341 . + . ID=AG00002:hsp:8276:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 474 682 +;Gap=M209\n"; | |
print EXPECTED "AG00002 model_gff:maker match_part 686461 686711 . + . ID=AG00002:hsp:8277:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 683 933 +;Gap=M251\n"; | |
close(EXPECTED); | |
$output_gff = $outputdir."/".$output_gff; | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test4 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $output_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_output_gff"; | |
} | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
sub run_main_program | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN. | |
my $input_gff = $_[1]; # THE INPUT GFF FILE | |
my $output_gff = $_[2]; # THE OUTPUT GFF FILE | |
my $species = $_[3]; # SPECIES ABBREVIATION | |
my $testing = $_[4]; # SAYS WHETHER IT'S CALLED FROM A TESTING SUBROUTINE | |
my $errorcode; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' IF THERE IS NO ERROR | |
my $line; # | |
my @temp; # | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF | |
my $gene_count = 1; # THE GENE COUNT, USED TO MAKE THE NEW GENE NAME | |
my $new_gene_name; # NEW NAME FOR A GENE | |
my $new_lines; # NEW GFF FILE LINES, WITH NEW NAMES | |
my $gene_gff; # GFF FILE WITH JUST THE FEATURES PERTAINING TO GENES | |
my $gene; # NAME OF THE GENE | |
my $input_gff_linecnt; # NUMBER OF LINES IN $input_gff | |
my $output_gff_linecnt; # NUMBER OF LINES IN $output_gff | |
my $gene_gff2; # GFF FILE WITH JUST 'gene' FEATURES | |
my $FEATURES; # HASH TABLE OF OVERLAPPING FEATURES BETWEEN $gene_gff2, AND $gene_gff2 | |
my $features; # OVERLAPPING FEATURES FOR GENE $gene | |
my $scaffold; # SCAFFOLD NAME | |
my $comment_lines = 0; # NUMBER OF COMMENT LINES TO IGNORE | |
my $new_lines_complete; # SAYS WHETHER THE NEW GFF LINES FOR A GENE HAVE 'gene','exon','CDS','mRNA' FEATURES | |
# MAKE A TEMPORARY GFF FILE WITH JUST THE FEATURES PERTAINING TO GENES ('exon', 'CDS', 'mRNA', etc.): | |
($gene_gff,$errorcode,$errormsg) = &make_gene_gff($input_gff); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# MAKE A TEMPORARY GFF FILE WITH JUST THE 'GENE' FEATURES: | |
($gene_gff2,$errorcode,$errormsg) = &make_gene_gff2($gene_gff); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# FIND FEATURES THAT OVERLAP BETWEEN EACH GENE IN $gene_gff2, AND ALL THE GENE FEATURES ('exon', 'CDS', etc.) | |
# IN $gene_gff: | |
($FEATURES,$errorcode,$errormsg) = &find_overlapping_features($gene_gff,$gene_gff2); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# SORT $gene_gff BY CHROMOSOME, THEN START POINT IN THE CHROMOSOME: | |
# (WE NEED TO DO THIS SO THAT GENES ARE NUMBERED 1,2,3,etc. IN ORDER ALONG A CHROMOSOME) | |
($gene_gff2,$errorcode,$errormsg) = &sort_gene_gff($outputdir,$gene_gff2); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# OPEN THE OUTPUT GFF FILE: | |
$output_gff = $outputdir."/".$output_gff; | |
open(OUTPUT_GFF,">$output_gff") || die "ERROR: run_main_program: cannot open output $output_gff\n"; | |
# READ IN THE $input_gff FILE AND GET COMMENT LINES AND 'contig' LINES: | |
open(INPUT_GFF,"$input_gff") || die "ERROR: run_main_program: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($temp[2] eq 'contig') | |
{ | |
print OUTPUT_GFF "$line\n"; | |
} | |
} | |
else | |
{ | |
if ($line ne '###') | |
{ | |
if ($end_of_gff == 0) { print OUTPUT_GFF "$line\n";} # PRINT COMMENT LINES | |
} | |
else { $comment_lines++;} # KEEP A COUNT OF COMMENT LINES TO IGNORE | |
} | |
} | |
close(INPUT_GFF); | |
# READ IN THE GENES IN THE SORTED 'gene' GFF FILE: | |
open(SORTED_GENES_GFF,"$gene_gff2") || die "ERROR: run_main_program: cannot open input $gene_gff2\n"; | |
while(<SORTED_GENES_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
@temp = split(/\t+/,$line); | |
$scaffold = $temp[0]; | |
# FIND THE GENE NAME: | |
# BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110 | |
$gene = $temp[8]; # eg. ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110 | |
@temp = split(/ID=/,$gene); | |
$gene = $temp[1]; | |
@temp = split(/\;/,$gene); | |
$gene = $temp[0]; | |
# GET THE NEW NAME FOR THIS GENE: | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename($gene_count,$species); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
if ($testing == 0) { print STDERR "Getting new gene name for gene $gene_count in $species (scaffold $scaffold) ... $gene (old) = $new_gene_name (new)\n";} | |
# FIND THE OVERLAPPING GFF LINES FROM $gene_gff FOR THE GENE: | |
if (!($FEATURES->{$line})) { print STDERR "ERROR: do not know overlapping features for gene $gene (line $line)\n"; exit;} | |
$features = $FEATURES->{$line}; | |
# RENAME THIS GENE AND ALL ITS EXONS etc.: | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($features,$new_gene_name,$gene); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# CHECK THAT $new_lines INCLUDES 'gene', 'mRNA', 'exon', AND 'CDS' FEATURES, ALL ON THE SAME STRAND: | |
($new_lines_complete,$errorcode,$errormsg) = &check_whether_new_lines_complete($new_lines); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
if ($new_lines_complete == 1) | |
{ | |
print OUTPUT_GFF "$new_lines"; | |
$gene_count = $gene_count + 100; # THE GENE COUNT, USED TO MAKE THE NEW GENE NAME | |
} | |
else | |
{ | |
if ($testing == 0) { print STDERR "WARNING: discarding gene $gene because it doesn't have all types of features (gene,mRNA,CDS,exon) on the same strand\n";} | |
} | |
} | |
close(SORTED_GENES_GFF); | |
# NOW ADD BACK THE FEATURE TYPES SUCH AS match, protein_match etc. | |
$end_of_gff = 0; | |
open(INPUT_GFF,"$input_gff") || die "ERROR: run_main_program: cannot open input $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($temp[2] eq 'protein_match' || $temp[2] eq 'match_part' || $temp[2] eq 'match' || $temp[2] eq 'expressed_sequence_match') | |
{ | |
print OUTPUT_GFF "$line\n"; | |
} | |
else | |
{ | |
if ($temp[2] ne 'gene' && $temp[2] ne 'contig' && $temp[2] ne 'exon' && $temp[2] ne 'CDS' && $temp[2] ne 'five_prime_UTR' && | |
$temp[2] ne 'three_prime_UTR' && $temp[2] ne 'mRNA') | |
{ | |
print STDERR "ERROR: temp[2] $temp[2] in line $line of $input_gff\n"; | |
exit; | |
} | |
} | |
} | |
} | |
close(INPUT_GFF); | |
# NOW ADD BACK THE FASTA SEQUENCE TO THE END OF THE GFF: | |
$end_of_gff = 0; | |
open(INPUT_GFF,"$input_gff") || die "ERROR: run_main_program: cannot open input $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if ($end_of_gff == 1) | |
{ | |
print OUTPUT_GFF "$line\n"; | |
} | |
} | |
close(OUTPUT_GFF); | |
# NOTE: I USED TO CHECK THAT THE INPUT AND OUTPUT GFF HAS THE SAME NUMBER OF LINES, ie. $output_gff_linecnt != $input_gff_linecnt - $comment_lines | |
# HOWEVER, THIS ASSUMPTION DOESN'T HOLD AS GENES THAT HAVE A MIX OF + AND - STRAND FEATURES ARE NOW DISCARDED. | |
# DELETE TEMPORARY FILES: | |
system "rm -f $gene_gff"; | |
system "rm -f $gene_gff2"; | |
} | |
#------------------------------------------------------------------# | |
# TEST &check_whether_new_lines_complete | |
sub test_check_whether_new_lines_complete | |
{ | |
my $new_lines; # NEW GFF LINES FOR GENE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $new_lines_complete; # SAYS WHETHER $new_lines INCLUDES gene,mRNA,CDS+exon FEATURES | |
$new_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1"; | |
($new_lines_complete,$errorcode,$errormsg) = &check_whether_new_lines_complete($new_lines); | |
if ($errorcode != 0) { print STDERR "ERROR: test_check_whether_new_lines_complete: failed test1\n"; exit;} | |
if ($new_lines_complete != 1) { print STDERR "ERROR: test_whether_new_lines_complete: failed test1b (new_lines_complete $new_lines_complete)\n"; exit;} | |
$new_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001"; | |
($new_lines_complete,$errorcode,$errormsg) = &check_whether_new_lines_complete($new_lines); | |
if ($errorcode != 0) { print STDERR "ERROR: test_check_whether_new_lines_complete: failed test2\n"; exit;} | |
if ($new_lines_complete != 0) { print STDERR "ERROR: test_whether_new_lines_complete: failed test2b\n"; exit;} | |
# AN EXAMPLE WHERE STRANDS DISAGREE: | |
$new_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . - . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1"; | |
($new_lines_complete,$errorcode,$errormsg) = &check_whether_new_lines_complete($new_lines); | |
if ($errorcode != 0) { print STDERR "ERROR: test_check_whether_new_lines_complete: failed test3\n"; exit;} | |
if ($new_lines_complete != 0) { print STDERR "ERROR: test_whether_new_lines_complete: failed test3b (new_lines_complete $new_lines_complete)\n"; exit;} | |
# AN EXAMPLE WHERE STRANDS DISAGREE: | |
$new_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_0000000001;Name=SRAE_0000000001\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_0000000001-mRNA-1;Parent=SRAE_0000000001;Name=SRAE_0000000001-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_0000000001-mRNA-1:exon:1;Parent=SRAE_0000000001-mRNA-1\n"; | |
$new_lines = $new_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . - 0 ID=SRAE_0000000001-mRNA-1:cds;Parent=SRAE_0000000001-mRNA-1"; | |
($new_lines_complete,$errorcode,$errormsg) = &check_whether_new_lines_complete($new_lines); | |
if ($errorcode != 0) { print STDERR "ERROR: test_check_whether_new_lines_complete: failed test4\n"; exit;} | |
if ($new_lines_complete != 0) { print STDERR "ERROR: test_whether_new_lines_complete: failed test4b (new_lines_complete $new_lines_complete)\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# CHECK WHETHER THE NEW GFF LINES FOR A GENE CONTAIN 'gene', 'mRNA', 'exon', 'CDS' FEATURES, ALL ON THE SAME STRAND: | |
sub check_whether_new_lines_complete | |
{ | |
my $new_lines = $_[0]; # NEW GFF LINES FOR A GENE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $new_lines_complete = 0; # SAYS WHETHER WE HAVE SEEN 'gene','mRNA','exon','CDS' FEATURES | |
my @new_lines; # ARRAY OF NEW LINES | |
my $i; # | |
my $new_line; # GFF LINE | |
my $feature_type; # FEATURE TYPE | |
my @temp; # | |
my $seen_gene = 0; # SAYS WHETHER WE SAW A 'gene' FEATURE | |
my $seen_mRNA = 0; # SAYS WHETHER WE SAW A 'mRNA' FEATURE | |
my $seen_exon = 0; # SAYS WHETHER WE SAW AN 'exon' FEATURE | |
my $seen_CDS = 0; # SAYS WHETHER WE SAW A 'CDS' FEATURE | |
my $the_strand = "none";# STRAND OF THE GENE | |
my $strands_agree = 1; # SAYS WHETHER ALL THE FEATURES HAVE THE SAME STRAND | |
my $strand; # STRAND OF A FEATURE | |
@new_lines = split(/\n+/,$new_lines); | |
for ($i = 0; $i <= $#new_lines; $i++) | |
{ | |
$new_line = $new_lines[$i]; | |
@temp = split(/\t+/,$new_line); | |
$feature_type = $temp[2]; | |
$strand = $temp[6]; | |
if ($the_strand eq 'none') { $the_strand = $strand;} | |
if ($feature_type eq 'gene') { $seen_gene = 1;} | |
elsif ($feature_type eq 'mRNA') { $seen_mRNA = 1;} | |
elsif ($feature_type eq 'exon') { $seen_exon = 1;} | |
elsif ($feature_type eq 'CDS') { $seen_CDS = 1;} | |
if ($strand ne $the_strand) { $strands_agree = 0;} | |
} | |
if ($seen_gene == 1 && $seen_mRNA == 1 && $seen_exon == 1 && $seen_CDS == 1) { $new_lines_complete = 1;} | |
if ($strands_agree == 0) { $new_lines_complete = 0;} | |
return($new_lines_complete,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &sort_gene_gff | |
sub test_sort_gene_gff | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $input_gff; # INPUT GFF FILE NAME | |
my $expected_input_gff; # FILE WITH EXPECTED CONTENTS OF $input_gff | |
my $differences; # DIFFERENCES BETWEEN $input_gff AND $expected_input_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_sort_gene_gff: cannot open $input_gff\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
close(INPUT_GFF); | |
($input_gff,$errorcode,$errormsg) = &sort_gene_gff($outputdir,$input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_sort_gene_gff: failed test1\n"; exit;} | |
($expected_input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_input_gff") || die "ERROR: test_sort_gene_gff: failed test1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $input_gff $expected_input_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_sort_gene_gff: failed test1 (input_gff $input_gff expected_input_gff $expected_input_gff)\n"; exit;} | |
system "rm -f $input_gff"; | |
system "rm -f $expected_input_gff"; | |
} | |
#------------------------------------------------------------------# | |
# SORT $gene_gff BY CHROMOSOME, THEN START POINT IN THE CHROMOSOME: | |
# (WE NEED TO DO THIS SO THAT GENES ARE NUMBERED 1,2,3,etc. IN ORDER ALONG A CHROMOSOME) | |
# NOTE THE GENE GFF WILL JUST HAVE exon,gene,mRNA,CDS,five_prime_UTR,three_prime_UTR FEATURES, AND WON'T HAVE | |
# COMMENT-LINES OR SEQUENCE. | |
sub sort_gene_gff | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $input_gff = $_[1]; # GFF FILE | |
my $sorted_gff; # SORTED GFF FILE | |
my $cmd; # COMMAND TO RUN | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
($sorted_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
$cmd = "sort -k1,1 -k4,4n $input_gff > $sorted_gff"; | |
system "$cmd"; | |
system "rm -f $input_gff"; | |
return($sorted_gff,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &count_lines | |
sub test_count_lines | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $input_gff; # INPUT GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $input_gff_linecnt; # NUMBER OF LINES IN $input_gff | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_run_main_program: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.10610.1000\n"; | |
print INPUT_GFF "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(INPUT_GFF); | |
($input_gff_linecnt,$errorcode,$errormsg) = &count_lines($input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_count_lines: failed test1\n"; exit;} | |
if ($input_gff_linecnt != 23) { print STDERR "ERROR: test_count_lines: failed test1b\n"; exit;} | |
system "rm -f $input_gff"; | |
} | |
#------------------------------------------------------------------# | |
# COUNT THE NUMBER OF LINES IN A FILE: | |
sub count_lines | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $linecnt = 0; # NUMBER OF LINES IN THE GFF FILE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my @temp; # | |
my $line; # | |
open(TMP,"wc -l $input_gff |"); | |
while(<TMP>) | |
{ | |
$line = $_; | |
chomp $line; | |
@temp = split(/\s+/,$line); | |
if ($linecnt != 0) | |
{ | |
$errormsg = "ERROR: count_lines: linecnt is already $linecnt (line $line) for $input_gff\n"; | |
$errorcode = 56; # ERRORCODE=56 (SHOULDN'T HAPPEN SO CAN'T TEST FOR IT) | |
return($linecnt,$errorcode,$errormsg); | |
} | |
$linecnt = $temp[0]; | |
} | |
close(TMP); | |
if ($linecnt == 0) | |
{ | |
$errormsg = "ERROR: count_lines: linecnt is $linecnt for $input_gff\n"; | |
$errorcode = 57; # ERRORCODE=57 (SHOULDN'T HAPPEN, CAN'T TEST FOR) | |
return($linecnt,$errorcode,$errormsg); | |
} | |
return($linecnt,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &make_gene_gff | |
sub test_make_gene_gff | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $input_gff; # THE INPUT GFF FILE | |
my $gene_gff; # THE GFF FILE WITH GENE FEATURES | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $expected_gene_gff; # FILE WITH EXPECTED CONTENTS OF $gene_gff | |
my $differences; # DIFFERENCES BETWEEN $gene_gff AND $expected_gene_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_make_gene_gff: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.10610.1000\n"; | |
print INPUT_GFF "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(INPUT_GFF); | |
($gene_gff,$errorcode,$errormsg) = &make_gene_gff($input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_make_gene_gff: failed test1\n"; exit;} | |
($expected_gene_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_gene_gff") || die "ERROR: test_make_gene_gff: failed test1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $gene_gff $expected_gene_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_make_gene_gff: failed test1 (gene_gff $gene_gff expected_gene_gff $expected_gene_gff)\n"; exit;} | |
system "rm -f $gene_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_gene_gff"; | |
# TEST ERRORCODE=58 (UNEXPECETED FEATURE TYPE): | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_make_gene_gff: cannot open $input_gff\n"; | |
print INPUT_GFF "##gff-version 3\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 . contig 1 117101 . . . ID=BPAG.contig.00154.117101;Name=BPAG.contig.00154.117101\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker myexon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx protein_match 1346 1510 223 + . ID=BPAG.contig.00154.117101:hit:88:2.10.0.0;Name=tr|J9EFX0|J9EFX0_WUCBA\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 blastx match_part 1346 1510 223 + . ID=BPAG.contig.00154.117101:hsp:88:2.10.0.0;Parent=BPAG.contig.00154.117101:hit:88:2.10.0.0;Target=tr|J9EFX0|J9EFX0_WUCBA 1 56;Gap=M24 I1 M31\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">BPAG.contig.10610.1000\n"; | |
print INPUT_GFF "CCAAACGATGATTGAAAATTTATTCATTTCTTTGCTTTTAAAAGGTAATAGTTTAAAAGCAACATTCTCTTTCTTATATTTTCAAAGTTTAGAAGAAAGAAAATTTTAATAATTTTCATTAAAATAAAATTAGGTAAGATATTAGAAAGGTTCAGTTGAAGTATTCCAAAATTATTGCTATCCATTTATTATTTATTTAAAAAAAAAAAAAAAGATTGTTAATTACTTTCATTTCTTAAATTTGATTAATTATATCAAAGATTATGAAGAAAAGAAAAATATGCTTTTTGTATTTTTTAAATTTCTGTTTTCAATTATTTTACAAAAATTTTTGTTTATTTTATTAAATCATTTCTCTTTTTTTTTTTTTGCCATTTGATTTTGCCTATTATTTATACAAATAATCATAATTCACTTTAAATATTAATGGATGAATATTTAAAATTATTATTCCATTTGAATTTGAATGTTAGAAAAAAAAATTCTATATATGAAATGATGATTTCATATTATTAATTAATTTTTTTTTAATTAATTAATACTGAAATTATATTAGACGCAAATGCATTTGAAATATTTACAGGCATGAAGATAGGAATATATTAGGTGGCGTACAAATAATTGACGTTACGTAATACACGTAACTTGTTAAGAACGTCCATTCTGCAATGTTTCTATGCAAAAATATACCAAACGATAAACCGTAAATGTTGTGATAATAGCATAAAATTTGCAATATTAAATTTGGGATAAAAAATACGAACAATATTTAGAAGAAAAATATTTACATAAAAATTAAAAATCAAATTTGAACTTTTGCAAATATTTATTAGACAACAATTTTAAAATTACAACTTAACATAGCAAACATTGATTAAAAGTAAAAGCTCAAAGTAAACAAAAAAAAAAGAATTCATTTCTTCAGTTTAATTTTAAGCCCTGTAAATAGGAAAACAAAATTCTAAAATTATTGCAAATTTTCTAGAAATTTTTTAAAAAA\n"; | |
close(INPUT_GFF); | |
($gene_gff,$errorcode,$errormsg) = &make_gene_gff($input_gff); | |
if ($errorcode != 58) { print STDERR "ERROR: test_make_gene_gff: failed test2\n"; exit;} | |
system "rm -f $gene_gff"; | |
system "rm -f $input_gff"; | |
} | |
#------------------------------------------------------------------# | |
# MAKE A TEMPORARY GFF FILE WITH JUST THE FEATURES PERTAINING TO GENES: | |
sub make_gene_gff | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $gene_gff; # GFF FILE WITH FEATURES OF GENES | |
my $line; # | |
my @temp; # | |
my $feature_type; # GFF FEATURE TYPE | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF FILE | |
my $start; # FEATURE START | |
my $end; # FEATURE END | |
# OPEN THE GENE GFF OUTPUT FILE: | |
($gene_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GENE_GFF,">$gene_gff") || die "ERROR: make_gene_gff: cannot open $gene_gff\n"; | |
# READ IN THE LINES OF $input_gff THAT OVERLAP WITH THE GENE: | |
open(GFF,"$input_gff") || die "ERROR: make_gene_gff: cannot open $input_gff\n"; | |
while(<GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
$feature_type = $temp[2]; | |
if ($feature_type eq 'gene' || $feature_type eq 'mRNA' || $feature_type eq 'CDS' || $feature_type eq 'exon' || $feature_type eq 'five_prime_UTR' || $feature_type eq 'three_prime_UTR' || $feature_type eq 'transcript') | |
{ | |
$start = $temp[3]; | |
$end = $temp[4]; | |
# SOMETIMES MAKER MAKES A PREDICTION WITH START AND/OR END POSITIONS OF -1 OR 0, THIS SHOULD BE 1 APPARENTLY (FROM TALKING TO THE DEVELOPER). | |
if ($start == 0 || $start == -1) { $start = 1;} | |
if ($end == 0 || $end == -1) { $end = 1;} | |
print GENE_GFF "$temp[0]\t$temp[1]\t$temp[2]\t$start\t$end\t$temp[5]\t$temp[6]\t$temp[7]\t$temp[8]\n"; | |
} | |
else | |
{ | |
if ($feature_type ne 'protein_match' && $feature_type ne 'match_part' && $feature_type ne 'contig' && $feature_type ne 'match' && $feature_type ne 'expressed_sequence_match') | |
{ | |
$errormsg = "ERROR: make_gene_gff: feature_type $feature_type line $line\n"; | |
$errorcode = 58; # ERRORCODE=58 (TESTED FOR) | |
return($gene_gff,$errorcode,$errormsg); | |
} | |
} | |
} | |
} | |
close(GFF); | |
close(GENE_GFF); | |
return($gene_gff,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &make_gene_gff2 | |
sub test_make_gene_gff2 | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $input_gff; # THE INPUT GFF FILE | |
my $gene_gff; # THE GFF FILE WITH 'GENE' FEATURES | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $expected_gene_gff; # FILE WITH EXPECTED CONTENTS OF $gene_gff | |
my $differences; # DIFFERENCES BETWEEN $gene_gff AND $expected_gene_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_make_gene_gff2: cannot open $input_gff\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print INPUT_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
close(INPUT_GFF); | |
($gene_gff,$errorcode,$errormsg) = &make_gene_gff2($input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_make_gene_gff2: failed test1\n"; exit;} | |
($expected_gene_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_gene_gff") || die "ERROR: test_make_gene_gff: failed test1\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print EXPECTED "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $gene_gff $expected_gene_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_make_gene_gff2: failed test1 (gene_gff $gene_gff expected_gene_gff $expected_gene_gff)\n"; exit;} | |
system "rm -f $gene_gff"; | |
system "rm -f $input_gff"; | |
system "rm -f $expected_gene_gff"; | |
} | |
#------------------------------------------------------------------# | |
# MAKE A TEMPORARY GFF FILE WITH JUST THE 'GENE' FEATURES: | |
sub make_gene_gff2 | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $gene_gff; # GFF FILE WITH 'gene' FEATURES | |
my $line; # | |
my @temp; # | |
my $feature_type; # GFF FEATURE TYPE | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF FILE | |
# OPEN THE GENE GFF OUTPUT FILE: | |
($gene_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GENE_GFF,">$gene_gff") || die "ERROR: make_gene_gff: cannot open $gene_gff\n"; | |
# READ IN THE LINES OF $input_gff (FILE WITH GENE FEATURES): | |
open(GFF,"$input_gff") || die "ERROR: make_gene_gff: cannot open $input_gff\n"; | |
while(<GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
$feature_type = $temp[2]; | |
if ($feature_type eq 'gene') | |
{ | |
print GENE_GFF "$line\n"; | |
} | |
} | |
} | |
close(GFF); | |
close(GENE_GFF); | |
return($gene_gff,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &get_new_genename | |
sub test_get_new_genename | |
{ | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $new_gene_name; # NEW NAME FOR GENE | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test1\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000000326') { print STDERR "ERROR: test_get_new_genename: failed test1b (new_gene_name $new_gene_name)\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(26,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test2\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000000026') { print STDERR "ERROR: failed test2b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(6,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test3\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000000006') { print STDERR "ERROR: failed test3b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(1326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test4\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000001326') { print STDERR "ERROR: failed test4b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(14326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test5\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000014326') { print STDERR "ERROR: failed test5b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(514326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test6\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0000514326') { print STDERR "ERROR: failed test6b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(7514326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test7\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0007514326') { print STDERR "ERROR: failed test7b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(17514326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test7\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0017514326') { print STDERR "ERROR: failed test7b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(817514326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test8\n"; exit;} | |
if ($new_gene_name ne 'SRAE_0817514326') { print STDERR "ERROR: failed test8b\n"; exit;} | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(9817514326,"SRAE"); | |
if ($errorcode != 0) { print STDERR "ERROR: test_get_new_genename: failed test9\n"; exit;} | |
if ($new_gene_name ne 'SRAE_9817514326') { print STDERR "ERROR: failed test9b\n"; exit;} | |
# TEST ERRORCODE=50: LENGTH OF LAST PART OF NAME IS >10: | |
($new_gene_name,$errorcode,$errormsg) = &get_new_genename(49817514326,"SRAE"); | |
if ($errorcode != 50) { print STDERR "ERROR: test_get_new_genename: failed test10\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# GET THE NEW NAME FOR A GENE: | |
sub get_new_genename | |
{ | |
my $gene_count = $_[0]; # COUNT OF GENES SEEN SO FAR | |
my $species = $_[1]; # ABBREVIATION FOR SPECIES | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $gene_name = ""; # GENE NAME | |
my $length_last_part; # LENGTH OF LAST PART OF GENE NAME | |
my $length_first_part; # LENGTH OF FIRST PART OF GENE NAME | |
my $i; # | |
# S. RATTI GENES ARE CALLED eg. SRAE_1243200, SRAE_X083500, etc. | |
$length_last_part = length($gene_count); | |
if ($length_last_part > 10) | |
{ | |
$errormsg = "ERROR: get_new_genename: gene count $gene_count\n"; | |
$errorcode = 50; # ERRORCODE=50 (TESTED FOR) | |
return($gene_name,$errorcode,$errormsg); | |
} | |
$length_first_part = 10 - $length_last_part; | |
for ($i = 1; $i <= $length_first_part; $i++) { $gene_name = $gene_name."0";} | |
$gene_name = $gene_name.$gene_count; | |
$gene_name = $species."_".$gene_name; | |
return($gene_name,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &find_overlapping_features | |
sub test_find_overlapping_features | |
{ | |
my $errorcode; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' IF THERE IS NO ERROR | |
my $features_gff; # GFF FILE WITH FEATURES OF GENES ('exon', 'CDS', etc.) | |
my $genes_gff; # GFF FILE WITH 'gene' FEATURES | |
my $FEATURES; # HASH TABLE OF OVERLAPPING FEATURES FOR GENES | |
my $features; # OVERLAPPING FEATURES FOR A GENE | |
my @features; # OVERLAPPING FEATURES FOR A GENE | |
($features_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(FEATURES_GFF,">$features_gff") || die "ERROR: test_find_overlapping_features: cannot open $features_gff\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
close(FEATURES_GFF); | |
($genes_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GENES_GFF,">$genes_gff") || die "ERROR: test_find_overlapping_features: cannot open $features_gff\n"; | |
print GENES_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print GENES_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
close(GENES_GFF); | |
($FEATURES,$errorcode,$errormsg) = &find_overlapping_features($features_gff,$genes_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_find_overlapping_features: failed test1\n"; exit;} | |
if (!($FEATURES->{"BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110"})) { print STDERR "ERROR: test_find_overlapping_features: failed test1b\n"; exit;} | |
$features = $FEATURES->{"BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110"}; | |
@features = split(/\n+/,$features); | |
if ($#features != 13) { print STDERR "ERROR: test_find_overlapping_features: failed test1c\n"; exit;} | |
if ($features[0] ne 'BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110') { print STDERR "ERROR: test_find_overlapping_features: failed test1d\n"; exit;} | |
if ($features[1] ne 'BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309') { print STDERR "ERROR: test_find_overlapping_features: failed test1e\n"; exit;} | |
if ($features[2] ne 'BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1f\n"; exit;} | |
if ($features[3] ne 'BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1g\n"; exit;} | |
if ($features[4] ne 'BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1h\n"; exit;} | |
if ($features[5] ne 'BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1i\n"; exit;} | |
if ($features[6] ne 'BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1j\n"; exit;} | |
if ($features[7] ne 'BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1k\n"; exit;} | |
if ($features[8] ne 'BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1l\n"; exit;} | |
if ($features[9] ne 'BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1m\n"; exit;} | |
if ($features[10] ne 'BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1n\n"; exit;} | |
if ($features[11] ne 'BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1o\n"; exit;} | |
if ($features[12] ne 'BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1p\n"; exit;} | |
if ($features[13] ne 'BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test1q\n"; exit;} | |
if (!($FEATURES->{"BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119"})) { print STDERR "ERROR: test_find_overalpping_features: failed test2\n"; exit;} | |
$features = $FEATURES->{"BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119"}; | |
@features = split(/\n+/,$features); | |
if ($#features != 1) { print STDERR "ERROR: test_find_overlapping_features: failed test2b\n"; exit;} | |
if ($features[0] ne 'BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119') { print STDERR "ERROR: test_find_overlapping_features: failed test2c\n"; exit;} | |
if ($features[1] ne 'BPAG.contig.00154.117101 maker mRNA 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276') { print STDERR "ERROR: test_find_overlapping_features: failed test2d\n"; exit;} | |
system "rm -f $features_gff"; | |
system "rm -f $genes_gff"; | |
# TEST ERRORCODE=100 (OVERLAPPING GENES OF THE SAME NAME ON THE SAME STRAND): | |
($features_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(FEATURES_GFF,">$features_gff") || die "ERROR: test_find_overlapping_features: cannot open $features_gff\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker gene 35584 39255 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print FEATURES_GFF "BPAG.contig.00154.117101 maker mRNA 35584 39255 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.06;_eAED=0.06;_QI=0|0|0|1|1|1|3|0|276\n"; | |
close(FEATURES_GFF); | |
($genes_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GENES_GFF,">$genes_gff") || die "ERROR: test_find_overlapping_features: cannot open $features_gff\n"; | |
print GENES_GFF "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
print GENES_GFF "BPAG.contig.00154.117101 maker gene 103190 104650 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.119;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.119\n"; | |
close(GENES_GFF); | |
($FEATURES,$errorcode,$errormsg) = &find_overlapping_features($features_gff,$genes_gff); | |
if ($errorcode != 100) { print STDERR "ERROR: test_find_overlapping_features: failed test3\n"; exit;} | |
system "rm -f $features_gff"; | |
system "rm -f $genes_gff"; | |
# TEST AN EXAMPLE WITH TWO OVERLAPPING GENES ON THE SAME STRAND, WHERE ONE OF THE GENES HAS BOTH POSITIVE AND NEGATIVE EXONS: | |
($features_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(FEATURES_GFF,">$features_gff") || die "ERROR: test_find_overlapping_features: cannot open $features_gff\n"; | |
print FEATURES_GFF "AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12\n"; | |
print FEATURES_GFF "AG00002 maker mRNA 683300 686711 . + . ID=F07C4.9;Parent=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1;_AED=1.00;_eAED=1.00;_QI=84|0|0|0|1|1|6|0|282\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match 683300 686711 . + . ID=AG00002:hit:2251:4.5.0.7;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 683300 683550 . - . ID=AG00002:hsp:8272:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 1 251 +;Gap=M251\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 683670 683878 . - . ID=AG00002:hsp:8273:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 252 460 +;Gap=M209\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 684544 684548 . - . ID=AG00002:hsp:8274:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 461 465 +;Gap=M5\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 685452 685459 . + . ID=AG00002:hsp:8275:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 466 473 +;Gap=M8\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 686133 686341 . + . ID=AG00002:hsp:8276:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 474 682 +;Gap=M209\n"; | |
print FEATURES_GFF "AG00002 model_gff:maker match_part 686461 686711 . + . ID=AG00002:hsp:8277:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 683 933 +;Gap=M251\n"; | |
print FEATURES_GFF "AG00002 maker exon 684544 684548 . - . ID=F07C4.9:exon:255;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker exon 683670 683878 . - . ID=F07C4.9:exon:254;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker exon 683300 683550 . - . ID=F07C4.9:exon:253;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker exon 685452 685459 . + . ID=F07C4.9:exon:256;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker exon 686133 686341 . + . ID=F07C4.9:exon:257;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker exon 686461 686711 . + . ID=F07C4.9:exon:258;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker five_prime_UTR 683550 683633 . + . ID=F07C4.9:five_prime_utr;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 683300 683634 . + 0 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 683670 683878 . + 1 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 684544 684548 . + 2 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 685452 685459 . + 0 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 686133 686341 . + 1 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker CDS 686461 686711 . + 2 ID=F07C4.9:cds;Parent=F07C4.9\n"; | |
print FEATURES_GFF "AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5\n"; | |
print FEATURES_GFF "AG00002 maker mRNA 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;Parent=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;_AED=0.23;_eAED=0.29;_QI=54|0|0|0.66|1|1|3|0|162\n"; | |
print FEATURES_GFF "AG00002 maker exon 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:210;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker exon 683646 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:209;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker exon 683300 683550 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:208;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker five_prime_UTR 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker five_prime_UTR 683884 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker CDS 683646 683883 . - 0 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
print FEATURES_GFF "AG00002 maker CDS 683300 683550 . - 2 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1\n"; | |
close(FEATURES_GFF); | |
($genes_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GENES_GFF,">$genes_gff") || die "ERROR: test_find_overlapping_features: cannot open $genes_gff\n"; | |
print GENES_GFF "AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12\n"; | |
print GENES_GFF "AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5\n"; | |
close(GENES_GFF); | |
($FEATURES,$errorcode,$errormsg) = &find_overlapping_features($features_gff,$genes_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_find_overlapping_features: failed test4\n"; exit;} | |
if (!($FEATURES->{"AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12"})) { print STDERR "ERROR: test_find_overlapping_features: failed test4b\n"; exit;} | |
$features = $FEATURES->{"AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12"}; | |
@features = split(/\n+/,$features); | |
if ($#features != 15) { print STDERR "ERROR: test_find_overlapping_features: failed test4c ($#features):\n$features"; exit;} | |
if ($features[0] ne 'AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12') { print STDERR "ERROR: test_find_overlapping_features: failed test4d\n"; exit;} | |
if ($features[1] ne 'AG00002 model_gff:maker match_part 683300 683550 . - . ID=AG00002:hsp:8272:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 1 251 +;Gap=M251') { print STDERR "ERROR: test_find_overlapping_features: failed test4e ($features[1])\n"; exit;} | |
if ($features[2] ne 'AG00002 model_gff:maker match_part 683670 683878 . - . ID=AG00002:hsp:8273:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 252 460 +;Gap=M209') { print STDERR "ERROR: test_find_overlapping_features: failed test4f\n"; exit;} | |
if ($features[3] ne 'AG00002 model_gff:maker match_part 684544 684548 . - . ID=AG00002:hsp:8274:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 461 465 +;Gap=M5') { print STDERR "ERROR: test_find_overlapping_features: failed test4g ($features[3])\n"; exit;} | |
if ($features[4] ne 'AG00002 maker exon 684544 684548 . - . ID=F07C4.9:exon:255;Parent=F07C4.9') { print STDERR "ERROR: test_find_overlapping_features: failed test4h\n"; exit;} | |
if ($features[5] ne 'AG00002 maker exon 683670 683878 . - . ID=F07C4.9:exon:254;Parent=F07C4.9') { print STDERR "ERROR: test_find_overlapping_features: failed test4i\n"; exit;} | |
if ($features[6] ne 'AG00002 maker exon 683300 683550 . - . ID=F07C4.9:exon:253;Parent=F07C4.9') { print STDERR "ERROR: test_find_overlapping_features: failed test4j\n"; exit;} | |
if ($features[7] ne 'AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5') { print STDERR "ERROR: test_find_overlapping_features: failed test4k\n"; exit;} | |
if ($features[8] ne 'AG00002 maker mRNA 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;Parent=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;_AED=0.23;_eAED=0.29;_QI=54|0|0|0.66|1|1|3|0|162') { print STDERR "ERROR: test_find_overlapping_features: failed test4l\n"; exit;} | |
if ($features[9] ne 'AG00002 maker exon 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:210;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4m\n"; exit;} | |
if ($features[10] ne 'AG00002 maker exon 683646 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:209;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4n\n"; exit;} | |
if ($features[11] ne 'AG00002 maker exon 683300 683550 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:208;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4o\n"; exit;} | |
if ($features[12] ne 'AG00002 maker five_prime_UTR 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4p\n"; exit;} | |
if ($features[13] ne 'AG00002 maker five_prime_UTR 683884 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4q\n"; exit;} | |
if ($features[14] ne 'AG00002 maker CDS 683646 683883 . - 0 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4r\n"; exit;} | |
if ($features[15] ne 'AG00002 maker CDS 683300 683550 . - 2 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4s\n"; exit;} | |
if (!($FEATURES->{"AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5"})) { print STDERR "ERROR: test_find_overlapping_features: failed test4aa\n"; exit;} | |
$features = $FEATURES->{"AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5"}; | |
@features = split(/\n+/,$features); | |
if ($#features != 13) { print STDERR "ERROR: test_find_overlapping_features: failed test4ab (($#features)\n"; exit;} | |
if ($features[0] ne 'AG00002 maker gene 683300 686711 . - . ID=maker-AG00002-pred_gff_protein_coding-gene-7.12;Name=maker-AG00002-pred_gff_protein_coding-gene-7.12') { print STDERR "ERROR: test_find_overlapping_features: failed test4ac\n"; exit;} | |
if ($features[1] ne 'AG00002 model_gff:maker match_part 683300 683550 . - . ID=AG00002:hsp:8272:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 1 251 +;Gap=M251') { print STDERR "ERROR: test_find_overlapping_features: failed test4ad ($features[3])\n"; exit;} | |
if ($features[2] ne 'AG00002 model_gff:maker match_part 683670 683878 . - . ID=AG00002:hsp:8273:4.5.0.7;Parent=AG00002:hit:2251:4.5.0.7;Target=maker-AG00002-pred_gff_protein_coding-gene-7.12-mRNA-1 252 460 +;Gap=M209') { print STDERR "ERROR: test_find_overlapping_features: failed testae\n"; exit;} | |
if ($features[3] ne 'AG00002 maker exon 683670 683878 . - . ID=F07C4.9:exon:254;Parent=F07C4.9') { print STDERR "ERROR: test_find_overlapping_features: failed test4af\n"; exit;} | |
if ($features[4] ne 'AG00002 maker exon 683300 683550 . - . ID=F07C4.9:exon:253;Parent=F07C4.9') { print STDERR "ERROR: test_find_overlapping_features: failed test4ag\n"; exit;} | |
if ($features[5] ne 'AG00002 maker gene 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5') { print STDERR "ERROR: test_find_overlapping_features: failed test4ah\n"; exit;} | |
if ($features[6] ne 'AG00002 maker mRNA 683300 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;Parent=augustus_masked-AG00002-processed-gene-6.5;Name=augustus_masked-AG00002-processed-gene-6.5-mRNA-1;_AED=0.23;_eAED=0.29;_QI=54|0|0|0.66|1|1|3|0|162') { print STDERR "ERROR: test_find_overlapping_features: failed test4ai\n"; exit;} | |
if ($features[7] ne 'AG00002 maker exon 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:210;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4aj\n"; exit;} | |
if ($features[8] ne 'AG00002 maker exon 683646 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:209;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4ak\n"; exit;} | |
if ($features[9] ne 'AG00002 maker exon 683300 683550 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:exon:208;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4al\n"; exit;} | |
if ($features[10] ne 'AG00002 maker five_prime_UTR 684077 684111 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4am\n"; exit;} | |
if ($features[11] ne 'AG00002 maker five_prime_UTR 683884 683902 . - . ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:five_prime_utr;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4an\n"; exit;} | |
if ($features[12] ne 'AG00002 maker CDS 683646 683883 . - 0 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4ao\n"; exit;} | |
if ($features[13] ne 'AG00002 maker CDS 683300 683550 . - 2 ID=augustus_masked-AG00002-processed-gene-6.5-mRNA-1:cds;Parent=augustus_masked-AG00002-processed-gene-6.5-mRNA-1') { print STDERR "ERROR: test_find_overlapping_features: failed test4ap\n"; exit;} | |
system "rm -f $features_gff"; | |
system "rm -f $genes_gff"; | |
} | |
#------------------------------------------------------------------# | |
# FIND FEATURES THAT OVERLAP BETWEEN EACH GENE IN $gene_gff2, AND ALL THE GENE FEATURES ('exon', 'CDS', etc.) | |
# IN $gene_gff: | |
sub find_overlapping_features | |
{ | |
my $features_gff = $_[0]; # GFF FILE WITH FEATURES IN A GENE | |
my $genes_gff = $_[1]; # GFF FILE WITH 'gene' FEATURES ONLY | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my %FEATURES = (); # HASH TABLE WITH OVERLAPPING FEATURES FOR EACH GENE | |
my $overlaps; # FILE WITH OVERLAPS | |
my $line; # | |
my @temp; # | |
my $gene; # NAME OF GENE | |
my $cmd; # COMMAND TO RUN | |
my $gene1; # NAME OF THE FIRST GENE | |
my $start1; # START POSITION OF $gene1 | |
my $end1; # END POSITION OF $gene1 | |
my $feature_type2; # TYPE OF THE SECOND FEATURE | |
my $start2; # START OF THE SECOND FEATURE | |
my $end2; # END OF THE SECOND FEATURE | |
my $feature2; # NAME OF THE SECOND FEATURE | |
my $overlap; # GFF LINE FOR OVERLAPPING FEATURE | |
my $gene1_line; # GFF LINE FOR $gene1 | |
($overlaps,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
$cmd = "bedtools intersect -loj -s -a $genes_gff -b $features_gff > $overlaps"; | |
system "$cmd"; | |
# The -loj option means that for each feature in gff1, BEDTools will report each overlapping feature in gff2. | |
# -s MEANS THAT WE ONLY REPORT FEATURES ON THE SAME STRAND. THIS IS BECAUSE IN SOME CASES THERE ARE TWO | |
# OVERLAPPING GENES WITH THE SAME NAME IN AN INPUT GFF, BUT THEY ARE ON OPPOSITE STRANDS. | |
# READ IN THE OVERLAPPING FEATURES FOR EACH GENE: | |
open(OVERLAPS,"$overlaps") || die "ERROR: find_overlapping_features: cannot open $overlaps\n"; | |
while(<OVERLAPS>) | |
{ | |
$line = $_; | |
chomp $line; | |
@temp = split(/\t+/,$line); | |
# eg. BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110 BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110 | |
$gene1 = $temp[8]; | |
$start1 = $temp[3]; | |
$end1 = $temp[4]; | |
$feature_type2 = $temp[11]; | |
$start2 = $temp[12]; | |
$end2 = $temp[13]; | |
$feature2 = $temp[17]; | |
if ($feature_type2 eq 'gene') | |
{ | |
if ($gene1 eq $feature2 && !($start1 == $start2 && $end1 == $end2)) | |
{ | |
$errorcode = 100; # ERRORCODE=100 (TESTED FOR) | |
$errormsg = "ERROR: find_overlapping_features: gene $gene1 overlaps gene2 $feature2: line:\n$line\n"; | |
system "rm -f $overlaps"; | |
return(\%FEATURES,$errorcode,$errormsg); | |
} | |
} | |
# GET THE GFF LINE FOR THE OVERLAPPING FEATURE: | |
$overlap = $temp[9]."\t".$temp[10]."\t".$temp[11]."\t".$temp[12]."\t".$temp[13]."\t".$temp[14]."\t".$temp[15]."\t".$temp[16]."\t".$temp[17]."\n"; | |
# RECORD THE OVERLAPS FOR $gene1: | |
$gene1_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\t".$temp[8]; | |
if (!($FEATURES{$gene1_line})) { $FEATURES{$gene1_line} = $overlap; } | |
else { $FEATURES{$gene1_line} = $FEATURES{$gene1_line}.$overlap;} | |
} | |
close(OVERLAPS); | |
# DELETE TEMPORARY FILES: | |
system "rm -f $overlaps"; | |
return(\%FEATURES,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &fix_gff_lines_for_gene | |
sub test_fix_gff_lines_for_gene | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $gff_lines; # GFF LINES | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $new_lines; # NEW LINES | |
my @temp; # | |
$gff_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-BPAG.contig.00154.117101-augustus-gene-0.110'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
@temp = split(/\n/,$new_lines); | |
if ($#temp != 13) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1b ($#temp)\n"; exit;} | |
if ($temp[0] ne 'BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_00003232;Name=SRAE_00003232') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1c\n"; exit;} | |
if ($temp[1] ne 'BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_00003232-mRNA-1;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1d (temp1 $temp[1])\n"; exit;} | |
if ($temp[2] ne 'BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_00003232-mRNA-1:exon:1;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1e (temp2 $temp[2])\n"; exit;} | |
if ($temp[3] ne 'BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=SRAE_00003232-mRNA-1:exon:2;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1f\n"; exit;} | |
if ($temp[4] ne 'BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=SRAE_00003232-mRNA-1:exon:3;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1g\n"; exit;} | |
if ($temp[5] ne 'BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=SRAE_00003232-mRNA-1:exon:4;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1h\n"; exit;} | |
if ($temp[6] ne 'BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=SRAE_00003232-mRNA-1:exon:5;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1i\n"; exit;} | |
if ($temp[7] ne 'BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=SRAE_00003232-mRNA-1:exon:6;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1j\n"; exit;} | |
if ($temp[8] ne 'BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1k (temp8 $temp[8])\n"; exit;} | |
if ($temp[9] ne 'BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1l\n"; exit;} | |
if ($temp[10] ne 'BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1m\n"; exit;} | |
if ($temp[11] ne 'BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1n\n"; exit;} | |
if ($temp[12] ne 'BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1o\n"; exit;} | |
if ($temp[13] ne 'BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test1p\n"; exit;} | |
# EXAMPLE OF A GENE ON THE NEGATIVE STRAND: | |
$gff_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 36179 36341 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 37052 37204 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38326 38535 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38781 38871 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 39087 39253 . - . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . - 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 36179 36341 . - 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 37052 37204 . - 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38326 38535 . - 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38781 38871 . - 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 39087 39253 . - 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-BPAG.contig.00154.117101-augustus-gene-0.110'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2\n"; exit;} | |
@temp = split(/\n/,$new_lines); | |
if ($#temp != 13) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2b\n"; exit;} | |
if ($temp[0] ne 'BPAG.contig.00154.117101 maker gene 35582 39253 . - . ID=SRAE_00003232;Name=SRAE_00003232') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2c\n"; exit;} | |
if ($temp[1] ne 'BPAG.contig.00154.117101 maker mRNA 35582 39253 . - . ID=SRAE_00003232-mRNA-1;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2d (temp1 $temp[1])\n"; exit;} | |
if ($temp[2] ne 'BPAG.contig.00154.117101 maker exon 35582 35727 . - . ID=SRAE_00003232-mRNA-1:exon:6;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2e (temp2 $temp[2])\n"; exit;} | |
if ($temp[3] ne 'BPAG.contig.00154.117101 maker exon 36179 36341 . - . ID=SRAE_00003232-mRNA-1:exon:5;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2f\n"; exit;} | |
if ($temp[4] ne 'BPAG.contig.00154.117101 maker exon 37052 37204 . - . ID=SRAE_00003232-mRNA-1:exon:4;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2g\n"; exit;} | |
if ($temp[5] ne 'BPAG.contig.00154.117101 maker exon 38326 38535 . - . ID=SRAE_00003232-mRNA-1:exon:3;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2h\n"; exit;} | |
if ($temp[6] ne 'BPAG.contig.00154.117101 maker exon 38781 38871 . - . ID=SRAE_00003232-mRNA-1:exon:2;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2i\n"; exit;} | |
if ($temp[7] ne 'BPAG.contig.00154.117101 maker exon 39087 39253 . - . ID=SRAE_00003232-mRNA-1:exon:1;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2j\n"; exit;} | |
if ($temp[8] ne 'BPAG.contig.00154.117101 maker CDS 35582 35727 . - 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2k (temp8 $temp[8])\n"; exit;} | |
if ($temp[9] ne 'BPAG.contig.00154.117101 maker CDS 36179 36341 . - 1 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2l\n"; exit;} | |
if ($temp[10] ne 'BPAG.contig.00154.117101 maker CDS 37052 37204 . - 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2m\n"; exit;} | |
if ($temp[11] ne 'BPAG.contig.00154.117101 maker CDS 38326 38535 . - 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2n\n"; exit;} | |
if ($temp[12] ne 'BPAG.contig.00154.117101 maker CDS 38781 38871 . - 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2o\n"; exit;} | |
if ($temp[13] ne 'BPAG.contig.00154.117101 maker CDS 39087 39253 . - 2 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test2p\n"; exit;} | |
# TEST ERRORCODE=60 (UNEXPECTED FEATURE TYPE): | |
$gff_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker contig 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-BPAG.contig.00154.117101-augustus-gene-0.110'); | |
if ($errorcode != 60) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test3\n"; exit;} | |
# EXAMPLE OF A GENE THAT DOES NOT HAVE -mRNA- IN TRANSCRIPT NAMES: | |
$gff_lines = "BPAG.contig.00154.117101 maker gene 53633 55479 . + . ID=maker-BPAG.contig.00154.117101-pred_gff_protein_coding-gene-0.33;Name=maker-BPAG.contig.00154.117101-pred_gff_protein_coding-gene-0.33\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 53633 55479 . + . ID=1:ZK795.3;Parent=maker-BPAG.contig.00154.117101-pred_gff_protein_coding-gene-0.33;Name=maker-BPAG.contig.00154.117101-pred_gff_protein_coding-gene-0.33-mRNA-1;_AED=0.13;_eAED=0.13;_QI=0|0|0|1|1|1|6|0|288\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 53633 53816 . + . ID=1:ZK795.3:exon:18;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 54152 54249 . + . ID=1:ZK795.3:exon:19;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 54398 54487 . + . ID=1:ZK795.3:exon:20;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 54707 54904 . + . ID=1:ZK795.3:exon:21;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 55077 55245 . + . ID=1:ZK795.3:exon:22;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 55352 55479 . + . ID=1:ZK795.3:exon:23;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 53633 53816 . + 0 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 54152 54249 . + 2 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 54398 54487 . + 0 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 54707 54904 . + 0 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 55077 55245 . + 0 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 55352 55479 . + 2 ID=1:ZK795.3:cds;Parent=1:ZK795.3\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-BPAG.contig.00154.117101-pred_gff_protein_coding-gene-0.33'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4\n"; exit;} | |
@temp = split(/\n/,$new_lines); | |
if ($#temp != 13) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4b ($#temp)\n"; exit;} | |
if ($temp[0] ne 'BPAG.contig.00154.117101 maker gene 53633 55479 . + . ID=SRAE_00003232;Name=SRAE_00003232') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4c\n"; exit;} | |
if ($temp[1] ne 'BPAG.contig.00154.117101 maker mRNA 53633 55479 . + . ID=SRAE_00003232-mRNA-1;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-1;_AED=0.13;_eAED=0.13;_QI=0|0|0|1|1|1|6|0|288') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4d\n"; exit;} | |
if ($temp[2] ne 'BPAG.contig.00154.117101 maker exon 53633 53816 . + . ID=SRAE_00003232-mRNA-1:exon:1;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4e (temp2 $temp[2])\n"; exit;} | |
if ($temp[3] ne 'BPAG.contig.00154.117101 maker exon 54152 54249 . + . ID=SRAE_00003232-mRNA-1:exon:2;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4f\n"; exit;} | |
if ($temp[4] ne 'BPAG.contig.00154.117101 maker exon 54398 54487 . + . ID=SRAE_00003232-mRNA-1:exon:3;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4g\n"; exit;} | |
if ($temp[5] ne 'BPAG.contig.00154.117101 maker exon 54707 54904 . + . ID=SRAE_00003232-mRNA-1:exon:4;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4h\n"; exit;} | |
if ($temp[6] ne 'BPAG.contig.00154.117101 maker exon 55077 55245 . + . ID=SRAE_00003232-mRNA-1:exon:5;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4i\n"; exit;} | |
if ($temp[7] ne 'BPAG.contig.00154.117101 maker exon 55352 55479 . + . ID=SRAE_00003232-mRNA-1:exon:6;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4j\n"; exit;} | |
if ($temp[8] ne 'BPAG.contig.00154.117101 maker CDS 53633 53816 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4k\n"; exit;} | |
if ($temp[9] ne 'BPAG.contig.00154.117101 maker CDS 54152 54249 . + 2 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4l\n"; exit;} | |
if ($temp[10] ne 'BPAG.contig.00154.117101 maker CDS 54398 54487 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4m\n"; exit;} | |
if ($temp[11] ne 'BPAG.contig.00154.117101 maker CDS 54707 54904 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4n\n"; exit;} | |
if ($temp[12] ne 'BPAG.contig.00154.117101 maker CDS 55077 55245 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4o\n"; exit;} | |
if ($temp[13] ne 'BPAG.contig.00154.117101 maker CDS 55352 55479 . + 2 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test4p\n"; exit;} | |
# EXAMPLE OF DUPLICATE LINES IN THE INPUT: | |
$gff_lines = "BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:2;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:3;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:4;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:5;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
$gff_lines = $gff_lines."BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-BPAG.contig.00154.117101-augustus-gene-0.110'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
@temp = split(/\n/,$new_lines); | |
if ($#temp != 13) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5b ($#temp)\n"; exit;} | |
if ($temp[0] ne 'BPAG.contig.00154.117101 maker gene 35582 39253 . + . ID=SRAE_00003232;Name=SRAE_00003232') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5c\n"; exit;} | |
if ($temp[1] ne 'BPAG.contig.00154.117101 maker mRNA 35582 39253 . + . ID=SRAE_00003232-mRNA-1;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5d (temp1 $temp[1])\n"; exit;} | |
if ($temp[2] ne 'BPAG.contig.00154.117101 maker exon 35582 35727 . + . ID=SRAE_00003232-mRNA-1:exon:1;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5e (temp2 $temp[2])\n"; exit;} | |
if ($temp[3] ne 'BPAG.contig.00154.117101 maker exon 36179 36341 . + . ID=SRAE_00003232-mRNA-1:exon:2;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5f\n"; exit;} | |
if ($temp[4] ne 'BPAG.contig.00154.117101 maker exon 37052 37204 . + . ID=SRAE_00003232-mRNA-1:exon:3;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5g\n"; exit;} | |
if ($temp[5] ne 'BPAG.contig.00154.117101 maker exon 38326 38535 . + . ID=SRAE_00003232-mRNA-1:exon:4;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5h\n"; exit;} | |
if ($temp[6] ne 'BPAG.contig.00154.117101 maker exon 38781 38871 . + . ID=SRAE_00003232-mRNA-1:exon:5;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5i\n"; exit;} | |
if ($temp[7] ne 'BPAG.contig.00154.117101 maker exon 39087 39253 . + . ID=SRAE_00003232-mRNA-1:exon:6;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5j\n"; exit;} | |
if ($temp[8] ne 'BPAG.contig.00154.117101 maker CDS 35582 35727 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5k (temp8 $temp[8])\n"; exit;} | |
if ($temp[9] ne 'BPAG.contig.00154.117101 maker CDS 36179 36341 . + 1 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5l\n"; exit;} | |
if ($temp[10] ne 'BPAG.contig.00154.117101 maker CDS 37052 37204 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5m\n"; exit;} | |
if ($temp[11] ne 'BPAG.contig.00154.117101 maker CDS 38326 38535 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5n\n"; exit;} | |
if ($temp[12] ne 'BPAG.contig.00154.117101 maker CDS 38781 38871 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5o\n"; exit;} | |
if ($temp[13] ne 'BPAG.contig.00154.117101 maker CDS 39087 39253 . + 2 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test5p\n"; exit;} | |
# EXAMPLE WITH MULTIPLE ALTERNATIVE TRANSCRIPTS: | |
$gff_lines = "Sratt_Chr00_000002 maker gene 371193 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1;Name=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker mRNA 371193 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1;Name=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1;_AED=0.02;_eAED=0.02;_QI=550|1|1|1|0|0|3|60|350;score=3922\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker mRNA 371415 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1;Name=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2;_AED=0.07;_eAED=0.07;_QI=378|1|1|1|0|0|2|60|350;score=7307\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker exon 371193 371384 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:exon:229;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker exon 371435 371821 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:exon:230;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker exon 371869 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:exon:231;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1,maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker exon 371415 371821 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2:exon:232;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker five_prime_UTR 371193 371384 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:five_prime_utr;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker five_prime_UTR 371435 371792 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:five_prime_utr;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:cds;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:cds;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1:three_prime_utr;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-1\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker five_prime_UTR 371415 371792 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2:five_prime_utr;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2:cds;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2:cds;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
$gff_lines = $gff_lines."Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2:three_prime_utr;Parent=maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1-mRNA-2\n"; | |
($new_lines,$errorcode,$errormsg) = &fix_gff_lines_for_gene($gff_lines,'SRAE_00003232','maker-Sratt_Chr00_000002-exonerate_est2genome-gene-3.1'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
@temp = split(/\n/,$new_lines); | |
if ($#temp != 16) { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6b ($#temp)\n"; exit;} | |
if ($temp[0] ne 'Sratt_Chr00_000002 maker gene 371193 372952 . + . ID=SRAE_00003232;Name=SRAE_00003232') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6c\n"; exit;} | |
if ($temp[1] ne 'Sratt_Chr00_000002 maker mRNA 371193 372952 . + . ID=SRAE_00003232-mRNA-1;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-1;_AED=0.02;_eAED=0.02;_QI=550|1|1|1|0|0|3|60|350;score=3922') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6d\n"; exit;} | |
if ($temp[2] ne 'Sratt_Chr00_000002 maker mRNA 371415 372952 . + . ID=SRAE_00003232-mRNA-2;Parent=SRAE_00003232;Name=SRAE_00003232-mRNA-2;_AED=0.07;_eAED=0.07;_QI=378|1|1|1|0|0|2|60|350;score=7307') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6e\n"; exit;} | |
if ($temp[3] ne 'Sratt_Chr00_000002 maker exon 371193 371384 . + . ID=SRAE_00003232-mRNA-1:exon:1;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6f\n"; exit;} | |
if ($temp[4] ne 'Sratt_Chr00_000002 maker exon 371435 371821 . + . ID=SRAE_00003232-mRNA-1:exon:2;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6g\n"; exit;} | |
if ($temp[5] ne 'Sratt_Chr00_000002 maker exon 371869 372952 . + . ID=SRAE_00003232-mRNA-1:exon:3;Parent=SRAE_00003232-mRNA-1') { print STDER "ERROR: test_fix_gff_lines_for_gene: failed test6h\n"; exit;} | |
if ($temp[6] ne 'Sratt_Chr00_000002 maker exon 371869 372952 . + . ID=SRAE_00003232-mRNA-2:exon:2;Parent=SRAE_00003232-mRNA-2') { print STDER "ERROR: test_fix_gff_lines_for_gene: failed test6i\n"; exit;} | |
if ($temp[7] ne 'Sratt_Chr00_000002 maker exon 371415 371821 . + . ID=SRAE_00003232-mRNA-2:exon:1;Parent=SRAE_00003232-mRNA-2') { print STDERR "ERROR: test_fix_gff_lines_for_gene: failed test6j\n"; exit;} | |
if ($temp[8] ne 'Sratt_Chr00_000002 maker five_prime_UTR 371193 371384 . + . ID=SRAE_00003232-mRNA-1:five_prime_utr;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6k\n"; exit;} | |
if ($temp[9] ne 'Sratt_Chr00_000002 maker five_prime_UTR 371435 371792 . + . ID=SRAE_00003232-mRNA-1:five_prime_utr;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6l\n"; exit;} | |
if ($temp[10] ne 'Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6m\n"; exit;} | |
if ($temp[11] ne 'Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_00003232-mRNA-1:cds;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6n\n"; exit;} | |
if ($temp[12] ne 'Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_00003232-mRNA-1:three_prime_utr;Parent=SRAE_00003232-mRNA-1') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6o\n"; exit;} | |
if ($temp[13] ne 'Sratt_Chr00_000002 maker five_prime_UTR 371415 371792 . + . ID=SRAE_00003232-mRNA-2:five_prime_utr;Parent=SRAE_00003232-mRNA-2') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6p\n"; exit;} | |
if ($temp[14] ne 'Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_00003232-mRNA-2:cds;Parent=SRAE_00003232-mRNA-2') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6q\n"; exit;} | |
if ($temp[15] ne 'Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_00003232-mRNA-2:cds;Parent=SRAE_00003232-mRNA-2') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6r\n"; exit;} | |
if ($temp[16] ne 'Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_00003232-mRNA-2:three_prime_utr;Parent=SRAE_00003232-mRNA-2') { print STDERR "ERROR: test_fix_gff_line_for_gene: failed test6s\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# RENAME A GENE AND ALL ITS EXONS etc.: | |
sub fix_gff_lines_for_gene | |
{ | |
my $gff_lines = $_[0]; # GFF LINES THAT OVERLAP WITH THE GENE | |
my $new_gene_name = $_[1]; # NEW NAME FOR A GENE | |
my $old_gene_name = $_[2]; # OLD NAME FOR THE GENE | |
my $new_lines = ""; # REVISED GFF LINES FOR GENE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $GENE; # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $line; # | |
my @temp; # | |
my @temp2; # | |
my $new_line; # NEW GFF LINE | |
my $feature_type; # FEATURE TYPE | |
my $gene; # GENE NAME | |
my $transcript; # TRANSCRIPT NAME | |
my $new_transcript_name; # NEW NAME FOR TRANSCRIPT | |
my $aed_score; # AED SCORE INFORMATION | |
my $exon_start; # EXON START | |
my $exon_end; # EXON END | |
my $strand; # EXON STRAND | |
my %EXONS = (); # EXONS IN A TRANSCRIPT | |
my $EXONNUM; # HASH TABLE FOR STORING NUMBER OF AN EXON ALONG A TRANSCRIPT | |
my $exon_num; # EXON NUMBER FOR EXON | |
my $transcript_num; # TRANSCRIPT NUMBER IN THE GENE | |
my @new_lines; # THE NEW LINES | |
my $gene_gff_linecnt; # NUMBER OF LINES IN $gene_gff | |
my @gff_lines; # GFF LINES | |
my $i; # | |
my $scaffold; # SCAFFOLD | |
my %SEENLINE = (); # SAYS WHETHER WE HAVE ALREADY SEEN A NEW GFF LINE | |
my $j; # | |
my @temp3; # | |
my %SEEN = (); # HASH TABLE TO RECORD WHICH EXONS WE HAVE SEEN IN A TRANSCRIPT | |
# READ IN THE GENE NAME FOR EACH TRANSCRIPT IN $gff_lines (WHICH JUST CONTAINS FEATURES THAT OVERLAP GENE $old_gene_name): | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($gff_lines); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# READ IN THE GFF LINES THAT OVERLAP WITH THE GENE, AND ASSIGN NUMBERS TO THE EXONS: | |
# THE EXONS SHOULD BE NUMBERED 1-... FROM 5' TO 3' OF THE TRANSCRIPT. | |
@gff_lines = split(/\n+/,$gff_lines); | |
for ($i = 0; $i <= $#gff_lines; $i++) | |
{ | |
$line = $gff_lines[$i]; | |
chomp $line; | |
@temp = split(/\t+/,$line); | |
$scaffold = $temp[0]; | |
$feature_type = $temp[2]; | |
if ($feature_type eq 'gene' || $feature_type eq 'mRNA' || $feature_type eq 'CDS' || $feature_type eq 'exon' || $feature_type eq 'five_prime_UTR' || $feature_type eq 'three_prime_UTR') | |
{ | |
if ($feature_type eq 'exon') | |
{ | |
# ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
$exon_start = $temp[3]; | |
$exon_end = $temp[4]; | |
$strand = $temp[6]; | |
$transcript = $temp[8]; | |
@temp2 = split(/Parent=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /\,/) | |
{ | |
@temp2 = split(/\,/,$transcript); | |
for ($j = 0; $j <= $#temp2; $j++) | |
{ | |
$transcript = $temp2[$j]; | |
# RECORD THE EXONS IN THE TRANSCRIPT: | |
# WE NEED TO RECORD WHETHER WE'VE SEEN A CERTAIN EXON IN THE TRANSCRIPT BEFORE, AS SOME MAKER LINES HAVE | |
# THE SAME TRANSCRIPT NAME TWICE: | |
# eg. N_americanus-1.0_Cont202 maker exon 178995 179125 . + . ID=1:ZK809.5b:exon:143;Parent=1:ZK809.5b,1:ZK809.5b | |
if (!($SEEN{$transcript."=".$exon_start."_".$exon_end."_".$strand})) | |
{ | |
$SEEN{$transcript."=".$exon_start."_".$exon_end."_".$strand} = 1; | |
if ($EXONS{$transcript}) { $EXONS{$transcript} = $EXONS{$transcript}.",".$transcript."=".$exon_start."=".$exon_end."=".$strand;} | |
else { $EXONS{$transcript} = $transcript."=".$exon_start."=".$exon_end."=".$strand; } | |
} | |
} | |
} | |
else | |
{ | |
# RECORD THE EXONS IN THE TRANSCRIPT: | |
if (!($SEEN{$transcript."=".$exon_start."_".$exon_end."_".$strand})) | |
{ | |
$SEEN{$transcript."=".$exon_start."_".$exon_end."_".$strand} = 1; | |
if ($EXONS{$transcript}) { $EXONS{$transcript} = $EXONS{$transcript}.",".$transcript."=".$exon_start."=".$exon_end."=".$strand;} | |
else { $EXONS{$transcript} = $transcript."=".$exon_start."=".$exon_end."=".$strand; } | |
} | |
} | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: fix_gff_lines_for_gene: feature_type $feature_type line $line\n"; | |
$errorcode = 60; # ERRORCODE=60 (TESTED FOR) | |
return($new_lines,$errorcode,$errormsg); | |
} | |
} | |
# ASSIGN NUMBERS TO THE EXONS IN THE TRANSCRIPTS: | |
($EXONNUM,$errorcode,$errormsg) = &assign_exon_numbers(\%EXONS); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# READ IN THE GFF LINES THAT OVERLAP WITH THE GENE: | |
for ($i = 0; $i <= $#gff_lines; $i++) | |
{ | |
$line = $gff_lines[$i]; | |
chomp $line; | |
@temp = split(/\t+/,$line); | |
$feature_type = $temp[2]; | |
if ($feature_type eq 'gene' || $feature_type eq 'mRNA' || $feature_type eq 'CDS' || $feature_type eq 'exon' || $feature_type eq 'five_prime_UTR' || $feature_type eq 'three_prime_UTR') | |
{ | |
if ($feature_type eq 'gene') | |
{ | |
$gene = $temp[8]; # ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=m... | |
@temp2 = split(/ID=/,$gene); | |
$gene = $temp2[1]; # maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=m... | |
@temp2 = split(/\;/,$gene); | |
$gene = $temp2[0]; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_gene_name\;Name=$new_gene_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
elsif ($feature_type eq 'mRNA') | |
{ | |
# eg. ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110;Name=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1;_AED=0.05;_eAED=0.05;_QI=0|0|0|1|1|1|6|0|309 | |
$transcript = $temp[8]; | |
@temp2 = split(/ID=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp2 = split(/-mRNA-/,$transcript); | |
$transcript_num= $temp2[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num= 1; | |
} | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
# GET THE AED SCORE INFORMATION FOR THE TRANSCRIPT: | |
$transcript = $temp[8]; | |
if ($transcript =~ /AED/) # FOR MAKER | |
{ | |
@temp2 = split(/\;\_AED/,$transcript); | |
$aed_score = $temp2[1]; | |
$aed_score = "_AED".$aed_score; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name\;Parent=$new_gene_name\;Name=$new_transcript_name\;$aed_score\n"; | |
} | |
else | |
{ | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name\;Parent=$new_gene_name\;Name=$new_transcript_name\n"; | |
} | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
elsif ($feature_type eq 'CDS') | |
{ | |
# eg. ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:cds;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
# ID=1:ZK795.3:cds;Parent=1:ZK795.3 | |
$transcript = $temp[8]; | |
@temp2 = split(/Parent=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp2 = split(/-mRNA-/,$transcript); | |
$transcript_num= $temp2[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num = 1; | |
} | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name:cds\;Parent=$new_transcript_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
elsif ($feature_type eq 'exon') | |
{ | |
# ID=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1:exon:0;Parent=maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
$exon_start = $temp[3]; | |
$exon_end = $temp[4]; | |
$strand = $temp[6]; | |
$transcript = $temp[8]; | |
@temp2 = split(/Parent=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /\,/) # HAPPENS OCCASIONALLY | |
{ | |
@temp2 = split(/\,/,$transcript); | |
for ($j = 0; $j <= $#temp2; $j++) | |
{ | |
$transcript = $temp2[$j]; | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp3= split(/-mRNA-/,$transcript); | |
$transcript_num= $temp3[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num = 1; | |
} | |
# GET THE EXON NUMBER: | |
if (!($EXONNUM->{$transcript."=".$exon_start."=".$exon_end."=".$strand})) | |
{ | |
$errorcode = 63; # ERRORCODE=63 (CAN'T TEST FOR, SHOULDN'T HAPPEN) | |
$errormsg = "ERROR: fix_gff_lines_for_gene: do not know exon number for $transcript $exon_start $exon_end $strand\n"; | |
return($new_lines,$errorcode,$errormsg); | |
} | |
$exon_num= $EXONNUM->{$transcript."=".$exon_start."=".$exon_end."=".$strand}; | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name:exon:$exon_num\;Parent=$new_transcript_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
} | |
else | |
{ | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp3= split(/-mRNA-/,$transcript); | |
$transcript_num= $temp3[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num = 1; | |
} | |
# GET THE EXON NUMBER: | |
if (!($EXONNUM->{$transcript."=".$exon_start."=".$exon_end."=".$strand})) | |
{ | |
$errorcode = 63; # ERRORCODE=63 (CAN'T TEST FOR, SHOULDN'T HAPPEN) | |
$errormsg = "ERROR: fix_gff_lines_for_gene: do not know exon number for $transcript $exon_start $exon_end $strand\n"; | |
return($new_lines,$errorcode,$errormsg); | |
} | |
$exon_num= $EXONNUM->{$transcript."=".$exon_start."=".$exon_end."=".$strand}; | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name:exon:$exon_num\;Parent=$new_transcript_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
} | |
elsif ($feature_type eq 'five_prime_UTR') | |
{ | |
# eg. ID=maker-BPAG.scaffold.00151.117925-snap-gene-0.187-mRNA-1:five_prime_utr;Parent=maker-BPAG.scaffold.00151.117925-snap-gene-0.187-mRNA-1 | |
$transcript = $temp[8]; | |
@temp2 = split(/Parent=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp2 = split(/-mRNA-/,$transcript); | |
$transcript_num= $temp2[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num = 1; | |
} | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name:five_prime_utr\;Parent=$new_transcript_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
elsif ($feature_type eq 'three_prime_UTR') | |
{ | |
# ID=maker-BPAG.contig.05972.1748-augustus-gene-0.1-mRNA-1:three_prime_utr;Parent=maker-BPAG.contig.05972.1748-augustus-gene-0.1-mRNA-1 | |
$transcript = $temp[8]; | |
@temp2 = split(/Parent=/,$transcript); | |
$transcript = $temp2[1]; | |
@temp2 = split(/\;/,$transcript); | |
$transcript = $temp2[0]; # eg. maker-BPAG.contig.00154.117101-augustus-gene-0.110-mRNA-1 | |
if ($transcript =~ /-mRNA-/) | |
{ | |
@temp2 = split(/-mRNA-/,$transcript); | |
$transcript_num= $temp2[1]; # eg. 1 | |
} | |
else | |
{ | |
$transcript_num = 1; | |
} | |
$transcript = $scaffold."=".$transcript; | |
if ($GENE->{$transcript}) # WE IGNORE TRANSCRIPTS FOR WHICH WE DON'T KNOW THE GENE NAME, AS IT MEANS THEY ARE NOT WITHIN THE GENE OF INTEREST | |
{ | |
$gene = $GENE->{$transcript}; | |
if ($gene eq $old_gene_name) | |
{ | |
$new_transcript_name = $new_gene_name."-mRNA-".$transcript_num; | |
$new_line = $temp[0]."\t".$temp[1]."\t".$temp[2]."\t".$temp[3]."\t".$temp[4]."\t".$temp[5]."\t".$temp[6]."\t".$temp[7]."\tID=$new_transcript_name:three_prime_utr\;Parent=$new_transcript_name\n"; | |
if (!($SEENLINE{$new_line})) | |
{ | |
$SEENLINE{$new_line} = 1; | |
$new_lines = $new_lines.$new_line; | |
} | |
} | |
} | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: fix_gff_lines_for_gene: feature_type $feature_type line $line\n"; | |
$errorcode = 62; # ERRORCODE=62 (CAN'T TEST FOR, SHOULD BE CAUGHT BY ERRORCODE=60) | |
return($new_lines,$errorcode,$errormsg); | |
} | |
} | |
# NOTE: THERE MAY NOT BE THE SAME NUMBER OF LINES IN $new_lines AS IN $gff_lines, AS $gff_lines MAY NOT ALL | |
# CORRESPOND TO THE SAME GENE, WHILE $new_lines WILL. | |
return($new_lines,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &assign_exon_numbers | |
sub test_assign_exon_numbers | |
{ | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my %EXONS = (); # HASH TABLE OF EXONS IN TRANSCRIPTS | |
my $EXONNUM = (); # HASH TABLE OF EXON NUMBERS IN TRANSCRIPTS | |
$EXONS{"transcript1"} = "transcript1=20=40=+,transcript1=5=10=+,transcript1=60=70=+"; | |
$EXONS{"transcript2"} = "transcript2=20=40=-,transcript2=5=10=-,transcript2=60=70=-"; | |
($EXONNUM,$errorcode,$errormsg) = &assign_exon_numbers(\%EXONS); | |
if ($errorcode != 0) { print STDERR "ERROR: test_assign_exon_numbers: failed test1 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
if ($EXONNUM->{"transcript1=5=10=+"} != 1) { print STDERR "ERROR: test_assign_exon_numbers: failed test1b\n"; exit;} | |
if ($EXONNUM->{"transcript1=20=40=+"} != 2) { print STDERR "ERROR: test_assign_exon_numbers: failed test1c\n"; exit;} | |
if ($EXONNUM->{"transcript1=60=70=+"} != 3) { print STDERR "ERROR: test_assign_exon_numbers: failed test1d\n"; exit;} | |
if ($EXONNUM->{"transcript2=5=10=-"} != 3) { print STDERR "ERROR: test_assign_exon_numbers: failed test1e\n"; exit;} | |
if ($EXONNUM->{"transcript2=20=40=-"} != 2) { print STDERR "ERROR: test_assign_exon_numbers: failed test1f\n"; exit;} | |
if ($EXONNUM->{"transcript2=60=70=-"} != 1) { print STDERR "ERROR: test_assign_exon_numbers: failed test1g\n"; exit;} | |
# TEST ERRORCODE=51 (REPEATED EXON IN SAME TRANSCRIPT): | |
%EXONS = (); | |
$EXONS{"transcript1"} = "transcript1=20=40=+,transcript1=5=10=+,transcript1=20=40=+"; | |
$EXONS{"transcript2"} = "transcript2=20=40=-,transcript2=5=10=-,transcript2=60=70=-"; | |
($EXONNUM,$errorcode,$errormsg) = &assign_exon_numbers(\%EXONS); | |
if ($errorcode != 51) { print STDERR "ERROR: test_assign_exon_numbers: failed test2 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
# TEST ERRORCODE=52 (EXONS IN SAME TRANSCRIPT WITH DIFFERENT STRANDS): | |
%EXONS = (); | |
$EXONS{"transcript1"} = "transcript1=20=40=+,transcript1=5=10=-"; | |
$EXONS{"transcript2"} = "transcript2=20=40=-,transcript2=5=10=-,transcript2=60=70=-"; | |
($EXONNUM,$errorcode,$errormsg) = &assign_exon_numbers(\%EXONS); | |
if ($errorcode != 52) { print STDERR "ERROR: test_assign_exon_numbers: failed test3 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
# TEST ERRORCODE=55 (STRAND OF AN EXON IS NOT +/-): | |
%EXONS = (); | |
$EXONS{"transcript1"} = "transcript1=20=40=A,transcript1=5=10=A,transcript1=50=60=A"; | |
$EXONS{"transcript2"} = "transcript2=20=40=-,transcript2=5=10=-,transcript2=60=70=-"; | |
($EXONNUM,$errorcode,$errormsg) = &assign_exon_numbers(\%EXONS); | |
if ($errorcode != 55) { print STDERR "ERROR: test_assign_exon_numbers: failed test4 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# ASSIGN NUMBERS TO THE EXONS IN THE TRANSCRIPTS: | |
sub assign_exon_numbers | |
{ | |
my $EXONS = $_[0]; # HASH TABLE OF EXONS IN TRANSCRIPTS | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my %EXONNUM = (); # HASH TABLE OF EXON NUMBERS IN TRANSCRIPTS | |
my $transcript; # TRANSCRIPT NAME | |
my $exons; # EXONS IN A TRANSCRIPT | |
my @exons; # EXONS IN A TRANSCRIPT | |
my $exon; # EXON IN A TRANSCRIPT | |
my $exon_start; # START POSITION OF AN EXON | |
my @temp; # | |
my $i; # | |
my $strand; # STRAND OF THE EXON | |
my %START = (); # HASH TABLE TO RECORD START POSITIONS OF EXONS | |
my %STRAND = (); # HASH TABLE TO RECORD STRAND OF A TRANSCRIPT | |
my $exon_num; # EXON NUMBER ALONG A TRANSCRIPT | |
my %SEEN = (); # HASH TABLE TO RECORD WHICH EXONS WE HAVE SEEN ALREADY | |
# FOR EACH TRANSCRIPT, NUMBER THE EXONS ALONG THE TRANSCRIPT FROM 5' TO 3': | |
foreach $transcript (keys %{$EXONS}) | |
{ | |
$exons = $EXONS->{$transcript}; | |
@exons = split(/\,/,$exons); | |
# RECORD THE START POSITION AND STRAND: | |
%START = (); | |
for ($i = 0; $i <= $#exons; $i++) | |
{ | |
$exon = $exons[$i]; | |
@temp = split(/=/,$exon); | |
$exon_start = $temp[1]; | |
$strand = $temp[3]; | |
if ($START{$exon}) | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: already have start for exon $exon in transcript $transcript\n"; | |
$errorcode = 51; # ERRORCODE=51 (TESTED FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
$START{$exon} = $exon_start; | |
if ($STRAND{$transcript}) | |
{ | |
if ($STRAND{$transcript} ne $strand) | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: stored $STRAND{$transcript} as strand of $transcript, not $strand\n"; | |
$errorcode = 52; # ERRORCODE=52 (TESTED FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
} | |
else | |
{ | |
$STRAND{$transcript} = $strand; | |
} | |
} | |
# SORT THE EXONS ALONG THE CHROMOSOME: | |
@exons = sort { $START{$b} <=> $START{$a} } keys %START; | |
# FIND THE STRAND OF THE TRANSCRIPT: | |
if (!($STRAND{$transcript})) | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: do not know strand of $transcript\n"; | |
$errorcode = 53; # ERRORCODE=53 (SHOULDN'T HAPPEN, CAN'T TEST FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
$strand = $STRAND{$transcript}; | |
# NUMBER THE EXONS FROM 5' TO 3' ALONG THE TRANSCRIPT: | |
if ($strand eq '-') | |
{ | |
$exon_num = 0; | |
for ($i = 0; $i <= $#exons; $i++) | |
{ | |
$exon = $exons[$i]; | |
$exon_num++; | |
if ($EXONNUM{$exon}) | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: already have exon number $EXONNUM{$exon} for exon $exon\n"; | |
$errorcode = 64; # ERRORCODE=64 (SHOULDN'T HAPPEN, CAN'T TEST FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
$EXONNUM{$exon} = $exon_num; | |
} | |
} | |
elsif ($strand eq '+') | |
{ | |
$exon_num = 0; | |
for ($i = $#exons; $i >= 0; $i--) | |
{ | |
$exon = $exons[$i]; | |
$exon_num++; | |
if ($EXONNUM{$exon}) | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: already have exon number $EXONNUM{$exon} for exon $exon\n"; | |
$errorcode = 64; # ERRORCODE=64 (SHOULDN'T HAPPEN, CAN'T TEST FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
$EXONNUM{$exon} = $exon_num; | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: assign_exon_numbers: strand $strand\n"; | |
$errorcode = 55; # ERRORCODE=55 (TESTED FOR) | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
} | |
return(\%EXONNUM,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &read_genes_for_transcripts | |
sub test_read_genes_for_transcripts | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $GENE; # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $gff_lines; # INPUT GFF LINES | |
$gff_lines = "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t29000\t29497\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
$gff_lines = $gff_lines."##FASTA\n"; | |
$gff_lines = $gff_lines.">seq1\n"; | |
$gff_lines = $gff_lines."ACAACACAGATAGATATATAGAGCA\n"; | |
close(GFF); | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($gff_lines); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_genes_for_transcripst: failed test1\n"; exit;} | |
if ($GENE->{"scaff_000010=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1"} ne "augustus_masked-scaff_000010-processed-gene-0.3") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test1b\n"; exit;} | |
if ($GENE->{"scaff_000010=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1"} ne "augustus_masked-scaff_000010-processed-gene-0.4") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test1c\n"; exit;} | |
# TEST ERRORCODE=9 (mRNA APPEARS TWICE IN GFF): | |
$gff_lines = "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t29000\t29497\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
$gff_lines = $gff_lines."##FASTA\n"; | |
$gff_lines = $gff_lines.">seq1\n"; | |
$gff_lines = $gff_lines."ACAACACAGATAGATATATAGAGCA\n"; | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($gff_lines); | |
if ($errorcode != 9) { print STDERR "ERROR: test_read_genes_for_transcripts: failed test2\n"; exit;} | |
# TEST ERRORCODE=3 (LINE DOES NOT HAVE 9 COLUMNS): | |
$gff_lines = "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmaker\tCDS\t29000\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
$gff_lines = $gff_lines."scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
$gff_lines = $gff_lines."##FASTA\n"; | |
$gff_lines = $gff_lines.">seq1\n"; | |
$gff_lines = $gff_lines."ACAACACAGATAGATATATAGAGCA\n"; | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($gff_lines); | |
if ($errorcode != 3) { print STDERR "ERROR: test_read_genes_for_transcripts: failed test3\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# READ IN THE GENE NAME FOR EACH TRANSCRIPT IN THE INPUT GFF FILE: | |
sub read_genes_for_transcripts | |
{ | |
my $gff_lines = $_[0]; # INPUT GFF LINES | |
my $errorcode = 0; # THIS IS RETURNED AS 0 IF NO ERROR OCCURRED. | |
my $errormsg = "none";# THIS IS RETURNED AS 'none' IF NO ERROR OCCURRED. | |
my %GENE = (); # HASH TABLE OF GENE NAMES FOR TRANSCRIPT NAMES. | |
my $line; # | |
my @temp; # | |
my $end_of_gff = 0; # SAYS WHETHER THE END OF THE GFF FILE HAS BEEN REACHED | |
my $name; # NAME OF mRNA | |
my $gene; # NAME OF GENE | |
my @gff_lines; # INPUT GFF LINES | |
my $i; # | |
my $scaffold; # | |
@gff_lines = split(/\n+/,$gff_lines); | |
for ($i = 0; $i <= $#gff_lines; $i++) | |
{ | |
$line = $gff_lines[$i]; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($#temp == 8) | |
{ | |
if ($temp[2] eq 'mRNA') | |
{ | |
# eg. scaff_000010 maker mRNA 88793 89828 . + . ID=maker-scaff_000010-augustus-gene-0.21-mRNA-1;Parent=maker-scaff_000010-augustus-gene-0.21;... | |
$scaffold = $temp[0]; | |
$name = $temp[8]; | |
$gene = $temp[8]; | |
@temp = split(/ID=/,$name); | |
$name = $temp[1]; # eg. maker-scaff_000010-augustus-gene-0.21-mRNA-1;... | |
@temp = split(/\;/,$name); | |
$name = $temp[0]; # eg. maker-scaff_000010-augustus-gene-0.21-mRNA-1 | |
# GET THE NAME OF THE GENE: | |
@temp = split(/Parent=/,$gene); | |
$gene = $temp[1]; # eg. maker-scaff_000010-augustus-gene-0.21;... | |
@temp = split(/\;/,$gene); | |
$gene = $temp[0]; # eg. maker-scaff_000010-augustus-gene-0.21 | |
$name = $scaffold."=".$name; | |
# RECORD THE GENE NAME FOR THE mRNA: | |
if ($GENE{$name}) | |
{ | |
if ($GENE{$name} ne $gene) | |
{ | |
$errormsg= "ERROR: read_genes_for_transcripts: already have gene name $GENE{$name} for $name (scaffold $scaffold)\n"; | |
$errorcode= 9; # ERRORCODE=9 (TESTED FOR) | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
} | |
$GENE{$name} = $gene; | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: read_genes_for_transcripts: line does not have 9 columns: $line\n"; | |
$errorcode = 3; # ERRORCODE=3 (TESTED FOR) | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
} | |
} | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# SUBROUTINE TO MAKE A FILE NAME FOR A TEMPORARY FILE: | |
sub make_filename | |
{ | |
my $outputdir = $_[0]; # OUTPUT DIRECTORY TO WRITE TEMPORARY FILE NAME TO | |
my $found_name = 0; # SAYS WHETHER WE HAVE FOUND A FILE NAME YET | |
my $filename = "none";# NEW FILE NAME TO USE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $poss_filename; # POSSIBLE FILE NAME TO USE | |
my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME | |
while($found_name == 0) | |
{ | |
$random_number = rand(); | |
$poss_filename = $outputdir."/tmp".$random_number; | |
if (!(-e $poss_filename)) | |
{ | |
$filename = $poss_filename; | |
$found_name = 1; | |
} | |
} | |
if ($found_name == 0 || $filename eq 'none') | |
{ | |
$errormsg = "ERROR: make_filename: found_name $found_name filename $filename\n"; | |
$errorcode = 6; # ERRORCODE=6 | |
return($filename,$errorcode,$errormsg); | |
} | |
return($filename,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &print_error | |
sub test_print_error | |
{ | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE WAS NO ERROR | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE WAS NO ERROR | |
($errormsg,$errorcode) = &print_error(45,45,1); | |
if ($errorcode != 12) { print STDERR "ERROR: test_print_error: failed test1\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message','My error message',1); | |
if ($errorcode != 11) { print STDERR "ERROR: test_print_error: failed test2\n"; exit;} | |
($errormsg,$errorcode) = &print_error('none',45,1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test3\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message', 0, 1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test4\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# PRINT OUT AN ERROR MESSAGE AND EXIT. | |
sub print_error | |
{ | |
my $errormsg = $_[0]; # THIS SHOULD BE NOT 'none' IF AN ERROR OCCURRED. | |
my $errorcode = $_[1]; # THIS SHOULD NOT BE 0 IF AN ERROR OCCURRED. | |
my $called_from_test = $_[2]; # SAYS WHETHER THIS WAS CALLED FROM test_print_error OR NOT | |
if ($errorcode =~ /[A-Z]/ || $errorcode =~ /[a-z]/) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 11; $errormsg = "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; # ERRORCODE=11 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; | |
exit; | |
} | |
} | |
if (!($errormsg =~ /[A-Z]/ || $errormsg =~ /[a-z]/)) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 12; $errormsg = "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; # ERRORCODE=12 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; | |
exit; | |
} | |
} | |
if ($errormsg eq 'none' || $errorcode == 0) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 13; $errormsg = "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; # ERRORCODE=13 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; | |
exit; | |
} | |
} | |
else | |
{ | |
print STDERR "$errormsg"; | |
exit; | |
} | |
return($errormsg,$errorcode); | |
} | |
#------------------------------------------------------------------# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi I tried to run the script with all required arguments and got an error:
command:
perl rename_genes_in_maker_gff.pl SS_maker_all.gff renamed_maker_all.gff /shared/gene_rename/SS/ SS
Error:
Argument "" isn't numeric in numeric eq (==) at rename_genes_in_maker_gff.pl line 882.
ERROR: test_count_lines: failed test1
any help?