Created
August 27, 2013 14:25
-
-
Save avrilcoghlan/6354195 to your computer and use it in GitHub Desktop.
Perl script that finds the set of highest-scoring non-overlapping genes in a gff file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=head1 NAME | |
find_best_nonoverlapping_genes.pl | |
=head1 SYNOPSIS | |
find_best_nonoverlapping_genes.pl input_gff path_to_bedtools output_gff outputdir min_gene_score from_maker subtract_from | |
where input_gff is the input GeneWise gff file, | |
path_to_bedtools is the path to the bedtools installation, | |
output_gff is the output gff file, | |
outputdir is the output directory for writing output files, | |
min_gene_score is the minimum score for a gene to be retained, | |
from_maker says whether the gff file is from maker (yes/no), | |
substract_from is the number to subtract the score from (eg. 1 for maker, none if you don't want this option) | |
=head1 DESCRIPTION | |
This script takes a gff file of GeneWise predictions, and for each set | |
of overlapping predictions, it takes just the highest scoring gene prediction. | |
The highest scoring gene predictions are put in the output gff (<output_gff>). | |
Only genes with scores of >=min_gene_score are kept. | |
=head1 VERSION | |
Perl script last edited 9-Jan-2013. | |
=head1 CONTACT | |
alc@sanger.ac.uk (Avril Coghlan) | |
=cut | |
# | |
# Perl script find_best_nonoverlapping_genes.pl | |
# Written by Avril Coghlan (alc@sanger.ac.uk) | |
# 9-Jan-13. | |
# Last edited 9-Jan-2013. | |
# SCRIPT SYNOPSIS: find_best_nonoverlapping_genes.pl: finds set of highest-scoring non-overlapping genes in a gff file | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
use strict; | |
use warnings; | |
my $num_args = $#ARGV + 1; | |
if ($num_args != 7) | |
{ | |
print "Usage of find_best_nonoverlapping_genes.pl\n\n"; | |
print "perl find_best_nonoverlapping_genes.pl <input_gff> <path_to_bedtools> <output_gff> <outputdir> <min_gene_score> <from_maker> <subtract_from>\n"; | |
print "where <input_gff> is the input GeneWise gff file,\n"; | |
print " <path_to_bedtools> is the path to the bedtools installation,\n"; | |
print " <output_gff> is the output gff file,\n"; | |
print " <outputdir> is the output directory for writing output files,\n"; | |
print " <min_gene_score> is the minimum score for a gene to be retained,\n"; | |
print " <from_maker> says whether the gff file is from maker (yes/no)\n"; | |
print " <subtract_from> says the number to subtract the score from (eg. 1 for maker, none if you don't want this option)\n"; | |
print "For example, >perl find_best_nonoverlapping_genes.pl TF.gff\n"; | |
print "/software/pathogen/external/apps/usr/bin/bedtools\n"; | |
print "TF_output.gff /lustre/scratch108/parasites/alc/Genewise50Helminths/gff_file 10 no none\n"; | |
exit; | |
} | |
# FIND THE PATH TO THE INPUT GFF FILE: | |
my $input_gff = $ARGV[0]; | |
# FIND THE PATH TO THE BEDTOOLS INSTALLATION: | |
my $path_to_bedtools = $ARGV[1]; | |
# FIND THE OUTPUT GFF FILE: | |
my $output_gff = $ARGV[2]; | |
# FIND THE DIRECTORY TO USE FOR OUTPUT FILES: | |
my $outputdir = $ARGV[3]; | |
# FIND THE MINIMUM SCORE FOR A GENE TO BE RETAINED: | |
my $min_gene_score = $ARGV[4]; | |
# FIND OUT WHETHER THE GFF FILE IS FROM MAKER: | |
my $from_maker = $ARGV[5]; | |
# FIND THE NUMBER TO SUBTRACT THE SCORE FROM: | |
my $subtract_from = $ARGV[6]; | |
#------------------------------------------------------------------# | |
# TEST SUBROUTINES: | |
my $PRINT_TEST_DATA = 1; # SAYS WHETHER TO PRINT DATA USED DURING TESTING. | |
&test_read_maker_scores($outputdir); | |
&test_make_cds_gff($outputdir); | |
&test_print_error; | |
&test_read_genes_for_transcripts($outputdir); | |
&test_run_main_program($outputdir,$path_to_bedtools); | |
&test_write_output_gff($outputdir); | |
&test_find_overlapping_genes($outputdir,$path_to_bedtools); | |
&test_find_best_predictions; | |
print STDERR "Tests done, running main code...\n"; | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
(my $errorcode,my $errormsg) = &run_main_program($outputdir,$input_gff,$output_gff,$path_to_bedtools,$min_gene_score,$from_maker,0,$subtract_from); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
print STDERR "FINISHED.\n"; | |
#------------------------------------------------------------------# | |
# TEST &run_main_program | |
sub test_run_main_program | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES INTO | |
my $path_to_bedtools = $_[1]; # PATH TO THE BEDTOOLS INSTALLATION | |
my $gff; # GFF FILE FOR THE SCAFFOLD | |
my $new_gff; # NEW GFF FILE FOR THIS SCAFFOLD | |
my $expected_new_gff; # FILE WITH EXPECTED CONTENTS OF $new_gff | |
my $differences; # DIFFERENES BETWEEN $new_gff AND $expected_new_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
my @temp; # | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
# IN THIS EXAMPLE, SHOULD JUST HAVE ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5 AND ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5 AT THE END: | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tintron\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
close(GFF); | |
($new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$new_gff); | |
$new_gff = $temp[$#temp]; | |
($errorcode,$errormsg) = &run_main_program($outputdir,$gff,$new_gff,$path_to_bedtools,10,'no',1,'none'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_run_main_program: failed test1\n"; exit;} | |
($expected_new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_new_gff") || die "ERROR: test_run_main_program: cannot open expected_new_gff $expected_new_gff\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204\tGeneWise\tintron\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $new_gff $expected_new_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test1 (new_gff $new_gff expected_new_gff $expected_new_gff)\n"; exit;} | |
system "rm -f $new_gff"; | |
system "rm -f $expected_new_gff"; | |
system "rm -f $gff"; | |
# MAKER TEST CASE: | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
($new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$new_gff); | |
$new_gff = $temp[$#temp]; | |
($errorcode,$errormsg) = &run_main_program($outputdir,$gff,$new_gff,$path_to_bedtools,0,'yes',1,'none'); | |
if ($errorcode != 0) { print STDERR "ERROR: test_run_main_program: failed test2\n"; exit;} | |
($expected_new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_new_gff") || die "ERROR: test_run_main_program: cannot open expected_new_gff $expected_new_gff\n"; | |
print EXPECTED "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print EXPECTED "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print EXPECTED "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">seq1\n"; | |
print EXPECTED "ACAGAG\n"; | |
close(EXPECTED); | |
$differences = ""; | |
$new_gff = $outputdir."/".$new_gff; | |
open(TEMP,"diff $new_gff $expected_new_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_run_main_program: failed test2 (new_gff $new_gff expected_new_gff $expected_new_gff)\n"; exit;} | |
system "rm -f $new_gff"; | |
system "rm -f $expected_new_gff"; | |
system "rm -f $gff"; | |
# TEST ERRORCODE=3 (from_maker IS NOT yes/no): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 7006 10673 . - . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 7006 10673 . - . ID=1:SRAE_2028500;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|0|0|1|1|1|2|0|1206;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 7006 10673 1000 - . ID=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 10537 10673 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5479:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 3485 3621 +;Gap=M137\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 7006 10489 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5480:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 1 3484 +;Gap=M3484\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 6942 8068 . + . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 6942 8068 . + . ID=1:SRAE_2028600;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;_AED=1.00;_eAED=1.00;_QI=106|0|0|0|1|1|2|884|43;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 6942 8068 1000 + . ID=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6942 6956 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5520:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 1 15 +;Gap=M15\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6962 8068 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5521:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 16 1122 +;Gap=M1107\n"; | |
close(GFF); | |
($new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
@temp = split(/\//,$new_gff); | |
$new_gff = $temp[$#temp]; | |
($errorcode,$errormsg) = &run_main_program($outputdir,$gff,$new_gff,$path_to_bedtools,10,'hello',1,'none'); | |
if ($errorcode != 3) { print STDERR "ERROR: test_run_main_program: failed test3\n"; exit;} | |
system "rm -f $new_gff"; | |
system "rm -f $gff"; | |
} | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
# TO FIND THE HIGHEST-SCORING NON-OVERLAPPING GENES, THIS DOES THE FOLLOWING: | |
# (i) SORTS THE GENES IN ORDER OF DESCENDING SCORES | |
# (ii) REMOVE ALL GENES FROM THE GFF THAT OVERLAP THE HIGHEST-SCORING GENE -> GIVES GFF1 | |
# (iii) REMOVE ALL THE GENES FROM THE GFF1 THAT OVERLAP THE SECOND HIGHEST-SCORING GENE -> GIVES GFF2 | |
# (iv) REMOVE ALL THE GENES FROM THE GFF2 THAT OVERLAP THE THIRD HIGHEST-SCORING GENE -> GIVES GFF3 | |
# (v) etc. | |
sub run_main_program | |
{ | |
my $outputdir = $_[0]; ## DIRECTORY TO PUT OUTPUT FILES IN. | |
my $input_gff = $_[1]; ## THE INPUT GFF FILE | |
my $output_gff = $_[2]; ## THE OUTPUT GFF FILE | |
my $path_to_bedtools = $_[3]; ## PATH TO THE BEDTOOLS INSTALLATION | |
my $min_gene_score = $_[4]; ## MINIMUM SCORE FOR A GENE TO BE RETAINED | |
my $from_maker = $_[5]; ## SAYS WHETHER THE GFF FILE IS FROM MAKER (yes/no) | |
my $testing = $_[6]; ## SAYS WHETHER THIS IS BEING CALLED FROM A TESTING SUBROUTINE | |
my $subtract_from = $_[7]; ## NUMBER TO SUBTRACT THE SCORE FROM (eg. 1 FOR MAKER) | |
my $errorcode = 0; ## RETURNED AS 0 IF THERE IS NO ERROR. | |
my $errormsg = "none";## RETURNED AS 'none' IF THERE IS NO ERROR. | |
my $scaffolds_file; ## A FILE WITH THE NAMES OF ALL SCAFFOLDS IN $new_gff | |
my $scaffold; ## NAME OF A SCAFFOLD | |
my $scaffold_gff; ## GFF FILE FOR SCAFFOLD $scaffold | |
my $scaffold_gff2; ## TEMPORARY GFF FILE FOR SCAFFOLD $scaffold | |
my $new_gff; ## GFF FILE OF GENES | |
my %SCORE = (); ## HASH TABLE WITH MAKER SCORES FOR GENES | |
my $GENE; ## HASH TABLE OF GENE NAMES FOR TRANSCRIPT NAMES | |
my $OVERLAP; ## HASH TABLE WITH OVERLAPPING GENES | |
my $DISCARD; ## HASH TABLE OF GENES TO DISCARD | |
my $DELETE_TRANSCRIPT; ## HASH TABLE OF TRANSCRIPTS TO DISCARD | |
# CHECK THAT $from_maker IS yes/no: | |
if ($from_maker ne 'yes' && $from_maker ne 'no') | |
{ | |
$errormsg = "ERROR: run_main_program: from_maker $from_maker: should be yes/no\n"; | |
$errorcode = 3; # ERRORCODE=3 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
# READ IN THE GENE NAME FOR EACH TRANSCRIPT IN THE INPUT GFF FILE: | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($input_gff); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# MAKE A GFF FILE WITH JUST THE 'cds' FEATURES (NOT 'gene' OR 'intron' FEATURES): | |
($new_gff,$errorcode,$errormsg) = &make_cds_gff($input_gff,$outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# FIND THE CODING OVERLAPS BETWEEN ALL THE GENES, BY LOOKING FOR OVERLAPS IN 'cds' FEATURES: | |
($OVERLAP,$errorcode,$errormsg) = &find_overlapping_genes($new_gff,$path_to_bedtools,$outputdir,$GENE); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# READ IN THE GENE SCORES FOR THE GENES: | |
if ($from_maker eq 'yes') # IT IS A MAKER GFF FILE | |
{ | |
($DELETE_TRANSCRIPT,$errorcode,$errormsg) = &read_maker_scores($input_gff,\%SCORE,$subtract_from,0); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
else # IT IS NOT A MAKER GFF FILE | |
{ | |
($errorcode,$errormsg) = &read_scores($input_gff,\%SCORE,$subtract_from); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
# FILTER ON $min_gene_score | |
# FIND THE HIGHEST-SCORING NON-OVERLAPPING GENE PREDICTIONS: | |
($DISCARD,$errorcode,$errormsg) = &find_best_predictions($testing,$OVERLAP,\%SCORE,$min_gene_score); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# WRITE THE OUTPUT FILE: | |
$output_gff = $outputdir."/".$output_gff; | |
($errorcode,$errormsg) = &write_output_gff($input_gff,$GENE,$output_gff,$DISCARD,$from_maker,$DELETE_TRANSCRIPT); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
return($errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &write_output_gff | |
sub test_write_output_gff | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $gff; # INPUT GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUCNCTION IF THERE IS NO ERROR | |
my %GENE = (); # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $output_gff; # OUTPUT GFF FILE | |
my %DISCARD = (); # HASH TABLE OF GENES TO DISCARD | |
my $expected_output_gff; # EXPECTED OUTPUT GFF FILE | |
my $differences; # DIFFERENCES BETWEEN $output_gff AND $expected_output_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
my %DELETE_TRANSCRIPT = (); # HASH TABLE OF TRANSCRIPTS TO DELETE | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
$GENE{"DME_0000799101-mRNA-1"} = "DME_0000799101"; | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
$DISCARD{"DME_0000799102"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 0) { print STDERR "ERROR: test_write_output_gff: failed test1\n"; exit;} | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_write_output_gff: cannot open expected_output_gff $expected_output_gff\n"; | |
print EXPECTED "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print EXPECTED "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print EXPECTED "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">seq1\n"; | |
print EXPECTED "ACAGAG\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_write_output_gff: failed test1 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
$GENE{"DME_0000799101-mRNA-1"} = "DME_0000799101"; | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
%DISCARD = (); | |
$DISCARD{"DME_0000799101"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 0) { print STDERR "ERROR: test_write_output_gff: failed test2\n"; exit;} | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_write_output_gff: cannot open expected_output_gff $expected_output_gff\n"; | |
print EXPECTED "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print EXPECTED "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print EXPECTED "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">seq1\n"; | |
print EXPECTED "ACAGAG\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_write_output_gff: failed test2 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
# TEST ERRORCODE=104 (DON'T KNOW GENE NAME FOR TRANSCRIPT): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
%GENE = (); | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
%DISCARD = (); | |
$DISCARD{"DME_0000799101"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 104) { print STDERR "ERROR: test_write_output_gff: failed test3\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
# TEST ERRORCODE=105 (UNUSUAL FEATURE IN GFF FILE): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker hello 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
$GENE{"DME_0000799101-mRNA-1"} = "DME_0000799101"; | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
%DISCARD = (); | |
$DISCARD{"DME_0000799101"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 105) { print STDERR "ERROR: test_write_output_gff: failed test4\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
# TEST ERRORCODE=106 (WRONG NUMBER OF COLUMNS IN GFF FILE): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799101;Name=DME_0000799101\n"; | |
print GFF "Contig_000100 maker mRNA 6027 7030 . + . ID=DME_0000799101-mRNA-1;Parent=DME_0000799101;Name=DME_0000799101-mRNA-1;_AED=0.15;_eAED=0.15;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799101-mRNA-1:exon:1;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799101-mRNA-1:exon:2;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799101-mRNA-1:exon:3;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker gene 6027 7030 . + . ID=DME_0000799102;Name=DME_0000799102\n"; | |
print GFF "Contig_000100 maker mRNA 7030 . + . ID=DME_0000799102-mRNA-1;Parent=DME_0000799102;Name=DME_0000799102-mRNA-1;_AED=0.12;_eAED=0.12;_QI=0|0|0|1|1|1|3|0|188\n"; | |
print GFF "Contig_000100 maker exon 6027 6166 . + . ID=DME_0000799102-mRNA-1:exon:1;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6236 6410 . + . ID=DME_0000799102-mRNA-1:exon:2;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker exon 6779 7030 . + . ID=DME_0000799102-mRNA-1:exon:3;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
$GENE{"DME_0000799101-mRNA-1"} = "DME_0000799101"; | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
%DISCARD = (); | |
$DISCARD{"DME_0000799101"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 106) { print STDERR "ERROR: test_write_output_gff: failed test5 (errorcode=$errorcode errormsg=$errormsg)\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
# EXAMPLE WITH TWO TRANSCRIPTS IN A GENE, ONE WITH HIGHER SCORE: | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_write_output_gff: cannot open gff $gff\n"; | |
open(GFF,">$gff") || die "ERROR: test_read_maker_scores: cannot open gff $gff\n"; | |
print GFF "Sratt_Chr00_000002 maker gene 371193 372952 . + . ID=SRAE_0000033601;Name=SRAE_0000033601\n"; | |
print GFF "Sratt_Chr00_000002 maker mRNA 371193 372952 . + . ID=SRAE_0000033601-mRNA-1;Parent=SRAE_0000033601;Name=SRAE_0000033601-mRNA-1;_AED=0.02;_eAED=0.02;_QI=550|1|1|1|0|0|3|60|350;score=3922\n"; | |
print GFF "Sratt_Chr00_000002 maker mRNA 371415 372952 . + . ID=SRAE_0000033601-mRNA-2;Parent=SRAE_0000033601;Name=SRAE_0000033601-mRNA-2;_AED=0.07;_eAED=0.07;_QI=378|1|1|1|0|0|2|60|350;score=7307\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371193 371384 . + . ID=SRAE_0000033601-mRNA-1:exon:1;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371435 371821 . + . ID=SRAE_0000033601-mRNA-1:exon:2;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371415 371821 . + . ID=SRAE_0000033601-mRNA-2:exon:1;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371193 371384 . + . ID=SRAE_0000033601-mRNA-1:five_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371435 371792 . + . ID=SRAE_0000033601-mRNA-1:five_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_0000033601-mRNA-1:cds;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_0000033601-mRNA-1:cds;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_0000033601-mRNA-1:three_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371415 371792 . + . ID=SRAE_0000033601-mRNA-2:five_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_0000033601-mRNA-2:three_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "##FASTA\n"; | |
print GFF ">seq1\n"; | |
print GFF "ACAGAG\n"; | |
close(GFF); | |
$GENE{"SRAE_0000033601-mRNA-1"} = "SRAE_0000033601"; | |
$GENE{"SRAE_0000033601-mRNA-2"} = "SRAE_0000033601"; | |
($output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
$DELETE_TRANSCRIPT{"SRAE_0000033601-mRNA-1"} = 1; | |
($errorcode,$errormsg) = &write_output_gff($gff,\%GENE,$output_gff,\%DISCARD,"yes",\%DELETE_TRANSCRIPT); | |
if ($errorcode != 0) { print STDERR "ERROR: test_write_output_gff: failed test1\n"; exit;} | |
($expected_output_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_output_gff") || die "ERROR: test_write_output_gff: cannot open expected_output_gff $expected_output_gff\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker gene 371193 372952 . + . ID=SRAE_0000033601;Name=SRAE_0000033601\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker mRNA 371415 372952 . + . ID=SRAE_0000033601-mRNA-2;Parent=SRAE_0000033601;Name=SRAE_0000033601-mRNA-2;_AED=0.07;_eAED=0.07;_QI=378|1|1|1|0|0|2|60|350;score=7307\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker exon 371415 371821 . + . ID=SRAE_0000033601-mRNA-2:exon:1;Parent=SRAE_0000033601-mRNA-2\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker five_prime_UTR 371415 371792 . + . ID=SRAE_0000033601-mRNA-2:five_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print EXPECTED "Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_0000033601-mRNA-2:three_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
print EXPECTED "##FASTA\n"; | |
print EXPECTED ">seq1\n"; | |
print EXPECTED "ACAGAG\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $output_gff $expected_output_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_write_output_gff: failed test6 (output_gff $output_gff expected_output_gff $expected_output_gff)\n"; exit;} | |
system "rm -f $gff"; | |
system "rm -f $expected_output_gff"; | |
system "rm -f $output_gff"; | |
} | |
#------------------------------------------------------------------# | |
# WRITE THE OUTPUT FILE: | |
sub write_output_gff | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $GENE = $_[1]; # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $output_gff = $_[2]; # OUTPUT GFF FILE | |
my $DISCARD = $_[3]; # HASH TABLE OF GENES TO DISCARD | |
my $from_maker = $_[4]; # SAYS WHETHER THE INPUT GFF IS FROM MAKER | |
my $DELETE_TRANSCRIPT = $_[5]; # HASH TABLE OF TRANSCRIPTS TO DELETE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $line; # | |
my @temp; # | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE INPUT GFF | |
my $gene; # GENE NAME | |
my $feature; # FEATURE TYPE | |
my $transcript; # TRANSCRIPT NAME | |
my $name; # FEATURE NAME | |
# OPEN THE OUTPUT GFF: | |
open(OUTPUT,">$output_gff") || die "ERROR: write_output_gff: cannot open output_gff $output_gff\n"; | |
# OPEN THE INPUT GFF: | |
open(INPUT_GFF,"$input_gff") || die "ERROR: write_output_gff: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if ($end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($#temp == 8) | |
{ | |
$name = $temp[8]; | |
$feature = $temp[2]; | |
if ($feature eq 'mRNA' || $feature eq 'exon' || $feature eq 'three_prime_UTR' || $feature eq 'five_prime_UTR' || $feature eq 'CDS' || $feature eq 'gene' || $feature eq 'intron') | |
{ | |
@temp = split(/ID=/,$name); | |
$name = $temp[1]; | |
if ($feature eq 'mRNA' && $from_maker eq 'yes') | |
{ | |
@temp = split(/Parent=/,$name); | |
$gene = $temp[1]; # eg. maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|0|0|1|1|1|2|0|1206;score=1000 | |
@temp = split(/\;/,$gene); | |
$gene = $temp[0]; # eg. maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52 | |
# GET THE TRANSCRIPT NAME: | |
@temp = split(/\;/,$name); | |
$transcript = $temp[0]; | |
} | |
elsif (($feature eq 'exon' || $feature eq 'three_prime_UTR' || $feature eq 'five_prime_UTR' || $feature eq 'CDS') && $from_maker eq 'yes') | |
{ | |
@temp = split(/Parent=/,$name); | |
$transcript = $temp[1]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1 | |
@temp = split(/\;/,$transcript); | |
$transcript = $temp[0]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264 | |
# FIND THE GENE NAME FOR THIS TRANSCRIPT: | |
if (!($GENE->{$transcript})) | |
{ | |
$errormsg = "ERROR: write_output_gff: do not know gene name for transcript $transcript (line $line)\n"; | |
$errorcode = 104; # ERRORCODE=104 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
$gene = $GENE->{$transcript}; | |
} | |
elsif ($feature eq 'gene') | |
{ | |
@temp = split(/\;/,$name); | |
$gene = $temp[0]; | |
$transcript = $gene; | |
} | |
if (!($DISCARD->{$gene}) && !($DELETE_TRANSCRIPT->{$transcript})) | |
{ | |
print OUTPUT "$line\n"; | |
} | |
} | |
else | |
{ | |
if (!($feature eq 'protein_match' || $feature eq 'expressed_sequence_match' || $feature eq 'contig' || $feature eq 'match' || $feature eq 'match_part') && $from_maker eq 'yes') | |
{ | |
$errormsg = "ERROR: write_output_gff: feature $feature line $line\n"; | |
$errorcode = 105; # ERRORCODE=105 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
# protein_match AND expressed_sequence_match FEATURES ARE NOT PART OF MAKER GENE PREDICTIONS | |
if ($from_maker eq 'yes' && ($feature eq 'protein_match' || $feature eq 'expressed_sequence_match' || $feature eq 'contig' || $feature eq 'match_part' || $feature eq 'match')) | |
{ | |
print OUTPUT "$line\n"; | |
} | |
} | |
} | |
else | |
{ | |
if (substr($line,0,1) ne '#') | |
{ | |
$errormsg = "ERROR: write_output_gff: line $line\n"; | |
$errorcode = 106; # ERRORCODE=106 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
print OUTPUT "$line\n"; | |
} | |
} | |
else | |
{ | |
print OUTPUT "$line\n"; | |
} | |
} | |
close(INPUT_GFF); | |
close(OUTPUT); | |
return($errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &find_overlapping_genes | |
sub test_find_overlapping_genes | |
{ | |
my $output_dir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES IN | |
my $path_to_bedtools = $_[1]; # PATH TO BEDTOOLS | |
my $OVERLAP; # HASH TABLE OF OVERLAPS FOR GENES | |
my $gff; # INPUT GFF FILE | |
my %GENE = (); # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_find_overlapping_genes: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6025 6167 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6237 6411 . + 1 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6783 7035 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
close(GFF); | |
$GENE{"DME_0000799101-mRNA-1"} = "DME_0000799101"; | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
$GENE{"DME_0000799103-mRNA-1"} = "DME_0000799103"; | |
($OVERLAP,$errorcode,$errormsg) = &find_overlapping_genes($gff,$path_to_bedtools,$outputdir,\%GENE); | |
if ($errorcode != 0) { print STDERR "ERROR: test_find_overlapping_genes: failed test1 (errorcode=$errorcode errormsg=$errormsg)\n"; exit;} | |
if ($OVERLAP->{"DME_0000799101"} ne "DME_0000799102,DME_0000799103") { print STDERR "ERROR: test_find_overlapping_genes: failed test1b\n"; exit;} | |
if ($OVERLAP->{"DME_0000799102"} ne "DME_0000799101,DME_0000799103") { print STDERR "ERROR: test_find_overlapping_genes: failed test1c\n"; exit;} | |
if ($OVERLAP->{"DME_0000799103"} ne "DME_0000799101,DME_0000799102") { print STDERR "ERROR: test_find_overlapping_genes: failed test1d\n"; exit;} | |
# TEST CASE WHERE THERE ARE NO OVERLAPS: | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_find_overlapping_genes: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000101 maker CDS 6025 6167 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000101 maker CDS 6237 6411 . + 1 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000101 maker CDS 6783 7035 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
close(GFF); | |
($OVERLAP,$errorcode,$errormsg) = &find_overlapping_genes($gff,$path_to_bedtools,$outputdir,\%GENE); | |
if ($errorcode != 0) { print STDERR "ERROR: test_find_overlapping_genes: failed test2\n"; exit;} | |
if (defined($OVERLAP->{"DME_0000799101"})) { print STDERR "ERROR: test_find_overlapping_genes: failed test2b\n"; exit;} | |
if (defined($OVERLAP->{"DME_0000799103"})) { print STDERR "ERROR: test_find_overlapping_genes: failed test2c\n"; exit;} | |
# TEST ERRORCODE=103 (DO NOT KNOW GENE NAME FOR TRANSCRIPT): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_find_overlapping_genes: cannot open gff $gff\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799101-mRNA-1:cds;Parent=DME_0000799101-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6027 6166 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6236 6410 . + 1 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6779 7030 . + 0 ID=DME_0000799102-mRNA-1:cds;Parent=DME_0000799102-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6025 6167 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6237 6411 . + 1 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
print GFF "Contig_000100 maker CDS 6783 7035 . + 0 ID=DME_0000799103-mRNA-1:cds;Parent=DME_0000799103-mRNA-1\n"; | |
close(GFF); | |
%GENE = (); | |
$GENE{"DME_0000799102-mRNA-1"} = "DME_0000799102"; | |
($OVERLAP,$errorcode,$errormsg) = &find_overlapping_genes($gff,$path_to_bedtools,$outputdir,\%GENE); | |
if ($errorcode != 103) { print STDERR "ERROR: test_find_overlapping_genes: failed test3\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# FIND THE OVERLAPS BETWEEN ALL THE GENE FEATURES: | |
sub find_overlapping_genes | |
{ | |
my $gff = $_[0]; # INPUT GENE GFF FILE WITH 'cds' FEATURES | |
my $path_to_bedtools = $_[1]; # PATH TO BEDTOOLS EXECUTABLE | |
my $outputdir = $_[2]; # DIRECTORY TO PUT OUTPUT FILES INTO | |
my $GENE = $_[3]; # HASH TABLE WITH GENE NAMES FOR TRANSCRIPTS | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my %OVERLAP = (); # HASH TABLE OF OVERLAPPING GENES | |
my $overlaps; # FILE WITH OVERLAPS BETWEEN GENES | |
my $line; # | |
my @temp; # | |
my $cmd; # COMMAND TO RUN | |
my $gene1; # FIRST GENE | |
my $gene2; # SECOND GENE | |
my %SEEN = (); # HASH TABLE TO RECORD WHICH GENE PAIRS WE HAVE SEEN ALREADY | |
my $pair1; # A PAIR OF GENES | |
my $pair2; # A PAIR OF GENES | |
# FIND ALL THE OVERLAPPING GENES: | |
($overlaps,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
$cmd = $path_to_bedtools." intersect -loj -a $gff -b $gff > $overlaps"; | |
system "$cmd"; | |
open(OVERLAPS,"$overlaps") || die "ERROR: find_overlapping_genes: cannot open overlaps $overlaps\n"; | |
while(<OVERLAPS>) | |
{ | |
$line = $_; | |
chomp $line; | |
@temp = split(/\t+/,$line); | |
$gene1 = $temp[8]; # eg. ID=DME_0000799301-mRNA-1:cds;Parent=DME_0000799301-mRNA-1 | |
$gene2 = $temp[17]; | |
@temp = split(/ID=/,$gene1); | |
$gene1 = $temp[1]; | |
if ($gene1 =~ /Parent=/) | |
{ | |
@temp = split(/Parent=/,$gene1); | |
$gene1 = $temp[1]; | |
# FIND THE GENE NAME FOR THIS TRANSCRIPT: | |
if (!($GENE->{$gene1})) | |
{ | |
$errormsg = "ERROR: find_overlapping_genes: do not know gene name for transcript $gene1 (line $line)\n"; | |
$errorcode = 103; # ERRORCODE=103 (TESTED FOR) | |
return(\%OVERLAP,$errorcode,$errormsg); | |
} | |
$gene1 = $GENE->{$gene1}; | |
} | |
@temp = split(/ID=/,$gene2); | |
$gene2 = $temp[1]; | |
if ($gene2 =~ /Parent=/) | |
{ | |
@temp = split(/Parent=/,$gene2); | |
$gene2 = $temp[1]; | |
# FIND THE GENE NAME FOR THIS TRANSCRIPT: | |
if (!($GENE->{$gene2})) | |
{ | |
$errormsg = "ERROR: find_overlapping_genes: do not know gene name for transcript $gene2 (line $line)\n"; | |
$errorcode = 103; # ERRORCODE=103 (TESTED FOR) | |
return(\%OVERLAP,$errorcode,$errormsg); | |
} | |
$gene2 = $GENE->{$gene2}; | |
} | |
if ($gene1 ne $gene2) | |
{ | |
$pair1 = $gene1."=".$gene2; | |
$pair2 = $gene2."=".$gene1; | |
if (!($SEEN{$pair1}) && !($SEEN{$pair2})) | |
{ | |
# RECORD OVERLAPS FOR $gene1: | |
if (!($OVERLAP{$gene1})) { $OVERLAP{$gene1} = $gene2;} | |
else {$OVERLAP{$gene1} = $OVERLAP{$gene1}.",".$gene2;} | |
# RECORD OVERLAPS FOR $gene2: | |
if (!($OVERLAP{$gene2})) { $OVERLAP{$gene2} = $gene1;} | |
else {$OVERLAP{$gene2} = $OVERLAP{$gene2}.",".$gene1;} | |
$SEEN{$pair1} = 1; | |
$SEEN{$pair2} = 1; | |
} | |
} | |
} | |
close(OVERLAPS); | |
return(\%OVERLAP,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &read_genes_for_transcripts | |
sub test_read_genes_for_transcripts | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $GENE; # HASH TABLE OF GENE NAMES FOR TRANSCRIPTS | |
my $input_gff; # INPUT GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_read_genes_for_transcripts: cannot open $input_gff\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t29000\t29497\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">seq1\n"; | |
print INPUT_GFF "ACAACACAGATAGATATATAGAGCA\n"; | |
close(INPUT_GFF); | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_genes_for_transcripst: failed test1\n"; exit;} | |
if ($GENE->{"augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1"} ne "augustus_masked-scaff_000010-processed-gene-0.3") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test1b\n"; exit;} | |
if ($GENE->{"augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1"} ne "augustus_masked-scaff_000010-processed-gene-0.4") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test1c\n"; exit;} | |
system "rm -f $input_gff"; | |
# TEST ERRORCODE=9 (mRNA APPEARS TWICE IN GFF): | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_read_genes_for_transcripts: cannot open $input_gff\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t29000\t29497\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">seq1\n"; | |
print INPUT_GFF "ACAACACAGATAGATATATAGAGCA\n"; | |
close(INPUT_GFF); | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($input_gff); | |
if ($errorcode != 9) { print STDERR "ERROR: test_read_genes_for_transcripts: failed test2\n"; exit;} | |
system "rm -f $input_gff"; | |
# TEST ERRORCODE=3 (LINE DOES NOT HAVE 9 COLUMNS): | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_read_genes_for_transcripts: cannot open $input_gff\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.3;Name=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|392\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t20476\t21654\t.\t+\t.\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:exon:0;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t20476\t21654\t.\t+\t0\tID=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.3-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tgene\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tmRNA\t29000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;Parent=augustus_masked-scaff_000010-processed-gene-0.4;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1;_AED=0.01;_eAED=0.01;_QI=0|-1|0|1|-1|1|1|0|165\n"; | |
print INPUT_GFF "scaff_000010\tmaker\texon\t9000\t29497\t.\t-\t.\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:exon:25;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmaker\tCDS\t29000\t.\t-\t0\tID=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1:cds;Parent=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch\t20476\t21654\t.\t+\t.\tID=scaff_000010:hit:4104:3.5.0.0;Name=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1\n"; | |
print INPUT_GFF "scaff_000010\tmodel_gff:maker\tmatch_part\t20476\t21654\t.\t+\t.\tID=scaff_000010:hsp:7664:3.5.0.0;Parent=scaff_000010:hit:4104:3.5.0.0;Target=augustus_masked-scaff_000010-processed-gene-0.4-mRNA-1 1 1179 +;Gap=M1179\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">seq1\n"; | |
print INPUT_GFF "ACAACACAGATAGATATATAGAGCA\n"; | |
close(INPUT_GFF); | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($input_gff); | |
if ($errorcode != 3) { print STDERR "ERROR: test_read_genes_for_transcripts: failed test3\n"; exit;} | |
system "rm -f $input_gff"; | |
# TEST MAKER EXAMPLE: | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_read_genes_for_transcripts: cannot open $input_gff\n"; | |
print INPUT_GFF "SSTP.scaffold.00055.227916\tmaker\tmRNA\t28133\t41313\t.\t+\t.\tID=1:SRAE_2027700;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.59;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.59-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|0|0|1|1|1|2|0|4380;score=1000\n"; | |
print INPUT_GFF "SSTP.scaffold.00055.227916\tmaker\tmRNA\t49678\t50700\t.\t+\t.\tID=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;Parent=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264;Name=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|340\n"; | |
print INPUT_GFF "##FASTA\n"; | |
print INPUT_GFF ">seq1\n"; | |
print INPUT_GFF "ACAACACAGATAGATATATAGAGCA\n"; | |
close(INPUT_GFF); | |
($GENE,$errorcode,$errormsg) = &read_genes_for_transcripts($input_gff); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_genes_for_transcripst: failed test4\n"; exit;} | |
if ($GENE->{"1:SRAE_2027700"} ne "maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.59") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test4b\n"; exit;} | |
if ($GENE->{"maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.59-mRNA-1"} ne "maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.59") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test4c\n"; exit;} | |
if ($GENE->{"augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1"} ne "augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264") { print STDERR "ERROR: test_read_genes_for_transcripts: failed test4d\n"; exit;} | |
system "rm -f $input_gff"; | |
} | |
#------------------------------------------------------------------# | |
# READ IN THE GENE NAME FOR EACH TRANSCRIPT IN THE INPUT GFF FILE: | |
sub read_genes_for_transcripts | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $errorcode = 0; # THIS IS RETURNED AS 0 IF NO ERROR OCCURRED. | |
my $errormsg = "none";# THIS IS RETURNED AS 'none' IF NO ERROR OCCURRED. | |
my %GENE = (); # HASH TABLE OF GENE NAMES FOR TRANSCRIPT NAMES. | |
my $line; # | |
my @temp; # | |
my $end_of_gff = 0; # SAYS WHETHER THE END OF THE GFF FILE HAS BEEN REACHED | |
my $name; # NAME OF mRNA | |
my $gene; # NAME OF GENE | |
my $name2; # NAME OF mRNA | |
open(INPUT_GFF,"$input_gff") || die "ERROR: read_genes_for_transcripts: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($#temp == 8) | |
{ | |
if ($temp[2] eq 'mRNA') | |
{ | |
# eg. scaff_000010 maker mRNA 88793 89828 . + . ID=maker-scaff_000010-augustus-gene-0.21-mRNA-1;Parent=maker-scaff_000010-augustus-gene-0.21;... | |
$name = $temp[8]; | |
$gene = $temp[8]; | |
@temp = split(/ID=/,$name); | |
$name = $temp[1]; # eg. maker-scaff_000010-augustus-gene-0.21-mRNA-1;... | |
@temp = split(/\;/,$name); | |
$name = $temp[0]; # eg. maker-scaff_000010-augustus-gene-0.21-mRNA-1 | |
# GET THE NAME OF THE GENE: | |
@temp = split(/Parent=/,$gene); | |
$gene = $temp[1]; # eg. maker-scaff_000010-augustus-gene-0.21;... | |
@temp = split(/\;/,$gene); | |
$gene = $temp[0]; # eg. maker-scaff_000010-augustus-gene-0.21 | |
# RECORD THE GENE NAME FOR THE mRNA: | |
if ($GENE{$name}) | |
{ | |
$errormsg= "ERROR: read_genes_for_transcripts: already have gene name for $name\n"; | |
$errorcode= 9; # ERRORCODE=9 (TESTED FOR) | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
$GENE{$name} = $gene; | |
# SEE WHAT NAME IS STORED IN THE Name= PART OF THE LINE: | |
@temp = split(/\t+/,$line); | |
$name2 = $temp[8]; | |
@temp = split(/Name=/,$name2); | |
$name2 = $temp[1]; | |
@temp = split(/\;/,$name2); | |
$name2 = $temp[0]; | |
if ($name2 ne $name) | |
{ | |
if ($GENE{$name2}) | |
{ | |
$errormsg= "ERROR: read_genes_for_transcripts: already have gene name for $name2\n"; | |
$errorcode= 9; # ERRORCODE=9 (TESTED FOR) | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
$GENE{$name2} = $gene; | |
} | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: read_genes_for_transcripts: line does not have 9 columns: $line\n"; | |
$errorcode = 3; # ERRORCODE=3 (TESTED FOR) | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
} | |
} | |
close(INPUT_GFF); | |
return(\%GENE,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &read_maker_scores | |
sub test_read_maker_scores | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $gff; # GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my %SCORE = (); # HASH TABLE WITH SCORES FOR GENES | |
my $DELETE_TRANSCRIPT; # HASH TABLE OF TRANSCRIPTS TO DELETE | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_read_maker_scores: cannot open gff $gff\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 7006 10673 . - . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 7006 10673 . - . ID=1:SRAE_2028500;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|0|0|1|1|1|2|0|1206;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 7006 10673 1000 - . ID=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 10537 10673 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5479:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 3485 3621 +;Gap=M137\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 7006 10489 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5480:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 1 3484 +;Gap=M3484\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 6942 8068 . + . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 6942 8068 . + . ID=1:SRAE_2028600;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;_AED=1.00;_eAED=1.00;_QI=106|0|0|0|1|1|2|884|43;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 6942 8068 1000 + . ID=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6942 6956 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5520:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 1 15 +;Gap=M15\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6962 8068 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5521:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 16 1122 +;Gap=M1107\n"; | |
close(GFF); | |
($DELETE_TRANSCRIPT,$errorcode,$errormsg) = &read_maker_scores($gff,\%SCORE,"none",1); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_maker_scores: failed test1\n"; exit;} | |
if ($SCORE{"maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69"} ne '1.00') { print STDERR "ERROR: test_read_maker_scores: failed test1b\n"; exit;} | |
if ($SCORE{"maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52"} != 0) { print STDERR "ERROR: test_read_maker_scores: failed test1c\n"; exit;} | |
system "rm -f $gff"; | |
# TEST FOR ERRORCODE=1 (mRNA DOESN'T HAVE AED SCORE): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_read_maker_scores: cannot open gff $gff\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 7006 10673 . - . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 7006 10673 . - . ID=1:SRAE_2028500;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;_QI=0|0|0|1|1|1|2|0|1206;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 7006 10673 1000 - . ID=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 10537 10673 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5479:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 3485 3621 +;Gap=M137\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 7006 10489 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5480:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 1 3484 +;Gap=M3484\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 6942 8068 . + . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 6942 8068 . + . ID=1:SRAE_2028600;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;_AED=1.00;_eAED=1.00;_QI=106|0|0|0|1|1|2|884|43;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 6942 8068 1000 + . ID=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6942 6956 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5520:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 1 15 +;Gap=M15\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6962 8068 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5521:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 16 1122 +;Gap=M1107\n"; | |
close(GFF); | |
($DELETE_TRANSCRIPT,$errorcode,$errormsg) = &read_maker_scores($gff,\%SCORE,"none",1); | |
if ($errorcode != 1) { print STDERR "ERROR: test_read_maker_scores: failed test2 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
system "rm -f $gff"; | |
# TEST ERRORCODE=4 (GFF FILE DOES NOT HAVE 8 COLUMNS): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_read_maker_scores: cannot open gff $gff\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 7006 10673 . - . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 7006 10673 . - . ID=1:SRAE_2028500;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|0|0|1|1|1|2|0|1206;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match 7006 10673 1000 - . ID=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 10537 10673 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5479:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 3485 3621 +;Gap=M137\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 7006 10489 1000 - . ID=SSTP.scaffold.00055.227916:hsp:5480:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3043:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.52-mRNA-1 1 3484 +;Gap=M3484\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker gene 6942 8068 . + . ID=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 maker mRNA 6942 8068 . + . ID=1:SRAE_2028600;Parent=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;_AED=1.00;_eAED=1.00;_QI=106|0|0|0|1|1|2|884|43;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl 6942 8068 1000 + . ID=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Name=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1;score=1000\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6942 6956 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5520:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 1 15 +;Gap=M15\n"; | |
print GFF "SSTP.scaffold.00055.227916 pred_gff:embl match_part 6962 8068 1000 + . ID=SSTP.scaffold.00055.227916:hsp:5521:3.5.0.0;Parent=SSTP.scaffold.00055.227916:hit:3060:3.5.0.0;Target=maker-SSTP.scaffold.00055.227916-pred_gff_embl-gene-0.69-mRNA-1 16 1122 +;Gap=M1107\n"; | |
close(GFF); | |
($DELETE_TRANSCRIPT,$errorcode,$errormsg) = &read_maker_scores($gff,\%SCORE,"none",1); | |
if ($errorcode != 4) { print STDERR "ERROR: test_read_maker_scores: failed test3\n"; exit;} | |
system "rm -f $gff"; | |
# EXAMPLE WITH TWO TRANSCRIPTS IN A GENE, ONE WITH HIGHER SCORE: | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_read_maker_scores: cannot open gff $gff\n"; | |
print GFF "Sratt_Chr00_000002 maker gene 371193 372952 . + . ID=SRAE_0000033601;Name=SRAE_0000033601\n"; | |
print GFF "Sratt_Chr00_000002 maker mRNA 371193 372952 . + . ID=SRAE_0000033601-mRNA-1;Parent=SRAE_0000033601;Name=SRAE_0000033601-mRNA-1;_AED=0.02;_eAED=0.02;_QI=550|1|1|1|0|0|3|60|350;score=3922\n"; | |
print GFF "Sratt_Chr00_000002 maker mRNA 371415 372952 . + . ID=SRAE_0000033601-mRNA-2;Parent=SRAE_0000033601;Name=SRAE_0000033601-mRNA-2;_AED=0.07;_eAED=0.07;_QI=378|1|1|1|0|0|2|60|350;score=7307\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371193 371384 . + . ID=SRAE_0000033601-mRNA-1:exon:1;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371435 371821 . + . ID=SRAE_0000033601-mRNA-1:exon:2;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker exon 371415 371821 . + . ID=SRAE_0000033601-mRNA-2:exon:1;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371193 371384 . + . ID=SRAE_0000033601-mRNA-1:five_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371435 371792 . + . ID=SRAE_0000033601-mRNA-1:five_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_0000033601-mRNA-1:cds;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_0000033601-mRNA-1:cds;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_0000033601-mRNA-1:three_prime_utr;Parent=SRAE_0000033601-mRNA-1\n"; | |
print GFF "Sratt_Chr00_000002 maker five_prime_UTR 371415 371792 . + . ID=SRAE_0000033601-mRNA-2:five_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371793 371821 . + 0 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker CDS 371869 372892 . + 1 ID=SRAE_0000033601-mRNA-2:cds;Parent=SRAE_0000033601-mRNA-2\n"; | |
print GFF "Sratt_Chr00_000002 maker three_prime_UTR 372893 372952 . + . ID=SRAE_0000033601-mRNA-2:three_prime_utr;Parent=SRAE_0000033601-mRNA-2\n"; | |
close(GFF); | |
($DELETE_TRANSCRIPT,$errorcode,$errormsg) = &read_maker_scores($gff,\%SCORE,"none",1); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_maker_scores: failed test4\n"; exit;} | |
my $score = $SCORE{"SRAE_0000033601"}; | |
if ($SCORE{"SRAE_0000033601"} ne '0.07') { print STDERR "ERROR: test_read_maker_scores: failed test4b\n"; exit;} | |
system "rm -f $gff"; | |
} | |
#------------------------------------------------------------------# | |
# IF IT IS A MAKER GFF FILE, READ IN THE SCORE INFO. FROM THE GFF FILE: | |
sub read_maker_scores | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $SCORE = $_[1]; # HASH TABLE OF SCORES FOR MAKER GENES | |
my $subtract_from = $_[2]; # NUMBER TO SUBTRACT THE MAKER SCORES FROM | |
my $testing = $_[3]; # SAYS WHETHER THIS IS BEING CALLED FROM A TESTING SUBROUTINE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $line; # | |
my @temp; # | |
my $name; # NAME FOR mRNA | |
my $gene; # GENE NAME | |
my $score; # SCORE FOR GENE | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF FILE | |
my $current_score; # CURRENTLY STORED SCORE FOR A GENE | |
my $transcript; # TRANSCRIPT NAME | |
my $current_transcript; # CURRENTLY STORED HIGHEST-SCORING TRANSCRIPT FOR A GENE | |
my %DELETE_TRANSCRIPT = (); # HASH TABLE OF TRANSCRIPTS TO DELETE | |
my %SCORE2 = (); # HASH TABLE OF SCORES | |
# THE MAKER OUTPUT FILE HAS AN AED SCORE FOR EACH mRNA FEATURE. | |
# IF THERE ARE SEVERAL mRNAs IN ONE GENE, TAKE THE MAXIMUM SCORE OF ALL THE mRNAs IN THE GENE AS THE GENE SCORE. | |
open(INPUT_GFF,"$input_gff") || die "ERROR: read_maker_scores: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} # END OF GFF FILE | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($#temp == 8) | |
{ | |
if ($temp[2] eq 'mRNA') # mRNA FEATURE | |
{ | |
# eg. SSTP.scaffold.00055.227916 maker mRNA 49678 50700 . + . ID=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;Parent=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264;Name=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|340 | |
$name = $temp[8]; # eg. ID=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;Parent=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264;Name=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|340 | |
@temp = split(/Parent=/,$name); | |
$gene = $temp[1]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264;Name=augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;_AED=0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|340 | |
@temp = split(/\;/,$gene); | |
$gene = $temp[0]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264; | |
@temp = split(/ID=/,$name); | |
$transcript = $temp[1]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1;Pa... | |
@temp = split(/\;/,$transcript); | |
$transcript = $temp[0]; # eg. augustus_masked-SSTP.scaffold.00055.227916-processed-gene-0.264-mRNA-1 | |
# NOW GET THE SCORE: | |
if (!($name =~ /AED=/)) | |
{ | |
$errormsg = "ERROR: read_maker_scores: name $name does not seem to have AED score\n"; | |
$errorcode = 1; # ERRORCODE=1 (TESTED FOR) | |
return(\%DELETE_TRANSCRIPT,$errorcode,$errormsg); | |
} | |
@temp = split(/AED=/,$name); | |
$score = $temp[1]; # eg. 0.00;_eAED=0.00;_QI=0|-1|0|1|-1|1|1|0|340 | |
@temp = split(/\;/,$score); | |
$score = $temp[0]; # eg. 0.00 | |
if ($subtract_from ne 'none') # eg. FOR MAKER, WE WANT TO SUBTRACT THE SCORE FROM 1 | |
{ | |
$score = $subtract_from - $score; | |
} | |
if (!($SCORE2{$gene})) # THERE IS NO SCORE STORED FOR THIS GENE YET | |
{ | |
$SCORE->{$gene} = $score; | |
$SCORE2{$gene} = $score."=".$transcript; | |
} | |
else | |
{ | |
$current_score = $SCORE2{$gene}; | |
@temp = split(/=/,$current_score); | |
$current_score = $temp[0]; | |
$current_transcript = $temp[1]; | |
if ($score > $current_score) | |
{ | |
$SCORE->{$gene} = $score; | |
$SCORE2{$gene} = $score."=".$transcript; | |
$DELETE_TRANSCRIPT{$current_transcript} = 1; # DELETE $current_transcript, AS $transcript HAS A HIGHER SCORE | |
if ($testing == 0) { print STDERR "WARNING: discarding transcript $current_transcript, as $transcript has a higher score...\n";} | |
} | |
else | |
{ | |
$DELETE_TRANSCRIPT{$transcript} = 1; # DELETE $transcript, AS $current_transcript HAS A HIGHER SCORE | |
if ($testing == 0) { print STDERR "WARNING: discarding transcript $transcript, as $current_transcript has a higher score...\n";} | |
} | |
} | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: read_maker_scores: gff line $line does not seem to have 8 columns\n"; | |
$errorcode = 4; # ERRORCODE=4 (TESTED FOR) | |
return(\%DELETE_TRANSCRIPT,$errorcode,$errormsg); | |
} | |
} | |
} | |
close(INPUT_GFF); | |
return(\%DELETE_TRANSCRIPT,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &find_best_predictions | |
sub test_find_best_predictions | |
{ | |
my %OVERLAP = (); # HASH TABLE WITH OVERLAPS BETWEEN GENES | |
my %SCORE = (); # HASH TABLE WITH SCORES OF GENES | |
my $DISCARD; # HASH TABLE OF GENES TO DISCARD | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
$SCORE{"gene1"} = 10; | |
$SCORE{"gene2"} = 9.5; | |
$SCORE{"gene3"} = 9.0; | |
$SCORE{"gene4"} = 8.5; | |
$SCORE{"gene5"} = 8.0; | |
$SCORE{"gene6"} = 1.0; | |
# ---gene1------ | |
# -----gene2---- | |
# -----gene3---- | |
# ---gene4--- | |
# ---gene5--- | |
# --gene6-- | |
$OVERLAP{"gene1"} = "gene2"; | |
$OVERLAP{"gene2"} = "gene1,gene3"; | |
$OVERLAP{"gene3"} = "gene2"; | |
$OVERLAP{"gene4"} = "gene5"; | |
$OVERLAP{"gene5"} = "gene4"; | |
($DISCARD,$errorcode,$errormsg) = &find_best_predictions(1,\%OVERLAP,\%SCORE,2); | |
if ($errorcode != 0) { print STDERR "ERROR: test_find_best_predictions: failed test1\n"; exit;} | |
if ($DISCARD->{"gene1"}) { print STDERR "ERROR: test_find_best_predictions: failed test1b\n"; exit;} | |
if (!($DISCARD->{"gene2"})) { print STDERR "ERROR: test_find_best_predictions: failed test1c\n"; exit;} | |
if ($DISCARD->{"gene3"}) { print STDERR "ERROR: test_find_best_predictions: failed test1d\n"; exit;} | |
if ($DISCARD->{"gene4"}) { print STDERR "ERROR: test_find_best_predictions: failed test1e\n"; exit;} | |
if (!($DISCARD->{"gene5"})) { print STDERR "ERROR: test_find_best_predictions: failed test1f\n"; exit;} | |
if (!($DISCARD->{"gene6"})) { print STDERR "ERROR: test_find_best_predictions: failed test1g\n"; exit;} | |
# TEST ERRORCODE=107 (SCORE NOT DEFINED FOR A GENE): | |
%SCORE = (); | |
# ---gene1------ | |
# -----gene2---- | |
# -----gene3---- | |
# ---gene4--- | |
# ---gene5--- | |
# --gene6-- | |
$SCORE{"gene1"} = 10; | |
$OVERLAP{"gene1"} = "gene2"; | |
$OVERLAP{"gene2"} = "gene1,gene3"; | |
$OVERLAP{"gene3"} = "gene2"; | |
$OVERLAP{"gene4"} = "gene5"; | |
$OVERLAP{"gene5"} = "gene4"; | |
($DISCARD,$errorcode,$errormsg) = &find_best_predictions(1,\%OVERLAP,\%SCORE,2); | |
if ($errorcode != 107) { print STDERR "ERROR: test_find_best_predictions: failed test2 (errorcode $errorcode errormsg $errormsg)\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# FIND THE HIGHEST-SCORING NON-OVERLAPPING GENE PREDICTIONS: | |
sub find_best_predictions | |
{ | |
my $testing = $_[0]; # SAYS WHETHER THIS IS BEING CALLED FROM A TESTING SUBROUTINE | |
my $OVERLAP = $_[1]; # HASH TABLE OF OVERLAPPING GENES | |
my $SCORE = $_[2]; # HASH TABLE OF GENE SCORES | |
my $min_gene_score = $_[3]; # MINIMUM SCORE OF GENES TO TAKE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my @genes; # ARRAY OF ALL THE GENES | |
my $i; # | |
my $gene; # GENE | |
my $score; # GENE SCORE | |
my $overlaps; # OVERLAPPING GENES FOR $gene | |
my @overlaps; # OVERLAPPING GENES FOR $gene | |
my $j; # | |
my $overlap; # AN OVERLAPPING GENE FOR $gene | |
my $overlap_score; # SCORE FOR $overlap | |
my %DISCARD = (); # HASH TABLE OF GENES TO DISCARD | |
# GET AN ARRAY OF ALL THE GENES: | |
@genes = keys %{$SCORE}; | |
# NOW SORT THE ARRAY BY SCORE: | |
@genes = sort { $SCORE->{$a} <=> $SCORE->{$b} } @genes; | |
# NOW LOOP THROUGH THE GENES, TAKING THE HIGHEST SCORING GENE FIRST: | |
for ($i = $#genes; $i >= 0; $i--) | |
{ | |
$gene = $genes[$i]; | |
$score = $SCORE->{$gene}; | |
# IGNORE THIS GENE IF WE HAVE DECIDED TO DISCARD IT: | |
if (!($DISCARD{$gene})) | |
{ | |
# FIND THE GENES OVERLAPPING $gene: | |
if ($OVERLAP->{$gene}) | |
{ | |
$overlaps = $OVERLAP->{$gene}; | |
@overlaps = split(/\,/,$overlaps); | |
for ($j = 0; $j <= $#overlaps; $j++) | |
{ | |
$overlap = $overlaps[$j]; | |
if (!(defined($SCORE->{$overlap}))) | |
{ | |
$errorcode = 107; # ERRORCODE=107 (TESTED FOR) | |
$errormsg = "ERROR: find_best_predictions: do not know score for $overlap\n"; | |
return(\%DISCARD,$errorcode,$errormsg); | |
} | |
$overlap_score = $SCORE->{$overlap}; | |
# IF $overlap HAS A LOWER SCORE, WE WANT TO DELETE IT: | |
if ($overlap_score < $score) | |
{ | |
$DISCARD{$overlap} = 1; | |
if ($testing == 0) { print STDERR "Discarding $overlap ($overlap_score) as it overlaps $gene ($score)...\n";} | |
} | |
} | |
} | |
} | |
# IF THE SCORE IS < $min_gene_score, WE WANT TO DISCARD THE GENE: | |
if ($score < $min_gene_score) | |
{ | |
$DISCARD{$gene} = 1; | |
if ($testing == 0) { print STDERR "Discarding $gene has it has score $score (<$min_gene_score)...\n";} | |
} | |
} | |
return(\%DISCARD,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &read_scores | |
sub test_read_scores | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $gff; # INPUT GFF FILE | |
my %SCORE = (); # HASH TABLE OF GENE SCORES | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tintron\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
close(GFF); | |
($errorcode,$errormsg) = &read_scores($input_gff,\%SCORE,0); | |
if ($errorcode != 0) { print STDERR "ERROR: test_read_scores: failed test1\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1"} != -24.36) { print STDERR "ERROR: test_read_scores: failed test1b\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2"} != -3.52) { print STDERR "ERROR: test_read_scores: failed test1c\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3"} != -3.42) { print STDERR "ERROR: test_read_scores: failed test1d\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4"} != 200.15) { print STDERR "ERROR: test_read_scores: failed test1e\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5"} != 252.15) { print STDERR "ERROR: test_read_scores: failed test1f\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1"} != -24.36) { print STDERR "ERROR: test_read_scores: failed test1g\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2"} != -3.52) { print STDERR "ERROR: test_read_scores: failed test1h\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3"} != -3.42) { print STDERR "ERROR: test_read_scores: failed test1i\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4"} != 200.15) { print STDERR "ERROR: test_read_scores: failed test1j\n"; exit;} | |
if ($SCORE{"ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5"} != 252.15) { print STDERR "ERROR: test_read_scores: failed test1k\n"; exit;} | |
system "rm -f $gff"; | |
# TEST ERRORCODE=101 (DO NOT KNOW SCORE FOR GENE): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tintron\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
close(GFF); | |
$SCORE{"ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1"} = 101; | |
($errorcode,$errormsg) = &read_scores($input_gff,\%SCORE,0); | |
if ($errorcode != 101) { print STDERR "ERROR: test_read_scores: failed test2\n"; exit;} | |
system "rm -f $gff"; | |
# TEST ERRORCODE=102 (GFF DOES NOT HAVE 8 COLUMNS): | |
($gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(GFF,">$gff") || die "ERROR: test_run_main_program: cannot open gff $gff\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383204\tGeneWise\tintron\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185915\t185921\t-24.36\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-1\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185920\t185931\t-3.52\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-2\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t185910\t185916\t-3.42\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-3\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t200.15\t+\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-4\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tgene\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
print GFF "ACAC.scaffold.00050.383205\tGeneWise\tCDS\t206829\t214134\t252.15\t-\t.\tID=ACAC.scaffold.00050.383205-TF101001-genewise-prediction-5\n"; | |
close(GFF); | |
($errorcode,$errormsg) = &read_scores($input_gff,\%SCORE,0); | |
if ($errorcode != 102) { print STDERR "ERROR: test_read_scores: failed test3\n"; exit;} | |
system "rm -f $gff"; | |
} | |
#------------------------------------------------------------------# | |
# READ IN THE GENE SCORES: | |
sub read_scores | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $SCORE = $_[1]; # HASH TABLE FOR STORING GENE SCORES | |
my $subtract_from = $_[2]; # SUBTRACT THE SCORE FROM THIS NUMBER | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF FILE | |
my $score; # GENE SCORE | |
my $gene; # GENE NAME | |
my $line; # | |
my @temp; # | |
open(INPUT_GFF,"$input_gff") || die "ERROR: read_scores: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} # END OF GFF FILE | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($#temp == 8) | |
{ | |
if ($temp[2] eq 'gene') # gene FEATURE | |
{ | |
$score = $temp[5]; | |
$gene = $temp[8]; | |
@temp = split(/ID=/,$gene); | |
$gene = $temp[1]; | |
if ($subtract_from ne 'none') # eg. FOR MAKER, WE WANT TO SUBTRACT THE SCORE FROM 1 | |
{ | |
$score = $subtract_from - $score; | |
} | |
if ($SCORE->{$gene}) | |
{ | |
$errormsg = "ERROR: read_scores: already have score for $gene line $line\n"; | |
$errorcode = 101; # ERRORCODE=101 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
$SCORE->{$gene} = $score; | |
} | |
} | |
else | |
{ | |
$errormsg = "ERROR: read_scores: gff line $line does not seem to have 8 columns\n"; | |
$errorcode = 102; # ERRORCODE=102 (TESTED FOR) | |
return($errorcode,$errormsg); | |
} | |
} | |
} | |
close(INPUT_GFF); | |
return($errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &make_cds_gff | |
sub test_make_cds_gff | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO WRITE OUTPUT FILES INTO | |
my $input_gff; # INPUT GFF FILE | |
my $new_gff; # NEW GFF FILE | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE IS NO ERROR | |
my $errormsg; # RETUREND AS 'none' FROM A FUNCTION IF THERE IS NO ERROR | |
my $expected_new_gff; # FILE WITH EXPECTED CONTENTS OF $new_gff | |
my $differences; # DIFFERENCES BETWEEN $new_gff AND $expected_new_gff | |
my $length_differences; # LENGTH OF $differences | |
my $line; # | |
($input_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(INPUT_GFF,">$input_gff") || die "ERROR: test_make_cds_gff: cannot open input_gff $input_gff\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise gene 185915 185917 -24.36 + . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 185915 185917 0.00 + 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise gene 185920 185931 -3.42 + . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 185920 185931 0.00 + 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise gene 206829 214134 252.15 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 214021 214134 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 213951 214020 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 213864 213950 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 213848 213863 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 213764 213847 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 211581 213763 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 211467 211580 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 211407 211466 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 211285 211406 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 211224 211284 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 211098 211223 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 211035 211097 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 210932 211034 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 210844 210931 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 210731 210843 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise intron 207010 210730 0.00 - . ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print INPUT_GFF "ACAC.scaffold.00050.383204 GeneWise CDS 206829 207009 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
close(INPUT_GFF); | |
($new_gff,$errorcode,$errormsg) = &make_cds_gff($input_gff,$outputdir); | |
if ($errorcode != 0) { print STDERR "ERROR: test_make_cds_gff: failed test1\n"; exit;} | |
($expected_new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(EXPECTED,">$expected_new_gff") || die "ERROR: test_make_cds_gff: cannot open $expected_new_gff\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 185915 185917 0.00 + 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-1\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 185920 185931 0.00 + 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-2\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 214021 214134 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 213864 213950 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 213764 213847 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 211467 211580 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 211285 211406 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 211098 211223 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 210932 211034 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 210731 210843 0.00 - 0 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
print EXPECTED "ACAC.scaffold.00050.383204 GeneWise CDS 206829 207009 0.00 - 1 ID=ACAC.scaffold.00050.383204-TF101001-genewise-prediction-5\n"; | |
close(EXPECTED); | |
$differences = ""; | |
open(TEMP,"diff $new_gff $expected_new_gff |"); | |
while(<TEMP>) | |
{ | |
$line = $_; | |
$differences = $differences.$line; | |
} | |
close(TEMP); | |
$length_differences = length($differences); | |
if ($length_differences != 0) { print STDERR "ERROR: test_make_cds_gff: failed test1 (new_gff $new_gff expected_new_gff $expected_new_gff)\n"; exit;} | |
system "rm -f $input_gff"; | |
system "rm -f $new_gff"; | |
system "rm -f $expected_new_gff"; | |
} | |
#------------------------------------------------------------------# | |
# MAKE A GFF FILE WITH JUST THE 'cds' FEATURES (NOT 'gene' OR 'intron' FEATURES): | |
sub make_cds_gff | |
{ | |
my $input_gff = $_[0]; # INPUT GFF FILE | |
my $outputdir = $_[1]; # DIRECTORY FOR PUTTING OUTPUT FILES INTO | |
my $new_gff; # NEW GFF FILE WITH JUST THE 'cds' FEATURES | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $line; # | |
my @temp; # | |
my $end_of_gff = 0; # SAYS WHETHER WE HAVE REACHED THE END OF THE GFF FILE | |
# JUST TAKE THE 'cds' FEATURES FROM $input_gff: | |
($new_gff,$errorcode,$errormsg) = &make_filename($outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
open(NEW_GFF,">$new_gff") || die "ERROR: make_cds_gff: cannot open $new_gff\n"; | |
open(INPUT_GFF,"$input_gff") || die "ERROR: make_cds_gff: cannot open $input_gff\n"; | |
while(<INPUT_GFF>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq '##FASTA') { $end_of_gff = 1;} # WE HAVE REACHED THE END OF THE GFF FILE | |
if (substr($line,0,1) ne '#' && $end_of_gff == 0) | |
{ | |
@temp = split(/\t+/,$line); | |
if ($temp[2] eq 'CDS') { print NEW_GFF "$line\n";} | |
} | |
} | |
close(INPUT_GFF); | |
close(NEW_GFF); | |
return($new_gff,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# SUBROUTINE TO MAKE A FILE NAME FOR A TEMPORARY FILE: | |
sub make_filename | |
{ | |
my $outputdir = $_[0]; # OUTPUT DIRECTORY TO WRITE TEMPORARY FILE NAME TO | |
my $found_name = 0; # SAYS WHETHER WE HAVE FOUND A FILE NAME YET | |
my $filename = "none";# NEW FILE NAME TO USE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $poss_filename; # POSSIBLE FILE NAME TO USE | |
my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAME | |
while($found_name == 0) | |
{ | |
$random_number = rand(); | |
$poss_filename = $outputdir."/tmp".$random_number; | |
if (!(-e $poss_filename)) | |
{ | |
$filename = $poss_filename; | |
$found_name = 1; | |
} | |
} | |
if ($found_name == 0 || $filename eq 'none') | |
{ | |
$errormsg = "ERROR: make_filename: found_name $found_name filename $filename\n"; | |
$errorcode = 6; # ERRORCODE=6 | |
return($filename,$errorcode,$errormsg); | |
} | |
return($filename,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &print_error | |
sub test_print_error | |
{ | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE WAS NO ERROR | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE WAS NO ERROR | |
($errormsg,$errorcode) = &print_error(45,45,1); | |
if ($errorcode != 12) { print STDERR "ERROR: test_print_error: failed test1\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message','My error message',1); | |
if ($errorcode != 11) { print STDERR "ERROR: test_print_error: failed test2\n"; exit;} | |
($errormsg,$errorcode) = &print_error('none',45,1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test3\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message', 0, 1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test4\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# PRINT OUT AN ERROR MESSAGE AND EXIT. | |
sub print_error | |
{ | |
my $errormsg = $_[0]; # THIS SHOULD BE NOT 'none' IF AN ERROR OCCURRED. | |
my $errorcode = $_[1]; # THIS SHOULD NOT BE 0 IF AN ERROR OCCURRED. | |
my $called_from_test = $_[2]; # SAYS WHETHER THIS WAS CALLED FROM test_print_error OR NOT | |
if ($errorcode =~ /[A-Z]/ || $errorcode =~ /[a-z]/) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 11; $errormsg = "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; # ERRORCODE=11 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; | |
exit; | |
} | |
} | |
if (!($errormsg =~ /[A-Z]/ || $errormsg =~ /[a-z]/)) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 12; $errormsg = "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; # ERRORCODE=12 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; | |
exit; | |
} | |
} | |
if ($errormsg eq 'none' || $errorcode == 0) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 13; $errormsg = "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; # ERRORCODE=13 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; | |
exit; | |
} | |
} | |
else | |
{ | |
print STDERR "$errormsg"; | |
exit; | |
} | |
return($errormsg,$errorcode); | |
} | |
#------------------------------------------------------------------# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment