Avril Coghlan avrilcoghlan

## fix_embl_file.py
import sys
import os
from collections import defaultdict

#====================================================================#

# define a function to read in the genes that are in families, for our species of interest:

def find_genes_in_families(families_file, our_species_name, locus_tag):
    """read in the genes that are in families, for our species of interest """

## find_internal_stops.pl
#!/usr/bin/env perl

=head1 NAME

    find_internal_stops.pl

=head1 SYNOPSIS

    find_internal_stops.pl input_fasta
        where input_fasta is the input fasta file of protein translations.

## submit_crispresso_jobs_for_subsetsoffastq.py
import os
import sys

#====================================================================#

def submit_crispresso_jobs(sample_name, num_subsets):

    # need to submit a crispresso job for each subset of the data:
    for x in range(num_subsets):
        subset = x + 1 # eg. if num_subsets is 17, 'subset' goes from 1 to 17

## filter_fastq_files_using_trimmomatic.py
import os
import sys

#====================================================================#

def run_trimmomatic_for_subsets_of_data(sample_name, num_subsets):

    # need to run trimmomatic for each subset of the data:
    for x in range(num_subsets):
        subset = x + 1 # eg. if num_subsets is 17, 'subset' goes from 1 to 17

## split_up_fastq.py
import sys
import os
import gzip
from collections import defaultdict

#====================================================================#

# now read in the input fastq and split it up:

def read_fastq_file_and_split(input_fastq_file, seqs_per_output_file, output_file_prefix):

## calc_information_content_for_GO_terms.py
from collections import defaultdict
import sys
import os
import math

#====================================================================#

# define a function to record the children of each GO term in the GO hierarchy:

def read_go_children(input_go_obo_file):

## comparaFamiliesAnalysis.py
#!/usr/bin/env python3
#25-Feb-2015 dr7
#Analysis suite for exploring Compara families. The purpose is to gather information about Compara trees to that we can mine this large set of information and select the most interesting families to study.

import sys
import os
import re
import gzip
import random
import pickle

## submit_crispresso_jobs.py
import sys
import os
from collections import defaultdict

#====================================================================#

# define a function to read in a list of files:

def read_file_list(input_file_list):


## get_ltr_retrotransposon_seqs.pl
#!/usr/local/bin/perl

$fasta = $ARGV[0];
$gff = $ARGV[1];

# read in the gff file to find which sequences to take:
%TAKE = ();
$num_to_take = 0;
open(GFF,"$gff");
while(<GFF>)

## find_lca_of_go_terms.py
import sys
import os
from collections import defaultdict
import calc_dists_to_top_of_GO
import calc_dists_to_top_of_GO_using_bfs

class Error (Exception): pass

#====================================================================#
	import sys
	import os
	from collections import defaultdict

	#====================================================================#

	# define a function to read in the genes that are in families, for our species of interest:

	def find_genes_in_families(families_file, our_species_name, locus_tag):
	"""read in the genes that are in families, for our species of interest """
	#!/usr/bin/env perl

	=head1 NAME

	find_internal_stops.pl

	=head1 SYNOPSIS

	find_internal_stops.pl input_fasta
	where input_fasta is the input fasta file of protein translations.
	import sys
	import os
	import gzip
	from collections import defaultdict

	#====================================================================#

	# now read in the input fastq and split it up:

	def read_fastq_file_and_split(input_fastq_file, seqs_per_output_file, output_file_prefix):
	#!/usr/bin/env python3
	#25-Feb-2015 dr7
	#Analysis suite for exploring Compara families. The purpose is to gather information about Compara trees to that we can mine this large set of information and select the most interesting families to study.

	import sys
	import os
	import re
	import gzip
	import random
	import pickle
	#!/usr/local/bin/perl

	$fasta = $ARGV[0];
	$gff = $ARGV[1];

	# read in the gff file to find which sequences to take:
	%TAKE = ();
	$num_to_take = 0;
	open(GFF,"$gff");
	while(<GFF>)