Skip to content

Instantly share code, notes, and snippets.

@hyphaltip
Last active December 17, 2018 05:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hyphaltip/8fec03236264341bf1a19027f0504c54 to your computer and use it in GitHub Desktop.
Save hyphaltip/8fec03236264341bf1a19027f0504c54 to your computer and use it in GitHub Desktop.
ortho2pattern
#!/usr/bin/env python3
import csv
input = 'Orthogroups.csv'
outfile = 'phyletic_patterns.txt'
# open report file you will write to
patterns = dict()
with open(input) as csvfile:
# columns with gene info by species are tab delimited
reader = csv.reader(csvfile,delimiter="\t")
header = next(reader)
header[0] = 'Orthogroup'
for row in reader:
ortho_name = row[0];
species_seen = dict()
n_counter = 0
for species_genes in row[1:]:
# each of these is comma delimited
species = header[n_counter]
n_counter += 1
if len(species_genes):
for gene in species_genes.split(", "):
species_seen[species] = 1
pattern = ",".join(sorted(species_seen.keys()))
if pattern in patterns:
patterns[pattern] +=1
else:
patterns[pattern] = 1
with open(outfile,'w') as rpt:
# sort by abundance for a better report format
# (eg. the most abundant pattern comes first)
# https://github.com/biodataprog/Class_Examples/blob/master/Dictionaries/sort_dictionary_by_value.py
for pattern in sorted(patterns, key=patterns.__getitem__, reverse=True):
rpt.write("%d\t%s\n"%(patterns[pattern],pattern))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment