Skip to content

Instantly share code, notes, and snippets.

@walterst
Created September 18, 2014 23:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save walterst/9ddb926fece4b7c0e12c to your computer and use it in GitHub Desktop.
Save walterst/9ddb926fece4b7c0e12c to your computer and use it in GitHub Desktop.
# Usage: python parse_to_7_taxa_levels.py X Y # where X is the input taxonomy mapping file, Y is the output taxonomy mapping file # Purpose is to parse output of Mike Robeson's script to force taxa into # 7 levels.
#!/usr/bin/env python
from sys import argv
# Usage: python parse_to_7_taxa_levels.py X Y
# where X is the input taxonomy mapping file, Y is the output taxonomy mapping file
# Purpose is to parse output of Mike Robeson's script to force taxa into
# 7 levels.
taxa_mapping = open(argv[1], "U")
parsed_taxa = open(argv[2], "w")
for line in taxa_mapping:
curr_line = line.strip()
curr_id = curr_line.split()[0]
taxa = ' '.join(curr_line.split()[1:]).split(';')
last_taxa = taxa[-1]
taxa_depth = len(taxa)
for curr_taxa in taxa:
if len(curr_taxa) in [5, 6]:
last_taxa = curr_taxa
taxa_depth = taxa.index(curr_taxa)
break
# If depth is 7 slice off first 7 levels
if taxa_depth == 7:
final_taxa = ";".join(taxa[0:7])
# If less than 7, pad out empty levels with 'unclassified'
elif taxa_depth < 7:
last_named_level = taxa[taxa_depth - 1].split('__')[1]
for n in range(taxa_depth, 7):
taxa[n] = taxa[n] + last_named_level
final_taxa = ";".join(taxa[0:7])
# If more than 7 levels, get the first 4 levels, plus last 3 named levels
elif taxa_depth > 7:
final_taxa = ";".join(taxa[0:4] + taxa[taxa_depth-3:taxa_depth])
parsed_taxa.write("%s\t%s\n" % (curr_id, final_taxa))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment