Skip to content

Instantly share code, notes, and snippets.

@eclarke
Created June 26, 2013 19:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eclarke/5870729 to your computer and use it in GitHub Desktop.
Save eclarke/5870729 to your computer and use it in GitHub Desktop.
Taxonomy fixer
#!/usr/bin/env python
"""Usage: python taxonomy_fixer.py [FILE]
Converts an ITS taxonomy file to eliminate taxa marked as unidentified,
swaps [kpcofg]__unidentified;s__Fungi to k__Fungi, and eliminates species
taxa that are simply s__[genus]_sp.
Writes to stdout.
"""
# Erik Clarke <ecl@mail.med.upenn.edu>
import sys
try:
inf = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
except:
print(__doc__)
sys.exit(1)
for line in inf:
if not line:
continue
else:
line = line.strip('\n')
otu, taxa = line.split('\t')
taxa = taxa.split(';')
sys.stdout.write(otu + '\t')
for i, taxon in enumerate(taxa):
if taxon == 'k__unidentified':
sys.stdout.write('k__Fungi')
break
elif 'unidentified' in taxon or 'uncultured' in taxon:
break
elif i == 6 and taxon.endswith('_sp.'):
break
else:
sys.stdout.write(';') if i > 0 else None
sys.stdout.write(taxon)
sys.stdout.write('\n')
inf.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment