Skip to content

Instantly share code, notes, and snippets.

@jonchang
Created March 13, 2013 10:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonchang/5151081 to your computer and use it in GitHub Desktop.
Save jonchang/5151081 to your computer and use it in GitHub Desktop.
Converts character data (e.g., dna) from one format to another using dendropy
#!/usr/bin/env python
# convert_characters.py -- by Jonathan Chang (March 2013)
import argparse
import os.path
import multiprocessing
import functools
import dendropy
def get_args():
parser = argparse.ArgumentParser(description="convert character data from one format to another")
formats = "nexus phylip fasta".split()
types = "dna rna protein standard restriction infinite".split()
parser.add_argument("files", nargs="+", help="list of file(s) to convert")
required = parser.add_argument_group("required arguments")
required.add_argument("--input-format", "-i", choices=formats, help="input format", required=True)
required.add_argument("--output-format", "-o", choices=formats, help="output format. also used as the extension for the output files", required=True)
required.add_argument("--type", "-t", default="dna", help="data type. ignored for nexus input. (default: dna)", choices=types)
parser.add_argument("--prefix", default="", help="string to prepend to the output files.")
parser.add_argument("--basename", action="store_true", help="strips away any leading directory entries. useful when specifying a prefix that is a different directory.")
parser.add_argument("--quiet", action="store_true", help="don't send messages to stdout")
return parser.parse_args()
def convert(filename, args):
parsed = dendropy.CharacterMatrix.get_from_path(filename, args.input_format, data_type=args.type)
if args.basename:
filename = os.path.basename(filename)
# replace extension.
name, ext = list(os.path.splitext(filename))
ext = args.output_format
newname = args.prefix + ".".join([name, ext])
parsed.write_to_path(newname, args.output_format)
print newname
def main():
args = get_args()
pool = multiprocessing.Pool()
partial = functools.partial(convert, args=args)
pool.map(partial, args.files)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment