-
-
Save jasonsahl/990d2c56c23bb5c2909d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""randomly selects N number | |
of genomes from a reference | |
directory. All reference genomes | |
must end in .fasta. | |
written by Jason Sahl | |
""" | |
import random | |
import optparse | |
import sys | |
import os | |
import errno | |
import glob | |
def select_genomes(seq_dir, keep, out): | |
"""use python random function in order | |
to pick a colleciton of genomes""" | |
outrecords = [ ] | |
for infile in glob.glob(os.path.join(seq_dir, '*.fasta')): | |
my_files = os.path.basename(infile) | |
outrecords.append(my_files) | |
outseqs=random.sample(set(outrecords), int(keep)) | |
return outseqs | |
def copy_hits(seq_dir, out_dir, outseqs): | |
"""moves hits into place""" | |
for infile in glob.glob(os.path.join(seq_dir, '*.fasta')): | |
my_files = os.path.basename(infile) | |
if str(my_files) in outseqs: | |
os.system("cp %s %s" % (infile, out_dir)) | |
def test_dir(option, opt_str, value, parser): | |
if os.path.exists(value): | |
setattr(parser.values, option.dest, value) | |
else: | |
print("directory of fastas cannot be found") | |
sys.exit() | |
def main(directory, keep, out_dir): | |
seq_dir = os.path.abspath("%s" % directory) | |
out = os.path.abspath("%s" % out_dir) | |
try: | |
os.makedirs('%s' % out) | |
except: | |
print("out directory exists...remove and try again") | |
sys.exit() | |
outseqs=select_genomes(seq_dir, keep, out) | |
copy_hits(seq_dir, out, outseqs) | |
if __name__ == "__main__": | |
usage="usage: %prog [options]" | |
parser = optparse.OptionParser(usage=usage) | |
parser.add_option("-d", "--directory", dest="directory", | |
help="/path/to/fasta_directory [REQUIRED]", | |
type="string", action="callback", callback=test_dir) | |
parser.add_option("-k", "--keep", dest="keep", | |
help="number of genomes to keep [REQUIRED]", | |
action="store", type="int") | |
parser.add_option("-o", "--out_dir", dest="out_dir", | |
help="/path/to/output_directory [REQUIRED]", | |
type="string", action="store") | |
options, args = parser.parse_args() | |
mandatories = ["directory","keep","out_dir"] | |
for m in mandatories: | |
if not getattr(options, m, None): | |
print("\nMust provide %s.\n" %m) | |
parser.print_help() | |
exit(-1) | |
main(options.directory,options.keep,options.out_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment