Skip to content

Instantly share code, notes, and snippets.

@jasonsahl
Last active October 13, 2021 16:35
Show Gist options
  • Save jasonsahl/990d2c56c23bb5c2909d to your computer and use it in GitHub Desktop.
Save jasonsahl/990d2c56c23bb5c2909d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""randomly selects N number
of genomes from a reference
directory. All reference genomes
must end in .fasta.
written by Jason Sahl
"""
import random
import optparse
import sys
import os
import errno
import glob
def select_genomes(seq_dir, keep, out):
"""use python random function in order
to pick a colleciton of genomes"""
outrecords = [ ]
for infile in glob.glob(os.path.join(seq_dir, '*.fasta')):
my_files = os.path.basename(infile)
outrecords.append(my_files)
outseqs=random.sample(set(outrecords), int(keep))
return outseqs
def copy_hits(seq_dir, out_dir, outseqs):
"""moves hits into place"""
for infile in glob.glob(os.path.join(seq_dir, '*.fasta')):
my_files = os.path.basename(infile)
if str(my_files) in outseqs:
os.system("cp %s %s" % (infile, out_dir))
def test_dir(option, opt_str, value, parser):
if os.path.exists(value):
setattr(parser.values, option.dest, value)
else:
print("directory of fastas cannot be found")
sys.exit()
def main(directory, keep, out_dir):
seq_dir = os.path.abspath("%s" % directory)
out = os.path.abspath("%s" % out_dir)
try:
os.makedirs('%s' % out)
except:
print("out directory exists...remove and try again")
sys.exit()
outseqs=select_genomes(seq_dir, keep, out)
copy_hits(seq_dir, out, outseqs)
if __name__ == "__main__":
usage="usage: %prog [options]"
parser = optparse.OptionParser(usage=usage)
parser.add_option("-d", "--directory", dest="directory",
help="/path/to/fasta_directory [REQUIRED]",
type="string", action="callback", callback=test_dir)
parser.add_option("-k", "--keep", dest="keep",
help="number of genomes to keep [REQUIRED]",
action="store", type="int")
parser.add_option("-o", "--out_dir", dest="out_dir",
help="/path/to/output_directory [REQUIRED]",
type="string", action="store")
options, args = parser.parse_args()
mandatories = ["directory","keep","out_dir"]
for m in mandatories:
if not getattr(options, m, None):
print("\nMust provide %s.\n" %m)
parser.print_help()
exit(-1)
main(options.directory,options.keep,options.out_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment