-
-
Save jasonsahl/1281a2ae7f10382c773fec9bf7906d0c to your computer and use it in GitHub Desktop.
subsample fastq reads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#subsampling and read mapping | |
#for script information/arguments: "python enrichment_sub.py --help" | |
import sys | |
import glob | |
from subprocess import Popen, PIPE | |
import random | |
import argparse | |
import os | |
#To parse command line | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-d", "--directory", type=str, help="directory of fastqs") | |
parser.add_argument("-i", "--iterations", type=int, help="number of interations") | |
parser.add_argument("-s", "--samplingdepth", type=int, help="number of reads to subsample",) | |
args = parser.parse_args() | |
fastqs = glob.glob("%s*_R1_*fastq.gz" % (args.directory)) | |
def is_tool(name): | |
from distutils.spawn import find_executable | |
return find_executable(name) is not None | |
result = is_tool("seqtk") | |
if result is False: | |
print("seqtk is not installed but needs to be...exiting") | |
sys.exit() | |
for file in fastqs: | |
seeds = random.sample(range(1000), args.iterations) | |
for seed in seeds: | |
R2file="_R2_".join(file.split("_R1_")) | |
outfastqR1= "%d_%s" % (seed, file.strip(args.directory)) | |
outfastqR2= "%d_%s" % (seed, R2file.strip(args.directory)) | |
os.system("seqtk sample -s%d %s %d | gzip > %s" % (seed, file, args.samplingdepth, outfastqR1)) | |
os.system("seqtk sample -s%d %s %d | gzip > %s" % (seed, R2file, args.samplingdepth, outfastqR2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment