Skip to content

Instantly share code, notes, and snippets.

@jasonsahl
Last active June 17, 2024 21:33
Show Gist options
  • Save jasonsahl/1281a2ae7f10382c773fec9bf7906d0c to your computer and use it in GitHub Desktop.
Save jasonsahl/1281a2ae7f10382c773fec9bf7906d0c to your computer and use it in GitHub Desktop.
subsample fastq reads
#!/usr/bin/env python
#subsampling and read mapping
#for script information/arguments: "python enrichment_sub.py --help"
import sys
import glob
from subprocess import Popen, PIPE
import random
import argparse
import os
#To parse command line
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--directory", type=str, help="directory of fastqs")
parser.add_argument("-i", "--iterations", type=int, help="number of interations")
parser.add_argument("-s", "--samplingdepth", type=int, help="number of reads to subsample",)
args = parser.parse_args()
fastqs = glob.glob("%s*_R1_*fastq.gz" % (args.directory))
def is_tool(name):
from distutils.spawn import find_executable
return find_executable(name) is not None
result = is_tool("seqtk")
if result is False:
print("seqtk is not installed but needs to be...exiting")
sys.exit()
for file in fastqs:
seeds = random.sample(range(1000), args.iterations)
for seed in seeds:
R2file="_R2_".join(file.split("_R1_"))
outfastqR1= "%d_%s" % (seed, file.strip(args.directory))
outfastqR2= "%d_%s" % (seed, R2file.strip(args.directory))
os.system("seqtk sample -s%d %s %d | gzip > %s" % (seed, file, args.samplingdepth, outfastqR1))
os.system("seqtk sample -s%d %s %d | gzip > %s" % (seed, R2file, args.samplingdepth, outfastqR2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment