Created
September 30, 2021 21:37
-
-
Save bluegenes/0ddd24affdd380b4e6f8f9d4b5d59c14 to your computer and use it in GitHub Desktop.
split a fastq file into one entry per file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import os | |
import sys | |
import argparse | |
import screed | |
def make_outdir(output_dirname): | |
if not os.path.exists(output_dirname): | |
try: | |
os.makedirs(output_dirname) | |
except OSError as e: | |
if e.errno != errno.EEXIST: | |
raise | |
def main(args): | |
print(f"Splitting {args.fasta} by entry. Writing files to {args.output_dir} \n") | |
output_dir = args.output_dir | |
if not args.prefix: | |
prefix = "" | |
else: | |
prefix = args.prefix + "." | |
make_outdir(args.output_dir) | |
# loop through; split fasta into groups | |
with open(args.output_csv, 'w') as info_out: | |
# store name,length,filename in a csv | |
info_out.write("name,length,filename\n") | |
for n, record in enumerate(screed.open(args.fasta)): | |
if n > 0 and n % 1000 == 0: | |
print(f"working on {str(n+1)}th entry\n") | |
name = record.name.split(" ")[0] | |
name = name.replace("/", ".") | |
length = len(record.sequence) | |
this_filename = f"{output_dir}/{name}.fq" | |
with open(this_filename, 'w') as out_fq: | |
out_fq.write(f">{name}\n{record.sequence}\n") | |
info_out.write(f"{name},{length},{this_filename}\n") | |
print(f"{str(n+1)} entries written to individual fasta files\n") | |
def cmdline(sys_args): | |
"Command line entry point w/argparse action." | |
p = argparse.ArgumentParser() | |
p.add_argument("fasta") | |
p.add_argument("--output-dir", default= "") | |
p.add_argument("--output-csv", required=True) | |
p.add_argument("--prefix") | |
args = p.parse_args() | |
return main(args) | |
if __name__ == '__main__': | |
returncode = cmdline(sys.argv[1:]) | |
sys.exit(returncode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment