Skip to content

Instantly share code, notes, and snippets.

@bede
Last active October 19, 2023 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bede/89c90755dd29490f33047e52f97078ad to your computer and use it in GitHub Desktop.
Save bede/89c90755dd29490f33047e52f97078ad to your computer and use it in GitHub Desktop.
Concatenate demultiplexed ONT FASTQs by barcode (for one or more runs)
"""
Purpose: Concatenate demultiplexed FASTQs by barcode for one or more ONT runs
Usage: python concat_by_barcode.py run1/fastq_pass run2/fastq_pass
Author: Bede Constantinides
"""
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
def concatenate_fastqs_by_barcode(fastq_pass_dirs: [list[Path]]):
barcodes_paths = defaultdict(list)
fq_ext = ".fastq.gz"
barcodes = [f"barcode{str(i).zfill(2)}" for i in range(1, 97)]
for barcode in barcodes:
for d in fastq_pass_dirs:
barcode_dir = d / barcode
if barcode_dir.exists():
fastq_paths = [f for f in barcode_dir.iterdir() if str(f).endswith(fq_ext)]
barcodes_paths[barcode].extend(list(map(str, fastq_paths)))
for barcode, fastq_paths in barcodes_paths.items():
if fastq_paths:
output_path = Path(f"{barcode}{fq_ext}")
if output_path.exists():
output_path.unlink()
subprocess.run(f"cat {' '.join(fastq_paths)} > {output_path}", shell=True)
print(f"Created {output_path} from files {fastq_paths}", file=sys.stderr)
if __name__ == "__main__":
fastq_pass_dirs = [Path(fn) for fn in sys.argv[1:]]
concatenate_fastqs_by_barcode(fastq_pass_dirs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment