Skip to content

Instantly share code, notes, and snippets.

@thanhleviet
Last active May 3, 2023 20:52
Show Gist options
  • Save thanhleviet/64ea913b8e02b186b336258d113c21d8 to your computer and use it in GitHub Desktop.
Save thanhleviet/64ea913b8e02b186b336258d113c21d8 to your computer and use it in GitHub Desktop.
Simple python script for scanning PE files based on a list of patterns and write to a csv file with three columns: sample_id, forward, reverse
import os
import csv
import argparse
def scan_paired_end_files(dir_path, pattern_list):
# Initialize a list to store the sample IDs and file paths
samples = []
# Loop through all files in the directory
for file_name in os.listdir(dir_path):
# Check if the file ends with any of the pattern options
for pattern in pattern_list:
if file_name.endswith(pattern):
# Extract the sample ID from the file name
sample_id = file_name[:-len(pattern)]
# Check if both forward and reverse files exist for the current sample ID
forward_path = os.path.join(dir_path, file_name)
reverse_path = os.path.join(dir_path, sample_id + "_2" + pattern[2:])
if os.path.exists(forward_path) and os.path.exists(reverse_path):
# Store the sample ID and file paths in the list
samples.append((sample_id, forward_path, reverse_path))
else:
print(f"Error: Paired-end files not found for sample ID {sample_id}")
# break
# Write the sample IDs and file paths to a csv file
output_file = os.path.join(dir_path, "paired_end_files.csv")
with open(output_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["sample_id", "forward", "reverse"])
for sample in samples:
writer.writerow(sample)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Scan paired-end fastq files and generate a CSV file.")
parser.add_argument("dir_path", help="the directory containing the fastq files")
parser.add_argument("--pattern", nargs="*", default=["_1.non_host.fastq.gz", "_1_non_host.fastq.gz"], help="the file name pattern(s) to search for")
args = parser.parse_args()
scan_paired_end_files(args.dir_path, args.pattern)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment