Skip to content

Instantly share code, notes, and snippets.

@TTTPOB
Created April 28, 2024 08:45
Show Gist options
  • Save TTTPOB/8bce0682776dca031f26bf9bdb2b6fbc to your computer and use it in GitHub Desktop.
Save TTTPOB/8bce0682776dca031f26bf9bdb2b6fbc to your computer and use it in GitHub Desktop.
mark pam
#!/usr/bin/env python3
import argparse
import re
from pyfaidx import Fasta
mainchr_re = re.compile(r"chr[0-9XY]{1,2}$")
pam_re = re.compile(r"(?=(.GG))")
def is_main_chromosome(chrom):
return mainchr_re.match(chrom)
def find_pam_sites(seq, chrom):
def process_matches(seq, strand):
for match in pam_re.finditer(seq):
if strand == "+":
s = seq[match.start() : match.start() + 3]
start = match.start()
end = start + 1
else:
s = seq[match.start() : match.start() + 3]
end = len(seq) - match.start()
start = end - 1
print(f"{chrom}\t{start}\t{end}\t{s}\t.\t{strand}")
process_matches(seq, "+")
seq_rc = seq.translate(str.maketrans("ATCG", "TAGC"))[::-1]
process_matches(seq_rc, "-")
def process_fasta(fasta_file):
fasta = Fasta(fasta_file)
for chrom in fasta.keys():
if is_main_chromosome(chrom):
seq = str(fasta[chrom]).upper()
find_pam_sites(seq, chrom)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Find PAM sites in a genome FASTA file."
)
parser.add_argument("fasta_file", type=str, help="Path to the genome FASTA file")
args = parser.parse_args()
process_fasta(args.fasta_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment