Skip to content

Instantly share code, notes, and snippets.

@EthanHolleman
Created January 3, 2022 23:17
Show Gist options
  • Save EthanHolleman/2ac437ef3c94b28095e19b5ee46ae7eb to your computer and use it in GitHub Desktop.
Save EthanHolleman/2ac437ef3c94b28095e19b5ee46ae7eb to your computer and use it in GitHub Desktop.
Python script to simulate 100% efficient bisulfite treatment by converting all C nucleotides in a sequence to T nucleotides. Inputs fasta or genbank files.
#!/usr/bin/env python3
from pathlib import Path
from Bio import SeqIO
from Bio.Seq import Seq
from argparse import ArgumentParser
FORMAT_DICT = {
".fa": "fasta",
".fasta": "fasta",
".gb": "genbank",
".genbank": "genbank",
}
def read_seqs(filepath):
path = Path(filepath)
if path.suffix in FORMAT_DICT:
file_type = FORMAT_DICT[path.suffix]
else:
raise TypeError("File must be fasta or genbank")
return SeqIO.parse(str(path), file_type)
def treat_bisulfite(record):
"""Simulate 100% efficient bisulfite treatment by converting all C nucleotides
in a sequence record to T nucleotides.
Args:
record (SeqRecord): BioPython SeqRecord object.
"""
seq_string = str(record.seq).upper()
treated = record.seq.replace("C", "T")
record.seq = Seq(treated)
return record
def get_args():
parser = ArgumentParser(
"Simulate 100% efficient bisulfite treatment by \
converting all C nucleotides in a sequence record to T nucleotides." \
)
parser.add_argument(
"F",
help="Fasta or genbank file containing sequences \
to simulate bisulfite treatment.",
)
parser.add_argument(
"-O",
default=None,
help='Output path to write converted file. \
Default is input filepath with ".bisulfite" appended.',
)
args = parser.parse_args()
if args.O == None:
P = Path(args.F)
args.O = P.with_suffix(f".bisulfite{P.suffix}")
return args
def write_records(records, filepath):
path = Path(filepath)
SeqIO.write(records, str(path), format=FORMAT_DICT[path.suffix])
def main():
args = get_args()
print(args.O)
records = read_seqs(args.F)
treated_records = [treat_bisulfite(record) for record in records]
write_records(treated_records, args.O)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment