Last active
September 20, 2019 17:20
-
-
Save rasi/2b79709968267f8b6a6b8e1f88222a77 to your computer and use it in GitHub Desktop.
Find MoClo-YTK overhangs in input sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Find MoClo-YTK overhangs in input sequence | |
@author: Arvind R. Subramaniam | |
@date: 15 Aug 2019 | |
These overhangs can be used for BsmBI-mediated | |
cloning into the pYTK001 part entry vector | |
to mutate or to combine fragments. | |
""" | |
import sys | |
from Bio.Seq import Seq | |
if len(sys.argv) != 2: | |
sys.exit("You need to give exactly one input sequence.") | |
seq = sys.argv[1] | |
seq = seq.upper() | |
# see Table S4 in | |
# https://pubs.acs.org/doi/suppl/10.1021/acssynbio.8b00333/suppl_file/sb8b00333_si_001.pdf | |
# for these overhangs. | |
# Overhangs differing by <1nt from TCGG and GACC or their | |
# reverse complements were manually removed. | |
sites = ['GGAG', | |
'ATCA', | |
'GATA', | |
'TCTT', | |
'GGCA', | |
'AGGT', | |
'CAAA', | |
'AAGC', | |
'GAGG', | |
'GCAC', | |
'CAGT', | |
'CAAC', | |
'GTAA', | |
'AACG', | |
'TCCA', | |
'CGAA', | |
'CACA', | |
'GTCT', | |
'GAAT', | |
'ATAG', | |
'CCAT', | |
'AGTA' | |
] | |
sites_rc = [str(Seq(x).reverse_complement()) for x in sites] | |
for x in sites: | |
if seq.find(x) != -1: | |
print(f'{x} at pos {seq.find(x) + 1}.') | |
for x in sites_rc: | |
if seq.find(x) != -1: | |
print(f'{x} at pos {seq.find(x) + 1}.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment