Last active
December 6, 2021 19:46
-
-
Save fomightez/a23a4edba03a6d573dd7f8ca28c09023 to your computer and use it in GitHub Desktop.
reply_to_biostars_9500386
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Reply to biostars https://www.biostars.org/p/9500386/\n", | |
"\n", | |
"reply to https://www.biostars.org/p/9500386/" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Biopython already present in launches from this repo." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" % Total % Received % Xferd Average Speed Time Time Time Current\n", | |
" Dload Upload Total Spent Left Speed\n", | |
"100 886 100 886 0 0 4789 0 --:--:-- --:--:-- --:--:-- 4789\n" | |
] | |
} | |
], | |
"source": [ | |
"# Get example sequences\n", | |
"!mv ../data/S288C_YMR054W_STV1_protein.fsa .\n", | |
"!curl -o S288C_YOR270C_VPH1_protein.fsa https://gist.githubusercontent.com/fomightez/f46b0624f1d8e3abb6ff908fc447e63b/raw/7ef7cfdaa2c9f9974f22fd60be3cfe7d1935cd86/ux_S288C_YOR270C_VPH1_protein.fsa" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# contcatenate into FASTA files into one multi-entry FASTA\n", | |
"!cat S288C_YMR054W_STV1_protein.fsa <(echo) S288C_YOR270C_VPH1_protein.fsa > seqs.fasta\n", | |
"# adding space between them is based on https://stackoverflow.com/a/23549826/8508004 ; contrast with\n", | |
"#!cat S288C_YMR054W_STV1_protein.fsa S288C_YOR270C_VPH1_protein.fsa > seqs.fasta" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Collecting package metadata (current_repodata.json): done\n", | |
"Solving environment: done\n", | |
"\n", | |
"\n", | |
"==> WARNING: A newer version of conda exists. <==\n", | |
" current version: 4.9.2\n", | |
" latest version: 4.11.0\n", | |
"\n", | |
"Please update conda by running\n", | |
"\n", | |
" $ conda update -n base conda\n", | |
"\n", | |
"\n", | |
"\n", | |
"## Package Plan ##\n", | |
"\n", | |
" environment location: /srv/conda/envs/notebook\n", | |
"\n", | |
" added / updated specs:\n", | |
" - clustalo\n", | |
"\n", | |
"\n", | |
"The following packages will be downloaded:\n", | |
"\n", | |
" package | build\n", | |
" ---------------------------|-----------------\n", | |
" argtable2-2.13 | h14c3975_1001 2.7 MB conda-forge\n", | |
" ca-certificates-2021.10.8 | ha878542_0 139 KB conda-forge\n", | |
" certifi-2021.10.8 | py37h89c1867_1 145 KB conda-forge\n", | |
" clustalo-1.2.4 | h1b792b2_4 313 KB bioconda\n", | |
" ------------------------------------------------------------\n", | |
" Total: 3.3 MB\n", | |
"\n", | |
"The following NEW packages will be INSTALLED:\n", | |
"\n", | |
" argtable2 conda-forge/linux-64::argtable2-2.13-h14c3975_1001\n", | |
" clustalo bioconda/linux-64::clustalo-1.2.4-h1b792b2_4\n", | |
"\n", | |
"The following packages will be UPDATED:\n", | |
"\n", | |
" ca-certificates 2021.5.30-ha878542_0 --> 2021.10.8-ha878542_0\n", | |
" certifi 2021.5.30-py37h89c1867_0 --> 2021.10.8-py37h89c1867_1\n", | |
"\n", | |
"\n", | |
"\n", | |
"Downloading and Extracting Packages\n", | |
"certifi-2021.10.8 | 145 KB | ##################################### | 100% \n", | |
"argtable2-2.13 | 2.7 MB | ##################################### | 100% \n", | |
"ca-certificates-2021 | 139 KB | ##################################### | 100% \n", | |
"clustalo-1.2.4 | 313 KB | ##################################### | 100% \n", | |
"Preparing transaction: done\n", | |
"Verifying transaction: done\n", | |
"Executing transaction: done\n", | |
"\n", | |
"Note: you may need to restart the kernel to use updated packages.\n" | |
] | |
} | |
], | |
"source": [ | |
"# install clustalo\n", | |
"%conda install -c bioconda clustalo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from Bio import SeqIO\n", | |
"from Bio.Align.Applications import ClustalOmegaCommandline\n", | |
"clustalomega_cline = ClustalOmegaCommandline(infile=\"seqs.fasta\", outfile=\"new_alignment.aln\", verbose=True, outfmt= \"clu\",\n", | |
"auto=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"clustalo -i seqs.fasta -o new_alignment.aln --outfmt clu --auto -v\n" | |
] | |
} | |
], | |
"source": [ | |
"print(clustalomega_cline)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Using 8 threads\n", | |
"Read 2 sequences (type: Protein) from seqs.fasta\n", | |
"not more sequences (2) than cluster-size (100), turn off mBed\n", | |
"Setting options automatically based on input sequence characteristics (might overwrite some of your options).\n", | |
"Auto settings: Enabling mBed.\n", | |
"Auto settings: Setting iteration to 1.\n", | |
"Progressive alignment progress done. CPU time: 0.11u 0.02s 00:00:00.13 Elapsed: 00:00:00\n", | |
"Iteration step 1 out of 1\n", | |
"Computing new guide tree (iteration step 1)\n", | |
"Computing HMM from alignment\n", | |
"Progressive alignment progress done. CPU time: 0.40u 0.03s 00:00:00.43 Elapsed: 00:00:00\n", | |
"Alignment written to new_alignment.aln\n" | |
] | |
} | |
], | |
"source": [ | |
"!clustalo -i seqs.fasta -o new_alignment.aln --outfmt clu --auto -v" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CLUSTAL O(1.2.4) multiple sequence alignment\r\n", | |
"\r\n", | |
"\r\n", | |
"STV1 -MNQEEAIFRSADMTYVQLYIPLEVIREVTFLLGKMSVFMVMDLNKDLTAFQRGYVNQLR\r\n", | |
"VPH1 MAEKEEAIFRSAEMALVQFYIPQEISRDSAYTLGQLGLVQFRDLNSKVRAFQRTFVNEIR\r\n", | |
" ::********:*: **:*** *: *: :: **::.:. . ***..: **** :**::*\r\n", | |
"\r\n", | |
"STV1 RFDEVERMVGFLNEVVEKHAAETWKYILHIDDEGNDIAQPDMADLINTMEPLSLENVNDM\r\n", | |
"VPH1 RLDNVERQYRYFYSLLKKHDIKLYEGDTDKYLDGS----------GELYVPPSGSVIDDY\r\n", | |
" *:*:*** :: .:::** : :: . :*. : * * . ::* \r\n", | |
"\r\n", | |
"STV1 VKEITDCESRARQLDESLDSLRSKLNDLLEQRQVIFECSKFIEVNPGIAGRATNPEIEQE\r\n", | |
"VPH1 VRNASYLEERLIQMEDATDQIEVQKNDLEQYRFILQSGDE--------------------\r\n", | |
" *:: : *.* *:::: *.:. : *** : * :: . .: \r\n", | |
"\r\n", | |
"STV1 ERDVDEFRMTPDDISETLSDAFSFDDETPQDRGALGNDLTRNQSVEDLSFLEQGYQHRYM\r\n", | |
"VPH1 ------FFLKGDN-----TDSTSYMDEDMIDAN--GENIAA-----------AIGASVNY\r\n", | |
" * :. *: :*: *: ** * . *:::: \r\n", | |
"\r\n", | |
"STV1 ITGSIRRTKVDILNRILWRLLRGNLIFQNFPIEEPLLEGK--EKVEKDCFIIFTHGETLL\r\n", | |
"VPH1 VTGVIARDKVATLEQILWRVLRGNLFFKTVEIEQPVYDVKTREYKHKNAFIVFSHGDLII\r\n", | |
" :** * * ** *::****:*****:*:.. **:*: : * * .*:.**:*:**: ::\r\n", | |
"\r\n", | |
"STV1 KKVKRVIDSLNGKIVSLNT---RSSELVDTLNRQIDDLQRILDTTEQTLHTELLVIHDQL\r\n", | |
"VPH1 KRIRKIAESLDANLYDVDSSNEGRSQQLAKVNKNLSDLYTVLKTTSTTLESELYAIAKEL\r\n", | |
" *::::: :**:.:: .::: *: : .:*:::.** :*.**. **.:** .* .:*\r\n", | |
"\r\n", | |
"STV1 PVWSAMTKREKYVYTTLNK--FQQESQGLIAEGWVPSTELIHLQDSLKDYIETLGSEYST\r\n", | |
"VPH1 DSWFQDVTREKAIFEILNKSNYDTNRKILIAEGWIPRDELATLQARLGEMIARLGIDVPS\r\n", | |
" * ..*** :: *** :: : : ******:* ** ** * : * ** : :\r\n", | |
"\r\n", | |
"STV1 VFNVILTNKLPPTYHRTNKFTQAFQSIVDAYGIATYKEINAGLATVVTFPFMFAIMFGDM\r\n", | |
"VPH1 IIQVLDTNHTPPTFHRTNKFTAGFQSICDCYGIAQYREINAGLPTIVTFPFMFAIMFGDM\r\n", | |
" :::*: **: ***:******* .**** *.**** *:****** *:**************\r\n", | |
"\r\n", | |
"STV1 GHGFILFLMALFLVLNERKFGAMHRDEIFDMAFTGRYVLLLMGAFSVYTGLLYNDIFSKS\r\n", | |
"VPH1 GHGFLMTLAALSLVLNEKKINKMKRGEIFDMAFTGRYIILLMGVFSMYTGFLYNDIFSKT\r\n", | |
" ****:: * ** *****:*:. *:*.***********::****.**:***:********:\r\n", | |
"\r\n", | |
"STV1 MTIFKSGWQWPSTFRKGESIEAKKTGVYPFGLDFAWHGTDNGLLFSNSYKMKLSILMGYA\r\n", | |
"VPH1 MTIFKSGWKWPDHWKKGESITATSVGTYPIGLDWAWHGTENALLFSNSYKMKLSILMGFI\r\n", | |
" ********:**. ::***** *...*.**:***:*****:*.****************: \r\n", | |
"\r\n", | |
"STV1 HMTYSFMFSYINYRAKNSKVDIIGNFIPGLVFMQSIFGYLSWAIVYKWSKDWIKDDKPAP\r\n", | |
"VPH1 HMTYSYFFSLANHLYFNSMIDIIGNFIPGLLFMQGIFGYLSVCIVYKWAVDWVKDGKPAP\r\n", | |
" *****::** *: ** :**********:***.****** .*****: **:**.****\r\n", | |
"\r\n", | |
"STV1 GLLNMLINMFLAPGTIDDQLYSGQAKLQVVLLLAALVCVPWLLLYKPLTLRRLNKNGGGG\r\n", | |
"VPH1 GLLNMLINMFLSPGTIDDELYPHQAKVQVFLLLMALVCIPWLLLVKPLHFKFTHKKKSHE\r\n", | |
" ***********:******:** ***:**.*** ****:***** *** :: :*: . \r\n", | |
"\r\n", | |
"STV1 RPHGYQSVGNIEHEEQIAQQRHSAEGFQGMIISDVASVADSINESVGGGEQGPFNFGDVM\r\n", | |
"VPH1 PLPSTEA-------------DASSEDLEAQQLISAMDADDAEEEEVGSGSHG-EDFGDIM\r\n", | |
" . :: *:*.::. : .. .. *: :*.**.*.:* :***:*\r\n", | |
"\r\n", | |
"STV1 IHQVIHTIEFCLNCISHTASYLRLWALSLAHAQLSSVLWDMTISNAFSSKNSGSPLAVMK\r\n", | |
"VPH1 IHQVIHTIEFCLNCVSHTASYLRLWALSLAHAQLSSVLWTMTIQIAFGFRGF---VGVFM\r\n", | |
" **************:************************ ***. **. :. :.*: \r\n", | |
"\r\n", | |
"STV1 VVFLFAMWFVLTVCILVFMEGTSAMLHALRLHWVEAMSKFFEGEGYAYEPFSFRAIIE*-\r\n", | |
"VPH1 TVALFAMWFALTCAVLVLMEGTSAMLHSLRLHWVESMSKFFVGEGLPYEPFAFEYKDMEV\r\n", | |
" .* ******.** .:**:*********:*******:***** *** ****:*. \r\n", | |
"\r\n", | |
"STV1 ------------\r\n", | |
"VPH1 AVASASSSASS*\r\n", | |
" \r\n" | |
] | |
} | |
], | |
"source": [ | |
"!cat new_alignment.aln" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.79\r\n" | |
] | |
} | |
], | |
"source": [ | |
"# show Biopython version present\n", | |
"!python -c \"import Bio; print(Bio.__version__)\"" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment