Created
March 11, 2016 13:48
-
-
Save Swarchal/6e06beeb2857f3b21fc8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Finding a protein motif\n", | |
"\n", | |
"### Problem:\n", | |
"\n", | |
"**Given:** At most 15 UniProt Protein Database access IDs\n", | |
"\n", | |
"**Return:** For each protein posessing the N-glycosylation motif, output its given access ID followed by a list of locations in the protein string where the motif can be found.\n", | |
"\n", | |
"\n", | |
"------------\n", | |
"\n", | |
"A protein motif is represented by a shorthand as follows:\n", | |
"- [XY] means 'either X or Y'\n", | |
"- {X} means 'any amino acid except X'\n", | |
"\n", | |
"The N-glycosylation motif is written as N{P}[ST]{P}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import urllib2\n", | |
"from Bio import SeqIO\n", | |
"import re\n", | |
"\n", | |
"def id_to_fasta(ID):\n", | |
" url = \"http://www.uniprot.org/uniprot/%s.fasta\" % ID.strip()\n", | |
" url_out = urllib2.urlopen(url)\n", | |
" fasta = SeqIO.parse(url_out, \"fasta\")\n", | |
" return(fasta)\n", | |
"\n", | |
"def is_motif(s):\n", | |
" s = s.upper()\n", | |
" assert len(s) is int(4)\n", | |
" pattern = re.compile('N[^P][ST][^P]')\n", | |
" return bool(pattern.match(s))\n", | |
"\n", | |
"def name_and_position(ID):\n", | |
" fasta_gen = id_to_fasta(ID)\n", | |
" positions = []\n", | |
" for fasta in fasta_gen:\n", | |
" sequence = str(fasta.seq)\n", | |
" for i in xrange(len(sequence) - 4):\n", | |
" if is_motif(sequence[i:i + 4]):\n", | |
" positions.append(i + 1) \n", | |
" if len(positions) > 0:\n", | |
" print ID,\n", | |
" print ' '.join(str(num) for num in positions), \"\\n\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"B5ZC00\n", | |
"85 118 142 306 395 \n", | |
"\n", | |
"P07204_TRBM_HUMAN\n", | |
"47 115 116 382 409 \n", | |
"\n", | |
"P20840_SAG1_YEAST\n", | |
"79 109 135 248 306 348 364 402 485 501 614 \n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for line in open(\"/home/scott/Dropbox/rosalind/mprt.txt\"):\n", | |
" name_and_position(line)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment