danielparton/gist:a7b83c85bc7e06dc5189

## gistfile1.py
def retrieve_uniprot(search_string, maxreadlength=100000000):
    '''
    Searches the UniProt database given a search string, and retrieves an XML
    file, which is returned as a string.
    maxreadlength is the maximum size in bytes which will be read from the website
    (default 100MB)
    Example search string: 'domain:"Protein kinase" AND reviewed:yes'

    The function also removes the xmlns attribute from <uniprot> tag, as this
    makes xpath searching annoying
    '''
    import msmseeder.core

    base_url = 'http://www.uniprot.org/uniprot/?query='
    search_string_encoded = msmseeder.core.encode_url_query(search_string.replace('=', ':'))
    query_url = base_url + search_string_encoded + '&format=xml'
    response = urllib2.urlopen(query_url)
    page = response.read(maxreadlength)
    page = page.replace('xmlns="http://uniprot.org/uniprot" ', '', 1)

    return page
	def retrieve_uniprot(search_string, maxreadlength=100000000):
	'''
	Searches the UniProt database given a search string, and retrieves an XML
	file, which is returned as a string.
	maxreadlength is the maximum size in bytes which will be read from the website
	(default 100MB)
	Example search string: 'domain:"Protein kinase" AND reviewed:yes'

	The function also removes the xmlns attribute from <uniprot> tag, as this
	makes xpath searching annoying
	'''
	import msmseeder.core

	base_url = 'http://www.uniprot.org/uniprot/?query='
	search_string_encoded = msmseeder.core.encode_url_query(search_string.replace('=', ':'))
	query_url = base_url + search_string_encoded + '&format=xml'
	response = urllib2.urlopen(query_url)
	page = response.read(maxreadlength)
	page = page.replace('xmlns="http://uniprot.org/uniprot" ', '', 1)

	return page