Skip to content

Instantly share code, notes, and snippets.

@woemler
Last active January 4, 2016 09:19
Show Gist options
  • Save woemler/8601450 to your computer and use it in GitHub Desktop.
Save woemler/8601450 to your computer and use it in GitHub Desktop.
Searches Sanger's COSMIC cell line database and retrieves basic sample metadata.
from BeautifulSoup import BeautifulSoup
import urllib2
import re
def fetch_page_soup(url):
""" Fetches page data from a URL and returns a parsed BeautifulSoup object """
try:
response = urllib2.urlopen(url)
soup = BeautifulSoup(response.read())
finally:
if response:
response.close()
return soup
def find_cosmic_cell_line(cosmic_id):
""" Returns a COSMIC cell line's annotation, given a COSMIC ID. """
url = r'http://cancer.sanger.ac.uk/cosmic/sample/overview?id=%s'%(str(cosmic_id))
soup = fetch_page_soup(url)
metadata = {}
#The sample metadata is stored in the "overview" tab
if soup.find("div", id="overview"):
soup = soup.find("div", id="overview").find("div", {"class":re.compile("w75")})
#Zip the metadata up into a dictionary
metadata = dict(zip([x.string for x in soup.findAll("dt")], [x.string for x in soup.findAll("dd")]))
#The sample name will not properly parse this way, so we have to pluck it out separately.
metadata["Sample name"] = soup.find(text="Sample name").findNext("dd").find("a").string
return metadata
if __name__ == "__main__":
cosmic_id = 905965
results = find_cosmic_cell_line(id)
for k,v in results.items():
print "%s\t%s"%(k,v)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment