Skip to content

Instantly share code, notes, and snippets.

@kozo2
Created August 14, 2012 14:21
Show Gist options
  • Save kozo2/3349686 to your computer and use it in GitHub Desktop.
Save kozo2/3349686 to your computer and use it in GitHub Desktop.
import urllib2
from bs4 import BeautifulSoup
import re
MAP_HTML_URL = "http://www.genome.jp/kegg/pathway/map/map01100.html"
HSA_HTML_URL = "http://www.genome.jp/kegg-bin/show_pathway?org_name=hsa&mapno=01100&mapscale=1.0&show_description=show"
HSA_KGML_URL = "http://www.genome.jp/kegg-bin/download?entry=hsa01100&format=kgml"
rid_pat = re.compile("R[0-9]{5}")
hoge = urllib2.urlopen(MAP_HTML_URL)
#print hoge.read()
soup = BeautifulSoup(hoge.read())
rids_for = {}
#print len(soup.find_all('area'))
for tag in soup.find_all('area'):
title = tag['title']
coords = tag['coords']
#print title
if len(rid_pat.findall(title)) > 0:
rids_for[coords] = rid_pat.findall(title)
#print len(rids_in_html)
piyo = urllib2.urlopen(HSA_HTML_URL)
soup = BeautifulSoup(piyo.read())
rids_in_html = []
for tag in soup.find_all('area'):
title = tag['title']
coords = tag['coords']
if rids_for.has_key(coords):
rids_in_html.extend(rids_for[coords])
#print len(rids_in_html)
moge = urllib2.urlopen(HSA_KGML_URL)
ksoup = BeautifulSoup(moge.read())
rids_in_kgml = []
for tag in ksoup.find_all('reaction'):
for rid in rid_pat.findall(tag['name']):
rids_in_kgml.append(rid)
#print len(rids_in_kgml)
#print len(set(rids_in_html) - set(rids_in_kgml))
missing_rids = set(rids_in_html) - set(rids_in_kgml)
for rid in missing_rids:
print rid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment