Skip to content

Instantly share code, notes, and snippets.

@krishnakummar
Created April 17, 2015 14:24
Show Gist options
  • Save krishnakummar/4c074b36ce83f74d586e to your computer and use it in GitHub Desktop.
Save krishnakummar/4c074b36ce83f74d586e to your computer and use it in GitHub Desktop.
Fetching medicine names from medindia
import requests
import BeautifulSoup
import string
session = requests.session()
alphas = list(string.ascii_uppercase)
for i in alphas:
url_parse = "http://www.medindia.net/drugs/manufacturers.asp?alpha=" + i
req = session.get(url_parse)
doc = BeautifulSoup.BeautifulSoup(req.content)
manufacturers_div = doc.find("div", attrs={"class": "headlines clear-fix"})
for manu in manufacturers_div:
tag = manu.next
links = tag.parent
pnames = links.findAll("a")
for name in pnames:
try:
phar_link = name["href"]
pro_req = session.get(phar_link)
pro_doc = BeautifulSoup.BeautifulSoup(pro_req.content)
manu_products = pro_doc.find("div", attrs={"class": "top-gray col-list clear-fix "})
product_names = manu_products.findAll("h3")
trade_names = manu_products.findAll("div", attrs={"class": "links"})
for i in xrange(0,len(product_names)):
tnames = trade_names[i].contents
list2 = [a for a in tnames if a != '\n']
list3 = [b for b in list2 if b is not None and b != '|']
for item in list3:
if item.string.strip() != "|":
try:print name.string.strip() + "," + product_names[i].string.strip() + ","+item.string.strip()+","+phar_link.strip()
except:pass
except:pass
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment