Skip to content

Instantly share code, notes, and snippets.

@kami4ka
Last active October 7, 2022 13:29
Show Gist options
  • Save kami4ka/b61667724c282024a38d5227a3391b86 to your computer and use it in GitHub Desktop.
Save kami4ka/b61667724c282024a38d5227a3391b86 to your computer and use it in GitHub Desktop.
Medindia ScrapingAnt parser
import requests
from bs4 import BeautifulSoup
YOUR_API_KEY = '<YOUR_SCRAPINGANT_API_KEY>'
def get_page(page_url):
response = requests.get(url='https://api.scrapingant.com/v2/general', params={'browser': False, 'url': page_url, 'x-api-key': YOUR_API_KEY, 'proxy_country': 'IN'})
content = response.content.decode('windows-1252')
return content
def parse_price_page(html):
soup = BeautifulSoup(html, 'html.parser')
data = []
table = soup.find('table', attrs={'class': 'tblgrn'})
rows = table.find_all('tr')
for row in rows:
# skip if row class is 'tblhead'
if row.get('class') and (row.get('class')[0] == 'tblhead' or row.get('class')[0] == 'ads-row'):
continue
cols = row.find_all('td')
data.append({
'id': cols[0].text.strip(),
'name': cols[1].text.strip(),
'url': cols[1].find('a').get('href'),
'manufacturer': cols[2].text.strip(),
'type': cols[3].text.strip(),
'price_url': cols[4].find('a').get('href'),
})
return data
def get_drug_price(html):
soup = BeautifulSoup(html, 'html.parser')
return soup.find('div', attrs={'class': 'ybox'}).text.strip()
response = get_page('https://www.medindia.net/drug-price/ketamine.htm')
drug_data = parse_price_page(response)
for drug in drug_data:
# append price to drug data dict
if drug['price_url'] and drug['price_url'] != '#':
drug['price'] = get_drug_price(get_page(drug['price_url']))
print(drug_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment