Skip to content

Instantly share code, notes, and snippets.

@boyank
Last active September 7, 2016 12:05
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save boyank/9439e4b36aaa9405782a to your computer and use it in GitHub Desktop.
Python script to download BSE Sofia trades data from http://www.infostock.bg
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import urllib2
def scrape_data(url, to_file):
soup = BeautifulSoup(urllib2.urlopen(url), 'lxml')
for tr in soup.find('table', class_='homeTable noborders').find_all('tr'):
row = [td.text.strip().encode('utf-8') for td in tr.find_all('td')
if td.has_attr('class') and td['class'] in ([u'left'], [u'right'])]
if row:
to_file.write('{}\n'.format(','.join(row)))
def create_url(ticker, date_from, date_to, data_page=0):
base_url = 'http://www.infostock.bg/infostock/control/transactions/history/p'
search_query = '?page={}&ticker={}&fromDate={}&toDate={}'.format(data_page, ticker, date_from, date_to)
search_url = '{}{}'.format(base_url, search_query)
return search_url
if __name__ == '__main__':
ticker = '4CF'
from_date = '01.01.2015'
to_date = '16.03.2015'
soup = BeautifulSoup(urllib2.urlopen(create_url(ticker, from_date, to_date)), 'lxml')
pagination = soup.find('ul', class_='pagination')
if pagination:
pages = max([int(a.text) for li in pagination.find_all('li') for a in li.find_all('a')
if a.text != u' следваща »'])
else:
pages = 1
with open('trades.csv', 'w') as f:
for page in xrange(pages):
search_url = create_url(ticker, from_date, to_date, page)
print 'page {}'.format(page + 1)
scrape_data(search_url, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment