Skip to content

Instantly share code, notes, and snippets.

@krishashok
Created May 19, 2016 14:51
Show Gist options
  • Save krishashok/8b4013d37cba26f152216f1680b48b8d to your computer and use it in GitHub Desktop.
Save krishashok/8b4013d37cba26f152216f1680b48b8d to your computer and use it in GitHub Desktop.
Extract election results for 2016 state elections from the ECI website into a spreadsheet.
from bs4 import BeautifulSoup
import requests
from xlwt.Workbook import *
from xlwt import easyxf,Formula
#initiatize a bloody excel sheet
wb = Workbook()
#initiatize a sheet on that bloody excel
ws = wb.add_sheet('0')
#Set column widths and fonts and shit
ws.col(0).width = 256 * 30
ws.col(1).width = 256 * 30
ws.col(2).width = 256 * 60
ws.col(3).width = 256 * 30
ws.col(4).width = 256 * 30
style_link = easyxf('font: underline single, name Arial, height 280, colour_index blue')
style_heading = easyxf('font: bold 1, name Arial, height 280; pattern: pattern solid, pattern_fore_colour yellow, pattern_back_colour yellow')
style_wrap = easyxf('align: wrap 1; font: height 280')
# Headings in proper MBA spreadsheet style - Bold with yellow background
ws.write(0,0,'Constituency',style_heading)
ws.write(0,1,'Const. No.',style_heading)
ws.write(0,2,'Leading Candidate',style_heading)
ws.write(0,3,'Leading Party',style_heading)
ws.write(0,4,'Trailing Candidate',style_heading)
ws.write(0,5,'Trailing Party',style_heading)
ws.write(0,6,'Margin',style_heading)
ws.write(0,7,'Status',style_heading)
# The damn ECI site uses a URL pattern that appends a page number to http://eciresults.nic.in/StatewiseS22 before the .htm FOR TN RESULTS
# If you want another state, set url below to the first page URL for that state
url_pattern = 'http://eciresults.nic.in/StatewiseS22'
# The damn site has 24 pages of results for TN, so we loop through that nonsense
# If it's another state, change the 24 in range(24) to the number of pages the damn site for that goddamned state
row_count = 1
for i in range(24):
# Get shit from ECI site for the current page
if i == 0:
url = url_pattern + ".htm"
else:
url = url_pattern + str(i) + ".htm"
r = requests.get(url)
data = r.text
# Turn shit into Beautiful Soup
soup = BeautifulSoup(data, "lxml")
# Find the table on the damn page
table = soup.find('table')
# Loop through table and save shit in the bloody spreadsheet
for row in table.find_all('tr'):
if len(row) == 8:
col_count = 0
for cell in row.find_all('td'):
ws.write(row_count, col_count, cell.text,style_wrap)
col_count = col_count + 1
row_count = row_count + 1
wb.save('election_results.xls')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment