Last active
August 29, 2015 13:57
-
-
Save faraday/9923477 to your computer and use it in GitHub Desktop.
Belediye meclisi vs. Büyükşehir oyları anomali kontrol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import html5lib | |
from html5lib import treebuilders | |
from xml.etree import cElementTree | |
import sys | |
import time | |
import argparse | |
STS_URL = "http://sts.chp.org.tr/SonucDetay.aspx?sid=%s" | |
TAB_PATH = ".//{http://www.w3.org/1999/xhtml}div[@id='TabContainer_TabPanel%s']" | |
ROW_PATH = ".//{http://www.w3.org/1999/xhtml}div[@class='chp-vote-row']" | |
# 205407, 217637 | |
LOCATIONS = { | |
'akyurt': (205407, 205482), | |
'altindag': (205483, 206246), | |
'ayas': (206292, 206345), | |
'bala': (206346, 206445), | |
'beypazari': (206446, 206612), | |
'camlidere': (206613, 206664), | |
'cankaya': (206747, 208739), | |
'cubuk': (208871, 209111), | |
'elmadag': (209112, 209224), | |
'etimesgut': (209277, 210240), | |
'evren': (210272, 210287), | |
'golbasi': (210288, 210555), | |
'gudul': (210556, 210596), | |
'haymana': (210597, 210738), | |
'kalecik': (210739, 210807), | |
'kazan': (210808, 210936), | |
'kecioren': (211747, 211005), | |
'kizilcahamam': (212890, 213039), | |
'mamak': (213040, 214337), | |
'nallihan': (214338, 214471), | |
'polatli': (214472, 214786), | |
'pursaklar': (214787, 215055), | |
'sincan': (215056, 216122), | |
'sereflikochisar': (216123, 216239), | |
'yenimahalle': (216275, 217637), | |
'HEPSI': (205407, 217637) | |
} | |
TABS = { | |
'belediye_meclis': 3, | |
'ilce': 2, | |
'buyuksehir': 1 | |
} | |
parser = argparse.ArgumentParser(description='Search task (retrieving in the background, saving data required for plotting') | |
parser.add_argument('--yer', required=True, choices=LOCATIONS.keys(), help='Yer (ilce)') | |
parser.add_argument('--esik', default=50, type=int, help='Anomali esik degeri (alt limit)') | |
def breakdown(row): | |
imgs = row.findall(".//{http://www.w3.org/1999/xhtml}img") | |
if not imgs: | |
return {} | |
img = imgs[0] | |
src = img.attrib['src'] | |
lastPart = src.split('/')[-1] | |
inputt = row.findall(".//{http://www.w3.org/1999/xhtml}input")[0] | |
val = 0 | |
if inputt.attrib.has_key('value'): | |
val = int(inputt.attrib['value']) | |
return {'parti': lastPart.rstrip('.gif'), 'oy': val} | |
def getResults(tabDoc): | |
rows = tabDoc.findall(ROW_PATH) | |
results = {} | |
for row in rows: | |
r = breakdown(row) | |
if not r: break | |
results[r['parti']] = r['oy'] | |
return results | |
def parse(feedHTML): | |
p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder('etree', cElementTree)) | |
doc = p.parse(feedHTML) | |
# locate topic list | |
tabResults = {} | |
for tabName, tabNo in TABS.items(): | |
tabDocs = doc.findall(TAB_PATH % tabNo) | |
if not tabDocs: continue | |
tabDoc = tabDocs[0] | |
tabResults[tabName] = getResults(tabDoc) | |
return tabResults | |
def check(placeId, limit): | |
url = STS_URL % placeId | |
text = urllib.urlopen(url).read() | |
tabResults = parse(text) | |
if tabResults: | |
meclisResults = tabResults['belediye_meclis'] | |
buyuksehirResults = tabResults['buyuksehir'] | |
for bKey,bVal in buyuksehirResults.items(): | |
if not bKey in meclisResults: continue | |
diff = meclisResults[bKey]-bVal | |
if diff > limit: | |
print "STS-ID %s --- %s --- belediye meclisi: %s, buyuksehir: %s --- FARK: %s" % (i, bKey.upper(), meclisResults[bKey], bVal, diff) | |
time.sleep(1) | |
if __name__ == "__main__": | |
args = parser.parse_args() | |
l = LOCATIONS[args.yer] | |
for i in range(l[0],l[1]): | |
check(placeId=i, limit=args.esik) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment