Skip to content

Instantly share code, notes, and snippets.

@efaisal
Created December 3, 2013 03:28
Show Gist options
  • Save efaisal/7763434 to your computer and use it in GitHub Desktop.
Save efaisal/7763434 to your computer and use it in GitHub Desktop.
Workaround the CSS Captcha to automate Semakan Pemilih Bagi Isi Rumah at http://daftarj.spr.gov.my/semakdm/ Output result in HTML, leaving you as an exercise to parse :)
#!/usr/bin/env python
import requests
import lxml.html
import tinycss
###############################################################################
url, form = 'http://daftarj.spr.gov.my/semakdm/', 'semakandm.asp'
form = url + form
###############################################################################
def get_cookies():
r = requests.get(url)
k, v = r.headers['set-cookie'].split(';')[0].strip().split('=')
return {k: v}, r.text
def parse_js(js):
js = lxml.html.fromstring(
js.replace("document.write('", '').replace("');", '').strip()
)
ids = {'root': js.values()[0]}
for i in js.getchildren():
ids[i.values()[0]] = i.text
return ids
def get_captcha(content):
html = lxml.html.fromstring(content)
js = parse_js(html.body[0].iter('script').next().text)
css = tinycss.make_parser().parse_stylesheet(
html.body[0].iter('style').next().text
)
captcha = {}
for r in css.rules:
for s in r.selector:
if s.as_css() in js['root']: break
if len(s.as_css()) < 2: continue
if s.as_css().replace('.', '') in js:
i = s.as_css().replace('.', '')
for d in r.declarations:
if d.name == 'left':
captcha[d.value[0].value] = js[i]
idx = captcha.keys()
idx.sort()
key = ''.join([captcha[i] for i in idx])
return key
def post_page(cookies, payload):
r = requests.post(form, cookies=cookies, data=payload)
return r.text
def semak(senarai):
cookies, content = get_cookies()
captcha = get_captcha(content)
payload = {'SEMAK': 'SEMAK', 'CaptchaBox': captcha}
for i, ic in enumerate(senarai):
payload['dfnokp' + str(i+1)] = ic
html = post_page(cookies, payload)
with open('result.html', 'w') as fp:
fp.write(html)
if __name__ == '__main__':
# Up to 5 IC
ic = ['xxxxxxxxxxxx', 'yyyyyyyyyyyy']
semak(ic)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment