Skip to content

Instantly share code, notes, and snippets.

@kkprakasa
Last active September 10, 2019 15:53
Show Gist options
  • Save kkprakasa/9fd0af1d4c2aa7fce7eb92de3ae5b99b to your computer and use it in GitHub Desktop.
Save kkprakasa/9fd0af1d4c2aa7fce7eb92de3ae5b99b to your computer and use it in GitHub Desktop.
kumpulan scrapping db1 kpu
#kpu-hasil
import urllib2
from bs4 import BeautifulSoup
from time import sleep
import csv
import sys
import json
import ssl
reload(sys)
sys.setdefaultencoding('utf8')
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' }
url = "https://pemilu2014.kpu.go.id/db1_dpr.php"
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = res.read()
dsoup = BeautifulSoup(x)
nn=[]
for i in dsoup.find('select').findAll('option'):
if i['value'] != '':
n={}
n['id'] = i['value']
n['value'] = i.text
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['value'])
}
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_1&grandparent=0&parent="+str(i['value'])
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = res.read()
dsoup2 = BeautifulSoup(x)
n['kabkot'] = []
for j in dsoup2.find('select').findAll('option'):
if j['value'] != '':
s={}
s['id'] = j['value']
s['value'] = j.text
n['kabkot'].append(s)
nn.append(n)
xxx=[]
for i in nn:
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id'])
}
for j in i['kabkot']:
xx={}
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id'])
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = res.read().decode("utf8")
dsoup = BeautifulSoup(x)
# type(dsoup.find('div',{'id':'infoboks'}).text)
if dsoup.find('div',{'id':'infoboks'}) is None :
xx['dapil'] = str(i['value'])
xx['keterangan'] = None
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[0].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen sedang direvisi'
xx['data'] = None
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen belum diverifikasi'
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[0].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
if xx['data'] is None:
xx['data'] = [''] * 18
else :
for d in xx['data']:
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d)))
xxx.append(xx)
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan']))
# pengguna hak suara
xxx=[]
for i in nn:
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id'])
}
for j in i['kabkot']:
xx={}
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id'])
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = res.read().decode("utf8")
dsoup = BeautifulSoup(x)
# type(dsoup.find('div',{'id':'infoboks'}).text)
if dsoup.find('div',{'id':'infoboks'}) is None :
xx['dapil'] = str(i['value'])
xx['keterangan'] = None
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[1].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen sedang direvisi'
xx['data'] = None
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen belum diverifikasi'
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[1].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
if xx['data'] is None:
xx['data'] = [''] * 18
else :
for d in xx['data']:
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d)))
xxx.append(xx)
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan']))
xxx=[]
for i in nn:
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id'])
}
for j in i['kabkot']:
xx={}
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id'])
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = res.read().decode("utf8")
dsoup = BeautifulSoup(x)
# type(dsoup.find('div',{'id':'infoboks'}).text)
if dsoup.find('div',{'id':'infoboks'}) is None :
xx['dapil'] = str(i['value'])
xx['keterangan'] = None
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[2].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen sedang direvisi'
xx['data'] = None
xx['kabkot'] = str(j['value'])
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' :
xx['dapil'] = str(i['value'])
xx['keterangan'] = 'Dokumen belum diverifikasi'
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[2].findAll('tr') if not k.find('td',{'colspan':2})]
xx['kabkot'] = str(j['value'])
if xx['data'] is None:
xx['data'] = [''] * 18
else :
for d in xx['data']:
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d)))
xxx.append(xx)
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan']))
kk = []
for i in xxx:
for k in [ list(j) for j in zip(*i['data'])]:
k.insert(0,i['dapil'])
k.insert(1,i['kabkot'])
kk.append(k)
with open('filename-0.csv', 'wb') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for i in kk:
wr.writerow(i)
####################
import urllib2
import json
import sys
reload(sys)
sys.setdefaultencoding('utf8')
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' }
url1 ='https://pemilu2019.kpu.go.id/static/json/wilayah/0.json'
req = urllib2.Request(url1, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = json.loads(res.read())
urx=[]
for i in x.keys():
url1 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'.json'
req = urllib2.Request(url1, None, headers)
res = urllib2.urlopen(req, context=ctx)
x1 = json.loads(res.read())
for j in x1.keys():
url2 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'.json'
req = urllib2.Request(url2, None, headers)
res = urllib2.urlopen(req, context=ctx)
x2 = json.loads(res.read())
for k in x2.keys():
url3 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'/'+str(k)+'.json'
req = urllib2.Request(url3, None, headers)
res = urllib2.urlopen(req, context=ctx)
x3 = json.loads(res.read())
for l in x3.keys():
url4 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'/'+str(k)+'/'+str(l)+'.json'
req = urllib2.Request(url4, None, headers)
res = urllib2.urlopen(req, context=ctx)
x4 = json.loads(res.read())
for m in x4.keys():
d = {}
d['prov'] = x[i]['nama']
d['kabkot'] = x1[j]['nama']
d['kec'] = x2[k]['nama']
d['kel'] = x3[l]['nama']
d['TPS'] = x4[m]['nama']
d['path'] = "%s/%s/%s/%s/%s.json" % (i,j,k,l,m)
urx.append(d)
print("path : %s/%s/%s/%s/%s.json selesai") % (i,j,k,l,m)
from sys import stdout
rx=[]
for i in urx:
# stdout.write("\rmulai mengunduh data tps di kelurahan %s" % i['kel'])
# stdout.flush()
try:
url1 ='https://pemilu2019.kpu.go.id/static/json/hhcw/ppwp/'+i['path']
req = urllib2.Request(url1, None, headers)
res = urllib2.urlopen(req, context=ctx)
x = json.loads(res.read())
i['Jokowi-Maruf'] = x['chart']['21']
i['Prabowo-Sandi'] = x['chart']['22']
i['Pemilih Terdaftar (DPT)'] = x['pemilih_j']
i['Pengguna hak pilih'] = x['pengguna_j']
i['Suara sah'] = x['suara_sah']
i['Suara tidak sah'] = x['suara_tidak_sah']
i['Suara total'] = x['suara_total']
except :
i['Jokowi-Maruf'] = None
i['Prabowo-Sandi'] = None
i['Pemilih Terdaftar (DPT)'] = None
i['Pengguna hak pilih'] = None
i['Suara sah'] = None
i['Suara tidak sah'] = None
i['Suara total'] = None
rx.append(i)
stdout.write("\rdata tps di kelurahan %s selesai" % i['kel'])
stdout.flush()
stdout.write("\n")
ch = urx.keys()
with open('test-pilpres2019.csv', 'wb') as o_f:
d_w = csv.DictWriter(o_f, ch)
d_w.writeheader()
d_w.writerows(urx)
########################
import csv
x = []
with open('test-pilpres2019.csv', mode='r') as cf:
cr = csv.DictReader(cf)
lc = 0
for r in cr:
if lc == 0:
x.append(r)
for i in range(0,len(x)):
x[i]['rekapurl']='/'.join(x[i]['path'].split('/')[:-1])+".json"
un = []
for i in x:
if i['rekapurl'] not in [j['rekapurl'] for j in un]:
un.append(i)
print(i['rekapurl'])
for i in un[0:5]:
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' }
url1='https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+str(i['rekapurl'])
req = urllib2.Request(url1, None, headers)
res = urllib2.urlopen(req, context=ctx)
json.loads(res.read())
#Rekap KPU
import urllib2
from bs4 import BeautifulSoup
from time import sleep
import csv
import sys
import json
import ssl
reload(sys)
sys.setdefaultencoding('utf8')
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url ='https://pemilu2019.kpu.go.id/static/json/wilayah/1.json'
def ambil(url):
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' }
req = urllib2.Request(url, None, headers)
res = urllib2.urlopen(req, context=ctx)
return json.loads(res.read())
url1='https://pemilu2019.kpu.go.id/static/json/wilayah/0.json'
x = ambil(url1)
d=[]
for i in [str(u) for u in x.keys()] :
data = {}
data['id'] = i
data['nama'] = x[i]['nama']
data['kabkot'] = []
data['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'.json'
xx=ambil('/'.join(url1.split('/')[:-1])+'/'+i+'.json')
for j in [str(v) for v in xx.keys()] :
kabkot = {}
kabkot['id'] = j
kabkot['nama'] = xx[j]['nama']
kabkot['kec'] = []
kabkot['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'.json'
data['kabkot'].append(kabkot)
xxx=ambil(kabkot['path'])
for k in [str(w) for w in xxx.keys()] :
kec = {}
kec['id'] = k
kec['nama'] = xxx[k]['nama']
kec['keldes'] = []
kec['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'/'+k+'.json'
kabkot['kec'].append(kec)
xxxx=ambil(kec['path'])
for l in [str(r) for r in xxxx.keys()] :
keldes = {}
keldes['id'] = l
keldes['nama'] = xxxx[l]['nama']
keldes['hasil']={}
# try:
# keldes['hasil']['paslon01'] = ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'.json')['table'][l]['21']
# keldes['hasil']['paslon02'] = ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'.json')['table'][l]['22']
# except :
# keldes['hasil']['paslon01'] = None
# keldes['hasil']['paslon02'] = None
keldes['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'/'+k+'/'+l+'.json'
try:
xxxxx=ambil(keldes['path'])
for m in xxxxx:
keldes['hasil'][xxxxx[m]['nama']] = {}
try:
keldes['hasil'][xxxxx[m]['nama']]['paslon02']=ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'/'+l+'.json')['table'][m]['22']
keldes['hasil'][xxxxx[m]['nama']]['paslon01']=ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'/'+l+'.json')['table'][m]['21']
except:
keldes['hasil'][xxxxx[m]['nama']]['paslon02']='kosong'
keldes['hasil'][xxxxx[m]['nama']]['paslon01']='kosong'
except:
keldes['hasil'] = 'kode tps tidak dapat diakses'
pass
kec['keldes'].append(keldes)
print('Prov: %s, kabkot: %s, kecamatan: %s, desa: %s' % (data['nama'],kabkot['nama'],kec['nama'],keldes['nama']))
d.append(data)
# ubah ke list of dictionary
dl=[]
for i in d:
for j in i['kabkot']:
for k in j['kec']:
for l in k['keldes']:
for m in l['hasil']:
# print('memproses p: %s, kk: %s, kec: %s, d: %s t: %s' % (i['nama'],j['nama'],k['nama'],l['nama'],m))
n={}
n['prov'] = i['nama']
n['id_prov_kpu'] = i['id']
n['kabkot'] = j['nama']
n['id_kab_kpu'] = j['id']
n['kec'] = k['nama']
n['id_kec_kpu'] = k['id']
n['keldes'] = l['nama']
n['id_keldes_kpu'] = l['id']
try:
n['TPS'] = m
n['hasil-01'] = l['hasil'][m]['paslon01']
n['hasil-02'] = l['hasil'][m]['paslon02']
except:
n['TPS'] = 'kosong'
n['hasil-01'] = 'kosong'
n['hasil-02'] = 'kosong'
pass
#print(n)
dl.append(n)
# tulis ke csv
keys = dl[0].keys()
with open('rekapKPUDESA-final-tps.csv', 'wb') as o_f:
d_w = csv.DictWriter(o_f, keys)
d_w.writeheader()
d_w.writerows(dl)
# cilacap 3301
# kebumen 3305
# klaten 3310
# bantul 3402
# wonogiri 3312
# purworejo 3306
# gunung kidul 3403
# pacitan 3501
# kulon progo 3401
# 3301
# 3305
# 3310
# 3402
# 3312
# 3306
# 3403
# 3501
# 3401
# https://geoportal.esdm.go.id/monaresia/sharing/servers/e8cc8d1772c94b1a9576cec2f39df3ad/rest/services/Pusat/WIUP_Publish/MapServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&geometry=%7B%22xmin%22%3A9922038.667205488%2C%22ymin%22%3A-301015.9504604894%2C%22xmax%22%3A13341525.564570224%2C%22ymax%22%3A1362253.7850245037%2C%22spatialReference%22%3A%7B%22wkid%22%3A102100%7D%7D&geometryType=esriGeometryEnvelope&inSR=102100&outFields=*&orderByFields=objectid%20ASC&outSR=102100&resultOffset=0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment