Last active
September 10, 2019 15:53
-
-
Save kkprakasa/9fd0af1d4c2aa7fce7eb92de3ae5b99b to your computer and use it in GitHub Desktop.
kumpulan scrapping db1 kpu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#kpu-hasil | |
import urllib2 | |
from bs4 import BeautifulSoup | |
from time import sleep | |
import csv | |
import sys | |
import json | |
import ssl | |
reload(sys) | |
sys.setdefaultencoding('utf8') | |
ctx = ssl.create_default_context() | |
ctx.check_hostname = False | |
ctx.verify_mode = ssl.CERT_NONE | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' } | |
url = "https://pemilu2014.kpu.go.id/db1_dpr.php" | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = res.read() | |
dsoup = BeautifulSoup(x) | |
nn=[] | |
for i in dsoup.find('select').findAll('option'): | |
if i['value'] != '': | |
n={} | |
n['id'] = i['value'] | |
n['value'] = i.text | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', | |
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['value']) | |
} | |
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_1&grandparent=0&parent="+str(i['value']) | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = res.read() | |
dsoup2 = BeautifulSoup(x) | |
n['kabkot'] = [] | |
for j in dsoup2.find('select').findAll('option'): | |
if j['value'] != '': | |
s={} | |
s['id'] = j['value'] | |
s['value'] = j.text | |
n['kabkot'].append(s) | |
nn.append(n) | |
xxx=[] | |
for i in nn: | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', | |
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id']) | |
} | |
for j in i['kabkot']: | |
xx={} | |
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id']) | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = res.read().decode("utf8") | |
dsoup = BeautifulSoup(x) | |
# type(dsoup.find('div',{'id':'infoboks'}).text) | |
if dsoup.find('div',{'id':'infoboks'}) is None : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = None | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[0].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen sedang direvisi' | |
xx['data'] = None | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen belum diverifikasi' | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[0].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
if xx['data'] is None: | |
xx['data'] = [''] * 18 | |
else : | |
for d in xx['data']: | |
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d))) | |
xxx.append(xx) | |
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan'])) | |
# pengguna hak suara | |
xxx=[] | |
for i in nn: | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', | |
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id']) | |
} | |
for j in i['kabkot']: | |
xx={} | |
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id']) | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = res.read().decode("utf8") | |
dsoup = BeautifulSoup(x) | |
# type(dsoup.find('div',{'id':'infoboks'}).text) | |
if dsoup.find('div',{'id':'infoboks'}) is None : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = None | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[1].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen sedang direvisi' | |
xx['data'] = None | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen belum diverifikasi' | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[1].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
if xx['data'] is None: | |
xx['data'] = [''] * 18 | |
else : | |
for d in xx['data']: | |
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d))) | |
xxx.append(xx) | |
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan'])) | |
xxx=[] | |
for i in nn: | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', | |
'Cookie' : "gov2pemilu2014=cookie[id]=&cookie[dapil_dpr]="+str(i['id']) | |
} | |
for j in i['kabkot']: | |
xx={} | |
url="https://pemilu2014.kpu.go.id/db1_dpr.php?cmd=select_2&grandparent="+str(i['id'])+"&parent="+str(j['id']) | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = res.read().decode("utf8") | |
dsoup = BeautifulSoup(x) | |
# type(dsoup.find('div',{'id':'infoboks'}).text) | |
if dsoup.find('div',{'id':'infoboks'}) is None : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = None | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[2].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen sedang direvisi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen sedang direvisi' | |
xx['data'] = None | |
xx['kabkot'] = str(j['value']) | |
elif str(dsoup.find('div',{'id':'infoboks'}).text) == 'Dokumen belum diverifikasi' : | |
xx['dapil'] = str(i['value']) | |
xx['keterangan'] = 'Dokumen belum diverifikasi' | |
xx['data'] = [k.text.replace('\t','').split('\n') for k in dsoup.findAll('table')[2].findAll('tr') if not k.find('td',{'colspan':2})] | |
xx['kabkot'] = str(j['value']) | |
if xx['data'] is None: | |
xx['data'] = [''] * 18 | |
else : | |
for d in xx['data']: | |
d[:0] = (['']*(max([len(n) for n in xx['data'] ]) - len(d))) | |
xxx.append(xx) | |
print("telah selesai memproses dapil %s kota/kab %s sebanyak: %s kec - %s" % (xx['dapil'], xx['kabkot'], ('None' if xx['data'] is None else len(xx['data'][2][3:-2]) ), xx['keterangan'])) | |
kk = [] | |
for i in xxx: | |
for k in [ list(j) for j in zip(*i['data'])]: | |
k.insert(0,i['dapil']) | |
k.insert(1,i['kabkot']) | |
kk.append(k) | |
with open('filename-0.csv', 'wb') as myfile: | |
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) | |
for i in kk: | |
wr.writerow(i) | |
#################### | |
import urllib2 | |
import json | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf8') | |
ctx = ssl.create_default_context() | |
ctx.check_hostname = False | |
ctx.verify_mode = ssl.CERT_NONE | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' } | |
url1 ='https://pemilu2019.kpu.go.id/static/json/wilayah/0.json' | |
req = urllib2.Request(url1, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = json.loads(res.read()) | |
urx=[] | |
for i in x.keys(): | |
url1 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'.json' | |
req = urllib2.Request(url1, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x1 = json.loads(res.read()) | |
for j in x1.keys(): | |
url2 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'.json' | |
req = urllib2.Request(url2, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x2 = json.loads(res.read()) | |
for k in x2.keys(): | |
url3 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'/'+str(k)+'.json' | |
req = urllib2.Request(url3, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x3 = json.loads(res.read()) | |
for l in x3.keys(): | |
url4 ='https://pemilu2019.kpu.go.id/static/json/wilayah/'+str(i)+'/'+str(j)+'/'+str(k)+'/'+str(l)+'.json' | |
req = urllib2.Request(url4, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x4 = json.loads(res.read()) | |
for m in x4.keys(): | |
d = {} | |
d['prov'] = x[i]['nama'] | |
d['kabkot'] = x1[j]['nama'] | |
d['kec'] = x2[k]['nama'] | |
d['kel'] = x3[l]['nama'] | |
d['TPS'] = x4[m]['nama'] | |
d['path'] = "%s/%s/%s/%s/%s.json" % (i,j,k,l,m) | |
urx.append(d) | |
print("path : %s/%s/%s/%s/%s.json selesai") % (i,j,k,l,m) | |
from sys import stdout | |
rx=[] | |
for i in urx: | |
# stdout.write("\rmulai mengunduh data tps di kelurahan %s" % i['kel']) | |
# stdout.flush() | |
try: | |
url1 ='https://pemilu2019.kpu.go.id/static/json/hhcw/ppwp/'+i['path'] | |
req = urllib2.Request(url1, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
x = json.loads(res.read()) | |
i['Jokowi-Maruf'] = x['chart']['21'] | |
i['Prabowo-Sandi'] = x['chart']['22'] | |
i['Pemilih Terdaftar (DPT)'] = x['pemilih_j'] | |
i['Pengguna hak pilih'] = x['pengguna_j'] | |
i['Suara sah'] = x['suara_sah'] | |
i['Suara tidak sah'] = x['suara_tidak_sah'] | |
i['Suara total'] = x['suara_total'] | |
except : | |
i['Jokowi-Maruf'] = None | |
i['Prabowo-Sandi'] = None | |
i['Pemilih Terdaftar (DPT)'] = None | |
i['Pengguna hak pilih'] = None | |
i['Suara sah'] = None | |
i['Suara tidak sah'] = None | |
i['Suara total'] = None | |
rx.append(i) | |
stdout.write("\rdata tps di kelurahan %s selesai" % i['kel']) | |
stdout.flush() | |
stdout.write("\n") | |
ch = urx.keys() | |
with open('test-pilpres2019.csv', 'wb') as o_f: | |
d_w = csv.DictWriter(o_f, ch) | |
d_w.writeheader() | |
d_w.writerows(urx) | |
######################## | |
import csv | |
x = [] | |
with open('test-pilpres2019.csv', mode='r') as cf: | |
cr = csv.DictReader(cf) | |
lc = 0 | |
for r in cr: | |
if lc == 0: | |
x.append(r) | |
for i in range(0,len(x)): | |
x[i]['rekapurl']='/'.join(x[i]['path'].split('/')[:-1])+".json" | |
un = [] | |
for i in x: | |
if i['rekapurl'] not in [j['rekapurl'] for j in un]: | |
un.append(i) | |
print(i['rekapurl']) | |
for i in un[0:5]: | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' } | |
url1='https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+str(i['rekapurl']) | |
req = urllib2.Request(url1, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
json.loads(res.read()) | |
#Rekap KPU | |
import urllib2 | |
from bs4 import BeautifulSoup | |
from time import sleep | |
import csv | |
import sys | |
import json | |
import ssl | |
reload(sys) | |
sys.setdefaultencoding('utf8') | |
ctx = ssl.create_default_context() | |
ctx.check_hostname = False | |
ctx.verify_mode = ssl.CERT_NONE | |
url ='https://pemilu2019.kpu.go.id/static/json/wilayah/1.json' | |
def ambil(url): | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36' } | |
req = urllib2.Request(url, None, headers) | |
res = urllib2.urlopen(req, context=ctx) | |
return json.loads(res.read()) | |
url1='https://pemilu2019.kpu.go.id/static/json/wilayah/0.json' | |
x = ambil(url1) | |
d=[] | |
for i in [str(u) for u in x.keys()] : | |
data = {} | |
data['id'] = i | |
data['nama'] = x[i]['nama'] | |
data['kabkot'] = [] | |
data['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'.json' | |
xx=ambil('/'.join(url1.split('/')[:-1])+'/'+i+'.json') | |
for j in [str(v) for v in xx.keys()] : | |
kabkot = {} | |
kabkot['id'] = j | |
kabkot['nama'] = xx[j]['nama'] | |
kabkot['kec'] = [] | |
kabkot['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'.json' | |
data['kabkot'].append(kabkot) | |
xxx=ambil(kabkot['path']) | |
for k in [str(w) for w in xxx.keys()] : | |
kec = {} | |
kec['id'] = k | |
kec['nama'] = xxx[k]['nama'] | |
kec['keldes'] = [] | |
kec['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'/'+k+'.json' | |
kabkot['kec'].append(kec) | |
xxxx=ambil(kec['path']) | |
for l in [str(r) for r in xxxx.keys()] : | |
keldes = {} | |
keldes['id'] = l | |
keldes['nama'] = xxxx[l]['nama'] | |
keldes['hasil']={} | |
# try: | |
# keldes['hasil']['paslon01'] = ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'.json')['table'][l]['21'] | |
# keldes['hasil']['paslon02'] = ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'.json')['table'][l]['22'] | |
# except : | |
# keldes['hasil']['paslon01'] = None | |
# keldes['hasil']['paslon02'] = None | |
keldes['path'] = '/'.join(url1.split('/')[:-1])+'/'+i+'/'+j+'/'+k+'/'+l+'.json' | |
try: | |
xxxxx=ambil(keldes['path']) | |
for m in xxxxx: | |
keldes['hasil'][xxxxx[m]['nama']] = {} | |
try: | |
keldes['hasil'][xxxxx[m]['nama']]['paslon02']=ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'/'+l+'.json')['table'][m]['22'] | |
keldes['hasil'][xxxxx[m]['nama']]['paslon01']=ambil('https://pemilu2019.kpu.go.id/static/json/hr/ppwp/'+i+'/'+j+'/'+k+'/'+l+'.json')['table'][m]['21'] | |
except: | |
keldes['hasil'][xxxxx[m]['nama']]['paslon02']='kosong' | |
keldes['hasil'][xxxxx[m]['nama']]['paslon01']='kosong' | |
except: | |
keldes['hasil'] = 'kode tps tidak dapat diakses' | |
pass | |
kec['keldes'].append(keldes) | |
print('Prov: %s, kabkot: %s, kecamatan: %s, desa: %s' % (data['nama'],kabkot['nama'],kec['nama'],keldes['nama'])) | |
d.append(data) | |
# ubah ke list of dictionary | |
dl=[] | |
for i in d: | |
for j in i['kabkot']: | |
for k in j['kec']: | |
for l in k['keldes']: | |
for m in l['hasil']: | |
# print('memproses p: %s, kk: %s, kec: %s, d: %s t: %s' % (i['nama'],j['nama'],k['nama'],l['nama'],m)) | |
n={} | |
n['prov'] = i['nama'] | |
n['id_prov_kpu'] = i['id'] | |
n['kabkot'] = j['nama'] | |
n['id_kab_kpu'] = j['id'] | |
n['kec'] = k['nama'] | |
n['id_kec_kpu'] = k['id'] | |
n['keldes'] = l['nama'] | |
n['id_keldes_kpu'] = l['id'] | |
try: | |
n['TPS'] = m | |
n['hasil-01'] = l['hasil'][m]['paslon01'] | |
n['hasil-02'] = l['hasil'][m]['paslon02'] | |
except: | |
n['TPS'] = 'kosong' | |
n['hasil-01'] = 'kosong' | |
n['hasil-02'] = 'kosong' | |
pass | |
#print(n) | |
dl.append(n) | |
# tulis ke csv | |
keys = dl[0].keys() | |
with open('rekapKPUDESA-final-tps.csv', 'wb') as o_f: | |
d_w = csv.DictWriter(o_f, keys) | |
d_w.writeheader() | |
d_w.writerows(dl) | |
# cilacap 3301 | |
# kebumen 3305 | |
# klaten 3310 | |
# bantul 3402 | |
# wonogiri 3312 | |
# purworejo 3306 | |
# gunung kidul 3403 | |
# pacitan 3501 | |
# kulon progo 3401 | |
# 3301 | |
# 3305 | |
# 3310 | |
# 3402 | |
# 3312 | |
# 3306 | |
# 3403 | |
# 3501 | |
# 3401 | |
# https://geoportal.esdm.go.id/monaresia/sharing/servers/e8cc8d1772c94b1a9576cec2f39df3ad/rest/services/Pusat/WIUP_Publish/MapServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&geometry=%7B%22xmin%22%3A9922038.667205488%2C%22ymin%22%3A-301015.9504604894%2C%22xmax%22%3A13341525.564570224%2C%22ymax%22%3A1362253.7850245037%2C%22spatialReference%22%3A%7B%22wkid%22%3A102100%7D%7D&geometryType=esriGeometryEnvelope&inSR=102100&outFields=*&orderByFields=objectid%20ASC&outSR=102100&resultOffset=0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment