Skip to content

Instantly share code, notes, and snippets.

@kkprakasa
Created July 22, 2020 04:16
Show Gist options
  • Save kkprakasa/5275ec52a63b58e04353762824b7dd30 to your computer and use it in GitHub Desktop.
Save kkprakasa/5275ec52a63b58e04353762824b7dd30 to your computer and use it in GitHub Desktop.
#IDM
#!/bin/python3
import urllib3
from bs4 import BeautifulSoup
from time import sleep
import csv
import sys
import json
import ssl
import hashlib
from tqdm import tqdm
def ambilJson(url):
http = urllib3.PoolManager()
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36'}
req = http.request('GET',url, headers)
return json.loads(req.data)
def ambil(url):
http = urllib3.PoolManager()
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
req = http.request('GET', url, headers)
dsoup = BeautifulSoup(req.data)
return dsoup
root = 'http://idm.kemendesa.go.id'
url = root+'/idm_data?id_prov=33&id_kabupaten=3321&id_kecamatan=332110&id_desa=3321102014&tahun=2019'
jurl = root+'/users/list_idm?draw=1&columns%5B0%5D%5Bdata%5D=tahun&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=id_prov&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=id_kabupaten&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=id_kecamatan&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=id_desa&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=iks_2019&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=ike_2019&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=ikl_2019&columns%5B7%5D%5Bname%5D=&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=idm_2019&columns%5B8%5D%5Bname%5D=&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=idm_status_2019&columns%5B9%5D%5Bname%5D=&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=detail&columns%5B10%5D%5Bname%5D=&columns%5B10%5D%5Bsearchable%5D=true&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=0&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1568821375769'
#
y = ambilJson(jurl)['data']
#narik data idm per kab
idkab = '3319'
# [ i for i in idmy.find_one({},{'_id':0,'id_kabupaten':1,'detail':1})['id_kabupaten'].split('|')[0].strip() == '3319' ]
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami']
for u in tqdm(idmy.find({'id_kabupaten':idkab},{'_id':0})):
if idm.find_one({'detail':u['detail']}) is None:
url0 = root+u['detail'].split('"')[1]
n=ambil(url0)
kkk=[]
try:
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')]
if len(kk) <16:
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')]
kk = list(u.values())+kk
kkk.append(kk)
except:
kk = list(u.values())
# kk.insert()
kkk.append(kk)
key1 = list(u.keys())+key0 #gabungkan list buat bikin key
for l in kkk:
data = dict(zip(key1,l))
idm.insert_one(data)
idm.insert_one(dict(zip(key1,l))) # jadikan dictionary kemudian masukkan ke mongo
# unduh semua idm
ik = [i['id_desa'] for i in idm.find({})]
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami']
for u in idmy.find({},{'_id':0}): #sort descending
if u['id_desa'] not in ik :
print('mengunduh %s' % u['detail'])
url0 = root+u['detail'].split('"')[1]
n=ambil(url)
kkk=[]
try:
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')]
if len(kk) <16:
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')]
kk = list(u.values())+kk
kkk.append(kk)
except:
kk = list(u.values())
# kk.insert()
kkk.append(kk)
key1 = list(u.keys())+key0 #gabungkan list buat bikin key
for l in kkk:
data = dict(zip(key1,l))
#dt.append(data)
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None :
tqdm(idm.insert_one(data))
ik = set([i['id_desa'] for i in idm.find({})])
else:
print('data id %s sudah tersedia' % u['id_desa'])
# debug
import requests
def ambil(url):
req = requests.get(url, verify=False)
dsoup = BeautifulSoup(req.content)
return dsoup
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami']
for u in tqdm(idmy.find({'id_prov':"11"},{'_id':0})):
if idm11.find_one({'detail':u['detail']}) is None:
url0 = root+u['detail'].split('"')[1]
n=ambil(url0)
kkk=[]
try:
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')]
if len(kk) <16:
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')]
kk = list(u.values())+kk
kkk.append(kk)
except:
kk = list(u.values())
# kk.insert()
kkk.append(kk)
key1 = list(u.keys())+key0 #gabungkan list buat bikin key
for l in kkk:
data = dict(zip(key1,l))
idm11.insert_one(data)
idmmod.insert_one(dict(zip(key1,l)))
for u in tqdm(idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1})):
try:
id_prov = u['id_prov'].split('|')[0].strip()
id_kabupaten = u['id_kabupaten'].split('|')[0].strip()
id_kecamatan = u['id_kecamatan'].split('|')[0].strip()
id_desa = u['id_desa'].split('|')[0].strip()
prov = u['id_prov'].split('|')[1].strip()
kabupaten = u['id_kabupaten'].split('|')[1].strip()
kecamatan = u['id_kecamatan'].split('|')[1].strip()
desa = u['id_desa'].split('|')[1].strip()
idmy.update_one({'_id':u['_id']},{'$set': {'id_prov':id_prov,'id_kabupaten':id_kabupaten,'id_kecamatan':id_kecamatan,'id_desa':id_desa,'prov':prov,'kabupaten':kabupaten,'kecamatan':kecamatan,'desa':desa }})
except:
pass
for i in idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1}):
idmy.update_one({'_id':i['_id']},{'$set' : {'_flag':0}})
ik = idm.find({}).distinct('id_desa')
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami']
for u in idmy.find({},{'_id':0}): #sort descending
if u['id_desa'] not in ik :
print('mengunduh %s' % u['detail'])
url0 = root+u['detail'].split('"')[1]
n=ambil(url)
kkk=[]
try:
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')]
if len(kk) <16:
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')]
kk = list(u.values())+kk
kkk.append(kk)
except:
kk = list(u.values())
# kk.insert()
kkk.append(kk)
key1 = list(u.keys())+key0 #gabungkan list buat bikin key
for l in kkk:
data = dict(zip(key1,l))
#dt.append(data)
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None :
tqdm(idm.insert_one(data))
ik = idm.find({}).distinct('id_desa')
else:
print('data id %s sudah tersedia' % u['id_desa'])
# x = ambilJson(jurl)['data']
# [ i['detail'].split('"')[1] for i in x]
# [i.text for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[4].findAll('td')]
# n.findAll('div',{'class':'row'})[1].findAll('tr')[3].findAll('img')[0]['src'].split('/')[-1]
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
# [j['src'].split('/')[-1] for j in i.findAll('img')]
# [ i for j in kkx[0] for i in j.split('|')] #### split item didalam list
#
# kkx = []
# s=0
# for u in tqdm(y):
# url0 = root+u['detail'].split('"')[1]
# n=ambil(url)
# kkk=[]
# try:
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]:
# kk = [j.text for j in i.findAll('td')]
# if len(kk) <16:
# kk = kkk[-1][11:11+(16-len(kk))] + kk
# kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')]
# kk = list(u.values())+kk
# kkk.append(kk)
# except:
# kk = list(u.values())
# # kk.insert()
# kkk.append(kk)
# kkx = kkx + kkk
# for l,k in enumerate(kkx):
# kkx[l] = [ i for j in k for i in j.split('|')]
# idm = dbrem.idm
# keys = [str(i) for i in range(1,33)]
# for i in tqdm(kkx):
# idm.insert_one(dict(zip(keys,i)))
# with open('dmk.csv','w') as f:
# wr = csv.writer(f)
# for row in kkx:
# wr.writerow(row)
Content-Type: application/x-www-form-urlencoded
Origin: https://caribdt.dinsos.jatengprov.go.id
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
POST /public/rekap-desa-proses HTTP/1.1
Host: caribdt.dinsos.jatengprov.go.id
Connection: keep-alive
Content-Length: 97
Cache-Control: max-age=0
Origin: https://caribdt.dinsos.jatengprov.go.id
Upgrade-Insecure-Requests: 1
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: _ga=GA1.3.947129702.1583481586; _gid=GA1.3.289665751.1583481586; XSRF-TOKEN=eyJpdiI6IkNMSGVnV1BXVDFQaW8xYmMwSSt5c0E9PSIsInZhbHVlIjoiaVRFbUJIb1ZLRjhBckQxaXYwZnZ4ODJCVjlaZXdCaXF3WnJnbW1aaTZVVkp0TkVIOWtTdXhJQXBcL0hxS1M5SSsiLCJtYWMiOiI1ODU4Y2JmZTFhZDBhMmI0ZjkwOTNlNWIxZGZmNjk0OTU2NGM1NzFkMWYxNTAyY2I3MDVkNWZhYjZlMGE1MzFlIn0%3D; caribdt_session=eyJpdiI6IjJzb2hlZXA4QkZnSWo3akE4a3lSeWc9PSIsInZhbHVlIjoibk5YUUMxRTVhcGR2YzBoXC94Nm9BUDFtK3p3eWJOeWJvbDFmSXdIN21XSm1zMlMxVnhRR0xUUVJLQ1QxdXlZM1wvIiwibWFjIjoiMGUxYzk5ZWJmNGYyZTg3ZjZmZmNiNTU2ZGJlOWE2MzQ4NzUwOGY4YjNmMDhjMTk3NTk1ZjI2NGFkMzRiZDE3NSJ9
import requests
import json
url = 'https://caribdt.dinsos.jatengprov.go.id/public/rekap-desa-proses'
h={'Host': 'caribdt.dinsos.jatengprov.go.id','User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Language': 'en-US,en;q=0.5','Accept-Encoding': 'gzip, deflate, br','Referer': 'https://caribdt.dinsos.jatengprov.go.id/public/dashboard','Content-Type': 'application/x-www-form-urlencoded','Content-Length': '89','Origin': 'https://caribdt.dinsos.jatengprov.go.id','Connection': 'keep-alive','Upgrade-Insecure-Requests': '1','Cache-Control': 'max-age=0, no-cache','Pragma': 'no-cache'}
p={'_token':'u7ER5WkjR6E2exdsvy17jENmrEYJPTitS3s0qiWk&','kdkab':'08','nmkec':'KAJORAN','nmdesa':'BANJARETNO'}
c={'XSRF-TOKEN':'eyJpdiI6ImRPUm1ZVDJhWXZSNnBhSGJBUkE5d3c9PSIsInZhbHVlIjoiNmsxRkJINmNUSTFYQitteEZFK1FcL1ZENHpPUWRBSzh4QzVzb21JaVwvdXl5QXAzXC9uM29QUXRqSTMzTW5IelZKUyIsIm1hYyI6IjhlMTk4MWE1OGMzYTIxNjMxOGRjYmNlZjNlZjAxNDUxYmY5M2NiMmQzNzVmNWUyNjgxZjJhODM0NTdlMzNhYWYifQ%3D%3D', 'caribdt_session':'eyJpdiI6Ijg1SW5hMFBYdiswdklOM0FmbU9IZnc9PSIsInZhbHVlIjoicHI5bWxXT1hEeWFcL1BEaDhvUlZ6ZzJvWmhzWVlRd09aUHJ1SlFoajZMYVlxUlN2Q01jdERwVTd6cDcyaHdGRkUiLCJtYWMiOiIzZTAwNjRhY2FiMGI3NGI2M2NhN2RkYjJkMTE5M2JlMWU0ZDAzZWU2MjZiMGEwNDYyMWIzZjMzMDk4YTYxMzc1In0%3D', '_ga':'GA1.3.876487108.1583484300', '_gid':'GA1.3.487186037.1583484300'}
r = requests.post(url,data=json.dumps(p),headers=h, cookies=c)
[i.update({'slug-url': base+'detail/%s/%s' % (i['id'], re.sub('\W+', ' ',i['title']).strip().lower().replace(' ','-')),'desc':re.sub(clean,' ',i['desc']),'slug-rubrik': base+'rubrik/%s/%s' % (i['rubrik'],i['namarubrik'].lower().replace(' ','-')), 'detail': re.sub(clean,'',i['detail']).replace('\\r\\n','')} ) for i in res ]
import cv2
import matplotlib.pyplot as plt
import cvlib as cv
from cvlib.object_detection import draw_bbox
im = cv2.imread('cars2.jpg')
bbox, label, conf = cv.detect_common_objects(im)
output_image = draw_bbox(im, bbox, label,conf)
plt.imshow(output_image)
plt.show()
print(str(label.count('car')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment