Skip to content

Instantly share code, notes, and snippets.

@haniefhan
Created August 30, 2021 22:37
Show Gist options
  • Save haniefhan/8eb52ca0ea4c05db4a17958432e877c6 to your computer and use it in GitHub Desktop.
Save haniefhan/8eb52ca0ea4c05db4a17958432e877c6 to your computer and use it in GitHub Desktop.
Parse Aceh CSV test 1
import csv
import json
province = [] # provinsi
regency_or_city = [] # kabupaten or kota
district = [] # kecamatan
administrative_village_or_village = [] # kelurahan or desa
# check if string has a number
def hasANumber(inputString):
return any(char.isdigit() for char in inputString)
# get region parent
# example: 11.01.01.2001
# 11 is for province code
# 11.01 is for regency or city code
# 11.01.01 is for district code
# 11.01.01.2001 is for administrative village or village code
def getRegionParent(ret, code):
prev = ""
for index, cd in enumerate(code.split(".")):
if index == 0:
ret["prov_code"] = cd
prev = cd
if index == 1:
ret["kab_code"] = prev + "." + cd
prev = prev + "." + cd
if index == 2:
ret["kec_code"] = prev + "." + cd
prev = prev + "." + cd
if index == 3:
ret["kel_code"] = prev + "." + cd
prev = prev + "." + cd
return ret
# parse region name
# because sometimes region name have a order number we don't need
def parseName(name):
return ''.join([i for i in name if not i.isdigit()])
with open("csv/11-aceh.csv") as csv_file:
# read csv file
csv_reader = csv.reader(csv_file, delimiter=",", dialect=csv.excel)
for index, row in enumerate(csv_reader):
code = row[0].strip()
if hasANumber(code):
if row[1] != '':
# must be a province or a regency or city
name = row[1].strip()
if not hasANumber(name):
# province
province.append({
"prov_code": code,
"prov_name": name
})
else:
# regency or city
name = parseName(name)
kab_type = "Kabupaten"
if "KAB. " not in name:
kab_type = "Kota"
kab = {
"kab_code": code,
"kab_name": name,
"kab_type": kab_type,
}
# we need to know which region parent it from
kab = getRegionParent(kab, code)
regency_or_city.append(kab)
elif row[4] != '' and hasANumber(row[4]):
# must be a district
name = parseName(row[4].strip())
district.append({
'kec_code': code,
'kec_name': name
})
elif row[5] != '' and hasANumber(row[5]):
name = parseName(row[6].strip())
administrative_village_or_village.append({
'kel_code': code,
'kel_name': name,
'kel_type': "Kelurahan"
})
elif row[6] != '' and hasANumber(row[6]):
name = parseName(row[6].strip())
administrative_village_or_village.append({
'kel_code': code,
'kel_name': name,
'kel_type': "Desa"
})
data = {
'province': province,
'regency_or_city': regency_or_city,
'district': district,
'administrative_village_or_village': administrative_village_or_village,
}
with open('json_result/11-aceh.json', 'w') as outfile:
json.dump(data, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment