Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save haniefhan/f569a08dc2794fb0adc51f98333cebf5 to your computer and use it in GitHub Desktop.
Save haniefhan/f569a08dc2794fb0adc51f98333cebf5 to your computer and use it in GitHub Desktop.
Read All Region PDF Document using tabula
import tabula
pdf_path = "PMDN 72 TH 2019+lampiran.pdf"
output_folder = "csv/"
region_list = [
# sumatera
{"file": output_folder + "11-aceh.csv", "pages": "8-274"},
{"file": output_folder + "12-sumut.csv", "pages": "287-582"},
{"file": output_folder + "13-sumbar.csv", "pages": "601-653"},
{"file": output_folder + "14-riau.csv", "pages": "662-786"},
{"file": output_folder + "15-jambi.csv", "pages": "794-864"},
{"file": output_folder + "16-sumsel.csv", "pages": "872-1022"},
{"file": output_folder + "17-bengkulu.csv", "pages": "1034-1106"},
{"file": output_folder + "18-lampung.csv", "pages": "1114-1233"},
{"file": output_folder + "19-babel.csv", "pages": "1244-1259"},
{"file": output_folder + "21-kepri.csv", "pages": "1264-1284"},
# jawa
{"file": output_folder + "31-jakarta.csv", "pages": "1290-1299"},
{"file": output_folder + "32-jabar.csv", "pages": "1304-1524"},
{"file": output_folder + "33-jateng.csv", "pages": "1547-1823"},
{"file": output_folder + "34-jogja.csv", "pages": "1843-1858"},
{"file": output_folder + "35-jatim.csv", "pages": "1864-2144"},
{"file": output_folder + "36-banten.csv", "pages": "2166-2228"},
# bali & nusa tenggara
{"file": output_folder + "51-bali.csv", "pages": "2236-2260"},
{"file": output_folder + "52-ntb.csv", "pages": "2265-2307"},
{"file": output_folder + "53-ntt.csv", "pages": "2315-2468"},
# kalimantan
{"file": output_folder + "61-kalbar.csv", "pages": "2481-2565"},
{"file": output_folder + "62-kalteng.csv", "pages": "2574-2642"},
{"file": output_folder + "63-kalsel.csv", "pages": "2650-2727"},
{"file": output_folder + "64-kaltim.csv", "pages": "2735-2777"},
{"file": output_folder + "65-kaltara.csv", "pages": "2784-2801"},
# sulawesi
{"file": output_folder + "71-sulut.csv", "pages": "2806-2893"},
{"file": output_folder + "72-sulteng.csv", "pages": "2902-3004"},
{"file": output_folder + "73-sulsel.csv", "pages": "3014-3134"},
{"file": output_folder + "74-sultra.csv", "pages": "3147-3268"},
{"file": output_folder + "75-gorontalo.csv", "pages": "3280-3315"},
{"file": output_folder + "76-sulbar.csv", "pages": "3321-3348"},
# maluku
{"file": output_folder + "81-maluku.csv", "pages": "3353-3418"},
{"file": output_folder + "82-malut.csv", "pages": "3426-3486"},
# papua
{"file": output_folder + "91-papua.csv", "pages": "3494-3789"},
{"file": output_folder + "92-pabar.csv", "pages": "3810-3924"},
]
for region in region_list:
tabula.convert_into(
input_path=pdf_path,
output_path=region["file"],
output_format="csv",
pages=region["pages"]
)
print("generate file " + region["file"] + '.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment