Skip to content

Instantly share code, notes, and snippets.

@fikriauliya
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fikriauliya/59b19b2ce8b08fa46644 to your computer and use it in GitHub Desktop.
Save fikriauliya/59b19b2ce8b08fa46644 to your computer and use it in GitHub Desktop.
contents = File.read('raw_rapbd.txt')
EXCEPTIONS = [/^Urusan Pemerintahan :/, /^Organisasi :/, /^Jumlah/, /^$/, /^Surplus\/\(Defisit\)/, /^Pembiayaan Netto/]
contents.each_line do |l|
l = l.strip
unless /^\d\.\d\d/.match(l)
if EXCEPTIONS.none? {|e| e.match(l)}
print(" #{l}")
else
print("\n#{l}")
end
else
print("\n#{l}")
end
end
contents = File.read('raw_rapbd_2.txt')
puts "Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Jumlah"
contents.each_line do |l|
if l.strip.length == 0 then next end
if (m = /^Urusan Pemerintahan : (.+)/.match(l))
@urusan_pemerintah = m[1]
elsif (m = /^Organisasi : (.+)/.match(l))
@organisasi = m[1]
else
l.strip!
if (m = /(\d\.\d{2} \d{3} \d{2} \d{3} (\d )?(\d )?(\d )?)(.+)/.match(l)) then
kode_rekening = m[1].strip
remaining = m[-1]
if (m = /(.+?)([\.\d]+\d{3})$/.match(remaining)) then
uraian = m[1].strip
jumlah = m[2].strip
puts "#{@urusan_pemerintah}|#{@organisasi}|#{kode_rekening}|#{uraian}|#{jumlah}"
else
puts "#{@urusan_pemerintah}|#{@organisasi}|#{kode_rekening}|#{remaining}|"
end
end
end
end
def separate_with_dot(number)
number.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(".").reverse
end
contents = File.read('raw_rapbd_3.txt')
puts "Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Belanja Barang dan Jasa|Belanja Modal|Belanja Pegawai|Total"
first_line = true
@last_jumlah = @total = @total_child = 0
@belanja_barang_dan_jasa = @belanja_modal = @belanja_pegawai = 0
contents.each_line do |l|
urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah = l.split("|")
jumlah.gsub!(/\./, '')
jumlah = jumlah.to_i
if first_line then first_line = false
else
last_kode_rekening = kode_rekening[15..-1].strip
case uraian
when 'BELANJA BARANG DAN JASA'
@belanja_barang_dan_jasa += jumlah
when 'BELANJA PEGAWAI'
@belanja_pegawai += jumlah
when 'BELANJA MODAL'
@belanja_modal += jumlah
end
if last_kode_rekening.length <= 1 and @prev_kode_rekening
puts "#{@prev_urusan_pemerintahan}|#{@prev_organisasi}|#{@prev_kode_rekening}|#{@prev_uraian}|#{separate_with_dot(@belanja_barang_dan_jasa)}|#{separate_with_dot(@belanja_modal)}|#{separate_with_dot(@belanja_pegawai)}|#{separate_with_dot(@total)}"
@total = @last_jumlah = @belanja_modal = @belanja_pegawai = @belanja_barang_dan_jasa = 0
elsif last_kode_rekening.length == 3
if @last_jumlah != 0
if @last_jumlah != @total_child
puts "Error! #{@last_jumlah} != #{@total_child}"
end
@last_jumlah = 0
end
@total += jumlah
@last_jumlah = jumlah
@total_child = 0
elsif last_kode_rekening.length == 5
@total_child += jumlah
end
if last_kode_rekening.length <= 1
@prev_urusan_pemerintahan, @prev_organisasi, @prev_kode_rekening, @prev_uraian, @prev_jumlah = urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah
end
# Original Algorithm by Arief:
#
# if (last_kode_rekening.length() <= 1) {
# print(kode_rekening + uraian + total);
# total = 0;
# }
# if (last_kode_rekening.length() == 3) {\
# if (last_jumlah != 0) {
# if (last_jumlah != total_child)
# gak sama
# last_jumlah = 0;
# }
# total += jumlah;
# last_jumlah = jumlah;
# total_child = 0;
# }
# if (last_kode_rekening.length() == 5) {
# total_child += jumlah;
# }
end
end
if @prev_kode_rekening
puts "#{@prev_urusan_pemerintahan}|#{@prev_organisasi}|#{@prev_kode_rekening}|#{@prev_uraian}|#{separate_with_dot(@belanja_barang_dan_jasa)}|#{separate_with_dot(@belanja_modal)}|#{separate_with_dot(@belanja_pegawai)}|#{separate_with_dot(@total)}"
end
def separate_with_comma(number)
res = number.abs.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(",").reverse
if number < 0 then return "-#{res}"
else return res end
end
dprd_version = File.read('join_apbd.csv')
kode_kegiatan_to_pagu = Hash.new
nama_kegiatan_to_pagu = Hash.new
kode_nama_kegiatan_to_pagu = Hash.new
# kode_kegiatan_to_line = Hash.new
# dprd_version.each_line do |l|
# _, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, _, _, _, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a
# unless kode_kegiatan.strip.empty?
# k = [kode_skpd, nama_skpd, komisi, kode_kegiatan].join("_")
# if kode_kegiatan_to_line.key?(k)
# if kode_kegiatan_to_line[k]['name'] == nama_kegiatan then print("* ") end
# puts "Duplicate keys: #{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan} -> #{kode_kegiatan_to_line[k]['name']} (#{kode_kegiatan_to_line[k]['jumlah']}) vs #{nama_kegiatan} (#{hasil_pembahasan})"
# else
# kode_kegiatan_to_line[k] = Hash.new
# kode_kegiatan_to_line[k]['name'] = nama_kegiatan
# kode_kegiatan_to_line[k]['jumlah'] = hasil_pembahasan
# end
# end
# end
dprd_version.each_line do |l|
_, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, _, _, _, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a
unless (kode_kegiatan.strip.empty? or nama_kegiatan.strip.empty?)
if kode_nama_kegiatan_to_pagu.key?([kode_skpd, kode_kegiatan, nama_kegiatan].join("_")) then puts "Duplicate kode & nama: #{[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")} -> #{pagu} vs #{kode_nama_kegiatan_to_pagu[[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")]}" end
kode_nama_kegiatan_to_pagu[[kode_skpd, kode_kegiatan, nama_kegiatan].join("_")] = pagu
end
unless kode_kegiatan.strip.empty?
if kode_kegiatan_to_pagu.key?(kode_kegiatan) then puts "Duplicate kode: #{kode_kegiatan}" end
kode_kegiatan_to_pagu[kode_kegiatan] = pagu
end
unless nama_kegiatan.strip.empty?
if nama_kegiatan_to_pagu.key?(nama_kegiatan) then puts "Duplicate nama: #{nama_kegiatan}" end
nama_kegiatan_to_pagu[nama_kegiatan] = pagu
end
end
# pemda_version = File.read('raw_rapbd_4.txt')
# puts("Urusan Pemerintahan|Organisasi|Kode Rekening|Uraian|Jumlah|Pagu_DPRD|Flag")
# first_line = true
# pemda_version.each_line do |l|
# if first_line then
# first_line = false
# else
# _, urusan_pemerintahan, organisasi, kode_rekening, uraian, jumlah = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)/.match(l).to_a
# # 1.01 001 01 121 -> 1.01.01.001.121
# translatted_kode_rekening = kode_rekening[0..3] + "." + kode_rekening[9..10] + "." + kode_rekening[5..7] + "." + kode_rekening[12..-1]
# pagu = nil
# pagu ||= kode_nama_kegiatan_to_pagu[[translatted_kode_rekening, uraian].join("_")]
# pagu ||= kode_kegiatan_to_pagu[translatted_kode_rekening]
# # pagu ||= nama_kegiatan_to_pagu[uraian]
# jumlah.gsub!(/\./, ',')
# if pagu
# if pagu == jumlah
# flag = 0
# else
# flag = separate_with_comma(jumlah.gsub(/\,/, '').to_i - pagu.gsub(/\,/, '').to_i)
# end
# else
# flag = "Doesn't exist in DPRD"
# end
# print "#{urusan_pemerintahan}|#{organisasi}|#{kode_rekening}|#{uraian}|#{jumlah}|#{pagu}|#{flag}\n"
# end
# end
# This code needs refactoring
require 'fuzzy_match'
require 'set'
require 'amatch'
def separate_with_comma(number)
if number.empty? then return 0 end
number = number.to_i
res = number.abs.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(",").reverse
if number < 0 then return "-#{res}"
else return res end
end
pemda_version = File.read('mata.anggaran.csv')
kode_rekening_program_to_total = Hash.new
kode_rekening_to_total = Hash.new
program_to_total = Hash.new
first_line = true
all_programs = Set.new
pemda_version.each_line do |l|
if first_line
first_line = false
else
_, urusan_pemerintahan, organisasi, kategori, kode_rekening, program, _, _, _, total = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)/.match(l).to_a
total = total.to_s.gsub(/\,/, "").gsub(/\./, "")
# puts kode_rekening, program
# if kode_rekening_to_total.key? kode_rekening then puts "Duplicate kode_rekening: #{kode_rekening}" end
# if kode_rekening_program_to_total.key? [kode_rekening, program].join("_") then puts "Duplicate kode_rekening_program: #{kode_rekening}_#{program}" end
# if program_to_total.key? program then puts "Duplicate program: #{program}" end
kode_rekening_program_to_total[[kode_rekening, program.downcase].join("_")] ||= []
kode_rekening_to_total[kode_rekening] ||= []
program_to_total[program.downcase] ||= []
kode_rekening_program_to_total[[kode_rekening, program.downcase].join("_")] << total
kode_rekening_to_total[kode_rekening] << total
program_to_total[program.downcase] << total
all_programs.add(program.downcase)
end
end
# fm = FuzzyMatch.new(all_programs.to_a)
dprd_version = File.read('join_apbd.csv')
first_line = true
puts "no|kode_skpd|nama_skpd|komisi|kode_kegiatan|nama_kegiatan|pagu|tambah|kurang|hasil_pembahasan|hasil_pemprov_version|delta = hasil_pembahasan - hasil_pemprov_version|flag|kegiatan_versi_pemprov_yang_mirip|% kemiripan|hasil_pemprov_version_yang_mirip|delta = hasil_pemprov_version_yang_mirip - hasil_pemprov_version"
number = 1
dprd_version.each_line do |l|
if first_line
first_line = false
else
_, kode_skpd, nama_skpd, komisi, kode_kegiatan, nama_kegiatan, pagu, tambah, kurang, hasil_pembahasan = /(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|(.*)\|/.match(l).to_a
# 1.01.01.001.121 -> 1.01 001 01 121
# puts kode_kegiatan
unless kode_kegiatan.empty?
translatted_kode_kegiatan = kode_kegiatan[0..3] + " " + kode_kegiatan[8..10] + " " + kode_kegiatan[5..6] + " " + kode_kegiatan[12..-1]
else
translatted_kode_kegiatan = ""
end
# puts translatted_kode_kegiatan, nama_kegiatan
flag = "Not found in Pemprov version based on kode & nama kegiatan"
total = kode_rekening_program_to_total[[translatted_kode_kegiatan, nama_kegiatan.downcase].join("_")]
if total then flag = "Perfect match" end
if total.nil? then
total = kode_rekening_to_total[translatted_kode_kegiatan]
if total then flag = "Matched by kode kegiatan" end
end
if total.nil? then
total = program_to_total[nama_kegiatan.downcase]
if total then flag = "Matched by nama kegiatan" end
end
pagu = separate_with_comma(pagu.gsub(/\,/, "").gsub(/\./, ""))
tambah = separate_with_comma(tambah.gsub(/\,/, "").gsub(/\./, ""))
kurang = separate_with_comma(kurang.gsub(/\,/, "").gsub(/\./, ""))
original_hasil_pembahasan = hasil_pembahasan.gsub(/\,/, "").gsub(/\./, "").to_i
hasil_pembahasan = separate_with_comma(hasil_pembahasan.gsub(/\,/, "").gsub(/\./, ""))
recommended_nama_kegiatan = nil
recommended_total = nil
recommended_score = nil
if total.nil?
# recommended_nama_kegiatan = fm.find(nama_kegiatan)
recommended_nama_kegiatan = nil
recommended_score = -1
all_programs.each do |p|
score = p.levenshtein_similar(nama_kegiatan.downcase)
if recommended_score < score
recommended_score = score
recommended_nama_kegiatan = p
end
end
if program_to_total[recommended_nama_kegiatan].length == 1
recommended_delta = separate_with_comma((original_hasil_pembahasan - program_to_total[recommended_nama_kegiatan][0].to_i).to_s)
end
if program_to_total[recommended_nama_kegiatan].length > 3
recommended_total = "Too many variants"
else
t = program_to_total[recommended_nama_kegiatan].map{|e| separate_with_comma(e)}
recommended_total = t.join("/")
end
end
if total.nil?
puts "#{number}|#{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan}|#{nama_kegiatan}|#{pagu}|#{tambah}|#{kurang}|#{hasil_pembahasan}||#{hasil_pembahasan}|#{flag}|#{recommended_nama_kegiatan}|#{recommended_score}|#{recommended_total}|#{recommended_delta}"
else
if total.length == 1
delta = separate_with_comma((original_hasil_pembahasan - total[0].to_i).to_s)
end
if total.length > 3
total = "Too many variants"
else
total = total.map{|e| separate_with_comma(e)}
total = total.join("/")
end
puts "#{number}|#{kode_skpd}|#{nama_skpd}|#{komisi}|#{kode_kegiatan}|#{nama_kegiatan}|#{pagu}|#{tambah}|#{kurang}|#{hasil_pembahasan}|#{total}|#{delta}|#{flag}|#{recommended_nama_kegiatan}|#{recommended_score}|#{recommended_total}|#{recommended_delta}"
end
number += 1
end
end
@djiebrats
Copy link

file raw_rapbd.txt, raw_rapbd_2.txt dapat dari mana ya mas. pengen nyobain

@fikriauliya
Copy link
Author

raw_rapbd_2.txt dari output 0_rapbd_cleaner.rb. raw_rapbd_3.txt dari output 1_rapbd_to_csv.rb.
raw_rapbd.txt dari copy paste PDF di sini www.jakarta.go.id/v2/news/2015/03/rapbd-pemprov-dki-dan-dprd#.VPLDDCwxs7w -> https://drive.google.com/file/d/0B9EJd8nA4QE8R0s1ellncWtPcmc/view?usp=sharing

@djiebrats
Copy link

terimakasih mas.. tak cobai melihat dengan cara yang berbeda

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment