Skip to content

Instantly share code, notes, and snippets.

@aivuk
Created December 5, 2015 00:05
Show Gist options
  • Save aivuk/20d0d639717a27ce43bc to your computer and use it in GitHub Desktop.
Save aivuk/20d0d639717a27ce43bc to your computer and use it in GitHub Desktop.
# Converte o arquivo RDF em http://dados.gov.br/dataset/orcamento-federal/resource/edf2263a-f8d9-48d9-b25d-6863d16ce0b0?inner_span=True
import re
loa_file = open('./loa.items')
numero_items = 375960
item_re = re.compile(r'<[^>]+/([^>]+)>.*/([^>]+)>')
item_value_re = re.compile(r'<[^>]+/([^>]+)>.* "(.*)"\^\^.*')
s = ''
for i in range(numero_items):
sp = ''
for j in range(9):
line = loa_file.readline().rstrip()[:-2]
sep = ', ' if j != 0 and j != 9 else ''
m = item_re.match(line)
if j == 0:
sp += m.group(1) + ', '
sp += '{}{}'.format(sep, m.group(2))
for j in range(6):
line = loa_file.readline().rstrip()[:-2]
sep = ', ' if j != 6 else '\n'
m = item_value_re.match(line)
sp += '{}{}'.format(sep, m.group(2))
loa_file.readline()
s += sp + '\n'
output = open('loa.csv', 'w')
output.write(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment