Skip to content

Instantly share code, notes, and snippets.

@elvisgs
Created January 7, 2019 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save elvisgs/f43704b2ac16a9cb1f93c148567b6ac8 to your computer and use it in GitHub Desktop.
Save elvisgs/f43704b2ac16a9cb1f93c148567b6ac8 to your computer and use it in GitHub Desktop.
Script para dividir CSV de publicações do SIJ por edição e salvar como JSON
import csv
import itertools
import json
import os
import operator
import re
import sys
cwd = '/home/elvis/playground/stm'
csv.field_size_limit(sys.maxsize)
path_csv = os.path.join(cwd, 'publicacoes-stm.csv')
def divide_publicacoes():
with open(path_csv, 'r') as csv_file:
csv_reader = csv.DictReader(csv_file)
grouped = itertools.groupby(csv_reader,
operator.itemgetter('nome_jornal_edicao'))
for group, items in grouped:
items = list(map(renomear_chaves, items))
data, nro_edicao = obtem_data_e_numero_edicao(group)
print(f'{data} ({nro_edicao}): {len(items)} publicações')
json_content = json.dumps(items, indent=2, ensure_ascii=False)
json_path = path_csv.replace('.csv', f'_{data}_{nro_edicao}.json')
with open(json_path, 'w') as json_file:
json_file.write(json_content)
def obtem_data_e_numero_edicao(jornal):
match = re.search(r'(\d{2}/\d{2}/\d{4}) Nº (\d+)', jornal)
return '-'.join(reversed(match[1].split('/'))), match[2]
def renomear_chaves(publicacao):
publicacao['codigo'] = publicacao.pop('cod_publicacao')
publicacao['textoprocesso'] = publicacao.pop('texto_processo')
publicacao['textodespacho'] = publicacao.pop('texto_despacho')
return publicacao
if __name__ == '__main__':
divide_publicacoes()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment