Skip to content

Instantly share code, notes, and snippets.

@denilsonsa
Created June 14, 2016 10:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denilsonsa/a3c9be44d3a29d7a6d15fc3e0d7af89d to your computer and use it in GitHub Desktop.
Save denilsonsa/a3c9be44d3a29d7a6d15fc3e0d7af89d to your computer and use it in GitHub Desktop.
moneylog_convert.py que usei até final de 2014
#!/usr/bin/env python2
# -*- coding: utf8 -*-
from __future__ import unicode_literals
import sys
from datetime import date, timedelta
import re
# Some useful variables
current_date = date(2009, 7, 1)
line_count = 0
line_count_in_this_day = 0
# Let's suppose the input and the output are both in UTF-8, without Byte-Order-Mark
# Might be useful to run this at the input file:
# :set fenc=utf8
# :set nobomb
# :set ff=unix
input_file = sys.stdin
output_file = sys.stdout
output_file.write('# vi:filetype=moneylog\n')
# Some regular expressions
re_isodate = re.compile(r'^(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)$')
re_dia = re.compile(r'^(?P<day>segunda|ter[çc]a|quarta|quinta|sexta|s[áa]bado|domingo) *(?P<date>\d+) *$', re.I)
re_custo = re.compile(r'^(?P<sinal>[-+]?)(?P<valor>[0-9.,]+) *(?P<nome>.*)$')
re_pagamento = re.compile(r'^(?P<forma>din|d[eé]b|cr[eé]d?) (?P<resto>.*)$', re.I)
forma_pagamento_tags = [
(re.compile(r'^din$' , re.I), ['dinheiro']),
(re.compile(r'^d[eé]b$' , re.I), ['debito']),
(re.compile(r'^cr[eé]d?$', re.I), ['nubank']),
]
re_ricos_lanches = re.compile(r'(no )?rico.?s( lanches?)?', re.I)
# Main loop here
for line in input_file:
line_count += 1
line_count_in_this_day += 1
# Empty line
if line.strip() == b'':
output_file.write(line)
continue
line_strip = line.strip()
# Line with just an ISO date
match = re_isodate.search(line_strip)
if match:
line_count_in_this_day = 0
current_date = date(
int(match.group('year'), 10),
int(match.group('month'), 10),
int(match.group('day'), 10)
)
continue
# New day
match = re_dia.search(line_strip.decode('utf8'))
if match:
line_count_in_this_day = 0
new_day = int(match.group('date'), 10)
# Check for month change
if new_day < current_date.day:
# Check for year change
if current_date.month == 12:
current_date = date(current_date.year + 1, 1, new_day)
else:
current_date = date(current_date.year, current_date.month + 1, new_day)
else:
current_date = current_date.replace(day=new_day)
continue
# Some value
match = re_custo.search(line_strip)
if match:
sinal = match.group('sinal')
if sinal == '':
sinal = '-'
valor = match.group('valor').replace(',', '.')
# valor_int 1090 <-- valor = '10.90'
valor_int = int(valor.replace('.',''), 10)
tags = []
nome = match.group('nome').strip().decode('utf8')
forma_pagamento = re_pagamento.search(nome)
if forma_pagamento:
forma = forma_pagamento.group('forma')
nome = forma_pagamento.group('resto')
for forma_re, forma_tags in forma_pagamento_tags:
if forma_re.match(forma):
tags.extend(forma_tags)
# :'<,'>Tabularize /,
nome = re.sub('\\bsubway\\b' , 'Subway' , nome , flags=re.I)
nome = re.sub('\\bBurgu?er King\\b' , 'Burger King' , nome , flags=re.I)
nome = re.sub('\\bcarrefour\\b' , 'Carrefour' , nome , flags=re.I)
nome = re.sub('\\bnas (lojas)? americanas\\b' , 'nas Lojas Americanas' , nome , flags=re.I)
nome = re.sub('\\blojas americanas\\b' , 'Lojas Americanas' , nome , flags=re.I)
nome = re.sub("\\bhabib'?s\\b" , "Habib's" , nome , flags=re.I)
nome = re.sub("\\bbob'?s\\b" , "Bob's" , nome , flags=re.I)
nome = re.sub('\\bsanduiche\\b' , 'sanduíche' , nome , flags=re.I)
nome = re.sub('\\bt[aá]xi\\b' , 'táxi' , nome , flags=re.I)
nome = re.sub('\\b[oô]nibus\\b' , 'ônibus' , nome , flags=re.I)
nome = nome[0:1].upper() + nome[1:]
if re.search('\\bTáxi\\b', nome, re.I):
tags.append('taxi')
for onibus in ('SC01', 'SC02', 'SC03', 'SC04'):
if nome.upper().startswith(onibus):
tags.append('onibus')
if re.search('\\bÔnibus\\b', nome, re.I):
tags.append('onibus')
if re.search("\\b(pizza|subway|sanduíches?|cachorro quente|hot dog|hambúrguer|calabresa|salgado|milkshake|almoço|jantar|lanches?|habib's|bob's|pão|pães|padaria|banana|biscoito|leite|comida|prato feito|restaurante|sorvete|sorveteria|suco)\\b", nome, re.I):
tags.append('alimentacao')
if re.search("\\brecarga\\b.*\\bcelular\\b", nome, re.I):
tags.append('telefone')
if re.search("\\bpassagem\\b.*\\bviação\\b", nome, re.I):
tags.append('viagem')
if re.search("\\btáxi\\b.*\\b(rodoviária|aeroporto)\\b", nome, re.I):
tags.append('viagem')
if re.search('\\b(gog(\\.com)?|steam|humble ?bundle|google play)\\b', nome, re.I):
tags.append('compras')
if re.search('\\b(estacionamento)\\b', nome, re.I):
tags.append('outros')
# Removing duplicates while preserving the order.
old_tags = tags
tags = []
for tag in old_tags:
if not tag in tags:
tags.append(tag)
# Finally, the almighty output!
output_file.write(
(
current_date.isoformat() + '\t' +
('%s%7s\t' % (sinal, valor)) +
# Using set(tags) just to remove duplicates
('%s| %s\n' % (','.join(tags), nome) )
).encode('utf8')
)
continue
# Something else, any non-recognized line
output_file.write(line)
2016-06-13
Terça 14
+50.00 mesada
Quarta 15
10.00 sanduiche no subway
35.00 taxi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment