Created
June 14, 2016 10:36
-
-
Save denilsonsa/a3c9be44d3a29d7a6d15fc3e0d7af89d to your computer and use it in GitHub Desktop.
moneylog_convert.py que usei até final de 2014
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf8 -*- | |
from __future__ import unicode_literals | |
import sys | |
from datetime import date, timedelta | |
import re | |
# Some useful variables | |
current_date = date(2009, 7, 1) | |
line_count = 0 | |
line_count_in_this_day = 0 | |
# Let's suppose the input and the output are both in UTF-8, without Byte-Order-Mark | |
# Might be useful to run this at the input file: | |
# :set fenc=utf8 | |
# :set nobomb | |
# :set ff=unix | |
input_file = sys.stdin | |
output_file = sys.stdout | |
output_file.write('# vi:filetype=moneylog\n') | |
# Some regular expressions | |
re_isodate = re.compile(r'^(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)$') | |
re_dia = re.compile(r'^(?P<day>segunda|ter[çc]a|quarta|quinta|sexta|s[áa]bado|domingo) *(?P<date>\d+) *$', re.I) | |
re_custo = re.compile(r'^(?P<sinal>[-+]?)(?P<valor>[0-9.,]+) *(?P<nome>.*)$') | |
re_pagamento = re.compile(r'^(?P<forma>din|d[eé]b|cr[eé]d?) (?P<resto>.*)$', re.I) | |
forma_pagamento_tags = [ | |
(re.compile(r'^din$' , re.I), ['dinheiro']), | |
(re.compile(r'^d[eé]b$' , re.I), ['debito']), | |
(re.compile(r'^cr[eé]d?$', re.I), ['nubank']), | |
] | |
re_ricos_lanches = re.compile(r'(no )?rico.?s( lanches?)?', re.I) | |
# Main loop here | |
for line in input_file: | |
line_count += 1 | |
line_count_in_this_day += 1 | |
# Empty line | |
if line.strip() == b'': | |
output_file.write(line) | |
continue | |
line_strip = line.strip() | |
# Line with just an ISO date | |
match = re_isodate.search(line_strip) | |
if match: | |
line_count_in_this_day = 0 | |
current_date = date( | |
int(match.group('year'), 10), | |
int(match.group('month'), 10), | |
int(match.group('day'), 10) | |
) | |
continue | |
# New day | |
match = re_dia.search(line_strip.decode('utf8')) | |
if match: | |
line_count_in_this_day = 0 | |
new_day = int(match.group('date'), 10) | |
# Check for month change | |
if new_day < current_date.day: | |
# Check for year change | |
if current_date.month == 12: | |
current_date = date(current_date.year + 1, 1, new_day) | |
else: | |
current_date = date(current_date.year, current_date.month + 1, new_day) | |
else: | |
current_date = current_date.replace(day=new_day) | |
continue | |
# Some value | |
match = re_custo.search(line_strip) | |
if match: | |
sinal = match.group('sinal') | |
if sinal == '': | |
sinal = '-' | |
valor = match.group('valor').replace(',', '.') | |
# valor_int 1090 <-- valor = '10.90' | |
valor_int = int(valor.replace('.',''), 10) | |
tags = [] | |
nome = match.group('nome').strip().decode('utf8') | |
forma_pagamento = re_pagamento.search(nome) | |
if forma_pagamento: | |
forma = forma_pagamento.group('forma') | |
nome = forma_pagamento.group('resto') | |
for forma_re, forma_tags in forma_pagamento_tags: | |
if forma_re.match(forma): | |
tags.extend(forma_tags) | |
# :'<,'>Tabularize /, | |
nome = re.sub('\\bsubway\\b' , 'Subway' , nome , flags=re.I) | |
nome = re.sub('\\bBurgu?er King\\b' , 'Burger King' , nome , flags=re.I) | |
nome = re.sub('\\bcarrefour\\b' , 'Carrefour' , nome , flags=re.I) | |
nome = re.sub('\\bnas (lojas)? americanas\\b' , 'nas Lojas Americanas' , nome , flags=re.I) | |
nome = re.sub('\\blojas americanas\\b' , 'Lojas Americanas' , nome , flags=re.I) | |
nome = re.sub("\\bhabib'?s\\b" , "Habib's" , nome , flags=re.I) | |
nome = re.sub("\\bbob'?s\\b" , "Bob's" , nome , flags=re.I) | |
nome = re.sub('\\bsanduiche\\b' , 'sanduíche' , nome , flags=re.I) | |
nome = re.sub('\\bt[aá]xi\\b' , 'táxi' , nome , flags=re.I) | |
nome = re.sub('\\b[oô]nibus\\b' , 'ônibus' , nome , flags=re.I) | |
nome = nome[0:1].upper() + nome[1:] | |
if re.search('\\bTáxi\\b', nome, re.I): | |
tags.append('taxi') | |
for onibus in ('SC01', 'SC02', 'SC03', 'SC04'): | |
if nome.upper().startswith(onibus): | |
tags.append('onibus') | |
if re.search('\\bÔnibus\\b', nome, re.I): | |
tags.append('onibus') | |
if re.search("\\b(pizza|subway|sanduíches?|cachorro quente|hot dog|hambúrguer|calabresa|salgado|milkshake|almoço|jantar|lanches?|habib's|bob's|pão|pães|padaria|banana|biscoito|leite|comida|prato feito|restaurante|sorvete|sorveteria|suco)\\b", nome, re.I): | |
tags.append('alimentacao') | |
if re.search("\\brecarga\\b.*\\bcelular\\b", nome, re.I): | |
tags.append('telefone') | |
if re.search("\\bpassagem\\b.*\\bviação\\b", nome, re.I): | |
tags.append('viagem') | |
if re.search("\\btáxi\\b.*\\b(rodoviária|aeroporto)\\b", nome, re.I): | |
tags.append('viagem') | |
if re.search('\\b(gog(\\.com)?|steam|humble ?bundle|google play)\\b', nome, re.I): | |
tags.append('compras') | |
if re.search('\\b(estacionamento)\\b', nome, re.I): | |
tags.append('outros') | |
# Removing duplicates while preserving the order. | |
old_tags = tags | |
tags = [] | |
for tag in old_tags: | |
if not tag in tags: | |
tags.append(tag) | |
# Finally, the almighty output! | |
output_file.write( | |
( | |
current_date.isoformat() + '\t' + | |
('%s%7s\t' % (sinal, valor)) + | |
# Using set(tags) just to remove duplicates | |
('%s| %s\n' % (','.join(tags), nome) ) | |
).encode('utf8') | |
) | |
continue | |
# Something else, any non-recognized line | |
output_file.write(line) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2016-06-13 | |
Terça 14 | |
+50.00 mesada | |
Quarta 15 | |
10.00 sanduiche no subway | |
35.00 taxi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment