Skip to content

Instantly share code, notes, and snippets.

@cuducos
Last active January 3, 2020 21:11
Show Gist options
  • Save cuducos/1ab59c9080f93f9aa30ec38b732cd3c0 to your computer and use it in GitHub Desktop.
Save cuducos/1ab59c9080f93f9aa30ec38b732cd3c0 to your computer and use it in GitHub Desktop.
Cartões Corporativos
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Análise de gastos do cartão corportativo do planalto"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from collections import namedtuple\n",
"from datetime import date, datetime\n",
"from pathlib import Path\n",
"from urllib.request import urlretrieve\n",
"from zipfile import ZipFile\n",
"\n",
"import pandas as pd\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"Term = namedtuple('Term', 'name start end')\n",
"\n",
"DILMA = Term('Dilma', date(2011, 1, 1), date(2016, 5, 12))\n",
"TEMER = Term('Temer', date(2016, 5, 12), date(2019, 1, 1))\n",
"BOLSONARO = Term('Bolsonaro', date(2019, 1, 1), date.today())\n",
"\n",
"TERMS = (DILMA, TEMER, BOLSONARO)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Carregando os dados"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def urls():\n",
" \"\"\"Generates all the URL to download the data\"\"\"\n",
" today = date.today()\n",
" for year in range(2013, today.year + 1):\n",
" for month in range(1, 13):\n",
" if date(year, month, 1) > today:\n",
" continue\n",
" \n",
" month = str(month).zfill(2)\n",
" yield f\"http://www.portaltransparencia.gov.br/download-de-dados/cpgf/{year}{month}\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def load(url, **read_csv_kwargs):\n",
" \"\"\"Downloads the zip archive with data and load the first file in a Pandas DataFrame\"\"\"\n",
" directory = Path('data')\n",
" directory.mkdir(exist_ok=True)\n",
" \n",
" *_, filename = url.split(\"/\")\n",
" path = directory / f\"{filename}.zip\"\n",
" \n",
" if not path.exists():\n",
" urlretrieve(url, filename=path)\n",
" \n",
" with ZipFile(path) as archive:\n",
" archive.extractall()\n",
" data, *_ = archive.namelist()\n",
" df = pd.read_csv(data, **read_csv_kwargs)\n",
"\n",
" for name in archive.namelist():\n",
" Path(name).unlink()\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [00:06<00:00, 13.33it/s]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CÓDIGO ÓRGÃO SUPERIOR</th>\n",
" <th>NOME ÓRGÃO SUPERIOR</th>\n",
" <th>CÓDIGO ÓRGÃO</th>\n",
" <th>NOME ÓRGÃO</th>\n",
" <th>CÓDIGO UNIDADE GESTORA</th>\n",
" <th>NOME UNIDADE GESTORA</th>\n",
" <th>ANO EXTRATO</th>\n",
" <th>MÊS EXTRATO</th>\n",
" <th>CPF PORTADOR</th>\n",
" <th>NOME PORTADOR</th>\n",
" <th>CNPJ OU CPF FAVORECIDO</th>\n",
" <th>NOME FAVORECIDO</th>\n",
" <th>TRANSAÇÃO</th>\n",
" <th>DATA TRANSAÇÃO</th>\n",
" <th>VALOR TRANSAÇÃO</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.419.804-**</td>\n",
" <td>ULISSES ANTONIO CAVALCANTI BIONES</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>27/11/2012</td>\n",
" <td>120.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.541.564-**</td>\n",
" <td>FELLIPE JOSE DA ROCHA MUNT</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>06/12/2012</td>\n",
" <td>190.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.541.564-**</td>\n",
" <td>FELLIPE JOSE DA ROCHA MUNT</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>19/12/2012</td>\n",
" <td>70.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.804.561-**</td>\n",
" <td>JOSE DE JESUS</td>\n",
" <td>32782393000129</td>\n",
" <td>MOVIDROLAR LTDA</td>\n",
" <td>COMPRA A/V - R$ - APRES</td>\n",
" <td>27/11/2012</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.401.785-**</td>\n",
" <td>MARCELO DA SILVA CARUCA</td>\n",
" <td>4066259000196</td>\n",
" <td>MULTI UTILIDADES LTDA</td>\n",
" <td>COMPRA A/V - R$ - APRES</td>\n",
" <td>28/11/2012</td>\n",
" <td>84.4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CÓDIGO ÓRGÃO SUPERIOR NOME ÓRGÃO SUPERIOR CÓDIGO ÓRGÃO \\\n",
"0 63000 Advocacia-Geral da União 63000 \n",
"1 63000 Advocacia-Geral da União 63000 \n",
"2 63000 Advocacia-Geral da União 63000 \n",
"3 63000 Advocacia-Geral da União 63000 \n",
"4 63000 Advocacia-Geral da União 63000 \n",
"\n",
" NOME ÓRGÃO CÓDIGO UNIDADE GESTORA \\\n",
"0 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"1 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"2 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"3 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"4 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"\n",
" NOME UNIDADE GESTORA ANO EXTRATO MÊS EXTRATO \\\n",
"0 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"1 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"2 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"3 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"4 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"\n",
" CPF PORTADOR NOME PORTADOR CNPJ OU CPF FAVORECIDO \\\n",
"0 ***.419.804-** ULISSES ANTONIO CAVALCANTI BIONES -2 \n",
"1 ***.541.564-** FELLIPE JOSE DA ROCHA MUNT -2 \n",
"2 ***.541.564-** FELLIPE JOSE DA ROCHA MUNT -2 \n",
"3 ***.804.561-** JOSE DE JESUS 32782393000129 \n",
"4 ***.401.785-** MARCELO DA SILVA CARUCA 4066259000196 \n",
"\n",
" NOME FAVORECIDO TRANSAÇÃO DATA TRANSAÇÃO \\\n",
"0 NAO SE APLICA SAQUE CASH/ATM BB 27/11/2012 \n",
"1 NAO SE APLICA SAQUE CASH/ATM BB 06/12/2012 \n",
"2 NAO SE APLICA SAQUE CASH/ATM BB 19/12/2012 \n",
"3 MOVIDROLAR LTDA COMPRA A/V - R$ - APRES 27/11/2012 \n",
"4 MULTI UTILIDADES LTDA COMPRA A/V - R$ - APRES 28/11/2012 \n",
"\n",
" VALOR TRANSAÇÃO \n",
"0 120.0 \n",
"1 190.0 \n",
"2 70.0 \n",
"3 60.0 \n",
"4 84.4 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.concat(\n",
" load(url, encoding=\"cp1252\", delimiter=\";\", decimal=\",\")\n",
" for url in tqdm(tuple(urls()))\n",
")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1049848, 15)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CÓDIGO ÓRGÃO SUPERIOR</th>\n",
" <th>NOME ÓRGÃO SUPERIOR</th>\n",
" <th>CÓDIGO ÓRGÃO</th>\n",
" <th>NOME ÓRGÃO</th>\n",
" <th>CÓDIGO UNIDADE GESTORA</th>\n",
" <th>NOME UNIDADE GESTORA</th>\n",
" <th>ANO EXTRATO</th>\n",
" <th>MÊS EXTRATO</th>\n",
" <th>CPF PORTADOR</th>\n",
" <th>NOME PORTADOR</th>\n",
" <th>CNPJ OU CPF FAVORECIDO</th>\n",
" <th>NOME FAVORECIDO</th>\n",
" <th>TRANSAÇÃO</th>\n",
" <th>DATA TRANSAÇÃO</th>\n",
" <th>VALOR TRANSAÇÃO</th>\n",
" <th>expense_date</th>\n",
" <th>president</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.419.804-**</td>\n",
" <td>ULISSES ANTONIO CAVALCANTI BIONES</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>27/11/2012</td>\n",
" <td>120.0</td>\n",
" <td>2012-11-27</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.541.564-**</td>\n",
" <td>FELLIPE JOSE DA ROCHA MUNT</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>06/12/2012</td>\n",
" <td>190.0</td>\n",
" <td>2012-12-06</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.541.564-**</td>\n",
" <td>FELLIPE JOSE DA ROCHA MUNT</td>\n",
" <td>-2</td>\n",
" <td>NAO SE APLICA</td>\n",
" <td>SAQUE CASH/ATM BB</td>\n",
" <td>19/12/2012</td>\n",
" <td>70.0</td>\n",
" <td>2012-12-19</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.804.561-**</td>\n",
" <td>JOSE DE JESUS</td>\n",
" <td>32782393000129</td>\n",
" <td>MOVIDROLAR LTDA</td>\n",
" <td>COMPRA A/V - R$ - APRES</td>\n",
" <td>27/11/2012</td>\n",
" <td>60.0</td>\n",
" <td>2012-11-27</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União</td>\n",
" <td>63000</td>\n",
" <td>Advocacia-Geral da União - Unidades com víncul...</td>\n",
" <td>110096</td>\n",
" <td>SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>***.401.785-**</td>\n",
" <td>MARCELO DA SILVA CARUCA</td>\n",
" <td>4066259000196</td>\n",
" <td>MULTI UTILIDADES LTDA</td>\n",
" <td>COMPRA A/V - R$ - APRES</td>\n",
" <td>28/11/2012</td>\n",
" <td>84.4</td>\n",
" <td>2012-11-28</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CÓDIGO ÓRGÃO SUPERIOR NOME ÓRGÃO SUPERIOR CÓDIGO ÓRGÃO \\\n",
"0 63000 Advocacia-Geral da União 63000 \n",
"1 63000 Advocacia-Geral da União 63000 \n",
"2 63000 Advocacia-Geral da União 63000 \n",
"3 63000 Advocacia-Geral da União 63000 \n",
"4 63000 Advocacia-Geral da União 63000 \n",
"\n",
" NOME ÓRGÃO CÓDIGO UNIDADE GESTORA \\\n",
"0 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"1 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"2 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"3 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"4 Advocacia-Geral da União - Unidades com víncul... 110096 \n",
"\n",
" NOME UNIDADE GESTORA ANO EXTRATO MÊS EXTRATO \\\n",
"0 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"1 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"2 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"3 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"4 SUPERINTENDENCIA ADMINISTRACAO EM PERNAMBUCO 2013 1 \n",
"\n",
" CPF PORTADOR NOME PORTADOR CNPJ OU CPF FAVORECIDO \\\n",
"0 ***.419.804-** ULISSES ANTONIO CAVALCANTI BIONES -2 \n",
"1 ***.541.564-** FELLIPE JOSE DA ROCHA MUNT -2 \n",
"2 ***.541.564-** FELLIPE JOSE DA ROCHA MUNT -2 \n",
"3 ***.804.561-** JOSE DE JESUS 32782393000129 \n",
"4 ***.401.785-** MARCELO DA SILVA CARUCA 4066259000196 \n",
"\n",
" NOME FAVORECIDO TRANSAÇÃO DATA TRANSAÇÃO \\\n",
"0 NAO SE APLICA SAQUE CASH/ATM BB 27/11/2012 \n",
"1 NAO SE APLICA SAQUE CASH/ATM BB 06/12/2012 \n",
"2 NAO SE APLICA SAQUE CASH/ATM BB 19/12/2012 \n",
"3 MOVIDROLAR LTDA COMPRA A/V - R$ - APRES 27/11/2012 \n",
"4 MULTI UTILIDADES LTDA COMPRA A/V - R$ - APRES 28/11/2012 \n",
"\n",
" VALOR TRANSAÇÃO expense_date president \n",
"0 120.0 2012-11-27 Dilma \n",
"1 190.0 2012-12-06 Dilma \n",
"2 70.0 2012-12-19 Dilma \n",
"3 60.0 2012-11-27 Dilma \n",
"4 84.4 2012-11-28 Dilma "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def president(row):\n",
" for term in TERMS:\n",
" if term.start <= row.expense_date < term.end:\n",
" return term.name\n",
" \n",
" # fall back for blank dates\n",
" expense_date = date(row[\"ANO EXTRATO\"], row[\"MÊS EXTRATO\"], 1)\n",
" for term in TERMS:\n",
" if term.start <= expense_date < term.end:\n",
" return term.name\n",
"\n",
"df[\"expense_date\"] = pd.to_datetime(df[\"DATA TRANSAÇÃO\"], format='%d/%m/%Y')\n",
"df['president'] = df.apply(president, axis=1)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Dilma', 'Temer', 'Bolsonaro'], dtype=object)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.president.unique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Isolando gastos da presidência"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>CÓDIGO ÓRGÃO SUPERIOR</th>\n",
" <th>NOME ÓRGÃO SUPERIOR</th>\n",
" <th>CÓDIGO ÓRGÃO</th>\n",
" <th>NOME ÓRGÃO</th>\n",
" <th>CÓDIGO UNIDADE GESTORA</th>\n",
" <th>NOME UNIDADE GESTORA</th>\n",
" <th>ANO EXTRATO</th>\n",
" <th>MÊS EXTRATO</th>\n",
" <th>CPF PORTADOR</th>\n",
" <th>NOME PORTADOR</th>\n",
" <th>CNPJ OU CPF FAVORECIDO</th>\n",
" <th>NOME FAVORECIDO</th>\n",
" <th>TRANSAÇÃO</th>\n",
" <th>DATA TRANSAÇÃO</th>\n",
" <th>VALOR TRANSAÇÃO</th>\n",
" <th>expense_date</th>\n",
" <th>president</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>17305</td>\n",
" <td>20000</td>\n",
" <td>Presidência da República</td>\n",
" <td>20101</td>\n",
" <td>Presidência da República</td>\n",
" <td>110322</td>\n",
" <td>GABINETE DE SEGURANÇA INSTITUCIONAL/PR</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Sigiloso</td>\n",
" <td>-11</td>\n",
" <td>Sigiloso</td>\n",
" <td>Informações protegidas por sigilo</td>\n",
" <td>NaN</td>\n",
" <td>1129.80</td>\n",
" <td>NaT</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17306</td>\n",
" <td>20000</td>\n",
" <td>Presidência da República</td>\n",
" <td>20101</td>\n",
" <td>Presidência da República</td>\n",
" <td>110322</td>\n",
" <td>GABINETE DE SEGURANÇA INSTITUCIONAL/PR</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Sigiloso</td>\n",
" <td>-11</td>\n",
" <td>Sigiloso</td>\n",
" <td>Informações protegidas por sigilo</td>\n",
" <td>NaN</td>\n",
" <td>4352.25</td>\n",
" <td>NaT</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17307</td>\n",
" <td>20000</td>\n",
" <td>Presidência da República</td>\n",
" <td>20101</td>\n",
" <td>Presidência da República</td>\n",
" <td>110322</td>\n",
" <td>GABINETE DE SEGURANÇA INSTITUCIONAL/PR</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Sigiloso</td>\n",
" <td>-11</td>\n",
" <td>Sigiloso</td>\n",
" <td>Informações protegidas por sigilo</td>\n",
" <td>NaN</td>\n",
" <td>7061.25</td>\n",
" <td>NaT</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17308</td>\n",
" <td>20000</td>\n",
" <td>Presidência da República</td>\n",
" <td>20101</td>\n",
" <td>Presidência da República</td>\n",
" <td>110322</td>\n",
" <td>GABINETE DE SEGURANÇA INSTITUCIONAL/PR</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Sigiloso</td>\n",
" <td>-11</td>\n",
" <td>Sigiloso</td>\n",
" <td>Informações protegidas por sigilo</td>\n",
" <td>NaN</td>\n",
" <td>781.20</td>\n",
" <td>NaT</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17309</td>\n",
" <td>20000</td>\n",
" <td>Presidência da República</td>\n",
" <td>20101</td>\n",
" <td>Presidência da República</td>\n",
" <td>110322</td>\n",
" <td>GABINETE DE SEGURANÇA INSTITUCIONAL/PR</td>\n",
" <td>2013</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Sigiloso</td>\n",
" <td>-11</td>\n",
" <td>Sigiloso</td>\n",
" <td>Informações protegidas por sigilo</td>\n",
" <td>NaN</td>\n",
" <td>570.00</td>\n",
" <td>NaT</td>\n",
" <td>Dilma</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CÓDIGO ÓRGÃO SUPERIOR NOME ÓRGÃO SUPERIOR CÓDIGO ÓRGÃO \\\n",
"17305 20000 Presidência da República 20101 \n",
"17306 20000 Presidência da República 20101 \n",
"17307 20000 Presidência da República 20101 \n",
"17308 20000 Presidência da República 20101 \n",
"17309 20000 Presidência da República 20101 \n",
"\n",
" NOME ÓRGÃO CÓDIGO UNIDADE GESTORA \\\n",
"17305 Presidência da República 110322 \n",
"17306 Presidência da República 110322 \n",
"17307 Presidência da República 110322 \n",
"17308 Presidência da República 110322 \n",
"17309 Presidência da República 110322 \n",
"\n",
" NOME UNIDADE GESTORA ANO EXTRATO MÊS EXTRATO \\\n",
"17305 GABINETE DE SEGURANÇA INSTITUCIONAL/PR 2013 1 \n",
"17306 GABINETE DE SEGURANÇA INSTITUCIONAL/PR 2013 1 \n",
"17307 GABINETE DE SEGURANÇA INSTITUCIONAL/PR 2013 1 \n",
"17308 GABINETE DE SEGURANÇA INSTITUCIONAL/PR 2013 1 \n",
"17309 GABINETE DE SEGURANÇA INSTITUCIONAL/PR 2013 1 \n",
"\n",
" CPF PORTADOR NOME PORTADOR CNPJ OU CPF FAVORECIDO NOME FAVORECIDO \\\n",
"17305 NaN Sigiloso -11 Sigiloso \n",
"17306 NaN Sigiloso -11 Sigiloso \n",
"17307 NaN Sigiloso -11 Sigiloso \n",
"17308 NaN Sigiloso -11 Sigiloso \n",
"17309 NaN Sigiloso -11 Sigiloso \n",
"\n",
" TRANSAÇÃO DATA TRANSAÇÃO VALOR TRANSAÇÃO \\\n",
"17305 Informações protegidas por sigilo NaN 1129.80 \n",
"17306 Informações protegidas por sigilo NaN 4352.25 \n",
"17307 Informações protegidas por sigilo NaN 7061.25 \n",
"17308 Informações protegidas por sigilo NaN 781.20 \n",
"17309 Informações protegidas por sigilo NaN 570.00 \n",
"\n",
" expense_date president \n",
"17305 NaT Dilma \n",
"17306 NaT Dilma \n",
"17307 NaT Dilma \n",
"17308 NaT Dilma \n",
"17309 NaT Dilma "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"presidents = df[df[\"NOME UNIDADE GESTORA\"] == \"GABINETE DE SEGURANÇA INSTITUCIONAL/PR\"]\n",
"presidents.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>president</th>\n",
" <th>total</th>\n",
" <th>days_in_office</th>\n",
" <th>expenses_per_day</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Bolsonaro</td>\n",
" <td>1339610.36</td>\n",
" <td>363.0</td>\n",
" <td>3690.386667</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Dilma</td>\n",
" <td>2174665.14</td>\n",
" <td>1958.0</td>\n",
" <td>1110.656353</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Temer</td>\n",
" <td>55375.44</td>\n",
" <td>964.0</td>\n",
" <td>57.443402</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" president total days_in_office expenses_per_day\n",
"0 Bolsonaro 1339610.36 363.0 3690.386667\n",
"1 Dilma 2174665.14 1958.0 1110.656353\n",
"2 Temer 55375.44 964.0 57.443402"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def by_president(rows):\n",
" delta = None\n",
" for term in TERMS:\n",
" if term.name == rows.president.unique()[0]:\n",
" delta = term.end - term.start\n",
" \n",
" if not delta:\n",
" return\n",
" \n",
" total = rows[\"VALOR TRANSAÇÃO\"].sum()\n",
" return pd.Series({\n",
" \"total\": total,\n",
" \"days_in_office\": delta.days,\n",
" \"expenses_per_day\": total / delta.days \n",
" \n",
" })\n",
" \n",
"summary = presidents.groupby('president').apply(by_president).reset_index()\n",
"summary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@diraol
Copy link

diraol commented Dec 30, 2019

Não consegui terminar ainda, mas tá aqui uma função que pega o valor corrigido via IPCA (mensal):

import re

import requests
from bs4 import BeautifulSoup

def corrige_valor(valor_original, mes_origem, ano_origem, mes_final=11, ano_final=2019):

    """
    Função que corrige, de acordo com o IPCA, um valor monetário (float).

    Args:
        - valor_original (float): valor a ser corrigido que está na data de origem.
        - mes_origem (int): Número do mês de origem (1 a 12)
        - ano_origem (int): Ano de origem
        - mes_final (int): Número do mês para o qual o valor deverá ser corrigido (1 a 12): default: 11
        - ano_final (int): Ano para o qual o valor deverá ser corrigido - default: 2019
    Return:
        Valor corrigido
    """

    resp = requests.post('https://www3.bcb.gov.br/CALCIDADAO/publico/corrigirPorIndice.do?method=corrigirPorIndice',
                         data={'aba':1,
                               'selIndice': '00433IPCA',
                               'dataInicial': f'{mes_origem:0>2d}/{ano_origem}',
                               'dataFinal': f'{mes_final:0>2d}/{ano_final}',
                               'valorCorrecao': valor_original,
                               'idIndice': None,
                               'nomeIndicePeriodo': None
                              })
    valor = BeautifulSoup(resp.content)\
        .select('table[title^="Correção de valores"]', class_="tabela")[0]\
        .select('tbody')[0]\
        .select('td')[-1]\
        .text
    valor = float(re.findall(r"\d+,\d\d", valor.replace(".", ""))[0].replace(",", "."))
    return valor

Agora precisaria, das duas uma:

1 - Fazer a sumarização por mês/ano, fazer a correção e depois "juntar tudo"
2 - Fazer a correção para cada dado e depois "juntar tudo".

Como a função faz chamadas (http post) para cada vez que é chamada, o melhor é "agregar por mês", corrigir o valor e depois finalizar a sumarização.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment