This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
client = boto3.client('textract') | |
response = client.analyze_document( | |
Document={ | |
'Bytes': b'bytes', | |
'S3Object': { | |
'Bucket': 'string', | |
'Name': 'string', | |
'Version': 'string' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime, requests | |
import io, glob, os | |
import PyPDF2 | |
from bs4 import BeautifulSoup | |
fecha = datetime.datetime.now().strftime('%Y%m%d') | |
print('FECHA:', fecha) | |
root='https://www.mercuriovalpo.cl/impresa/%s/%s/%s/' %(fecha[:4], fecha[4:6], fecha[6:]) | |
base = root + 'papel' |
We can't make this file beautiful and searchable because it's too large.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OBJECTID,ID_CIRCUNSCRIPCION,CIRCUNCRIPCION_ELECTORAL,COD_LOCAL,LOCAL_VOTACION,DIRECCION,Latitud,Longitud,ELECTORES,CUT_PROV,PROVINCIA,CUT_REG,CUT_COM,REGION,COMUNA,CUT_COM_ENT,CUT_REG_ENT,ORD_REG,N_MESAS,CASOS_ACTIVOS,GOOGLE_MAPS | |
1,589,ALTO HOSPICIO,1036,ANEXO DE COLEGIO SAN ANTONIO DE MATILLA,LOS TAMARUGOS ESQUINA LOS KIWIS,-20.2687,-70.1029,3402,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,10,64.0,"<a href=""https://www.google.cl/maps/dir//-20.2687,-70.1029"" target=_blank"">Ir con Google Maps</a>" | |
2,589,ALTO HOSPICIO,1025,COLEGIO MARISTA HERMANO FERNANDO,AVENIDA RICARDO LAGOS 4201,-20.2864,-70.0823,6386,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,19,0.0,"<a href=""https://www.google.cl/maps/dir//-20.2864,-70.0823"" target=_blank"">Ir con Google Maps</a>" | |
3,589,ALTO HOSPICIO,1061,COLEGIO METODISTA WILLIAM TAYLOR,AVENIDA LOS ALAMOS 3025,-20.2678,-70.1044,6405,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,19,64.0,"<a href=""https://www.google.cl/maps/dir//-20.2678,-70.1044"" target=_bl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import altair as alt | |
# plot.ly, dash... | |
fig, ax = plt.subplots(1, figsize=(20,12)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob, fitz, pandas as pd | |
files = glob.glob('folder/*.pdf') | |
texts = [' '.join([page.get_text() for page in fitz.open(fn)]) | |
for fn in files] | |
df = pd.DataFrame(dict(file=files, text=texts)) | |
df['cuerpo'] = df.text.apply(lambda t: remove_headandsentence) | |
df['fallo'] = df.text.apply(lambda t: extract_fallo) | |
df.to_csv('sentencias.csv', index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
import os | |
import pickle | |
import pandas as pd | |
import torch | |
from torch.utils.data import TensorDataset | |
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler | |
from transformers import BertTokenizer, BertForSequenceClassification |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# shape source: https://geoine-ine-chile.opendata.arcgis.com/datasets/186b78e22db44f96a016f9254de44dd4_0 | |
import geopandas as gp | |
gdf = gp.read_file('MAESTRO_CALLES',init={'epsg:3857'}) | |
gdf = gdf.to_crs('epsg:4326') | |
gdf.columns=[c.lower() for c in gdf.columns] | |
for comuna, cdf in gdf.groupby('comuna'): | |
ddf = cdf[['comuna','nombre_aux','geometry']] | |
ddf.to_file(f'CALLES/{comuna.replace(" ","_")}.json', engine='GeoJSON', single_file=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import os, time | |
DOWNER = 'wget https://cdn.servel.cl/padron/A%05d.pdf' | |
UPPER = 'aws s3 cp A%05d.pdf s3://quantcldata/CLIENTES/SERVEL/2021/' | |
DELETE = 'rm A%05d.pdf' | |
df=pd.read_excel('cut_2018_v03.xls') | |
comunas=df['Código Comuna 2017'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import joypy | |
import matplotlib.pyplot as plt | |
df2 = pd.read_excel('EC_series.xlsx') | |
df2 = df2[df2.mes==11] # November | |
odf=df2[df2.agno<=1950] | |
df2 = df2[df2.agno>=1980] # Last 40 | |
# df2.valor -= 23.58 # deviation from the historic November mean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script> | |
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script> | |
<script type="text/javascript"> | |
function getAgeFromRUT(rut) { | |
var today_date = new Date(); | |
var slope = 3.3363697569700348e-06 | |
var intercept = 1932.2573852507373 | |
var birth_date = rut * slope + intercept | |
var birth_date_year = Math.floor(birth_date) |