Skip to content

Instantly share code, notes, and snippets.

View sergiolucero's full-sized avatar
💭
coding the days away

Sergio Lucero sergiolucero

💭
coding the days away
View GitHub Profile
@sergiolucero
sergiolucero / botoTextract.py
Created September 11, 2021 00:30
Textract AWS boto3
import boto3
client = boto3.client('textract')
response = client.analyze_document(
Document={
'Bytes': b'bytes',
'S3Object': {
'Bucket': 'string',
'Name': 'string',
'Version': 'string'
@sergiolucero
sergiolucero / mercuvalpo.py
Last active March 7, 2022 13:00
Mercurio de Valpo
import datetime, requests
import io, glob, os
import PyPDF2
from bs4 import BeautifulSoup
fecha = datetime.datetime.now().strftime('%Y%m%d')
print('FECHA:', fecha)
root='https://www.mercuriovalpo.cl/impresa/%s/%s/%s/' %(fecha[:4], fecha[4:6], fecha[6:])
base = root + 'papel'
@sergiolucero
sergiolucero / locales_votacion_2021.csv
Created August 8, 2021 12:51
scraper locales votacion
We can't make this file beautiful and searchable because it's too large.
OBJECTID,ID_CIRCUNSCRIPCION,CIRCUNCRIPCION_ELECTORAL,COD_LOCAL,LOCAL_VOTACION,DIRECCION,Latitud,Longitud,ELECTORES,CUT_PROV,PROVINCIA,CUT_REG,CUT_COM,REGION,COMUNA,CUT_COM_ENT,CUT_REG_ENT,ORD_REG,N_MESAS,CASOS_ACTIVOS,GOOGLE_MAPS
1,589,ALTO HOSPICIO,1036,ANEXO DE COLEGIO SAN ANTONIO DE MATILLA,LOS TAMARUGOS ESQUINA LOS KIWIS,-20.2687,-70.1029,3402,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,10,64.0,"<a href=""https://www.google.cl/maps/dir//-20.2687,-70.1029"" target=_blank"">Ir con Google Maps</a>"
2,589,ALTO HOSPICIO,1025,COLEGIO MARISTA HERMANO FERNANDO,AVENIDA RICARDO LAGOS 4201,-20.2864,-70.0823,6386,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,19,0.0,"<a href=""https://www.google.cl/maps/dir//-20.2864,-70.0823"" target=_blank"">Ir con Google Maps</a>"
3,589,ALTO HOSPICIO,1061,COLEGIO METODISTA WILLIAM TAYLOR,AVENIDA LOS ALAMOS 3025,-20.2678,-70.1044,6405,011,Iquique,01,01107,Tarapacá,Alto Hospicio,1107,1,2,19,64.0,"<a href=""https://www.google.cl/maps/dir//-20.2678,-70.1044"" target=_bl
@sergiolucero
sergiolucero / myvizlibs.py
Created April 25, 2021 16:28
essential python plotting tools
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
# plot.ly, dash...
fig, ax = plt.subplots(1, figsize=(20,12))
@sergiolucero
sergiolucero / pdf2txt.py
Last active April 12, 2023 13:49
pdf legal conversion
import glob, fitz, pandas as pd
files = glob.glob('folder/*.pdf')
texts = [' '.join([page.get_text() for page in fitz.open(fn)])
for fn in files]
df = pd.DataFrame(dict(file=files, text=texts))
df['cuerpo'] = df.text.apply(lambda t: remove_headandsentence)
df['fallo'] = df.text.apply(lambda t: extract_fallo)
df.to_csv('sentencias.csv', index=False)
@sergiolucero
sergiolucero / TorchBERT.py
Created April 7, 2021 15:00
Torch and BERT
from tqdm import tqdm
import os
import pickle
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification
@sergiolucero
sergiolucero / street_splitter.py
Created March 24, 2021 12:48
Street splitter
# shape source: https://geoine-ine-chile.opendata.arcgis.com/datasets/186b78e22db44f96a016f9254de44dd4_0
import geopandas as gp
gdf = gp.read_file('MAESTRO_CALLES',init={'epsg:3857'})
gdf = gdf.to_crs('epsg:4326')
gdf.columns=[c.lower() for c in gdf.columns]
for comuna, cdf in gdf.groupby('comuna'):
ddf = cdf[['comuna','nombre_aux','geometry']]
ddf.to_file(f'CALLES/{comuna.replace(" ","_")}.json', engine='GeoJSON', single_file=True)
@sergiolucero
sergiolucero / servel2s3.py
Last active January 30, 2021 08:03
descarga del padrón servel a s3
import pandas as pd
import os, time
DOWNER = 'wget https://cdn.servel.cl/padron/A%05d.pdf'
UPPER = 'aws s3 cp A%05d.pdf s3://quantcldata/CLIENTES/SERVEL/2021/'
DELETE = 'rm A%05d.pdf'
df=pd.read_excel('cut_2018_v03.xls')
comunas=df['Código Comuna 2017']
@sergiolucero
sergiolucero / joyplot.py
Created November 28, 2020 22:35
JoyPlot temperaturas máximas Quinta Normal
import pandas as pd
import joypy
import matplotlib.pyplot as plt
df2 = pd.read_excel('EC_series.xlsx')
df2 = df2[df2.mes==11] # November
odf=df2[df2.agno<=1950]
df2 = df2[df2.agno>=1980] # Last 40
# df2.valor -= 23.58 # deviation from the historic November mean
@sergiolucero
sergiolucero / interpoladorFechas.js
Created May 10, 2020 10:47
edad aproximada por RUT
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
<script type="text/javascript">
function getAgeFromRUT(rut) {
var today_date = new Date();
var slope = 3.3363697569700348e-06
var intercept = 1932.2573852507373
var birth_date = rut * slope + intercept
var birth_date_year = Math.floor(birth_date)