Skip to content

Instantly share code, notes, and snippets.

Avatar
💭
coding the days away

Sergio Lucero sergiolucero

💭
coding the days away
View GitHub Profile
@sergiolucero
sergiolucero / myvizlibs.py
Created Apr 25, 2021
essential python plotting tools
View myvizlibs.py
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
# plot.ly, dash...
fig, ax = plt.subplots(1, figsize=(20,12))
@sergiolucero
sergiolucero / pdf2txt.py
Last active Apr 24, 2021
pdf legal conversion
View pdf2txt.py
import glob, fitz, pandas as pd
files = glob.glob('folder/*.pdf')
texts = [' '.join([page.getText() for page in fitz.open(fn)]) for fn in files]
df = pd.DataFrame(dict(file=files, text=texts))
df['cuerpo'] = df.text.apply(lambda t: remove_headandsentence)
df['fallo'] = df.text.apply(lambda t: extract_fallo)
df.to_csv('sentencias.csv', index=False)
print(sum(len(txt) for txt in texts))
View TorchBERT.py
from tqdm import tqdm
import os
import pickle
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification
View street_splitter.py
# shape source: https://geoine-ine-chile.opendata.arcgis.com/datasets/186b78e22db44f96a016f9254de44dd4_0
import geopandas as gp
gdf = gp.read_file('MAESTRO_CALLES',init={'epsg:3857'})
gdf = gdf.to_crs('epsg:4326')
gdf.columns=[c.lower() for c in gdf.columns]
for comuna, cdf in gdf.groupby('comuna'):
ddf = cdf[['comuna','nombre_aux','geometry']]
ddf.to_file(f'CALLES/{comuna.replace(" ","_")}.json', engine='GeoJSON', single_file=True)
@sergiolucero
sergiolucero / servel2s3.py
Last active Jan 30, 2021
descarga del padrón servel a s3
View servel2s3.py
import pandas as pd
import os, time
DOWNER = 'wget https://cdn.servel.cl/padron/A%05d.pdf'
UPPER = 'aws s3 cp A%05d.pdf s3://quantcldata/CLIENTES/SERVEL/2021/'
DELETE = 'rm A%05d.pdf'
df=pd.read_excel('cut_2018_v03.xls')
comunas=df['Código Comuna 2017']
@sergiolucero
sergiolucero / joyplot.py
Created Nov 28, 2020
JoyPlot temperaturas máximas Quinta Normal
View joyplot.py
import pandas as pd
import joypy
import matplotlib.pyplot as plt
df2 = pd.read_excel('EC_series.xlsx')
df2 = df2[df2.mes==11] # November
odf=df2[df2.agno<=1950]
df2 = df2[df2.agno>=1980] # Last 40
# df2.valor -= 23.58 # deviation from the historic November mean
View interpoladorFechas.js
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
<script type="text/javascript">
function getAgeFromRUT(rut) {
var today_date = new Date();
var slope = 3.3363697569700348e-06
var intercept = 1932.2573852507373
var birth_date = rut * slope + intercept
var birth_date_year = Math.floor(birth_date)
View lined_heatmap.py
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
flights = sns.load_dataset("flights");flights = flights.pivot("month", "year", "passengers")
fig, ax = plt.subplots(1, figsize=(16,8))
sns.heatmap(flights, cbar=False, annot=True, fmt='.0f', ax=ax)
for ix in range(12): # one per height
x = range(12); y = ix+1-flights.iloc[ix]/flights.iloc[ix].max()
h = plt.plot(x,y, lw=4)
plt.show()
View docker_install.sh
#/bin/bash
sudo apt-get update
sudo apt-get install -y docker.io
sudo apt-get install -y docker-compose
sudo systemctl start docker
sudo systemctl enable docker
docker --version
View mapa_covid_italia.py
import folium, pandas as pd
from folium.plugins import MarkerCluster
pdf = pd.read_json('https://tinyurl.com/covid19-github')
pdf = pdf[pdf.data==pdf.data.max()]
pdf = pdf[pdf.totale_casi>0]
centroid = pdf.describe()[['lat','long']].loc['50%'].values
fm = folium.Map(location=centroid, zoom_start=6, tiles='stamentoner',
width=800, height=600)
mc = MarkerCluster()