Skip to content

Instantly share code, notes, and snippets.

View sergiolucero's full-sized avatar
💭
coding the days away

Sergio Lucero sergiolucero

💭
coding the days away
View GitHub Profile
@sergiolucero
sergiolucero / cne_api.py
Last active July 27, 2023 17:41
Precios combustibles
import pandas as pd
from token import TOKEN # token personal: http://www.energiaabierta.cne.cl/
TIPOS = ['calefaccion','vehicular']
SOURCE = 'http://api.cne.cl/v3/combustibles/{}/estaciones?token={}'
COPY_VARS = ['nombre_comuna', 'id_region', 'direccion_calle', 'fecha_hora_actualizacion']
for tipo in TIPOS:
print(tipo)
df = pd.read_json(SOURCE.format(tipo, TOKEN))
out = pd.DataFrame()
@sergiolucero
sergiolucero / pdf2txt.py
Last active April 12, 2023 13:49
pdf legal conversion
import glob, fitz, pandas as pd
files = glob.glob('folder/*.pdf')
texts = [' '.join([page.get_text() for page in fitz.open(fn)])
for fn in files]
df = pd.DataFrame(dict(file=files, text=texts))
df['cuerpo'] = df.text.apply(lambda t: remove_headandsentence)
df['fallo'] = df.text.apply(lambda t: extract_fallo)
df.to_csv('sentencias.csv', index=False)
@sergiolucero
sergiolucero / metropolitana.py
Created June 3, 2018 17:30
Comunas de Chile
#!wget https://www.bcn.cl/siit/obtienearchivo?id=repositorio/10221/10396/1/division_comunal.zip
#!mv obtienearchivo\?id\=repositorio%2F10221%2F10396%2F1%2Fdivision_comunal.zip comunal.zip
#!unzip comunal
#!pip install geopandas
import geopandas as gp
df = gp.read_file('division_comunal.shp')
#df.NOM_REG.unique()
rmdf = df[df.NOM_REG=='Región Metropolitana de Santiago']
print('Procesamos %d comunas en la Región Metropolitana' %len(rmdf))
rmdf.to_pickle('rm.pk') # 780K! upload to quant.cl for reference (just repeat the above)
@sergiolucero
sergiolucero / failed_transformer
Created March 31, 2023 11:25
¿Por qué diantres no funciona esto?
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer,
TrainingArguments, default_data_collator
import datasets
model_name = 'dccuchile/bert-base-spanish-wwm-cased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
train_data = datasets.load_dataset('squad_es', 'v1.1.0', split='train[:80%]')
@sergiolucero
sergiolucero / cancionero.py
Last active January 9, 2023 18:58
Cancionero de La Cuerda
import requests, sys
from bs4 import BeautifulSoup
from docx import Document
ubs = lambda url:BeautifulSoup(requests.get(url).text,'html5lib')
def get_chords(artist = 'Manu Chao'):
fartist = '_'.join(s.lower() for s in artist.split()) # use map
url = f'https://acordes.lacuerda.net/{fartist}/'
@sergiolucero
sergiolucero / graphqlapp.py
Created June 20, 2019 13:33
basic flask-graphql app
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
import os
import graphene
from graphene_sqlalchemy import SQLAlchemyObjectType, SQLAlchemyConnectionField
from flask_graphql import GraphQLView
#################################
app = Flask(__name__)
app.debug = True
@sergiolucero
sergiolucero / dcm_plotter
Last active August 9, 2022 02:26
view dcm folder
import pydicom as dicom
import matplotlib.pylab as plt
import glob
fig=plt.figure(figsize=(20,12));rows=2;cols=2;axes=[]
for a, file in zip(range(rows*cols),glob.glob('*.dcm')):
axes.append( fig.add_subplot(rows, cols, a+1) )
axes[-1].set_title(file)
axes[-1].axis('off')
plt.imshow(dicom.dcmread(file).pixel_array)
@sergiolucero
sergiolucero / pdf_merger.py
Created July 24, 2022 22:31
concatenate and merge a list of PDF files
import glob
from PyPDF2 import PdfFileMerger
merger = PdfFileMerger()
for fn in glob.glob('*.pdf'):
merger.append(fn)
merger.write('result.pdf')
merger.close()
import glob
import cv2
images = glob.glob('Plot*.png')
print('nImages:', len(images))
height, width, layers = cv2.imread(images[0]).shape
video = cv2.VideoWriter('video.avi', 0, 1, (width,height))
for image in images:
from diagrams import Diagram, Cluster
from diagrams.aws.compute import EC2
from diagrams.aws.network import ELB
from diagrams.aws.network import Route53
from diagrams.onprem.database import PostgreSQL # Would typically use RDS from aws.database
from diagrams.onprem.inmemory import Redis # Would typically use ElastiCache from aws.database
with Diagram("Simple Website Diagram", direction='LR') as diag: # It's LR by default, but you have a few options with the orientation
dns = Route53("dns")
load_balancer = ELB("Load Balancer")