Skip to content

Instantly share code, notes, and snippets.

View cperales's full-sized avatar

Carlos Perales cperales

View GitHub Profile
@cperales
cperales / rename_files.py
Created September 2, 2025 07:51
Script for renaming the files in a folder using Llama3 served in Ollama
import os
import fitz # PyMuPDF
from PIL import Image
import pytesseract
import ollama
import shutil
import sys
folder = "~/Test"
@cperales
cperales / sobol_integrate.py
Created April 2, 2020 14:53
Example of Sobol integration method
import numpy as np
from pyscenarios.sobol import sobol
import matplotlib.pyplot as plt
def example_func(x):
"""
Result of integrate this function
is approximately 0.63661977.
@cperales
cperales / counting_cpus.py
Created January 22, 2019 10:35
A graphic shows to us how many real cpus we have
import multiprocessing as mp
from time import perf_counter
import matplotlib.pyplot as plt
def counting_time(cpu, amount):
start = perf_counter()
list_amount = [amount] * cpu
with mp.Pool(cpu) as p:
p.map(f, list_amount)
@cperales
cperales / check_cpu.py
Created January 22, 2019 09:38
Python code to see if you can use the 100% of your processors
import multiprocessing as mp
def f(huge_amount):
count = 0
for i in range(huge_amount):
if i % 2:
count += 1
else:
count -= 1
@cperales
cperales / wilcoxon.py
Created July 6, 2018 07:39
Simple code to perform Wilcoxon when you have CSVs with algorithms and columns and first column as dataset index
import os
import pandas as pd
from scipy.stats import wilcoxon, ranksums, mannwhitneyu
import numpy as np
def wilcoxon_csv(csv_name):
# Load the csv as DataFrame
df = pd.read_csv(csv_name, delimiter=';', index_col='Dataset')
# The name of the csv file is the name of the metric
@cperales
cperales / load_data.py
Created March 20, 2018 08:25
Simple script to load train-target data, typical in machine learning
import os
import pandas as pd
from sklearn import preprocessing
def load_data(folder,
dataset,
n_targ=None,
header=None,
sep='\s+'):
@cperales
cperales / update_all.sh
Created January 8, 2018 11:31
A very short code for updating in debian
sudo apt-get update -y && sudo apt-get upgrade -y && sudo apt-get dist-upgrade -y && sudo apt autoremove -y
@cperales
cperales / average_shortest_distances
Created June 19, 2016 12:27
average_shortest_distances
## This classifier compares the average of the shortest distances of the
# samples from target 1 with the average of the distnces of the samples
# from target 2
from numpy.linalg import norm
from numpy import percentile
def average_shortest_distances(train_set,sample,per=20):
"""
"""
# Código para volver a descargar tweets que están mal parseados (pero poseemos su ID)
# Sería conveniente tener dos claves API, porque es normal que durante el proceso ocurra el error "rate limit"
# De ahí la necesidad de tantos try/except
infile = codecs.open('twitter.json', 'r', encoding='utf-8')
outfile = codecs.open('tweets_completos.json', 'w', encoding='utf-8')
errores = codecs.open('id_tweets_no_recuperados.txt', 'w', encoding='utf-8')
recuperados = 0
perdidos = 0
# -*- coding: utf-8 -*-
from bson import json_util as json
import codecs
import datetime
import sys
# Introduces por linea de comandos "python dict_to_json.py file.json"
# Pensado para tweets en diccionario de un arhcivo a un JSON