Carlos Perales cperales

## rename_files.py
import os
import fitz  # PyMuPDF
from PIL import Image
import pytesseract
import ollama
import shutil
import sys


folder = "~/Test"

## sobol_integrate.py
import numpy as np
from pyscenarios.sobol import sobol
import matplotlib.pyplot as plt


def example_func(x):
    """
    Result of integrate this function
    is approximately 0.63661977.

## counting_cpus.py
import multiprocessing as mp
from time import perf_counter
import matplotlib.pyplot as plt


def counting_time(cpu, amount):
    start = perf_counter()
    list_amount = [amount] * cpu
    with mp.Pool(cpu) as p:
        p.map(f, list_amount)

## check_cpu.py
import multiprocessing as mp


def f(huge_amount):
    count = 0
    for i in range(huge_amount):
        if i % 2:
            count += 1
        else:
            count -= 1

## wilcoxon.py
import os
import pandas as pd
from scipy.stats import wilcoxon, ranksums, mannwhitneyu
import numpy as np


def wilcoxon_csv(csv_name):
    # Load the csv as DataFrame
    df = pd.read_csv(csv_name, delimiter=';', index_col='Dataset')
    # The name of the csv file is the name of the metric

## load_data.py
import os
import pandas as pd
from sklearn import preprocessing


def load_data(folder,
              dataset,
              n_targ=None,
              header=None,
              sep='\s+'):

## update_all.sh
sudo apt-get update -y && sudo apt-get upgrade -y && sudo apt-get dist-upgrade -y && sudo apt autoremove -y

## average_shortest_distances
## This classifier compares the average of the shortest distances of the
# samples from target 1 with the average of the distnces of the samples
# from target 2

from numpy.linalg import norm
from numpy import percentile

def average_shortest_distances(train_set,sample,per=20):
	"""
	"""

## completar_tweets.py
# Código para volver a descargar tweets que están mal parseados (pero poseemos su ID)
# Sería conveniente tener dos claves API, porque es normal que durante el proceso ocurra el error "rate limit"
# De ahí la necesidad de tantos try/except

infile = codecs.open('twitter.json', 'r', encoding='utf-8')
outfile = codecs.open('tweets_completos.json', 'w', encoding='utf-8')
errores = codecs.open('id_tweets_no_recuperados.txt', 'w', encoding='utf-8')

recuperados = 0
perdidos = 0

## dict_to_json.py
# -*- coding: utf-8 -*-

from bson import json_util as json
import codecs
import datetime
import sys

# Introduces por linea de comandos "python dict_to_json.py file.json"
# Pensado para tweets en diccionario de un arhcivo a un JSON
	import os
	import fitz # PyMuPDF
	from PIL import Image
	import pytesseract
	import ollama
	import shutil
	import sys


	folder = "~/Test"
	import numpy as np
	from pyscenarios.sobol import sobol
	import matplotlib.pyplot as plt


	def example_func(x):
	"""
	Result of integrate this function
	is approximately 0.63661977.
	import multiprocessing as mp
	from time import perf_counter
	import matplotlib.pyplot as plt


	def counting_time(cpu, amount):
	start = perf_counter()
	list_amount = [amount] * cpu
	with mp.Pool(cpu) as p:
	p.map(f, list_amount)
	import multiprocessing as mp


	def f(huge_amount):
	count = 0
	for i in range(huge_amount):
	if i % 2:
	count += 1
	else:
	count -= 1
	import os
	import pandas as pd
	from scipy.stats import wilcoxon, ranksums, mannwhitneyu
	import numpy as np


	def wilcoxon_csv(csv_name):
	# Load the csv as DataFrame
	df = pd.read_csv(csv_name, delimiter=';', index_col='Dataset')
	# The name of the csv file is the name of the metric
	import os
	import pandas as pd
	from sklearn import preprocessing


	def load_data(folder,
	dataset,
	n_targ=None,
	header=None,
	sep='\s+'):
	## This classifier compares the average of the shortest distances of the
	# samples from target 1 with the average of the distnces of the samples
	# from target 2

	from numpy.linalg import norm
	from numpy import percentile

	def average_shortest_distances(train_set,sample,per=20):
	"""
	"""
	# Código para volver a descargar tweets que están mal parseados (pero poseemos su ID)
	# Sería conveniente tener dos claves API, porque es normal que durante el proceso ocurra el error "rate limit"
	# De ahí la necesidad de tantos try/except

	infile = codecs.open('twitter.json', 'r', encoding='utf-8')
	outfile = codecs.open('tweets_completos.json', 'w', encoding='utf-8')
	errores = codecs.open('id_tweets_no_recuperados.txt', 'w', encoding='utf-8')

	recuperados = 0
	perdidos = 0
	# -- coding: utf-8 --

	from bson import json_util as json
	import codecs
	import datetime
	import sys

	# Introduces por linea de comandos "python dict_to_json.py file.json"
	# Pensado para tweets en diccionario de un arhcivo a un JSON