Skip to content

Instantly share code, notes, and snippets.

View mesejo's full-sized avatar

Daniel Mesejo mesejo

  • Barcelona, Spain
View GitHub Profile
@mesejo
mesejo / single-tree-model.json
Last active February 19, 2024 14:20
Single Tree XGBoost Model
{
"learner": {
"attributes": {
"scikit_learn": "{\"_estimator_type\": \"classifier\"}"
},
"feature_names": [],
"feature_types": [],
"gradient_booster": {
"model": {
"gbtree_model_param": {
@mesejo
mesejo / defaultdict_performance.py
Created November 7, 2021 14:11
defaultdict performance
from collections import defaultdict
from itertools import groupby, cycle
from operator import itemgetter as ig
import random
import perfplot
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
@mesejo
mesejo / atomic.py
Created November 4, 2021 10:41
Atomic Groups vs Non-Atomic Groups
import perfplot
import re
import random
from string import ascii_lowercase
pattern = r"(?=(n(?:e(?:c(?:essary|k)|ed|xt|ighborly)|o(?:tebook|ndescript|rmal|is[ey])|a(?:ppy|me|useating|t(?:ion|ural))|i(?:fty|ne|ce|ppy)|u(?:mber(?:less)?|t))|k(?:ey|i(?:ck|nd|ll)|no(?:ck|t))|z(?:oo|any|i(?:nc|p)|ephyr)|v(?:o(?:ice|latile)|e(?:in|rsed?)|ulgar|a(?:nish|cuous|se)|iolet)|y(?:oung|a(?:rd|k)|ummy)|g(?:a(?:ze|rrulous)|e(?:ese|ntle)|l(?:ass|eaming|ue)|iant|o(?:od|ld|vernment)|u(?:itar|llible|ar(?:antee|ded)|sty)|host|r(?:ieving|ubby|e(?:en|a(?:sy|t))|o(?:u(?:chy|nd)|an|ovy)|a(?:ndfather|pe|ceful|t(?:is|e)|b)))|j(?:u(?:m(?:bled|p)|icy)|e(?:wel|lly)|a(?:il|zzy)|o(?:yous|lly|g))|u(?:mbrella|gliest|tter|sed?|n(?:becoming|ruly|fasten|kempt|used|wieldy|derstood|pack|a(?:ccountable|ble)|i(?:que|nterested|t)|equaled|s(?:ightly|uitable)|cle)|p)|i(?:tchy|cy|gnore|m(?:mense|p(?:erfect|o(?:rtant|lite|ssible)))|ll(?:\-informed|ustrious)|n(?:ject|v(?:incible|ention)|c(?:re(?:ase|dible)|ompetent)|nate|t(?:roduce|e(?:lligent|rest(?:ing
@mesejo
mesejo / nested_find.py
Created November 2, 2021 15:39
Code for reproducing the Benchmarks of https://stackoverflow.com/a/69812299/4001592
from operator import eq
from functools import partial
import perfplot
import random
def nested_list_comprehension(names, needle="Matt"):
return [[name for name in lst if needle == name] for lst in names]
@mesejo
mesejo / weighted_sampling_without_replacement.py
Created October 19, 2021 16:56
A weighted sampling without replacement
from collections import defaultdict
from random import choices
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_theme(style="whitegrid")
import random
import perfplot
from collections import defaultdict, Counter
from itertools import chain
from pyroaring import BitMap
def setup(k):
lss = [f"list{i}" for i in range(1, k + 1)]
labels = [f"label{i}" for i in range(1, 101)]
@mesejo
mesejo / reservoir_sampling.py
Created May 13, 2021 20:58
Implementation of the L algorithm for reservoir sampling
import random
import numpy as np
from numpy.random import default_rng
from itertools import islice
def reservoir_sampling(pop, k):
reservoir = []
stream = iter(pop)
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
question
for the last years of his life galileo was under house arrest for espousing this mans theory
no 1912 olympian football star at carlisle indian school mlb seasons with the reds giants braves
the city of yuma in this state has record average of 4055 hours of sunshine each year
in 1963 live on the art linkletter show this company served its billionth burger
signer of the dec of indep framer of the constitution of mass second president of the united states
in the title of an aesop fable this insect shared billing with grasshopper
built in 312 bc to link rome the south of italy its still in use today
no 30 steals for the birmingham barons 2306 steals for the bulls
in the winter of 1971 72 record 1122 inches of snow fell at rainier paradise ranger station in this state
import pandas as pd
import numpy as np
arr = pd.read_csv("test.txt", delim_whitespace=True).values
print(arr)
class Cube:
def __init__(self, row_index, col_index, data):
import pandas as pd
from collections import defaultdict
df = pd.DataFrame(
{
'unit_0': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'],
'unit_1': ['B', 'C', 'C', 'C', 'D', 'D', 'E', 'E'],
'unit_2': ['F', 'G', 'G', 'H', 'I', 'I', 'J', 'I']
}
)