Skip to content

Instantly share code, notes, and snippets.

View TheLoneNut's full-sized avatar

Pascal Potvin TheLoneNut

View GitHub Profile
def my_list_processing(l):
if 666 in l: return 666/0
return sum(l)
elements = [i for i in range(1000)]
results = []
def reducing(e):
results.append(e)
def my_list_processing(l):
return sum(l)
elements = [i for i in range(1000)]
results = []
def reducing(e):
results.append(e)
simpleMultiprocessing(elements, my_list_processing, reducing, verbose=True)
def my_list_processing(l):
return sum(l)
elements = [i for i in range(1000)]
results = []
simpleMultiprocessing(elements, my_list_processing, results.append, verbose=True)
print(results)
result = sum(results)
class simpleMultiprocessing:
'''
This class makes multiprocessing easy.
:param elements: A list of elements that can be split in smaller chunks and processed in parallel.
:param f_map: A function which takes a list of elements (normally a sublist of "elements") and process it.
:param f_reduce: [Optional] A callback function called each time f_map return from processing sublist of elements. The function takes the return value of f_map as input.
:param nProcesses: [Optional] Number of processes to spawn, default is twice the number of available processors.
:param verbose: [Optional] When set to True, displays the steps of multiprocessing.
'''
def __init__(self, elements, f_map, f_reduce=None, nProcesses=max(1, int(2.*float(os.getenv('CPU_LIMIT')))), verbose=True):
@TheLoneNut
TheLoneNut / bug.py
Created March 24, 2021 23:10
Just an example software bug
def append_y_words(y_words, base_list=[]):
'''
Purpose: Return a list of words from the base_list (if any) followed by
words starting with 'y' from the words list.
'''
y_words = [word for word in y_words if word.startswith('y')]
base_list += y_words
return base_list
print append_y_words(["yoyo", "player"]) # should print ['yoyo']
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import calinski_harabaz_score
num_clusters = range(10, 600, 10)
scores = []
for num_cluster in num_clusters:
km = MiniBatchKMeans(n_clusters=num_cluster, init_size=max(300, 3*num_cluster)).fit(X)
labels = km.labels_
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf1 = LinearDiscriminantAnalysis(n_components=2)
X1 = clf1.fit_transform(X, labels)
from sklearn.cluster import MiniBatchKMeans
num_clusters = 80
kn = MiniBatchKMeans(n_clusters=num_clusters, init_size=max(300, 3*num_clusters)).fit(X)
labels = kn.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
X = pmi(df)
X = normalize(X, copy=False)
tree = BallTree(X, p=2)
knn_d, knn_ix = tree.query([X[<specific_example>]], k=10, return_distance=True)
@TheLoneNut
TheLoneNut / pmi.py
Created February 6, 2019 17:48
Calculation of the positive pointwise mutual information matrix from a pandas dataframe.
def pmi(df):
'''
Calculate the positive pointwise mutal information score for each entry
https://en.wikipedia.org/wiki/Pointwise_mutual_information
We use the log( p(y|x)/p(y) ), y being the column, x being the row
'''
# Get numpy array from pandas df
arr = df.as_matrix()
# p(y|x) probability of each t1 overlap within the row