This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pmi(df): | |
''' | |
Calculate the positive pointwise mutal information score for each entry | |
https://en.wikipedia.org/wiki/Pointwise_mutual_information | |
We use the log( p(y|x)/p(y) ), y being the column, x being the row | |
''' | |
# Get numpy array from pandas df | |
arr = df.as_matrix() | |
# p(y|x) probability of each t1 overlap within the row |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def my_list_processing(l): | |
if 666 in l: return 666/0 | |
return sum(l) | |
elements = [i for i in range(1000)] | |
results = [] | |
def reducing(e): | |
results.append(e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def my_list_processing(l): | |
return sum(l) | |
elements = [i for i in range(1000)] | |
results = [] | |
def reducing(e): | |
results.append(e) | |
simpleMultiprocessing(elements, my_list_processing, reducing, verbose=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def my_list_processing(l): | |
return sum(l) | |
elements = [i for i in range(1000)] | |
results = [] | |
simpleMultiprocessing(elements, my_list_processing, results.append, verbose=True) | |
print(results) | |
result = sum(results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class simpleMultiprocessing: | |
''' | |
This class makes multiprocessing easy. | |
:param elements: A list of elements that can be split in smaller chunks and processed in parallel. | |
:param f_map: A function which takes a list of elements (normally a sublist of "elements") and process it. | |
:param f_reduce: [Optional] A callback function called each time f_map return from processing sublist of elements. The function takes the return value of f_map as input. | |
:param nProcesses: [Optional] Number of processes to spawn, default is twice the number of available processors. | |
:param verbose: [Optional] When set to True, displays the steps of multiprocessing. | |
''' | |
def __init__(self, elements, f_map, f_reduce=None, nProcesses=max(1, int(2.*float(os.getenv('CPU_LIMIT')))), verbose=True): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def append_y_words(y_words, base_list=[]): | |
''' | |
Purpose: Return a list of words from the base_list (if any) followed by | |
words starting with 'y' from the words list. | |
''' | |
y_words = [word for word in y_words if word.startswith('y')] | |
base_list += y_words | |
return base_list | |
print append_y_words(["yoyo", "player"]) # should print ['yoyo'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def triplet_loss(y_true, y_pred, alpha = 0.2): | |
""" | |
Implementation of the triplet loss function | |
Arguments: | |
y_true -- true labels, required when you define a loss in Keras, not used in this function. | |
y_pred -- python list containing three objects: | |
anchor: the encodings for the anchor data | |
positive: the encodings for the positive data (similar to anchor) | |
negative: the encodings for the negative data (different from anchor) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import MiniBatchKMeans | |
from sklearn.metrics import calinski_harabaz_score | |
num_clusters = range(10, 600, 10) | |
scores = [] | |
for num_cluster in num_clusters: | |
km = MiniBatchKMeans(n_clusters=num_cluster, init_size=max(300, 3*num_cluster)).fit(X) | |
labels = km.labels_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
clf1 = LinearDiscriminantAnalysis(n_components=2) | |
X1 = clf1.fit_transform(X, labels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import MiniBatchKMeans | |
num_clusters = 80 | |
kn = MiniBatchKMeans(n_clusters=num_clusters, init_size=max(300, 3*num_clusters)).fit(X) | |
labels = kn.labels_ | |
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) |
NewerOlder