This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
example: SortedStringTrie({'ate': 3, 'cat': 5, 'do': 1, 'dog': 2, 'gate': 4, 'the': 0}) | |
prefixes example: ['do', 'dog'] | |
starting.. | |
recoursively working on string: thedogatethecat | |
progress: t | |
progress: th | |
progress: the | |
found: ['the'] for: the | |
possible sentences {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://pytrie.readthedocs.io/en/latest/#trie-methods | |
from pytrie import SortedStringTrie as Trie | |
s = Trie(the=0, do=1, dog=2, ate=3, gate=4, cat=5) | |
print('example:',s) | |
print('prefixes example:',s.keys(prefix='do')) | |
sentence = "thedogatethecat" | |
def generator_string(s): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
rolls = 10000 | |
n = 6 | |
E = [0] * rolls | |
for i in range(0,rolls): | |
flag = False | |
ar = [] | |
while flag == False: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
#nltk v1 | |
from nltk.tag.stanford import StanfordNERTagger | |
from nltk.tokenize import word_tokenize | |
path_to_data = './data/' | |
cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from nltk.parse import CoreNLPParser | |
import datetime | |
path_to_data = './data/' | |
cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv') | |
cities['City alias'] = cities['City alias'].apply(lambda x: str(x)) | |
ner_tagger = CoreNLPParser(url='http://localhost:9000', tagtype='ner') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import spacy | |
import en_core_web_sm | |
path_to_data = './data/' | |
spacy.prefer_gpu() | |
nlp = en_core_web_sm.load() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot3clusters(X, title, vtitle): | |
plt.figure() | |
colors = ['navy', 'turquoise', 'darkorange'] | |
lw = 2 | |
for color, i, target_name in zip(colors, [0, 1, 2], target_names): | |
plt.scatter(X[y == i, 0], X[y == i, 1], color=color, alpha=1., lw=lw, | |
label=target_name) | |
plt.legend(loc='best', shadow=False, scatterpoints=1) | |
plt.title(title) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels_true = y | |
titles = ['PCA', 'Linear AE', 'Sigmoid AE', 'Relu AE'] | |
for n_clusters_ in [2,3]: | |
estimators = [('PCA' , KMeans(n_clusters=n_clusters_), pca_transformed), | |
('AE linear' , KMeans(n_clusters=n_clusters_), encoded_data), | |
('AE sigmoid' , KMeans(n_clusters=n_clusters_), encoded_data2), | |
('AE relu', KMeans(n_clusters=n_clusters_), encoded_data3)] | |
print(type(y)) | |
print('Number of clusters: %d' % n_clusters_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot3clusters(pca_transformed[:,:2], 'PCA', 'PC') | |
plot3clusters(encoded_data[:,:2], 'Linear AE', 'AE') | |
plot3clusters(encoded_data2[:,:2], 'Non-Linear sigmoid-based AE', 'AE') | |
plot3clusters(encoded_data3[:,:2], 'Non-Linear relu-based AE', 'AE') |
NewerOlder