Skip to content

Instantly share code, notes, and snippets.

View orico's full-sized avatar

Ori Cohen orico

View GitHub Profile
example: SortedStringTrie({'ate': 3, 'cat': 5, 'do': 1, 'dog': 2, 'gate': 4, 'the': 0})
prefixes example: ['do', 'dog']
starting..
recoursively working on string: thedogatethecat
progress: t
progress: th
progress: the
found: ['the'] for: the
possible sentences {}
#https://pytrie.readthedocs.io/en/latest/#trie-methods
from pytrie import SortedStringTrie as Trie
s = Trie(the=0, do=1, dog=2, ate=3, gate=4, cat=5)
print('example:',s)
print('prefixes example:',s.keys(prefix='do'))
sentence = "thedogatethecat"
def generator_string(s):
import numpy as np
rolls = 10000
n = 6
E = [0] * rolls
for i in range(0,rolls):
flag = False
ar = []
while flag == False:
@orico
orico / stanfordNERold.py
Created April 9, 2019 11:10
Stanford NER Location old API
import pandas as pd
import numpy as np
#nltk v1
from nltk.tag.stanford import StanfordNERTagger
from nltk.tokenize import word_tokenize
path_to_data = './data/'
cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')
@orico
orico / stanfordNER.py
Last active April 9, 2019 05:37
Stanford NER LOCATION test
import pandas as pd
import numpy as np
from nltk.parse import CoreNLPParser
import datetime
path_to_data = './data/'
cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')
cities['City alias'] = cities['City alias'].apply(lambda x: str(x))
ner_tagger = CoreNLPParser(url='http://localhost:9000', tagtype='ner')
@orico
orico / spacy.py
Last active April 14, 2019 08:36
Spacy GPE test
import pandas as pd
import numpy as np
import spacy
import en_core_web_sm
path_to_data = './data/'
spacy.prefer_gpu()
nlp = en_core_web_sm.load()
@orico
orico / learning_curves.png
Created August 25, 2018 10:17 — forked from ogrisel/learning_curves.png
Learning Curves for under/overfitting evaluation
learning_curves.png
@orico
orico / PCAAE-plotclusters.py
Created April 24, 2018 06:44
PCAAE-plotclusters.py
def plot3clusters(X, title, vtitle):
plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
plt.scatter(X[y == i, 0], X[y == i, 1], color=color, alpha=1., lw=lw,
label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title(title)
@orico
orico / PCAAE-KMEANS.py
Last active April 24, 2018 07:00
PCAAE-KMEANS
labels_true = y
titles = ['PCA', 'Linear AE', 'Sigmoid AE', 'Relu AE']
for n_clusters_ in [2,3]:
estimators = [('PCA' , KMeans(n_clusters=n_clusters_), pca_transformed),
('AE linear' , KMeans(n_clusters=n_clusters_), encoded_data),
('AE sigmoid' , KMeans(n_clusters=n_clusters_), encoded_data2),
('AE relu', KMeans(n_clusters=n_clusters_), encoded_data3)]
print(type(y))
print('Number of clusters: %d' % n_clusters_)
@orico
orico / PCAAE-graphs.py
Last active April 24, 2018 06:56
PCAAE-graphs
plot3clusters(pca_transformed[:,:2], 'PCA', 'PC')
plot3clusters(encoded_data[:,:2], 'Linear AE', 'AE')
plot3clusters(encoded_data2[:,:2], 'Non-Linear sigmoid-based AE', 'AE')
plot3clusters(encoded_data3[:,:2], 'Non-Linear relu-based AE', 'AE')