Ori Cohen orico

## output.txt
example: SortedStringTrie({'ate': 3, 'cat': 5, 'do': 1, 'dog': 2, 'gate': 4, 'the': 0})
prefixes example: ['do', 'dog']
starting..

recoursively working on string: thedogatethecat
progress: t
progress: th
progress: the
found: ['the'] for: the
possible sentences {}

## solution.py
#https://pytrie.readthedocs.io/en/latest/#trie-methods
from pytrie import SortedStringTrie as Trie

s = Trie(the=0, do=1, dog=2, ate=3, gate=4, cat=5)
print('example:',s)
print('prefixes example:',s.keys(prefix='do'))

sentence = "thedogatethecat"

def generator_string(s):

## MonteCarlo.py
import numpy as np

rolls = 10000
n = 6
E = [0] * rolls

for i in range(0,rolls):
  flag = False
  ar = []
  while flag == False:

## stanfordNERold.py
import pandas as pd
import numpy as np

#nltk v1
from nltk.tag.stanford import StanfordNERTagger
from nltk.tokenize import word_tokenize

path_to_data = './data/'

cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')

## stanfordNER.py
import pandas as pd
import numpy as np

from nltk.parse import CoreNLPParser
import datetime

path_to_data = './data/'
cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')
cities['City alias'] = cities['City alias'].apply(lambda x: str(x))
ner_tagger = CoreNLPParser(url='http://localhost:9000', tagtype='ner')

## spacy.py
import pandas as pd
import numpy as np

import spacy
import en_core_web_sm

path_to_data = './data/'

spacy.prefer_gpu()
nlp = en_core_web_sm.load()

## learning_curves.png

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                orico
                / learning_curves.png
            
            
              Created
              August 25, 2018 10:17
                — forked from ogrisel/learning_curves.png
            
              
                Learning Curves for under/overfitting evaluation
              
          
## PCAAE-plotclusters.py
def plot3clusters(X, title, vtitle):
  plt.figure()
  colors = ['navy', 'turquoise', 'darkorange']
  lw = 2

  for color, i, target_name in zip(colors, [0, 1, 2], target_names):
      plt.scatter(X[y == i, 0], X[y == i, 1], color=color, alpha=1., lw=lw,
                  label=target_name)
  plt.legend(loc='best', shadow=False, scatterpoints=1)
  plt.title(title)

## PCAAE-KMEANS.py
labels_true = y
titles = ['PCA', 'Linear AE', 'Sigmoid AE', 'Relu AE']
for n_clusters_ in [2,3]:
  estimators = [('PCA'    , KMeans(n_clusters=n_clusters_), pca_transformed),
                ('AE linear' , KMeans(n_clusters=n_clusters_), encoded_data),
                ('AE sigmoid' , KMeans(n_clusters=n_clusters_), encoded_data2),
                ('AE relu', KMeans(n_clusters=n_clusters_), encoded_data3)]

  print(type(y))
  print('Number of clusters: %d' % n_clusters_)

## PCAAE-graphs.py
plot3clusters(pca_transformed[:,:2], 'PCA', 'PC')
plot3clusters(encoded_data[:,:2], 'Linear AE', 'AE')
plot3clusters(encoded_data2[:,:2], 'Non-Linear sigmoid-based AE', 'AE')
plot3clusters(encoded_data3[:,:2], 'Non-Linear relu-based AE', 'AE')
	example: SortedStringTrie({'ate': 3, 'cat': 5, 'do': 1, 'dog': 2, 'gate': 4, 'the': 0})
	prefixes example: ['do', 'dog']
	starting..

	recoursively working on string: thedogatethecat
	progress: t
	progress: th
	progress: the
	found: ['the'] for: the
	possible sentences {}
	#https://pytrie.readthedocs.io/en/latest/#trie-methods
	from pytrie import SortedStringTrie as Trie

	s = Trie(the=0, do=1, dog=2, ate=3, gate=4, cat=5)
	print('example:',s)
	print('prefixes example:',s.keys(prefix='do'))

	sentence = "thedogatethecat"

	def generator_string(s):
	import numpy as np

	rolls = 10000
	n = 6
	E = [0] * rolls

	for i in range(0,rolls):
	flag = False
	ar = []
	while flag == False:
	import pandas as pd
	import numpy as np

	#nltk v1
	from nltk.tag.stanford import StanfordNERTagger
	from nltk.tokenize import word_tokenize

	path_to_data = './data/'

	cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')
	import pandas as pd
	import numpy as np

	from nltk.parse import CoreNLPParser
	import datetime

	path_to_data = './data/'
	cities = pd.read_csv(path_to_data + 'us_cities_states_counties.csv')
	cities['City alias'] = cities['City alias'].apply(lambda x: str(x))
	ner_tagger = CoreNLPParser(url='http://localhost:9000', tagtype='ner')
	import pandas as pd
	import numpy as np

	import spacy
	import en_core_web_sm

	path_to_data = './data/'

	spacy.prefer_gpu()
	nlp = en_core_web_sm.load()
	def plot3clusters(X, title, vtitle):
	plt.figure()
	colors = ['navy', 'turquoise', 'darkorange']
	lw = 2

	for color, i, target_name in zip(colors, [0, 1, 2], target_names):
	plt.scatter(X[y == i, 0], X[y == i, 1], color=color, alpha=1., lw=lw,
	label=target_name)
	plt.legend(loc='best', shadow=False, scatterpoints=1)
	plt.title(title)
	labels_true = y
	titles = ['PCA', 'Linear AE', 'Sigmoid AE', 'Relu AE']
	for n_clusters_ in [2,3]:
	estimators = [('PCA' , KMeans(n_clusters=n_clusters_), pca_transformed),
	('AE linear' , KMeans(n_clusters=n_clusters_), encoded_data),
	('AE sigmoid' , KMeans(n_clusters=n_clusters_), encoded_data2),
	('AE relu', KMeans(n_clusters=n_clusters_), encoded_data3)]

	print(type(y))
	print('Number of clusters: %d' % n_clusters_)
	plot3clusters(pca_transformed[:,:2], 'PCA', 'PC')
	plot3clusters(encoded_data[:,:2], 'Linear AE', 'AE')
	plot3clusters(encoded_data2[:,:2], 'Non-Linear sigmoid-based AE', 'AE')
	plot3clusters(encoded_data3[:,:2], 'Non-Linear relu-based AE', 'AE')