tbatsuur tbatsuur

## word_stats.py
for fileid in gutenberg.fileids():
  num_chars = len(gutenberg.raw(fileid))
  num_words = len(gutenberg.words(fileid))
  num_sents = len(gutenberg.sents(fileid))
  num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)]))
  print(int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid)

## generateCharLSTMText.py
def sample(preds, temperature=1.0):
	preds = np.asarray(preds).astype('float64')
	preds = np.log(preds) / temperature
	exp_preds = np.exp(preds)
	preds = exp_preds / np.sum(exp_preds)
	probas = np.random.multinomial(1, preds, 1)
	return np.argmax(probas)

for epoch in range(1, 60):
    print('epoch', epoch)

## createModel.py
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

## vectorizeCorpus.py
maxlen = 60
step = 3

sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
	sentences.append(text[i: i + maxlen])
	next_chars.append(text[i + maxlen])

## importCorpus.py
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()

print('Corpus length:', len(text))

## babelize2.py
babelize_shell()
Babel> The pig that John found looked happy
Babel> german
Babel> run

## babelize.py
babelize_shell()
NLTK Babelizer: type 'help' for a list of commands.
Babel> how long before the next flight to Alice Springs?
Babel> german
Babel> run

## loopingConditionals2.py
for token in sent1:
	 if token.islower():
		 print token, 'is a lowercase word'
	 elif token.istitle():
		 print token, 'is a titlecase word'
	 else:
		 print token, 'is punctuation'

## loopingConditionals.py
sent1 = ['Call', 'me', 'Ishmael', '.']
for xyzzy in sent1:
	if xyzzy.endswith('l'):
		print xyzzy

## removingNonAlphabetic.py
len(set([word.lower() for word in text1 if word.isalpha()])) //16948
	for fileid in gutenberg.fileids():
	num_chars = len(gutenberg.raw(fileid))
	num_words = len(gutenberg.words(fileid))
	num_sents = len(gutenberg.sents(fileid))
	num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)]))
	print(int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid)
	def sample(preds, temperature=1.0):
	preds = np.asarray(preds).astype('float64')
	preds = np.log(preds) / temperature
	exp_preds = np.exp(preds)
	preds = exp_preds / np.sum(exp_preds)
	probas = np.random.multinomial(1, preds, 1)
	return np.argmax(probas)

	for epoch in range(1, 60):
	print('epoch', epoch)
	from keras import layers

	model = keras.models.Sequential()
	model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
	model.add(layers.Dense(len(chars), activation='softmax'))
	optimizer = keras.optimizers.RMSprop(lr=0.01)
	model.compile(loss='categorical_crossentropy', optimizer=optimizer)
	maxlen = 60
	step = 3

	sentences = []
	next_chars = []

	for i in range(0, len(text) - maxlen, step):
	sentences.append(text[i: i + maxlen])
	next_chars.append(text[i + maxlen])
	import keras
	import numpy as np

	path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
	text = open(path).read().lower()

	print('Corpus length:', len(text))
	babelize_shell()
	Babel> The pig that John found looked happy
	Babel> german
	Babel> run
	babelize_shell()
	NLTK Babelizer: type 'help' for a list of commands.
	Babel> how long before the next flight to Alice Springs?
	Babel> german
	Babel> run
	for token in sent1:
	if token.islower():
	print token, 'is a lowercase word'
	elif token.istitle():
	print token, 'is a titlecase word'
	else:
	print token, 'is punctuation'
	sent1 = ['Call', 'me', 'Ishmael', '.']
	for xyzzy in sent1:
	if xyzzy.endswith('l'):
	print xyzzy