Skip to content

Instantly share code, notes, and snippets.

for fileid in gutenberg.fileids():
num_chars = len(gutenberg.raw(fileid))
num_words = len(gutenberg.words(fileid))
num_sents = len(gutenberg.sents(fileid))
num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)]))
print(int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid)
def sample(preds, temperature=1.0):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
for epoch in range(1, 60):
print('epoch', epoch)
from keras import layers
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
sentences.append(text[i: i + maxlen])
next_chars.append(text[i + maxlen])
import keras
import numpy as np
path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))
babelize_shell()
Babel> The pig that John found looked happy
Babel> german
Babel> run
babelize_shell()
NLTK Babelizer: type 'help' for a list of commands.
Babel> how long before the next flight to Alice Springs?
Babel> german
Babel> run
for token in sent1:
if token.islower():
print token, 'is a lowercase word'
elif token.istitle():
print token, 'is a titlecase word'
else:
print token, 'is punctuation'
sent1 = ['Call', 'me', 'Ishmael', '.']
for xyzzy in sent1:
if xyzzy.endswith('l'):
print xyzzy
len(set([word.lower() for word in text1 if word.isalpha()])) //16948