Skip to content

Instantly share code, notes, and snippets.

@evanmiltenburg
Last active May 9, 2019 18:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evanmiltenburg/d7a5522148a0d0762a448eaf477c3aab to your computer and use it in GitHub Desktop.
Save evanmiltenburg/d7a5522148a0d0762a448eaf477c3aab to your computer and use it in GitHub Desktop.
import csv
import numpy as np
from gensim.models import Word2Vec
np.random.seed(1234)
from keras.models import Sequential
from keras.layers.core import Activation, Dense
from keras.callbacks import EarlyStopping
print("Loading the GoogleNews model...")
# Download the GoogleNews vectors and change the path.
googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True)
print("Loaded!")
# See:
# Lynott, D., & Connell, L. (2013).
# Modality exclusivity norms for 400 nouns:
# The relationship between perceptual experience and surface word form.
# Behavior Research Methods, 45, 516-526.
#
# Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html
with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f:
reader = csv.DictReader(f, delimiter='\t')
entries = list(reader)
def get_data(entries, googlenews):
"This function produces the input and output data for the neural network."
data_points = []
score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean']
for entry in entries:
noun = entry['Noun']
scores = [float(entry[score])/5 for score in score_names]
try:
word_vector = googlenews[noun]
except KeyError:
continue
data_points.append([noun, word_vector, scores])
nouns, inputs, outputs = zip(*data_points)
return nouns, np.array(inputs), np.array(outputs)
nouns, inputs, outputs = get_data(entries, googlenews)
model = Sequential()
model.add(Dense(100, input_dim=300))
model.add(Activation('tanh'))
model.add(Dense(5))
model.add(Activation('sigmoid'))
model.compile(loss='mean_squared_error',
optimizer='adagrad',
metrics=['accuracy'])
train_X = inputs[:300]
train_y = outputs[:300]
val_X = inputs[300:350]
val_y = outputs[300:350]
test_X = inputs[350:]
test_y = outputs[350:]
stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
history = model.fit(train_X,train_y,
nb_epoch=500,
batch_size=10,
validation_data=(val_X,val_y),
callbacks=[stopper])
score, acc = model.evaluate(test_X, test_y, batch_size=10)
print("The accuracy of the model is: ", acc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment