evanmiltenburg/neural_modalities.py

## neural_modalities.py
import csv
import numpy as np
from gensim.models import Word2Vec

np.random.seed(1234)

from keras.models import Sequential
from keras.layers.core import Activation, Dense
from keras.callbacks import EarlyStopping

print("Loading the GoogleNews model...")
# Download the GoogleNews vectors and change the path.
googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True)
print("Loaded!")

# See:
# Lynott, D., & Connell, L. (2013).
# Modality exclusivity norms for 400 nouns:
# The relationship between perceptual experience and surface word form.
# Behavior Research Methods, 45, 516-526.
#
# Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html
with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f:
    reader = csv.DictReader(f, delimiter='\t')
    entries = list(reader)

def get_data(entries, googlenews):
    "This function produces the input and output data for the neural network."
    data_points = []
    score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean']
    for entry in entries:
        noun = entry['Noun']
        scores = [float(entry[score])/5 for score in score_names]
        try:
            word_vector = googlenews[noun]
        except KeyError:
            continue
        data_points.append([noun, word_vector, scores])
    nouns, inputs, outputs = zip(*data_points)
    return nouns, np.array(inputs), np.array(outputs)

nouns, inputs, outputs = get_data(entries, googlenews)

model = Sequential()
model.add(Dense(100, input_dim=300))
model.add(Activation('tanh'))
model.add(Dense(5))
model.add(Activation('sigmoid'))
model.compile(loss='mean_squared_error',
              optimizer='adagrad',
              metrics=['accuracy'])

train_X = inputs[:300]
train_y = outputs[:300]

val_X = inputs[300:350]
val_y = outputs[300:350]

test_X = inputs[350:]
test_y = outputs[350:]

stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
history = model.fit(train_X,train_y,
                    nb_epoch=500,
                    batch_size=10,
                    validation_data=(val_X,val_y),
                    callbacks=[stopper])

score, acc = model.evaluate(test_X, test_y, batch_size=10)
print("The accuracy of the model is: ", acc)
	import csv
	import numpy as np
	from gensim.models import Word2Vec

	np.random.seed(1234)

	from keras.models import Sequential
	from keras.layers.core import Activation, Dense
	from keras.callbacks import EarlyStopping

	print("Loading the GoogleNews model...")
	# Download the GoogleNews vectors and change the path.
	googlenews = Word2Vec.load_word2vec_format('/Users/Emiel/Downloads/GoogleNews-vectors-negative300.bin.gz', binary=True)
	print("Loaded!")

	# See:
	# Lynott, D., & Connell, L. (2013).
	# Modality exclusivity norms for 400 nouns:
	# The relationship between perceptual experience and surface word form.
	# Behavior Research Methods, 45, 516-526.
	#
	# Downloaded from: http://www.lancaster.ac.uk/people/connelll/papers.html
	with open('./Lynott&Connell_NounModalityNorms_tabDelimited.txt') as f:
	reader = csv.DictReader(f, delimiter='\t')
	entries = list(reader)

	def get_data(entries, googlenews):
	"This function produces the input and output data for the neural network."
	data_points = []
	score_names = ['Auditory_mean', 'Gustatory_mean', 'Haptic_mean', 'Olfactory_mean', 'Visual_mean']
	for entry in entries:
	noun = entry['Noun']
	scores = [float(entry[score])/5 for score in score_names]
	try:
	word_vector = googlenews[noun]
	except KeyError:
	continue
	data_points.append([noun, word_vector, scores])
	nouns, inputs, outputs = zip(*data_points)
	return nouns, np.array(inputs), np.array(outputs)

	nouns, inputs, outputs = get_data(entries, googlenews)

	model = Sequential()
	model.add(Dense(100, input_dim=300))
	model.add(Activation('tanh'))
	model.add(Dense(5))
	model.add(Activation('sigmoid'))
	model.compile(loss='mean_squared_error',
	optimizer='adagrad',
	metrics=['accuracy'])

	train_X = inputs[:300]
	train_y = outputs[:300]

	val_X = inputs[300:350]
	val_y = outputs[300:350]

	test_X = inputs[350:]
	test_y = outputs[350:]

	stopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
	history = model.fit(train_X,train_y,
	nb_epoch=500,
	batch_size=10,
	validation_data=(val_X,val_y),
	callbacks=[stopper])

	score, acc = model.evaluate(test_X, test_y, batch_size=10)
	print("The accuracy of the model is: ", acc)