Skip to content

Instantly share code, notes, and snippets.

@gavinmh
Created December 4, 2012 02:08
Show Gist options
  • Save gavinmh/4199899 to your computer and use it in GitHub Desktop.
Save gavinmh/4199899 to your computer and use it in GitHub Desktop.
substitution classifier harness
import logging
import numpy as np
import Alignment_sub
import lexent_featurizer_sub
try:
import cPickle as pickle
except:
import pickle
with open('training_data/train_sub.txt') as f:
lines = f.readlines()
feature_vectors = []
targets = np.zeros(len(lines), dtype=np.int)
print 'number of SUB edits: %s' % len(lines)
for index, line in enumerate(lines):
parts = line.strip().split('\t')
print parts
p_tokens = parts[1].split(' ')
h_tokens = parts[2].split(' ')
lexicalEntailment = parts[3]
targets[index] = lexicalEntailment
p_pos_tag = parts[4].split(';')
h_pos_tag = parts[5].split(';')
alignment = Alignment_sub.Alignment_sub(p_tokens[0], p_pos_tag[0], h_tokens[0], h_pos_tag[0])
print '\nAlignment:'
print str(alignment)
featurizer = lexent_featurizer_sub.Lexent_featurizer_sub()
features = featurizer.getFeatures(alignment)
feature_vectors.append(features)
print features.tolist()
logging.info('WNSyn: %s' % features[0])
logging.info('WNAnt: %s' % features[1])
logging.info('WNHyper: %s' % features[2])
logging.info('WNHypo: %s' % features[3])
logging.info('Jico: %s' % features[4])
logging.info('DLin: %s' % features[5])
logging.info('LemSUbSeqF: %s' % features[6])
logging.info('LemSUbSeqR: %s' % features[7])
logging.info('LemSUbSeqE: %s' % features[8])
logging.info('LemSUbSeqN: %s' % features[9])
logging.info('Light: %s' % features[10])
logging.info('Preps: %s' % features[11])
logging.info('Pronoun: %s' % features[12])
logging.info('String edit: %s' % features[13])
logging.info('NNNN: %s' % features[14])
logging.info('NomB: %s' % features[15])
feature_vectors_matrix = np.vstack(feature_vectors)
print feature_vectors_matrix
# Write the SUB training data
f = open('classifier_models/sub_model.p', "w+b")
pickle.dump(feature_vectors_matrix, f)
f.close()
# Write the SUB targets
targets_file = open('classifier_models/sub_targets.p', 'w+b')
pickle.dump(targets, targets_file)
targets_file.close()
print targets
print 'SUB model trained'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment