Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save thmavri/49d3f86e0bd609cdc68836e3c8e8ffa0 to your computer and use it in GitHub Desktop.
Save thmavri/49d3f86e0bd609cdc68836e3c8e8ffa0 to your computer and use it in GitHub Desktop.
#create an index of all the words (words2idx)
...
#create an index of all the labels (labels2idx)
...
#query to classify
q="hotel amsterdam wifi"
#labels to use "prop", "dest", "fac"
#this will contain all our training data
matrix=[]
#this array will contain all the indexes of the words
words=[]
#this array will contain all the indexes of the labels
labels=[]
#this will contain the arrays of indexes
sentence=[]
words.append(words2idx["hotel"])
labels.append(labels2idx['proptype'])
words.append(words2idx["amsterdam"])
labels.append(labels2idx['dest'])
words.append(words2idx["wifi"])
labels.append(labels2idx['fac'])
sentence.append(words)
sentence.append(labels)
sentence.append(labels)
matrix.append(sentence)
...
#the RNN settings
s = { 'fold':3, # 5 folds 0,1,2,3,4
'lr':0.0627142536696559,
'verbose':1,
'decay':False, # decay on the learning rate
'win':7, # number of words in the context window
'bs':9, # number of backprop through time steps
'nhidden':100, # number of hidden units
'seed':345,
'emb_dimension':100, # dimension of word embedding
'nepochs':50}
# instanciate the model
numpy.random.seed(s['seed'])
random.seed(s['seed'])
rnn = model( nh = s['nhidden'],
nc = nclasses,
ne = vocsize,
de = s['emb_dimension'],
cs = s['win'] )
#separate in train, test, validation
...
#train
for i in xrange(nsentences):
train_lex_list=train_lex[i].tolist()[0]
cwords = contextwin(train_lex_list, s['win'])
#print cwords
words = map(lambda x: numpy.asarray(x).astype('int32'),\
minibatch(cwords, s['bs']))
labels = train_y[i]
cnt_w=0
for word_batch , label_last_word in zip(words, labels):
#print word_batch, label_last_word
#cnt_w+=1
#print cnt_w
rnn.train(word_batch, label_last_word, s['clr'])
rnn.normalize()
if s['verbose']:
print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
sys.stdout.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment