Skip to content

Instantly share code, notes, and snippets.

@MichaelPaulukonis
Created April 3, 2015 17:44
Show Gist options
  • Save MichaelPaulukonis/6c764dba5c00e51d7a85 to your computer and use it in GitHub Desktop.
Save MichaelPaulukonis/6c764dba5c00e51d7a85 to your computer and use it in GitHub Desktop.
Quick code to translate a word:[pos-tag, pos-tag] object into pos-tag: [word, word] object
// target was https://github.com/dariusk/spewer/blob/master/lib/lexicon.js
var rewriter = function() {
var oldLex = require('./lexicon.old.js');
var tagbag = {};
for (var word in oldLex) {
var tags = oldLex[word];
for (var i = 0; i < tags.length; i++) {
var tag = tags[i];
// hoo-hah! the tag can be further split as "VBN|VBP"
var subtags = tag.split('|');
for (var j = 0; j < subtags.length; j++) {
var stag = subtags[j];
if (!tagbag[stag]) { tagbag[stag] = []; }
tagbag[stag].push(word);
}
}
}
for (tag in tagbag) {
tagbag[tag] = tagbag[tag].sort();
}
require('fs').writeFile('lexicon.txt', JSON.stringify(tagbag, null, 2));
};
rewriter();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment