Created
April 3, 2015 17:44
-
-
Save MichaelPaulukonis/6c764dba5c00e51d7a85 to your computer and use it in GitHub Desktop.
Quick code to translate a word:[pos-tag, pos-tag] object into pos-tag: [word, word] object
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// target was https://github.com/dariusk/spewer/blob/master/lib/lexicon.js | |
var rewriter = function() { | |
var oldLex = require('./lexicon.old.js'); | |
var tagbag = {}; | |
for (var word in oldLex) { | |
var tags = oldLex[word]; | |
for (var i = 0; i < tags.length; i++) { | |
var tag = tags[i]; | |
// hoo-hah! the tag can be further split as "VBN|VBP" | |
var subtags = tag.split('|'); | |
for (var j = 0; j < subtags.length; j++) { | |
var stag = subtags[j]; | |
if (!tagbag[stag]) { tagbag[stag] = []; } | |
tagbag[stag].push(word); | |
} | |
} | |
} | |
for (tag in tagbag) { | |
tagbag[tag] = tagbag[tag].sort(); | |
} | |
require('fs').writeFile('lexicon.txt', JSON.stringify(tagbag, null, 2)); | |
}; | |
rewriter(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment