Last active
August 29, 2015 14:26
-
-
Save Hugo-ter-Doest/cb76b59a95ba2eb1ad27 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Usage: | |
// transformationRules = new BrillTransformationRules(); | |
// transformationRules.rules.forEach(function(ruleFunction) { | |
// ruleFunction(taggedSentence, i); | |
// }); | |
// where taggedSentence is an array of arrays of the form: | |
// [[the, DET], [red, JJ], [book, NN]] and i the position to be processed | |
function BrillTransformationRules() { | |
this.rules = [rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8]; | |
} | |
BrillTransformationRules.prototype.getRule = function(index) { | |
return(this.rules[index]); | |
}; | |
BrillTransformationRules.prototype.setRule = function(index, rule) { | |
this.rules[index] = rule; | |
}; | |
BrillTransformationRules.prototype.appendRule = function(rule) { | |
this.rules[this.rules.length] = rule; | |
}; | |
/** | |
* Indicates whether or not this string starts with the specified string. | |
* @param {Object} string | |
*/ | |
function startsWith($this, string) { | |
if (!string) { | |
return false; | |
} | |
return $this.indexOf(string) == 0; | |
} | |
/** | |
* Indicates whether or not this string ends with the specified string. | |
* @param {Object} string | |
*/ | |
function endsWith($this, string) { | |
if (!string || string.length > $this.length) { | |
return false; | |
} | |
return $this.indexOf(string) == $this.length - string.length; | |
} | |
// rule 1: DT, {VBD | VBP} --> DT, NN | |
function rule1(taggedSentence, index) { | |
if ((index > 0) && (taggedSentence[index - 1][1] === "DT")) { | |
if ((taggedSentence[index][1] === "VBD") || | |
(taggedSentence[index][1] === "VBP") || | |
(taggedSentence[index][1] === "VB")) { | |
taggedSentence[index][1] = "NN"; | |
} | |
} | |
} | |
// rule 2: convert a noun to a number (CD) if "." appears in the word | |
function rule2(taggedSentence, index) { | |
if (startsWith(taggedSentence[index][1], "N")) { | |
if (taggedSentence[index][0].indexOf(".") > -1) { | |
// url if there are two contiguous alpha characters | |
if (/[a-zA-Z]{2}/.test(words[i])) { | |
taggedSentence[index][1] = "URL"; | |
} | |
else { | |
taggedSentence[index][1] = "CD"; | |
} | |
// Attempt to convert into a number | |
if (!isNaN(parseFloat(words[i]))) { | |
taggedSentence[index][1] = "CD"; | |
} | |
} | |
} | |
} | |
// rule 3: convert a noun to a past participle if words[i] ends with "ed" | |
function rule3(taggedSentence, index) { | |
if (startsWith(taggedSentence[index][1], "N") && endsWith(taggedSentence[index][0], "ed")) | |
startsWith(taggedSentence[index][1] = "VBN"; | |
} | |
} | |
// rule 4: convert any type to adverb if it ends in "ly"; | |
function rule4(taggedSentence, index) { | |
if (endsWith(taggedSentence[index][0], "ly")) | |
taggedSentence[index][1] = "RB"; | |
} | |
} | |
// rule 5: convert a common noun (NN or NNS) to a adjective if it ends with "al" | |
function rule5(taggedSentence, index) { | |
if (startsWith(taggedSentence[index][1], "NN") && endsWith(taggedSentence[index][0], "al")) { | |
taggedSentence[index][1] = "JJ"; | |
} | |
} | |
// rule 6: convert a noun to a verb if the preceding work is "would" | |
function rule6(taggedSentence, index) { | |
if ((i > 0) && startsWith(taggedSentence[index][1], "NN") && (taggedSentence[index][0].toLowerCase() === "would")) { | |
taggedSentence[index][1] = "VB"; | |
} | |
} | |
// rule 7: if a word has been categorized as a common noun and it ends with "s", | |
// then set its type to plural common noun (NNS) | |
function rule7(taggedSentence, index) { | |
if ((taggedSentence[index][1] === "NN") && (endsWith(taggedSentence[index][0], "s"))) { | |
taggedSentence[index][1] = "NNS"; | |
} | |
} | |
// rule 8: convert a common noun to a present participle verb (i.e., a gerund) | |
function rule8(taggedSentence, index) { | |
if (startsWith(taggedSentence[index][1], "NN") && endsWith(taggedSentence[index][0], "ing")) { | |
taggedSentence[index][1] = "VBG"; | |
} | |
} | |
module.exports = BrillTransformationRules; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment