Skip to content

Instantly share code, notes, and snippets.

@Hugo-ter-Doest
Created April 26, 2018 20:18
Show Gist options
  • Save Hugo-ter-Doest/2f14fe14c4d7319e9de396d77dff3cf6 to your computer and use it in GitHub Desktop.
Save Hugo-ter-Doest/2f14fe14c4d7319e9de396d77dff3cf6 to your computer and use it in GitHub Desktop.
NER based on regular expressions
var regExs = {"e-mail": /(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,3})/ig,
// Matches time of the form 19:20
"time": /[0-9]{1,2}:[0-9][0-9]/g,
// This regular expressions matches dates of the form XX/XX/YYYY
// where XX can be 1 or 2 digits long and YYYY is always 4 digits long.
"date": /\d{1,2}\/\d{1,2}\/\d{4}/g,
"zipcode": /[0-9]{1,4}[A-Z]{2}/g,
// Matches http://210.50.2.215/sd_new/WebBuilder.cgi?RegID=7449046&First=Ok&Upt=Ok&EditPage=3&S
"uri": /\b([\d\w\.\/\+\-\?\:]*)((ht|f)tp(s|)\:\/\/|[\d\d\d|\d\d]\.[\d\d\d|\d\d]\.|www\.|\.tv|\.ac|\.com|\.edu|\.gov|\.int|\.mil|\.net|\.org|\.biz|\.info|\.name|\.pro|\.museum|\.co)([\d\w\.\/\%\+\-\=\&\?\:\\\"\'\,\|\~\;]*)\b/g
};
var str = "this is a hz@hotmai.com string as you hwl@terdoest.info can read 19:20 9:10 31/2/2018 7559AH https://kennisbank.dimpact.nl/jira";
var edges = [];
function matchRegEx(cat, regex, matches) {
while (match = regex.exec(str)) {
console.log(match);
console.log(match.index + ' ' + regex.lastIndex);
edges.push({"matchedString": match[0],
"category": cat,
"start": match.index,
"end": regex.lastIndex
});
}
}
Object.keys(regExs).forEach(function(cat) {
matchRegEx(cat, regExs[cat], edges)
});
console.log(edges);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment