Last active
August 29, 2015 14:10
-
-
Save lusentis/5a26c03afe2d2db0bcba to your computer and use it in GitHub Desktop.
Fixup text typed by dumb italian people
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*jshint node:true, esnext:true */ | |
'use strict'; | |
module.exports.police = function (text) { | |
if (!text) { return ''; } | |
text = text.replace(/(\s| )+/g, ' '); // compact multiple places | |
text = text.replace(/e'\s+/g, 'è '); // apos -> accent | |
text = text.replace(/o'\s+/g, 'ò '); | |
text = text.replace(/a'\s+/g, 'à '); | |
text = text.replace(/chè\s+/g, 'ché '); // grave accent on words ending in "che" | |
text = text.replace(/\s*'(.*)'\s*/g, ' «$1» '); // 'foo' -> «foo» | |
text = text.replace(/\s*"(.*)"\s*/g, ' «$1» '); // "foo" -> «foo» | |
text = text.replace(/\s*"(.*)"\s*/g, ' «$1» '); // "foo" -> «foo» | |
text = text.replace(/\s*(,|\.|;|:|\.\.\.)\s*/g, '$1 '); // remove spaces before punctuation, add a space after | |
text = text.trim(); // trim spaces at end | |
text[0] = text[0].toUpperCase(); // uppercase first character | |
text = text.replace(/\.\s*(\w)/, function (match, p1) { return '. ' + p1.toUpperCase(); }); // uppercase after dot | |
return text; | |
}; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment