Skip to content

Instantly share code, notes, and snippets.

@audinue
Created June 2, 2024 17:37
Show Gist options
  • Save audinue/ae8c29d8e4b082e439b5215039f70aad to your computer and use it in GitHub Desktop.
Save audinue/ae8c29d8e4b082e439b5215039f70aad to your computer and use it in GitHub Desktop.
<?php
class Pluralize
{
function pluralize($word)
{
return $this->replaceWord(
$word,
$this->irregularSingles,
$this->irregularPlurals,
$this->pluralRules
);
}
function singularize($word)
{
return $this->replaceWord(
$word,
$this->irregularPlurals,
$this->irregularSingles,
$this->singularRules
);
}
private function replaceWord($word, $replaceMap, $keepMap, $rules)
{
$token = strtolower($word);
if (isset($keepMap[$token])) {
return $word;
}
if (isset($replaceMap[$token])) {
return $replaceMap[$token];
}
if (isset($this->uncountables[$token])) {
return $word;
}
foreach ($rules as $pattern => $replacement) {
if (preg_match($pattern, $token)) {
return preg_replace($pattern, $replacement, $token);
}
}
return $word;
}
private $pluralRules = [
'/sheep$/' => '$0',
'/pox$/' => '$0',
'/o[iu]s$/' => '$0',
'/measles$/' => '$0',
'/fish$/' => '$0',
'/deer$/' => '$0',
'/[^aeiou]ese$/' => '$0',
'/pok[eé]mon$/' => '$0',
'/^thou$/' => 'you',
'/m[ae]n$/' => 'men',
'/eaux$/' => '$0',
'/(child)(?:ren)?$/' => '$1ren',
'/(pe)(?:rson|ople)$/' => '$1ople',
'/\b((?:tit)?m|l)(?:ice|ouse)$/' => '$1ice',
'/(matr|cod|mur|sil|vert|ind|append)(?:ix|ex)$/' => '$1ices',
'/(x|ch|ss|sh|zz)$/' => '$1es',
'/([^ch][ieo][ln])ey$/' => '$1ies',
'/([^aeiouy]|qu)y$/' => '$1ies',
'/(?:(kni|wi|li)fe|(ar|l|ea|eo|oa|hoo)f)$/' => '$1$2ves',
'/sis$/' => 'ses',
'/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)(?:a|on)$/' => '$1a',
'/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|automat|quor)(?:a|um)$/' => '$1a',
'/(her|at|gr)o$/' => '$1oes',
'/(seraph|cherub)(?:im)?$/' => '$1im',
'/(alumn|alg|vertebr)(?:a|ae)$/' => '$1ae',
'/(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/' => '$1i',
'/([^l]ias|[aeiou]las|[ejzr]as|[iu]am)$/' => '$1',
'/(e[mn]u)s?$/' => '$1s',
'/(alias|[^aou]us|t[lm]as|gas|ris)$/' => '$1es',
'/(ax|test)is$/' => '$1es',
'/([^aeiou]ese)$/' => '$1',
'/[^\x{0000}-\x{007F}]$/u' => '$0',
'/s?$/' => 's'
];
private $singularRules = [
'/sheep$/' => '$0',
'/pox$/' => '$0',
'/o[iu]s$/' => '$0',
'/measles$/' => '$0',
'/fish$/' => '$0',
'/deer$/' => '$0',
'/[^aeiou]ese$/' => '$0',
'/pok[eé]mon$/' => '$0',
'/men$/' => 'man',
'/(eau)x?$/' => '$1',
'/(child)ren$/' => '$1',
'/(pe)(rson|ople)$/' => '$1rson',
'/(matr|append)ices$/' => '$1ix',
'/(cod|mur|sil|vert|ind)ices$/' => '$1ex',
'/(alumn|alg|vertebr)ae$/' => '$1a',
'/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)a$/' => '$1on',
'/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|quor)a$/' => '$1um',
'/(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/' => '$1us',
'/(test)(?:is|es)$/' => '$1is',
'/(movie|twelve|abuse|e[mn]u)s$/' => '$1',
'/(analy|diagno|parenthe|progno|synop|the|empha|cri|ne)(?:sis|ses)$/' => '$1sis',
'/(x|ch|ss|sh|zz|tto|go|cho|alias|[^aou]us|t[lm]as|gas|(?:her|at|gr)o|[aeiou]ris)(?:es)?$/' => '$1',
'/(seraph|cherub)im$/' => '$1',
'/\b((?:tit)?m|l)ice$/' => '$1ouse',
'/\b(mon|smil)ies$/' => '$1ey',
'/\b(l|(?:neck|cross|hog|aun)?t|coll|faer|food|gen|goon|group|hipp|junk|vegg|(?:pork)?p|charl|calor|cut)ies$/' => '$1ie',
'/(dg|ss|ois|lk|ok|wn|mb|th|ch|ec|oal|is|ck|ix|sser|ts|wb)ies$/' => '$1ie',
'/ies$/' => 'y',
'/(ar|(?:wo|[ae])l|[eo][ao])ves$/' => '$1f',
'/(wi|kni|(?:after|half|high|low|mid|non|night|[^\w]|^)li)ves$/' => '$1fe',
'/(ss)$/' => '$1',
'/s$/' => ''
];
private $uncountables = [
'adulthood' => 1,
'advice' => 1,
'agenda' => 1,
'aid' => 1,
'aircraft' => 1,
'alcohol' => 1,
'ammo' => 1,
'analytics' => 1,
'anime' => 1,
'athletics' => 1,
'audio' => 1,
'bison' => 1,
'blood' => 1,
'bream' => 1,
'buffalo' => 1,
'butter' => 1,
'carp' => 1,
'cash' => 1,
'chassis' => 1,
'chess' => 1,
'clothing' => 1,
'cod' => 1,
'commerce' => 1,
'cooperation' => 1,
'corps' => 1,
'debris' => 1,
'diabetes' => 1,
'digestion' => 1,
'elk' => 1,
'energy' => 1,
'equipment' => 1,
'excretion' => 1,
'expertise' => 1,
'firmware' => 1,
'flounder' => 1,
'fun' => 1,
'gallows' => 1,
'garbage' => 1,
'graffiti' => 1,
'hardware' => 1,
'headquarters' => 1,
'health' => 1,
'herpes' => 1,
'highjinks' => 1,
'homework' => 1,
'housework' => 1,
'information' => 1,
'jeans' => 1,
'justice' => 1,
'kudos' => 1,
'labour' => 1,
'literature' => 1,
'machinery' => 1,
'mackerel' => 1,
'mail' => 1,
'manga' => 1,
'media' => 1,
'mews' => 1,
'moose' => 1,
'mud' => 1,
'music' => 1,
'news' => 1,
'only' => 1,
'personnel' => 1,
'pike' => 1,
'plankton' => 1,
'pliers' => 1,
'police' => 1,
'pollution' => 1,
'premises' => 1,
'rain' => 1,
'research' => 1,
'rice' => 1,
'salmon' => 1,
'scissors' => 1,
'series' => 1,
'sewage' => 1,
'shambles' => 1,
'shrimp' => 1,
'software' => 1,
'staff' => 1,
'swine' => 1,
'tennis' => 1,
'traffic' => 1,
'transportation' => 1,
'trout' => 1,
'tuna' => 1,
'wealth' => 1,
'welfare' => 1,
'whiting' => 1,
'wildebeest' => 1,
'wildlife' => 1,
'you' => 1
];
private $irregularPlurals = [
'anathemata' => 1,
'are' => 1,
'axes' => 1,
'canvases' => 1,
'carves' => 1,
'dice' => 1,
'dingoes' => 1,
'dogmata' => 1,
'eaves' => 1,
'echoes' => 1,
'feet' => 1,
'geese' => 1,
'genera' => 1,
'grooves' => 1,
'have' => 1,
'humans' => 1,
'lemmata' => 1,
'looies' => 1,
'our' => 1,
'ourselves' => 1,
'oxen' => 1,
'passersby' => 1,
'pickaxes' => 1,
'proofs' => 1,
'quizzes' => 1,
'schemata' => 1,
'stigmata' => 1,
'stomata' => 1,
'teeth' => 1,
'their' => 1,
'them' => 1,
'themselves' => 1,
'these' => 1,
'they' => 1,
'thieves' => 1,
'those' => 1,
'tornadoes' => 1,
'torpedoes' => 1,
'us' => 1,
'valves' => 1,
'viscera' => 1,
'volcanoes' => 1,
'we' => 1,
'were' => 1,
'yeses' => 1,
'yourselves' => 1
];
private $irregularSingles = [
'anathema' => 1,
'axe' => 1,
'canvas' => 1,
'carve' => 1,
'die' => 1,
'dingo' => 1,
'dogma' => 1,
'eave' => 1,
'echo' => 1,
'foot' => 1,
'genus' => 1,
'goose' => 1,
'groove' => 1,
'has' => 1,
'he' => 1,
'her' => 1,
'herself' => 1,
'himself' => 1,
'his' => 1,
'human' => 1,
'i' => 1,
'is' => 1,
'its' => 1,
'itself' => 1,
'lemma' => 1,
'looey' => 1,
'me' => 1,
'my' => 1,
'myself' => 1,
'ox' => 1,
'passerby' => 1,
'pickaxe' => 1,
'proof' => 1,
'quiz' => 1,
'schema' => 1,
'she' => 1,
'stigma' => 1,
'stoma' => 1,
'that' => 1,
'them' => 1,
'themself' => 1,
'thief' => 1,
'this' => 1,
'tooth' => 1,
'tornado' => 1,
'torpedo' => 1,
'valve' => 1,
'viscus' => 1,
'volcano' => 1,
'was' => 1,
'yes' => 1,
'yourself' => 1
];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment