From 83a2babb3ff294faa201ca34b5d01a55cf5a591b Mon Sep 17 00:00:00 2001 From: Caleb Fidecaro Date: Mon, 9 Oct 2017 16:14:57 +1300 Subject: [PATCH 1/3] Add inflectors and pluralize/singularize methods Needs tests, but needs discussion first. --- src/Inflections/En.php | 169 ++++++++++++++++++++++++++++++++++ src/Inflections/Es.php | 76 +++++++++++++++ src/Inflections/Fr.php | 68 ++++++++++++++ src/Inflections/Inflector.php | 93 +++++++++++++++++++ src/Inflections/Nb.php | 63 +++++++++++++ src/Inflections/Pt.php | 131 ++++++++++++++++++++++++++ src/Inflections/Tr.php | 59 ++++++++++++ src/StaticStringy.php | 2 + src/Stringy.php | 43 +++++++++ 9 files changed, 704 insertions(+) create mode 100644 src/Inflections/En.php create mode 100644 src/Inflections/Es.php create mode 100644 src/Inflections/Fr.php create mode 100644 src/Inflections/Inflector.php create mode 100644 src/Inflections/Nb.php create mode 100644 src/Inflections/Pt.php create mode 100644 src/Inflections/Tr.php diff --git a/src/Inflections/En.php b/src/Inflections/En.php new file mode 100644 index 0000000..6364362 --- /dev/null +++ b/src/Inflections/En.php @@ -0,0 +1,169 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/(quiz)$/i' => '\1zes', + '/^(oxen)$/i' => '\1', + '/^(ox)$/i' => '\1en', + '/^(m|l)ice$/i' => '\1ice', + '/^(m|l)ouse$/i' => '\1ice', + '/(matr|vert|ind)(?:ix|ex)$/i' => '\1ices', + '/(x|ch|ss|sh)$/i' => '\1es', + '/([^aeiouy]|qu)y$/i' => '\1ies', + '/(hive)$/i' => '\1s', + '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves', + '/sis$/i' => 'ses', + '/([ti])a$/i' => '\1a', + '/([ti])um$/i' => '\1a', + '/(buffal|tomat|potat|volcan|her)o$/i' => '\1oes', + '/(bu)s$/i' => '\1ses', + '/(alias|status)$/i' => '\1es', + '/^(ax|test)is$/i' => '\1es', + '/s$/i' => 's', + '/$/' => 's', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/(database)s$/i' => '\1', + '/(quiz)zes$/i' => '\1', + '/(matr)ices$/i' => '\1ix', + '/(vert|ind)ices$/i' => '\1ex', + '/^(ox)en/i' => '\1', + '/(alias|status)(es)?$/i' => '\1', + '/^(a)x[ie]s$/i' => '\1xis', + '/(cris|test)(is|es)$/i' => '\1is', + '/(shoe)s$/i' => '\1', + '/(o)es$/i' => '\1', + '/(bus)(es)?$/i' => '\1', + '/^(m|l)ice$/i' => '\1ouse', + '/(x|ch|ss|sh)es$/i' => '\1', + '/(m)ovies$/i' => '\1ovie', + '/(s)eries$/i' => '\1eries', + '/([^aeiouy]|qu)ies$/i' => '\1y', + '/([lr])ves$/i' => '\1f', + '/(tive)s$/i' => '\1', + '/(hive)s$/i' => '\1', + '/([^f])ves$/i' => '\1fe', + '/(^analy)(sis|ses)$/i' => '\1sis', + '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)(sis|ses)$/i' => '\1sis', + '/([ti])a$/i' => '\1um', + '/(n)ews$/i' => '\1ews', + '/(ss)$/i' => '\1', + '/s$/i' => '', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'leaf' => 'leaves', + 'loaf' => 'loaves', + 'octopus' => 'octopuses', + 'virus' => 'viruses', + 'person' => 'people', + 'man' => 'men', + 'child' => 'children', + 'sex' => 'sexes', + 'move' => 'moves', + 'zombie' => 'zombies', + 'goose' => 'geese', + 'genus' => 'genera', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return [ + 'advice', + 'aircraft', + 'art', + 'baggage', + 'butter', + 'clothing', + 'coal', + 'cotton', + 'deer', + 'equipment', + 'experience', + 'feedback', + 'fish', + 'flour', + 'food', + 'furniture', + 'gas', + 'homework', + 'impatience', + 'information', + 'jeans', + 'knowledge', + 'leather', + 'love', + 'luggage', + 'management', + 'money', + 'moose', + 'music', + 'news', + 'oil', + 'patience', + 'police', + 'polish', + 'progress', + 'research', + 'rice', + 'salmon', + 'sand', + 'series', + 'sheep', + 'silk', + 'sms', + 'soap', + 'spam', + 'species', + 'staff', + 'sugar', + 'swine', + 'talent', + 'toothpaste', + 'traffic', + 'travel', + 'vinegar', + 'weather', + 'wood', + 'wool', + 'work', + ]; + } +} diff --git a/src/Inflections/Es.php b/src/Inflections/Es.php new file mode 100644 index 0000000..75f62ae --- /dev/null +++ b/src/Inflections/Es.php @@ -0,0 +1,76 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/ú([sn])$/i' => 'u\1es', + '/ó([sn])$/i' => 'o\1es', + '/í([sn])$/i' => 'i\1es', + '/é([sn])$/i' => 'e\1es', + '/á([sn])$/i' => 'a\1es', + '/z$/i' => 'ces', + '/([aeiou]s)$/i' => '\1', + '/([^aeéiou])$/i' => '\1es', + '/$/' => 's', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/ereses$/' => 'erés', + '/iones$/' => 'ión', + '/ces$/' => 'z', + '/es$/' => '', + '/s$/' => '', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'el' => 'los', + 'lunes' => 'lunes', + 'rompecabezas' => 'rompecabezas', + 'crisis' => 'crisis', + 'papá' => 'papás', + 'mamá' => 'mamás', + 'sofá' => 'sofás', + // because 'mes' is considered already a plural + 'mes' => 'meses', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return []; + } +} diff --git a/src/Inflections/Fr.php b/src/Inflections/Fr.php new file mode 100644 index 0000000..df8fb3a --- /dev/null +++ b/src/Inflections/Fr.php @@ -0,0 +1,68 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/(s|x|z)$/' => '\1', + '/(b|cor|ém|gemm|soupir|trav|vant|vitr)ail$/' => '\1aux', + '/ail$/' => 'ails', + '/al$/' => 'aux', + '/(bleu|émeu|landau|lieu|pneu|sarrau)$/' => '\1s', + '/(bijou|caillou|chou|genou|hibou|joujou|pou|au|eu|eau)$/' => '\1x', + '/$/' => 's', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/(b|cor|ém|gemm|soupir|trav|vant|vitr)aux$/' => '\1ail', + '/ails$/' => 'ail', + '/(journ|chev)aux$/' => '\1al', + '/(bijou|caillou|chou|genou|hibou|joujou|pou|au|eu|eau)x$/' => '\1', + '/s$/' => '', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'monsieur' => 'messieurs', + 'madame' => 'mesdames', + 'mademoiselle' => 'mesdemoiselles', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return []; + } +} diff --git a/src/Inflections/Inflector.php b/src/Inflections/Inflector.php new file mode 100644 index 0000000..1e00414 --- /dev/null +++ b/src/Inflections/Inflector.php @@ -0,0 +1,93 @@ +toLowerCase(), self::$pluralCache)) { + return self::$pluralCache[(string)$str->toLowerCase()]; + } + + if (!$this->isCountable($str)) { + return $str; + } + + if (array_key_exists($str->toLowerCase(), $this->irregularRules())) { + return $this->irregularRules()[(string)$str->toLowerCase()]; + } + + foreach ($this->pluralRules() as $rule => $replacement) { + if (preg_match($rule, $str)) { + return self::$pluralCache[(string)$str->toLowerCase()] = preg_replace($rule, $replacement, $str); + } + } + } + + public function singularize(Stringy $str) + { + if (array_key_exists($str->toLowerCase(), self::$singularCache)) { + return self::$singularCache[(string)$str->toLowerCase()]; + } + + if (!$this->isCountable($str)) { + return $str; + } + + if (array_key_exists($str->toLowerCase(), array_flip($this->irregularRules()))) { + return array_flip($this->irregularRules())[(string)$str->toLowerCase()]; + } + + foreach ($this->singularRules() as $rule => $replacement) { + if (preg_match($rule, $str)) { + return self::$singularCache[(string)$str->toLowerCase()] = preg_replace($rule, $replacement, $str); + } + } + } + + /** + * @param Stringy $str + * + * @return bool + */ + public function isCountable(Stringy $str) + { + return !array_key_exists($str->toLowerCase(), $this->uncountableRules()); + } + + /** + * Return an array of pluralization rules, from most to least specific, in the form $rule => $replacement + * + * @return array + */ + abstract public function pluralRules(); + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + abstract public function singularRules(); + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + abstract public function irregularRules(); + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + abstract public function uncountableRules(); +} \ No newline at end of file diff --git a/src/Inflections/Nb.php b/src/Inflections/Nb.php new file mode 100644 index 0000000..eb15f62 --- /dev/null +++ b/src/Inflections/Nb.php @@ -0,0 +1,63 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/e$/i' => 'er', + '/r$/i' => 're', + '/$/' => 'er', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/re$/i' => 'r', + '/er$/i' => '', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'konto' => 'konti', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return [ + 'barn', + 'fjell', + 'hus', + ]; + } +} diff --git a/src/Inflections/Pt.php b/src/Inflections/Pt.php new file mode 100644 index 0000000..1288b32 --- /dev/null +++ b/src/Inflections/Pt.php @@ -0,0 +1,131 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/^(alem|c|p)ao$/i' => '\1aes', + '/^(irm|m)ao$/i' => '\1aos', + '/ao$/i' => 'oes', + '/^(alem|c|p)ão$/i' => '\1ães', + '/^(irm|m)ão$/i' => '\1ãos', + '/ão$/i' => 'ões', + '/^(|g)ás$/i' => '\1ases', + '/^(japon|escoc|ingl|dinamarqu|fregu|portugu)ês$/i' => '\1eses', + '/m$/i' => 'ns', + '/([^aeou])il$/i' => '\1is', + '/ul$/i' => 'uis', + '/ol$/i' => 'ois', + '/el$/i' => 'eis', + '/al$/i' => 'ais', + '/(z|r)$/i' => '\1es', + '/(s)$/i' => '\1', + '/$/' => 's', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/^(g|)ases$/i' => '\1ás', + '/(japon|escoc|ingl|dinamarqu|fregu|portugu)eses$/i' => '\1ês', + '/(ae|ao|oe)s$/' => 'ao', + '/(ãe|ão|õe)s$/' => 'ão', + '/^(.*[^s]s)es$/i' => '\1', + '/sses$/i' => 'sse', + '/ns$/i' => 'm', + '/(r|t|f|v)is$/i' => '\1il', + '/uis$/i' => 'ul', + '/ois$/i' => 'ol', + '/eis$/i' => 'ei', + '/éis$/i' => 'el', + '/([^p])ais$/i' => '\1al', + '/(r|z)es$/i' => '\1', + '/^(á|gá)s$/i' => '\1s', + '/([^ê])s$/i' => '\1', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'abdomen' => 'abdomens', + 'alemão' => 'alemães', + 'artesã' => 'artesãos', + 'álcool' => 'álcoois', + "árvore", + "árvores", + 'bencão' => 'bencãos', + 'cão' => 'cães', + 'campus' => 'campi', + "cadáver", + "cadáveres", + 'capelão' => 'capelães', + 'capitão' => 'capitães', + 'chão' => 'chãos', + 'charlatão' => 'charlatães', + 'cidadão' => 'cidadãos', + 'consul' => 'consules', + 'cristão' => 'cristãos', + 'difícil' => 'difíceis', + 'email' => 'emails', + 'escrivão' => 'escrivães', + 'fóssil' => 'fósseis', + 'gás' => 'gases', + 'germens' => 'germen', + 'grão' => 'grãos', + 'hífen' => 'hífens', + 'irmão' => 'irmãos', + 'liquens' => 'liquen', + 'mal' => 'males', + 'mão' => 'mãos', + 'orfão' => 'orfãos', + 'país' => 'países', + 'pai' => 'pais', + 'pão' => 'pães', + 'projétil' => 'projéteis', + 'réptil' => 'répteis', + 'sacristão' => 'sacristães', + 'sotão' => 'sotãos', + 'tabelião' => 'tabeliães', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return [ + 'tórax', + 'tênis', + 'ônibus', + 'lápis', + 'fênix', + ]; + } +} diff --git a/src/Inflections/Tr.php b/src/Inflections/Tr.php new file mode 100644 index 0000000..712abad --- /dev/null +++ b/src/Inflections/Tr.php @@ -0,0 +1,59 @@ + $replacement + * + * @return array + */ + public function pluralRules() + { + return [ + '/([eöiü][^aoıueöiü]{0,6})$/u' => '\1ler', + '/([aoıu][^aoıueöiü]{0,6})$/u' => '\1lar', + ]; + } + + /** + * Return an array of singularization rules, from most to least specific, in the form $rule => $replacement + * + * + * @return array + */ + public function singularRules() + { + return [ + '/l[ae]r$/i' => '', + ]; + } + + /** + * Return an array of irregular replacements, in the form singular => plural ('goose' => 'geese') + * + * @return array + */ + public function irregularRules() + { + return [ + 'ben' => 'biz', + 'sen' => 'siz', + 'o' => 'onlar', + ]; + } + + /** + * Return an array of uncountable rules (sheep, police) + * + * @return array + */ + public function uncountableRules() + { + return []; + } +} diff --git a/src/StaticStringy.php b/src/StaticStringy.php index 11f9a06..548dfdd 100644 --- a/src/StaticStringy.php +++ b/src/StaticStringy.php @@ -55,6 +55,7 @@ * @method static string padBoth(string $str, int $length, string $padStr = ' ', string $encoding = null) * @method static string padLeft(string $str, int $length, string $padStr = ' ', string $encoding = null) * @method static string padRight(string $str, int $length, string $padStr = ' ', string $encoding = null) + * @method static string pluralize(string $str, string $language = 'en') * @method static string prepend(string $str, string $string, string $encoding = null) * @method static string regexReplace(string $str, string $pattern, string $replacement, string $options = 'msr', string $encoding = null) * @method static string removeLeft(string $str, string $substring, string $encoding = null) @@ -64,6 +65,7 @@ * @method static string reverse(string $str, string $encoding = null) * @method static string safeTruncate(string $str, int $length, string $substring = '', string $encoding = null) * @method static string shuffle(string $str, string $encoding = null) + * @method static string singularize(string $str, string $language = 'en') * @method static string slugify(string $str, string $replacement = '-', string $encoding = null) * @method static string slice(string $str, int $start, int $end = null, string $encoding = null) * @method static string split(string $str, string $pattern, int $limit = null, string $encoding = null) diff --git a/src/Stringy.php b/src/Stringy.php index ccb6f5a..2421a8f 100644 --- a/src/Stringy.php +++ b/src/Stringy.php @@ -9,6 +9,7 @@ use InvalidArgumentException; use IteratorAggregate; use OutOfBoundsException; +use Stringy\Inflections\Inflector; class Stringy implements Countable, IteratorAggregate, ArrayAccess { @@ -982,6 +983,18 @@ public function padRight($length, $padStr = ' ') return $this->applyPadding(0, $length - $this->length(), $padStr); } + /** + * Converts $str from singular to plural + * + * @param string $language + * + * @return static Object with $str pluralized according to $language + */ + public function pluralize($language = 'en') + { + return static::create($this->getInflector($language)->pluralize($this)); + } + /** * Returns a new string starting with $string. * @@ -1152,6 +1165,18 @@ public function shuffle() return static::create($shuffledStr, $this->encoding); } + /** + * Converts $str from plural to singular + * + * @param string $language + * + * @return static Object with $str singularize according to $language + */ + public function singularize($language = 'en') + { + return static::create($this->getInflector($language)->singularize($this)); + } + /** * Converts the string into an URL slug. This includes replacing non-ASCII * characters with their closest ASCII equivalents, removing remaining @@ -1673,6 +1698,24 @@ public function upperCaseFirst() return static::create($str, $this->encoding); } + /** + * Gets the Inflector for $language, if one exists. + * + * @param string $language + * + * @return Inflector + */ + protected function getInflector($language) + { + $inflector = 'Stringy\\Inflections\\' . static::create($language)->upperCamelize(); + + if (!class_exists($inflector)) { + throw new InvalidArgumentException($language.' is an unsupported language'); + } + + return new $inflector; + } + /** * Returns the replacements for the toAscii() method. * From 11c96a05dff8d74ea92139f14f8763ad22a85fcf Mon Sep 17 00:00:00 2001 From: Caleb Fidecaro Date: Mon, 9 Oct 2017 16:24:45 +1300 Subject: [PATCH 2/3] Make sure inflector extends Inflector class before returning. --- src/Stringy.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Stringy.php b/src/Stringy.php index 2421a8f..80446b5 100644 --- a/src/Stringy.php +++ b/src/Stringy.php @@ -1713,7 +1713,13 @@ protected function getInflector($language) throw new InvalidArgumentException($language.' is an unsupported language'); } - return new $inflector; + $inflector = new $inflector; + + if (!$inflector instanceof Inflector) { + throw new InvalidArgumentException($language.' class found, but does not extend Stringy\\Inflectors\\Inflector.'); + } + + return $inflector; } /** From e8380449ea03ec1fc0b90d94878bc5cc74116a9d Mon Sep 17 00:00:00 2001 From: Caleb Fidecaro Date: Mon, 9 Oct 2017 17:21:32 +1300 Subject: [PATCH 3/3] Fix Portuguese irregulars --- src/Inflections/Pt.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Inflections/Pt.php b/src/Inflections/Pt.php index 1288b32..7d3f11f 100644 --- a/src/Inflections/Pt.php +++ b/src/Inflections/Pt.php @@ -75,13 +75,11 @@ public function irregularRules() 'alemão' => 'alemães', 'artesã' => 'artesãos', 'álcool' => 'álcoois', - "árvore", - "árvores", + "árvore" => "árvores", 'bencão' => 'bencãos', 'cão' => 'cães', 'campus' => 'campi', - "cadáver", - "cadáveres", + "cadáver" => "cadáveres", 'capelão' => 'capelães', 'capitão' => 'capitães', 'chão' => 'chãos',