From 8ef8d2be5a45e756cad1f0820c64c5e40aae85e9 Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Tue, 17 Apr 2018 10:26:39 +1000 Subject: [PATCH 1/7] add multi-language support --- src/Language/English.php | 81 ++++++++++++++++++++++++ src/LanguageInterface.php | 12 ++++ src/Mapper/AbstractMapper.php | 28 ++++----- src/Mapper/InitialMapper.php | 14 ++--- src/Mapper/LastnameMapper.php | 36 +++++++---- src/Mapper/SalutationMapper.php | 20 +++++- src/Mapper/SuffixMapper.php | 19 +++--- src/Parser.php | 90 ++++++++++++++++++++++++--- src/Part/AbstractPart.php | 2 +- src/Part/Lastname.php | 66 +------------------- src/Part/LastnamePrefix.php | 26 ++++++++ tests/Mapper/AbstractMapperTest.php | 7 ++- tests/Mapper/FirstnameMapperTest.php | 5 ++ tests/Mapper/InitialMapperTest.php | 10 ++- tests/Mapper/LastnameMapperTest.php | 18 ++++-- tests/Mapper/MiddlenameMapperTest.php | 5 ++ tests/Mapper/NicknameMapperTest.php | 5 ++ tests/Mapper/SalutationMapperTest.php | 8 +++ tests/Mapper/SuffixMapperTest.php | 8 +++ 19 files changed, 329 insertions(+), 131 deletions(-) create mode 100644 src/Language/English.php create mode 100644 src/LanguageInterface.php create mode 100644 src/Part/LastnamePrefix.php diff --git a/src/Language/English.php b/src/Language/English.php new file mode 100644 index 0000000..f78deab --- /dev/null +++ b/src/Language/English.php @@ -0,0 +1,81 @@ + '1st', + '2nd' => '2nd', + '3rd' => '3rd', + '4th' => '4th', + '5th' => '5th', + 'i' => 'I', + 'ii' => 'II', + 'iii' => 'III', + 'iv' => 'IV', + 'v' => 'V', + 'apr' => 'APR', + 'cme' => 'CME', + 'dmd' => 'DMD', + 'jr' => 'Jr', + 'junior' => 'Junior', + 'ma' => 'MA', + 'md' => 'MD', + 'pe' => 'PE', + 'phd' => 'PhD', + 'rph' => 'RPh', + 'senior' => 'Senior', + 'sr' => 'Sr', + ]; + + const SALUTATIONS = [ + 'dr' => 'Dr.', + 'fr' => 'Fr.', + 'madam' => 'Madam', + 'master' => 'Mr.', + 'miss' => 'Miss', + 'mister' => 'Mr.', + 'mr' => 'Mr.', + 'mrs' => 'Mrs.', + 'ms' => 'Ms.', + 'mx' => 'Mx.', + 'rev' => 'Rev.', + 'sir' => 'Sir', + ]; + + const LASTNAME_PREFIXES = [ + 'da' => 'da', + 'de' => 'de', + 'del' => 'del', + 'della' => 'della', + 'der' => 'der', + 'di' => 'di', + 'du' => 'du', + 'la' => 'la', + 'pietro' => 'pietro', + 'st' => 'st.', + 'ter' => 'ter', + 'van' => 'van', + 'vanden' => 'vanden', + 'vere' => 'vere', + 'von' => 'von', + ]; + + public function getSuffixes(): array + { + return self::SUFFIXES; + } + + public function getSalutations(): array + { + return self::SALUTATIONS; + } + + public function getLastnamePrefixes(): array + { + return self::LASTNAME_PREFIXES; + } +} diff --git a/src/LanguageInterface.php b/src/LanguageInterface.php new file mode 100644 index 0000000..d4d8bd8 --- /dev/null +++ b/src/LanguageInterface.php @@ -0,0 +1,12 @@ +options = array_merge($this->options, $options); - } - } - /** * implements the mapping of parts * @@ -70,4 +53,15 @@ protected function findFirstMapped(string $type, array $parts) return false; } + + /** + * get the registry lookup key for the given word + * + * @param string $word the word + * @return string the key + */ + protected function getKey($word): string + { + return strtolower(str_replace('.', '', $word)); + } } diff --git a/src/Mapper/InitialMapper.php b/src/Mapper/InitialMapper.php index 5b8de23..5041fd9 100644 --- a/src/Mapper/InitialMapper.php +++ b/src/Mapper/InitialMapper.php @@ -10,12 +10,12 @@ */ class InitialMapper extends AbstractMapper { - /** - * @var array options - */ - protected $options = [ - 'match_last' => false, - ]; + protected $matchLastPart = false; + + public function __construct(bool $matchLastPart = false) + { + $this->matchLastPart = $matchLastPart; + } /** * map intials in parts array @@ -32,7 +32,7 @@ public function map(array $parts): array continue; } - if (!$this->options['match_last'] && $k === $last) { + if (!$this->matchLastPart && $k === $last) { continue; } diff --git a/src/Mapper/LastnameMapper.php b/src/Mapper/LastnameMapper.php index 16cc276..707efd3 100644 --- a/src/Mapper/LastnameMapper.php +++ b/src/Mapper/LastnameMapper.php @@ -2,18 +2,23 @@ namespace TheIconic\NameParser\Mapper; +use TheIconic\NameParser\LanguageInterface; use TheIconic\NameParser\Part\AbstractPart; use TheIconic\NameParser\Part\Lastname; +use TheIconic\NameParser\Part\LastnamePrefix; use TheIconic\NameParser\Part\Suffix; class LastnameMapper extends AbstractMapper { - /** - * @var array options - */ - protected $options = [ - 'match_single' => false, - ]; + protected $prefixes = []; + + protected $matchSinglePart = false; + + public function __construct(array $prefixes, bool $matchSinglePart = false) + { + $this->prefixes = $prefixes; + $this->matchSinglePart = $matchSinglePart; + } /** * map lastnames in the parts array @@ -23,7 +28,7 @@ class LastnameMapper extends AbstractMapper */ public function map(array $parts): array { - if (!$this->options['match_single'] && count($parts) < 2) { + if (!$this->matchSinglePart && count($parts) < 2) { return $parts; } @@ -56,9 +61,7 @@ protected function mapReversedParts(array $parts): array if ($this->isFollowedByLastnamePart($originalParts, $originalIndex)) { if ($this->isApplicablePrefix($originalParts, $originalIndex)) { - $lastname = new Lastname($part); - $lastname->setApplyPrefix(true); - $parts[$k] = $lastname; + $parts[$k] = new LastnamePrefix($part, $this->prefixes[$this->getKey($part)]); continue; } break; @@ -98,10 +101,21 @@ protected function isFollowedByLastnamePart(array $parts, int $index): bool */ protected function isApplicablePrefix(array $parts, int $index): bool { - if (!Lastname::isPrefix($parts[$index])) { + if (!$this->isPrefix($parts[$index])) { return false; } return $this->hasUnmappedPartsBefore($parts, $index); } + + /** + * check if the given word is a lastname prefix + * + * @param string $word the word to check + * @return bool + */ + protected function isPrefix($word): bool + { + return (array_key_exists($this->getKey($word), $this->prefixes)); + } } diff --git a/src/Mapper/SalutationMapper.php b/src/Mapper/SalutationMapper.php index 29a814b..8ca2966 100644 --- a/src/Mapper/SalutationMapper.php +++ b/src/Mapper/SalutationMapper.php @@ -7,6 +7,13 @@ class SalutationMapper extends AbstractMapper { + protected $salutations = []; + + public function __construct(array $salutations) + { + $this->salutations = $salutations; + } + /** * map salutations in the parts array * @@ -20,11 +27,22 @@ public function map(array $parts): array break; } - if (Salutation::isSalutation($part)) { + if ($this->isSalutation($part)) { $parts[$k] = new Salutation($part); } } return $parts; } + + /** + * check if the given word is a viable salutation + * + * @param string $word the word to check + * @return bool + */ + protected function isSalutation($word): bool + { + return (array_key_exists($this->getKey($word), $this->salutations)); + } } diff --git a/src/Mapper/SuffixMapper.php b/src/Mapper/SuffixMapper.php index 6e70b22..3625770 100644 --- a/src/Mapper/SuffixMapper.php +++ b/src/Mapper/SuffixMapper.php @@ -7,12 +7,15 @@ class SuffixMapper extends AbstractMapper { - /** - * @var array options - */ - protected $options = [ - 'match_single' => false, - ]; + protected $suffixes = []; + + protected $matchSinglePart = false; + + public function __construct(array $suffixes, bool $matchSinglePart = false) + { + $this->suffixes = $suffixes; + $this->matchSinglePart = $matchSinglePart; + } /** * map suffixes in the parts array @@ -48,7 +51,7 @@ public function map(array $parts): array */ protected function isMatchingSinglePart($parts): bool { - if (!$this->options['match_single']) { + if (!$this->matchSinglePart) { return false; } @@ -69,6 +72,6 @@ protected function isSuffix($part): bool return false; } - return (Suffix::isSuffix($part)); + return (array_key_exists($this->getKey($part), $this->suffixes)); } } diff --git a/src/Parser.php b/src/Parser.php index 7081876..151053e 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,6 +2,7 @@ namespace TheIconic\NameParser; +use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Mapper\NicknameMapper; use TheIconic\NameParser\Mapper\SalutationMapper; use TheIconic\NameParser\Mapper\SuffixMapper; @@ -22,6 +23,20 @@ class Parser */ protected $mappers = []; + /** + * @var array + */ + protected $languages = []; + + public function __construct(array $languages = []) + { + if (empty($languages)) { + $languages = [new English()]; + } + + $this->languages = $languages; + } + /** * split full names into the following parts: * - prefix / salutation (Mr., Mrs., etc) @@ -78,9 +93,9 @@ protected function getFirstSegmentParser(): Parser { $parser = new Parser(); $parser->setMappers([ - new SalutationMapper(), - new SuffixMapper(), - new LastnameMapper(['match_single' => true]), + new SalutationMapper($this->getSalutations()), + new SuffixMapper($this->getSuffixes()), + new LastnameMapper($this->getPrefixes(), true), new FirstnameMapper(), new MiddlenameMapper(), ]); @@ -95,10 +110,10 @@ protected function getSecondSegmentParser(): Parser { $parser = new Parser(); $parser->setMappers([ - new SalutationMapper(), - new SuffixMapper(['match_single' => true]), + new SalutationMapper($this->getSalutations()), + new SuffixMapper($this->getSuffixes(), true), new NicknameMapper(), - new InitialMapper(['match_last' => true]), + new InitialMapper(true), new FirstnameMapper(), new MiddlenameMapper(), ]); @@ -110,7 +125,7 @@ protected function getThirdSegmentParser(): Parser { $parser = new Parser(); $parser->setMappers([ - new SuffixMapper(['match_single' => true]), + new SuffixMapper($this->getSuffixes(), true), ]); return $parser; @@ -126,10 +141,10 @@ public function getMappers(): array if (empty($this->mappers)) { $this->setMappers([ new NicknameMapper(), - new SalutationMapper(), - new SuffixMapper(), + new SalutationMapper($this->getSalutations()), + new SuffixMapper($this->getSuffixes()), new InitialMapper(), - new LastnameMapper(), + new LastnameMapper($this->getPrefixes()), new FirstnameMapper(), new MiddlenameMapper(), ]); @@ -188,4 +203,59 @@ public function setWhitespace($whitespace): Parser return $this; } + + /** + * + */ + public function addLanguages() + { + foreach (func_get_args() as $language) { + $this->languages[] = $language; + } + } + + /** + * @return array + */ + protected function getPrefixes() + { + $prefixes = []; + + /** @var LanguageInterface $language */ + foreach ($this->languages as $language) { + $prefixes += $language->getLastnamePrefixes(); + } + + return $prefixes; + } + + /** + * @return array + */ + protected function getSuffixes() + { + $suffixes = []; + + /** @var LanguageInterface $language */ + foreach ($this->languages as $language) { + $suffixes += $language->getSuffixes(); + } + + return $suffixes; + } + + /** + * @return array + */ + protected function getSalutations() + { + $salutations = []; + + /** @var LanguageInterface $language */ + foreach ($this->languages as $language) { + $salutations += $language->getSalutations(); + } + + return $salutations; + } } diff --git a/src/Part/AbstractPart.php b/src/Part/AbstractPart.php index c41796c..ba1179d 100644 --- a/src/Part/AbstractPart.php +++ b/src/Part/AbstractPart.php @@ -70,7 +70,7 @@ protected function camelcase($word): string return $word; } - return preg_replace_callback('/[a-z0-9]+/i', array($this, 'camelcaseReplace'), $word); + return preg_replace_callback('/[a-z0-9]+/i', [$this, 'camelcaseReplace'], $word); } /** diff --git a/src/Part/Lastname.php b/src/Part/Lastname.php index e22ca84..997b46c 100644 --- a/src/Part/Lastname.php +++ b/src/Part/Lastname.php @@ -5,76 +5,12 @@ class Lastname extends AbstractPart { /** - * @var array possible lastname prefixes - */ - protected static $prefixes = [ - 'da' => 'da', - 'de' => 'de', - 'del' => 'del', - 'della' => 'della', - 'der' => 'der', - 'di' => 'di', - 'du' => 'du', - 'la' => 'la', - 'pietro' => 'pietro', - 'st' => 'st.', - 'ter' => 'ter', - 'van' => 'van', - 'vanden' => 'vanden', - 'vere' => 'vere', - 'von' => 'von', - ]; - - /** @var bool */ - private $applyPrefix = false; - - /** - * check if the given word is a lastname prefix - * - * @param string $word the word to check - * @return bool - */ - public static function isPrefix($word): bool - { - return (array_key_exists(self::getKey($word), static::$prefixes)); - } - - /** - * get the prefix registry key for the given word - * - * @param string $word the word - * @return string the key - */ - protected static function getKey($word): string - { - return strtolower(str_replace('.', '', $word)); - } - - /** - * if this is a lastname prefix, look up normalized version from registry - * otherwise camelcase the lastname + * camelcase the lastname * * @return string */ public function normalize(): string { - $value = $this->getValue(); - - if ($this->applyPrefix && self::isPrefix($value)) { - return static::$prefixes[self::getKey($value)]; - } - return $this->camelcase($this->getValue()); } - - /** - * @param bool $applyPrefix - * @return Lastname - */ - public function setApplyPrefix(bool $applyPrefix): Lastname - { - $this->applyPrefix = $applyPrefix; - - return $this; - } } diff --git a/src/Part/LastnamePrefix.php b/src/Part/LastnamePrefix.php new file mode 100644 index 0000000..bcddc45 --- /dev/null +++ b/src/Part/LastnamePrefix.php @@ -0,0 +1,26 @@ +normalized = $normalized ?? $value; + + return parent::__construct($value); + } + + /** + * if this is a lastname prefix, look up normalized version from registry + * otherwise camelcase the lastname + * + * @return string + */ + public function normalize(): string + { + return $this->normalized; + } +} diff --git a/tests/Mapper/AbstractMapperTest.php b/tests/Mapper/AbstractMapperTest.php index c8f5e97..915beba 100644 --- a/tests/Mapper/AbstractMapperTest.php +++ b/tests/Mapper/AbstractMapperTest.php @@ -12,11 +12,12 @@ abstract class AbstractMapperTest extends TestCase * @param $input * @param $expectation */ - public function testMap($input, $expectation, $options = []) + public function testMap($input, $expectation, $arguments = []) { - $classname = substr(get_class($this), 0, -4); - $mapper = new $classname($options); + $mapper = call_user_func_array([$this, 'getMapper'], $arguments); $this->assertEquals($expectation, $mapper->map($input)); } + + abstract protected function getMapper(); } diff --git a/tests/Mapper/FirstnameMapperTest.php b/tests/Mapper/FirstnameMapperTest.php index 9c2fc55..21ff38b 100644 --- a/tests/Mapper/FirstnameMapperTest.php +++ b/tests/Mapper/FirstnameMapperTest.php @@ -50,4 +50,9 @@ public function provider() ], ]; } + + protected function getMapper() + { + return new FirstnameMapper(); + } } diff --git a/tests/Mapper/InitialMapperTest.php b/tests/Mapper/InitialMapperTest.php index d8796df..5f1dbb4 100644 --- a/tests/Mapper/InitialMapperTest.php +++ b/tests/Mapper/InitialMapperTest.php @@ -2,6 +2,7 @@ namespace TheIconic\NameParser\Mapper; +use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Part\Initial; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Lastname; @@ -69,10 +70,15 @@ public function provider() 'James', new Initial('B'), ], - 'options' => [ - 'match_last' => true + 'arguments' => [ + true ], ] ]; } + + protected function getMapper($matchLastPart = false) + { + return new InitialMapper($matchLastPart); + } } diff --git a/tests/Mapper/LastnameMapperTest.php b/tests/Mapper/LastnameMapperTest.php index 9f410b9..4c9b746 100644 --- a/tests/Mapper/LastnameMapperTest.php +++ b/tests/Mapper/LastnameMapperTest.php @@ -2,9 +2,11 @@ namespace TheIconic\NameParser\Mapper; +use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Firstname; use TheIconic\NameParser\Part\Lastname; +use TheIconic\NameParser\Part\LastnamePrefix; class LastnameMapperTest extends AbstractMapperTest { @@ -13,9 +15,6 @@ class LastnameMapperTest extends AbstractMapperTest */ public function provider() { - $vanPrefix = new Lastname('van'); - $vanPrefix->setApplyPrefix(true); - return [ [ 'input' => [ @@ -61,7 +60,7 @@ public function provider() 'expectation' => [ new Salutation('Mr'), 'Lars', - $vanPrefix, + new LastnamePrefix('van'), new Lastname('Trier'), ], ], @@ -112,10 +111,17 @@ public function provider() 'expectation' => [ new Lastname('Kirk'), ], - 'options' => [ - 'match_single' => true + 'arguments' => [ + true ], ] ]; } + + protected function getMapper($matchSingle = false) + { + $english = new English(); + + return new LastnameMapper($english->getLastnamePrefixes(), $matchSingle); + } } diff --git a/tests/Mapper/MiddlenameMapperTest.php b/tests/Mapper/MiddlenameMapperTest.php index c0ad8b2..4dd4cec 100644 --- a/tests/Mapper/MiddlenameMapperTest.php +++ b/tests/Mapper/MiddlenameMapperTest.php @@ -79,4 +79,9 @@ public function provider() ], ]; } + + protected function getMapper() + { + return new MiddlenameMapper(); + } } diff --git a/tests/Mapper/NicknameMapperTest.php b/tests/Mapper/NicknameMapperTest.php index 9c18dae..ae55c98 100644 --- a/tests/Mapper/NicknameMapperTest.php +++ b/tests/Mapper/NicknameMapperTest.php @@ -47,4 +47,9 @@ public function provider() ], ]; } + + protected function getMapper() + { + return new NicknameMapper(); + } } diff --git a/tests/Mapper/SalutationMapperTest.php b/tests/Mapper/SalutationMapperTest.php index 54bcbc4..7366fe0 100644 --- a/tests/Mapper/SalutationMapperTest.php +++ b/tests/Mapper/SalutationMapperTest.php @@ -2,6 +2,7 @@ namespace TheIconic\NameParser\Mapper; +use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Part\Salutation; use TheIconic\NameParser\Part\Firstname; use TheIconic\NameParser\Part\Lastname; @@ -50,4 +51,11 @@ public function provider() ], ]; } + + protected function getMapper() + { + $english = new English(); + + return new SalutationMapper($english->getSalutations()); + } } diff --git a/tests/Mapper/SuffixMapperTest.php b/tests/Mapper/SuffixMapperTest.php index 167a129..c90b689 100644 --- a/tests/Mapper/SuffixMapperTest.php +++ b/tests/Mapper/SuffixMapperTest.php @@ -2,6 +2,7 @@ namespace TheIconic\NameParser\Mapper; +use TheIconic\NameParser\Language\English; use TheIconic\NameParser\Part\Lastname; use TheIconic\NameParser\Part\Firstname; use TheIconic\NameParser\Part\Suffix; @@ -78,4 +79,11 @@ public function provider() ], ]; } + + protected function getMapper() + { + $english = new English(); + + return new SuffixMapper($english->getSuffixes()); + } } From a5805e06244c4b05263e89dc841dac35ce4ba096 Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Tue, 17 Apr 2018 15:58:21 +1000 Subject: [PATCH 2/7] update composer.lock --- composer.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.lock b/composer.lock index b7659f3..62c4d59 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "8de6775d96345f4218a80e2754a020d2", + "content-hash": "e899c1151ea9a6df1777902b468b1a0c", "packages": [], "packages-dev": [ { @@ -2174,7 +2174,7 @@ "prefer-stable": false, "prefer-lowest": false, "platform": { - "php": ">=7.0" + "php": ">=7.1" }, "platform-dev": [] } From 9fa3554942a7358ceb57156da151a8789cfc1a76 Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Wed, 18 Apr 2018 00:38:32 +1000 Subject: [PATCH 3/7] add German language; improve NicknameParser; cleanup --- src/Language/German.php | 48 ++++++++++++++++++ src/Mapper/NicknameMapper.php | 44 ++++++++++++++-- src/Mapper/SalutationMapper.php | 2 +- src/Mapper/SuffixMapper.php | 4 +- src/Parser.php | 41 ++++++++++----- src/Part/Salutation.php | 44 +++------------- src/Part/Suffix.php | 54 +++----------------- tests/GermanParserTest.php | 66 ++++++++++++++++++++++++ tests/Mapper/NicknameMapperTest.php | 72 ++++++++++++++++++++++++++- tests/Mapper/SalutationMapperTest.php | 6 +-- tests/NameTest.php | 4 +- tests/ParserTest.php | 10 ++++ 12 files changed, 286 insertions(+), 109 deletions(-) create mode 100644 src/Language/German.php create mode 100644 tests/GermanParserTest.php diff --git a/src/Language/German.php b/src/Language/German.php new file mode 100644 index 0000000..491bb4c --- /dev/null +++ b/src/Language/German.php @@ -0,0 +1,48 @@ + '1.', + '2.' => '2.', + '3.' => '3.', + '4.' => '4.', + '5.' => '5.', + 'i' => 'I', + 'ii' => 'II', + 'iii' => 'III', + 'iv' => 'IV', + 'v' => 'V', + ]; + + const SALUTATIONS = [ + 'herr' => 'Herr', + 'hr' => 'Herr', + 'frau' => 'Frau', + 'fr' => 'Frau' + ]; + + const LASTNAME_PREFIXES = [ + 'der' => 'der', + 'von' => 'von', + ]; + + public function getSuffixes(): array + { + return self::SUFFIXES; + } + + public function getSalutations(): array + { + return self::SALUTATIONS; + } + + public function getLastnamePrefixes(): array + { + return self::LASTNAME_PREFIXES; + } +} diff --git a/src/Mapper/NicknameMapper.php b/src/Mapper/NicknameMapper.php index 66e1c25..a8fb500 100644 --- a/src/Mapper/NicknameMapper.php +++ b/src/Mapper/NicknameMapper.php @@ -7,6 +7,25 @@ class NicknameMapper extends AbstractMapper { + /** + * @var array + */ + protected $delimiters = [ + '[' => ']', + '{' => '}', + '(' => ')', + '<' => '>', + '"' => '"', + '\'' => '\'' + ]; + + public function __construct(array $delimiters = []) + { + if (!empty($delimiters)) { + $this->delimiters = $delimiters; + } + } + /** * map nicknames in the parts array * @@ -17,28 +36,47 @@ public function map(array $parts): array { $isEncapsulated = false; + $regexp = $this->buildRegexp(); + foreach ($parts as $k => $part) { if ($part instanceof AbstractPart) { continue; } - if (preg_match('/^[\(\[\<\{]/', $part)) { + if (preg_match($regexp, $part, $matches)) { $isEncapsulated = true; $part = substr($part, 1); + $closing = $this->delimiters[$matches[1]]; } if (!$isEncapsulated) { continue; } - if (preg_match('/[\)\]\>\}]$/', $part)) { + if ($closing === substr($part, -1)) { $isEncapsulated = false; $part = substr($part, 0, -1); } - $parts[$k] = new Nickname($part); + $parts[$k] = new Nickname(str_replace(['"', '\''], '', $part)); } return $parts; } + + /** + * @return string + */ + protected function buildRegexp() + { + $regexp = '/^(['; + + foreach ($this->delimiters as $opening => $closing) { + $regexp .= sprintf('\\%s', $opening); + } + + $regexp .= '])/'; + + return $regexp; + } } diff --git a/src/Mapper/SalutationMapper.php b/src/Mapper/SalutationMapper.php index 8ca2966..239ba46 100644 --- a/src/Mapper/SalutationMapper.php +++ b/src/Mapper/SalutationMapper.php @@ -28,7 +28,7 @@ public function map(array $parts): array } if ($this->isSalutation($part)) { - $parts[$k] = new Salutation($part); + $parts[$k] = new Salutation($part, $this->salutations[$this->getKey($part)]); } } diff --git a/src/Mapper/SuffixMapper.php b/src/Mapper/SuffixMapper.php index 3625770..74a732a 100644 --- a/src/Mapper/SuffixMapper.php +++ b/src/Mapper/SuffixMapper.php @@ -26,7 +26,7 @@ public function __construct(array $suffixes, bool $matchSinglePart = false) public function map(array $parts): array { if ($this->isMatchingSinglePart($parts)) { - $parts[0] = new Suffix($parts[0]); + $parts[0] = new Suffix($parts[0], $this->suffixes[$this->getKey($parts[0])]); return $parts; } @@ -39,7 +39,7 @@ public function map(array $parts): array break; } - $parts[$k] = new Suffix($part); + $parts[$k] = new Suffix($part, $this->suffixes[$this->getKey($part)]); } return $parts; diff --git a/src/Parser.php b/src/Parser.php index 151053e..e019f99 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -28,6 +28,11 @@ class Parser */ protected $languages = []; + /** + * @var array + */ + protected $nicknameDelimiters = []; + public function __construct(array $languages = []) { if (empty($languages)) { @@ -92,6 +97,7 @@ protected function parseSplitName($first, $second, $third): Name protected function getFirstSegmentParser(): Parser { $parser = new Parser(); + $parser->setMappers([ new SalutationMapper($this->getSalutations()), new SuffixMapper($this->getSuffixes()), @@ -109,10 +115,11 @@ protected function getFirstSegmentParser(): Parser protected function getSecondSegmentParser(): Parser { $parser = new Parser(); + $parser->setMappers([ new SalutationMapper($this->getSalutations()), new SuffixMapper($this->getSuffixes(), true), - new NicknameMapper(), + new NicknameMapper($this->getNicknameDelimiters()), new InitialMapper(true), new FirstnameMapper(), new MiddlenameMapper(), @@ -124,6 +131,7 @@ protected function getSecondSegmentParser(): Parser protected function getThirdSegmentParser(): Parser { $parser = new Parser(); + $parser->setMappers([ new SuffixMapper($this->getSuffixes(), true), ]); @@ -140,7 +148,7 @@ public function getMappers(): array { if (empty($this->mappers)) { $this->setMappers([ - new NicknameMapper(), + new NicknameMapper($this->getNicknameDelimiters()), new SalutationMapper($this->getSalutations()), new SuffixMapper($this->getSuffixes()), new InitialMapper(), @@ -204,16 +212,6 @@ public function setWhitespace($whitespace): Parser return $this; } - /** - * - */ - public function addLanguages() - { - foreach (func_get_args() as $language) { - $this->languages[] = $language; - } - } - /** * @return array */ @@ -258,4 +256,23 @@ protected function getSalutations() return $salutations; } + + /** + * @return array + */ + public function getNicknameDelimiters(): array + { + return $this->nicknameDelimiters; + } + + /** + * @param array $nicknameDelimiters + * @return Parser + */ + public function setNicknameDelimiters(array $nicknameDelimiters): Parser + { + $this->nicknameDelimiters = $nicknameDelimiters; + + return $this; + } } diff --git a/src/Part/Salutation.php b/src/Part/Salutation.php index afc1337..50f4820 100644 --- a/src/Part/Salutation.php +++ b/src/Part/Salutation.php @@ -4,53 +4,23 @@ class Salutation extends AbstractPart { - /** - * @var array possible salutations - */ - protected static $salutations = [ - 'dr' => 'Dr.', - 'fr' => 'Fr.', - 'madam' => 'Madam', - 'master' => 'Mr.', - 'miss' => 'Miss', - 'mister' => 'Mr.', - 'mr' => 'Mr.', - 'mrs' => 'Mrs.', - 'ms' => 'Ms.', - 'mx' => 'Mx.', - 'rev' => 'Rev.', - 'sir' => 'Sir', - ]; + protected $normalized = ''; - /** - * check if the given word is a viable salutation - * - * @param string $word the word to check - * @return bool - */ - public static function isSalutation($word): bool + public function __construct(string $value, string $normalized = null) { - return (array_key_exists(self::getKey($word), static::$salutations)); - } + $this->normalized = $normalized ?? $value; - /** - * get the registry lookup key for the given word - * - * @param string $word the word - * @return string the key - */ - protected static function getKey($word): string - { - return strtolower(str_replace('.', '', $word)); + return parent::__construct($value); } /** - * normalize by looking up the wrapped value against the registry + * if this is a lastname prefix, look up normalized version from registry + * otherwise camelcase the lastname * * @return string */ public function normalize(): string { - return static::$salutations[self::getKey($this->getValue())]; + return $this->normalized; } } diff --git a/src/Part/Suffix.php b/src/Part/Suffix.php index 7c7edd2..bb5c24c 100644 --- a/src/Part/Suffix.php +++ b/src/Part/Suffix.php @@ -4,63 +4,23 @@ class Suffix extends AbstractPart { - /** - * @var array possible suffixes - */ - protected static $suffixes = [ - '1st' => '1st', - '2nd' => '2nd', - '3rd' => '3rd', - '4th' => '4th', - '5th' => '5th', - 'i' => 'I', - 'ii' => 'II', - 'iii' => 'III', - 'iv' => 'IV', - 'v' => 'V', - 'apr' => 'APR', - 'cme' => 'CME', - 'dmd' => 'DMD', - 'jr' => 'Jr', - 'junior' => 'Junior', - 'ma' => 'MA', - 'md' => 'MD', - 'pe' => 'PE', - 'phd' => 'PhD', - 'rph' => 'RPh', - 'senior' => 'Senior', - 'sr' => 'Sr', - ]; + protected $normalized = ''; - /** - * check if the given word is a viable suffix - * - * @param string $word the word to check - * @return bool - */ - public static function isSuffix($word): bool + public function __construct(string $value, string $normalized = null) { - return (array_key_exists(self::getKey($word), static::$suffixes)); - } + $this->normalized = $normalized ?? $value; - /** - * get the registry lookup key for the given word - * - * @param string $word the word - * @return string the key - */ - protected static function getKey($word): string - { - return strtolower(str_replace('.', '', $word)); + return parent::__construct($value); } /** - * lookup the normalized suffix from the registry + * if this is a lastname prefix, look up normalized version from registry + * otherwise camelcase the lastname * * @return string */ public function normalize(): string { - return static::$suffixes[self::getKey($this->getValue())]; + return $this->normalized; } } diff --git a/tests/GermanParserTest.php b/tests/GermanParserTest.php new file mode 100644 index 0000000..8e6641d --- /dev/null +++ b/tests/GermanParserTest.php @@ -0,0 +1,66 @@ + 'Herr', + 'lastname' => 'Schmidt', + ] + ], + [ + 'Frau Maria Lange', + [ + 'salutation' => 'Frau', + 'firstname' => 'Maria', + 'lastname' => 'Lange', + ] + ], + [ + 'Hr. Juergen von der Lippe', + [ + 'salutation' => 'Herr', + 'firstname' => 'Juergen', + 'lastname' => 'von der Lippe', + ] + ], + [ + 'Fr. Charlotte von Stein', + [ + 'salutation' => 'Frau', + 'firstname' => 'Charlotte', + 'lastname' => 'von Stein', + ] + ], + ]; + } + + /** + * @dataProvider provider + * + * @param $input + * @param $expectation + */ + public function testParse($input, $expectation) + { + $parser = new Parser([ + new German() + ]); + $name = $parser->parse($input); + + $this->assertInstanceOf(Name::class, $name); + $this->assertEquals($expectation, $name->getAll()); + } +} diff --git a/tests/Mapper/NicknameMapperTest.php b/tests/Mapper/NicknameMapperTest.php index ae55c98..592cf27 100644 --- a/tests/Mapper/NicknameMapperTest.php +++ b/tests/Mapper/NicknameMapperTest.php @@ -24,10 +24,35 @@ public function provider() 'James', new Nickname('Jim'), 'T.', - 'Kirk' + 'Kirk', ], ], [ + 'input' => [ + 'James', + '(\'Jim\')', + 'T.', + 'Kirk', + ], + 'expectation' => [ + 'James', + new Nickname('Jim'), + 'T.', + 'Kirk', + ], + ], + [ + 'input' => [ + 'William', + '"Will"', + 'Shatner', + ], + 'expectation' => [ + 'William', + new Nickname('Will'), + 'Shatner', + ], + ], [ 'input' => [ new Salutation('Mr'), 'Andre', @@ -45,11 +70,54 @@ public function provider() 'Roussimoff', ], ], + [ + 'input' => [ + new Salutation('Mr'), + 'Andre', + '["The', + 'Giant"]', + 'Rene', + 'Roussimoff', + ], + 'expectation' => [ + new Salutation('Mr'), + 'Andre', + new Nickname('The'), + new Nickname('Giant'), + 'Rene', + 'Roussimoff', + ], + ], + [ + 'input' => [ + new Salutation('Mr'), + 'Andre', + '"The', + 'Giant"', + 'Rene', + 'Roussimoff', + ], + 'expectation' => [ + new Salutation('Mr'), + 'Andre', + new Nickname('The'), + new Nickname('Giant'), + 'Rene', + 'Roussimoff', + ], + ], ]; } protected function getMapper() { - return new NicknameMapper(); + return new NicknameMapper([ + '[' => ']', + '{' => '}', + '(' => ')', + '<' => '>', + '"' => '"', + '\'' => '\'' + ]); } } diff --git a/tests/Mapper/SalutationMapperTest.php b/tests/Mapper/SalutationMapperTest.php index 7366fe0..fd35c84 100644 --- a/tests/Mapper/SalutationMapperTest.php +++ b/tests/Mapper/SalutationMapperTest.php @@ -21,7 +21,7 @@ public function provider() 'Pan', ], 'expectation' => [ - new Salutation('Mr.'), + new Salutation('Mr.', 'Mr.'), 'Pan', ], ], @@ -32,7 +32,7 @@ public function provider() 'Pan', ], 'expectation' => [ - new Salutation('Mr'), + new Salutation('Mr', 'Mr.'), 'Peter', 'Pan', ], @@ -44,7 +44,7 @@ public function provider() 'Miss', ], 'expectation' => [ - new Salutation('Mr'), + new Salutation('Mr', 'Mr.'), new Firstname('James'), 'Miss', ], diff --git a/tests/NameTest.php b/tests/NameTest.php index 0ef462b..6830f57 100644 --- a/tests/NameTest.php +++ b/tests/NameTest.php @@ -16,13 +16,13 @@ class NameTest extends TestCase public function testToString() { $parts = [ - new Salutation('Mr'), + new Salutation('Mr', 'Mr.'), new Firstname('James'), new Middlename('Morgan'), new Nickname('Jim'), new Initial('T.'), new Lastname('Smith'), - new Suffix('I'), + new Suffix('I', 'I'), ]; $name = new Name($parts); diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 26c7a2f..d79a655 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -3,6 +3,7 @@ namespace TheIconic\NameParser; use PHPUnit\Framework\TestCase; +use TheIconic\NameParser\Language\German; class ParserTest extends TestCase { @@ -445,4 +446,13 @@ public function testSetGetWhitespace() $parser->setWhitespace(' _'); $this->assertSame(' _', $parser->getWhitespace()); } + + public function testSetGetNicknameDelimiters() + { + $parser = new Parser(); + $parser->setNicknameDelimiters(['[' => ']']); + $this->assertSame(['[' => ']'], $parser->getNicknameDelimiters()); + $this->assertSame('Jim', $parser->parse('[Jim]')->getNickname()); + $this->assertNotSame('Jim', $parser->parse('(Jim)')->getNickname()); + } } From 129db2c56f732b51033b59fe941113525904e62b Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Wed, 18 Apr 2018 00:50:51 +1000 Subject: [PATCH 4/7] generate coverage reports during phpunit runs --- .gitignore | 1 + phpunit.xml.dist | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 71037ec..4bb9daf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /vendor/ +/tests/coverage phpunit.xml diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 0874a48..3a89b9a 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -2,7 +2,18 @@ + > + + + + + + + + + + + src From 9329083977e4ae5ddf15e90a70a7801e194b8e7c Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Wed, 18 Apr 2018 01:30:26 +1000 Subject: [PATCH 5/7] improve readme --- README.md | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 81d4dfb..3bbab33 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,9 @@ E.g. **Mr Anthony R Von Fange III** is parsed to - lastname: **von Fange** - suffix: **III** -## Parseable patterns +## Features + +### Supported patterns This parser is able to handle name patterns with and without comma: ``` ... [firstname] ... [lastname] ... @@ -36,7 +38,8 @@ This parser is able to handle name patterns with and without comma: ``` ... [lastname] ..., ... [firstname] ..., [suffix] ``` -It supports + +### Supported parts - salutations (e.g. Mr, Mrs, Dr, etc.) - first name - middle names @@ -45,6 +48,13 @@ It supports - last names (also supports prefixes like von, de etc.) - suffixes (Jr, Senior, 3rd, PhD, etc.) +### Other features +- multi-language support for salutations, suffixes and lastname prefixes +- customizable nickname delimiters +- customizable normalisation of all output strings + (original values remain accessible) +- customizable whitespace + ## Examples More than 80 different successfully parsed name patterns can be found in the @@ -57,6 +67,7 @@ composer require theiconic/name-parser ## Usage +### Basic usage ```php setNicknameDelimiters(['(' => ')']); +``` + +### Setting whitespace characters +```php +$parser = new TheIconic\NameParser\Parser(); +$parser->setWhitespace("\t _."); +``` + ## License THE ICONIC Name Parser library for PHP is released under the MIT License. From a2b34a1a3371d1f7c756019e03f605051915a227 Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Wed, 18 Apr 2018 01:38:57 +1000 Subject: [PATCH 6/7] scrutinize --- src/Mapper/NicknameMapper.php | 6 ++++-- src/Part/LastnamePrefix.php | 2 +- src/Part/Salutation.php | 2 +- src/Part/Suffix.php | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Mapper/NicknameMapper.php b/src/Mapper/NicknameMapper.php index a8fb500..f12867b 100644 --- a/src/Mapper/NicknameMapper.php +++ b/src/Mapper/NicknameMapper.php @@ -38,6 +38,8 @@ public function map(array $parts): array $regexp = $this->buildRegexp(); + $closingDelimiter = ''; + foreach ($parts as $k => $part) { if ($part instanceof AbstractPart) { continue; @@ -46,14 +48,14 @@ public function map(array $parts): array if (preg_match($regexp, $part, $matches)) { $isEncapsulated = true; $part = substr($part, 1); - $closing = $this->delimiters[$matches[1]]; + $closingDelimiter = $this->delimiters[$matches[1]]; } if (!$isEncapsulated) { continue; } - if ($closing === substr($part, -1)) { + if ($closingDelimiter === substr($part, -1)) { $isEncapsulated = false; $part = substr($part, 0, -1); } diff --git a/src/Part/LastnamePrefix.php b/src/Part/LastnamePrefix.php index bcddc45..ec44ae6 100644 --- a/src/Part/LastnamePrefix.php +++ b/src/Part/LastnamePrefix.php @@ -10,7 +10,7 @@ public function __construct(string $value, string $normalized = null) { $this->normalized = $normalized ?? $value; - return parent::__construct($value); + parent::__construct($value); } /** diff --git a/src/Part/Salutation.php b/src/Part/Salutation.php index 50f4820..3908671 100644 --- a/src/Part/Salutation.php +++ b/src/Part/Salutation.php @@ -10,7 +10,7 @@ public function __construct(string $value, string $normalized = null) { $this->normalized = $normalized ?? $value; - return parent::__construct($value); + parent::__construct($value); } /** diff --git a/src/Part/Suffix.php b/src/Part/Suffix.php index bb5c24c..36bb51a 100644 --- a/src/Part/Suffix.php +++ b/src/Part/Suffix.php @@ -10,7 +10,7 @@ public function __construct(string $value, string $normalized = null) { $this->normalized = $normalized ?? $value; - return parent::__construct($value); + parent::__construct($value); } /** From 11fdcf5a9882374bbd11d4c215afca70976bc7ba Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Wed, 18 Apr 2018 01:43:50 +1000 Subject: [PATCH 7/7] introduce PreNormalizedPart abstract --- src/Part/PreNormalizedPart.php | 26 ++++++++++++++++++++++++++ src/Part/Salutation.php | 20 +------------------- src/Part/Suffix.php | 20 +------------------- 3 files changed, 28 insertions(+), 38 deletions(-) create mode 100644 src/Part/PreNormalizedPart.php diff --git a/src/Part/PreNormalizedPart.php b/src/Part/PreNormalizedPart.php new file mode 100644 index 0000000..cc5128c --- /dev/null +++ b/src/Part/PreNormalizedPart.php @@ -0,0 +1,26 @@ +normalized = $normalized ?? $value; + + parent::__construct($value); + } + + /** + * if this is a lastname prefix, look up normalized version from registry + * otherwise camelcase the lastname + * + * @return string + */ + public function normalize(): string + { + return $this->normalized; + } +} diff --git a/src/Part/Salutation.php b/src/Part/Salutation.php index 3908671..f277ff3 100644 --- a/src/Part/Salutation.php +++ b/src/Part/Salutation.php @@ -2,25 +2,7 @@ namespace TheIconic\NameParser\Part; -class Salutation extends AbstractPart +class Salutation extends PreNormalizedPart { - protected $normalized = ''; - public function __construct(string $value, string $normalized = null) - { - $this->normalized = $normalized ?? $value; - - parent::__construct($value); - } - - /** - * if this is a lastname prefix, look up normalized version from registry - * otherwise camelcase the lastname - * - * @return string - */ - public function normalize(): string - { - return $this->normalized; - } } diff --git a/src/Part/Suffix.php b/src/Part/Suffix.php index 36bb51a..e87e87c 100644 --- a/src/Part/Suffix.php +++ b/src/Part/Suffix.php @@ -2,25 +2,7 @@ namespace TheIconic\NameParser\Part; -class Suffix extends AbstractPart +class Suffix extends PreNormalizedPart { - protected $normalized = ''; - public function __construct(string $value, string $normalized = null) - { - $this->normalized = $normalized ?? $value; - - parent::__construct($value); - } - - /** - * if this is a lastname prefix, look up normalized version from registry - * otherwise camelcase the lastname - * - * @return string - */ - public function normalize(): string - { - return $this->normalized; - } }