Skip to content

Commit

Permalink
Merge pull request #3 from theiconic/feature/languages
Browse files Browse the repository at this point in the history
add multi-language support
  • Loading branch information
wyrfel authored Apr 17, 2018
2 parents 17cd229 + 11fdcf5 commit faaa310
Show file tree
Hide file tree
Showing 31 changed files with 668 additions and 258 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/vendor/
/tests/coverage
phpunit.xml
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ E.g. **Mr Anthony R Von Fange III** is parsed to
- lastname: **von Fange**
- suffix: **III**

## Parseable patterns
## Features

### Supported patterns
This parser is able to handle name patterns with and without comma:
```
... [firstname] ... [lastname] ...
Expand All @@ -36,7 +38,8 @@ This parser is able to handle name patterns with and without comma:
```
... [lastname] ..., ... [firstname] ..., [suffix]
```
It supports

### Supported parts
- salutations (e.g. Mr, Mrs, Dr, etc.)
- first name
- middle names
Expand All @@ -45,6 +48,13 @@ It supports
- last names (also supports prefixes like von, de etc.)
- suffixes (Jr, Senior, 3rd, PhD, etc.)

### Other features
- multi-language support for salutations, suffixes and lastname prefixes
- customizable nickname delimiters
- customizable normalisation of all output strings
(original values remain accessible)
- customizable whitespace

## Examples

More than 80 different successfully parsed name patterns can be found in the
Expand All @@ -57,6 +67,7 @@ composer require theiconic/name-parser

## Usage

### Basic usage
```php
<?php

Expand All @@ -78,6 +89,26 @@ echo $name; // re-prints the full normalised name
```
An empty string is returned for missing parts.

### Setting Languages
```php
$parser = new TheIconic\NameParser\Parser([
new TheIconic\NameParser\Language\English(), //default
new TheIconic\NameParser\Language\German(),
])
```

### Setting nickname delimiters
```php
$parser = new TheIconic\NameParser\Parser();
$parser->setNicknameDelimiters(['(' => ')']);
```

### Setting whitespace characters
```php
$parser = new TheIconic\NameParser\Parser();
$parser->setWhitespace("\t _.");
```

## License

THE ICONIC Name Parser library for PHP is released under the MIT License.
4 changes: 2 additions & 2 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@

<phpunit bootstrap="vendor/autoload.php"
colors="true"
>
>
<logging>
<log type="coverage-html" target="./tests/coverage" lowUpperBound="35" highLowerBound="70"/>
<log type="coverage-clover" target="./tests/coverage/coverage.xml"/>
<log type="coverage-php" target="./tests/coverage/coverage.serialized"/>
<log type="coverage-text" target="php://stdout" showUncoveredFiles="false"/>
<log type="json" target="./tests/coverage/logfile.json"/>
<log type="tap" target="./tests/coverage/logfile.tap"/>
<log type="junit" target="./tests/coverage/logfile.xml" logIncompleteSkipped="false"/>
<log type="testdox-html" target="./tests/coverage/testdox.html"/>
<log type="testdox-text" target="./tests/coverage/testdox.txt"/>
</logging>
<filter>
<whitelist processUncoveredFilesFromWhitelist="true">
<directory suffix=".php">src</directory>
Expand Down
81 changes: 81 additions & 0 deletions src/Language/English.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<?php

namespace TheIconic\NameParser\Language;

use TheIconic\NameParser\LanguageInterface;

class English implements LanguageInterface
{
const SUFFIXES = [
'1st' => '1st',
'2nd' => '2nd',
'3rd' => '3rd',
'4th' => '4th',
'5th' => '5th',
'i' => 'I',
'ii' => 'II',
'iii' => 'III',
'iv' => 'IV',
'v' => 'V',
'apr' => 'APR',
'cme' => 'CME',
'dmd' => 'DMD',
'jr' => 'Jr',
'junior' => 'Junior',
'ma' => 'MA',
'md' => 'MD',
'pe' => 'PE',
'phd' => 'PhD',
'rph' => 'RPh',
'senior' => 'Senior',
'sr' => 'Sr',
];

const SALUTATIONS = [
'dr' => 'Dr.',
'fr' => 'Fr.',
'madam' => 'Madam',
'master' => 'Mr.',
'miss' => 'Miss',
'mister' => 'Mr.',
'mr' => 'Mr.',
'mrs' => 'Mrs.',
'ms' => 'Ms.',
'mx' => 'Mx.',
'rev' => 'Rev.',
'sir' => 'Sir',
];

const LASTNAME_PREFIXES = [
'da' => 'da',
'de' => 'de',
'del' => 'del',
'della' => 'della',
'der' => 'der',
'di' => 'di',
'du' => 'du',
'la' => 'la',
'pietro' => 'pietro',
'st' => 'st.',
'ter' => 'ter',
'van' => 'van',
'vanden' => 'vanden',
'vere' => 'vere',
'von' => 'von',
];

public function getSuffixes(): array
{
return self::SUFFIXES;
}

public function getSalutations(): array
{
return self::SALUTATIONS;
}

public function getLastnamePrefixes(): array
{
return self::LASTNAME_PREFIXES;
}
}
48 changes: 48 additions & 0 deletions src/Language/German.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

namespace TheIconic\NameParser\Language;

use TheIconic\NameParser\LanguageInterface;

class German implements LanguageInterface
{
const SUFFIXES = [
'1.' => '1.',
'2.' => '2.',
'3.' => '3.',
'4.' => '4.',
'5.' => '5.',
'i' => 'I',
'ii' => 'II',
'iii' => 'III',
'iv' => 'IV',
'v' => 'V',
];

const SALUTATIONS = [
'herr' => 'Herr',
'hr' => 'Herr',
'frau' => 'Frau',
'fr' => 'Frau'
];

const LASTNAME_PREFIXES = [
'der' => 'der',
'von' => 'von',
];

public function getSuffixes(): array
{
return self::SUFFIXES;
}

public function getSalutations(): array
{
return self::SALUTATIONS;
}

public function getLastnamePrefixes(): array
{
return self::LASTNAME_PREFIXES;
}
}
12 changes: 12 additions & 0 deletions src/LanguageInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

namespace TheIconic\NameParser;

interface LanguageInterface
{
public function getSuffixes(): array;

public function getLastnamePrefixes(): array;

public function getSalutations(): array;
}
28 changes: 11 additions & 17 deletions src/Mapper/AbstractMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,6 @@

abstract class AbstractMapper
{
/**
* @var array
*/
protected $options = [];

/**
* constructor allows passing of options
*
* @param array $options
*/
public function __construct(array $options = null)
{
if (null !== $options) {
$this->options = array_merge($this->options, $options);
}
}

/**
* implements the mapping of parts
*
Expand Down Expand Up @@ -70,4 +53,15 @@ protected function findFirstMapped(string $type, array $parts)

return false;
}

/**
* get the registry lookup key for the given word
*
* @param string $word the word
* @return string the key
*/
protected function getKey($word): string
{
return strtolower(str_replace('.', '', $word));
}
}
14 changes: 7 additions & 7 deletions src/Mapper/InitialMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
*/
class InitialMapper extends AbstractMapper
{
/**
* @var array options
*/
protected $options = [
'match_last' => false,
];
protected $matchLastPart = false;

public function __construct(bool $matchLastPart = false)
{
$this->matchLastPart = $matchLastPart;
}

/**
* map intials in parts array
Expand All @@ -32,7 +32,7 @@ public function map(array $parts): array
continue;
}

if (!$this->options['match_last'] && $k === $last) {
if (!$this->matchLastPart && $k === $last) {
continue;
}

Expand Down
36 changes: 25 additions & 11 deletions src/Mapper/LastnameMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@

namespace TheIconic\NameParser\Mapper;

use TheIconic\NameParser\LanguageInterface;
use TheIconic\NameParser\Part\AbstractPart;
use TheIconic\NameParser\Part\Lastname;
use TheIconic\NameParser\Part\LastnamePrefix;
use TheIconic\NameParser\Part\Suffix;

class LastnameMapper extends AbstractMapper
{
/**
* @var array options
*/
protected $options = [
'match_single' => false,
];
protected $prefixes = [];

protected $matchSinglePart = false;

public function __construct(array $prefixes, bool $matchSinglePart = false)
{
$this->prefixes = $prefixes;
$this->matchSinglePart = $matchSinglePart;
}

/**
* map lastnames in the parts array
Expand All @@ -23,7 +28,7 @@ class LastnameMapper extends AbstractMapper
*/
public function map(array $parts): array
{
if (!$this->options['match_single'] && count($parts) < 2) {
if (!$this->matchSinglePart && count($parts) < 2) {
return $parts;
}

Expand Down Expand Up @@ -56,9 +61,7 @@ protected function mapReversedParts(array $parts): array

if ($this->isFollowedByLastnamePart($originalParts, $originalIndex)) {
if ($this->isApplicablePrefix($originalParts, $originalIndex)) {
$lastname = new Lastname($part);
$lastname->setApplyPrefix(true);
$parts[$k] = $lastname;
$parts[$k] = new LastnamePrefix($part, $this->prefixes[$this->getKey($part)]);
continue;
}
break;
Expand Down Expand Up @@ -98,10 +101,21 @@ protected function isFollowedByLastnamePart(array $parts, int $index): bool
*/
protected function isApplicablePrefix(array $parts, int $index): bool
{
if (!Lastname::isPrefix($parts[$index])) {
if (!$this->isPrefix($parts[$index])) {
return false;
}

return $this->hasUnmappedPartsBefore($parts, $index);
}

/**
* check if the given word is a lastname prefix
*
* @param string $word the word to check
* @return bool
*/
protected function isPrefix($word): bool
{
return (array_key_exists($this->getKey($word), $this->prefixes));
}
}
Loading

0 comments on commit faaa310

Please sign in to comment.