-
Notifications
You must be signed in to change notification settings - Fork 16
#21 Added support for embedded directions #23
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,13 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { | |
*/ | ||
const RTL = 1; | ||
|
||
/** | ||
* Bi-Directional. | ||
* | ||
* @const int | ||
*/ | ||
const BIDI = 2; | ||
|
||
/** | ||
* ZERO WIDTH NON-BREAKING SPACE (ZWNPBSP, aka byte-order mark, BOM). | ||
* | ||
|
@@ -89,6 +96,13 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { | |
*/ | ||
const RLM = 0x200f; | ||
|
||
/** | ||
* ARABIC LETTER MARK. | ||
* | ||
* @const int | ||
*/ | ||
const ARM = 0x061c; | ||
|
||
/** | ||
* LEFT-TO-RIGHT EMBEDDING. | ||
* | ||
|
@@ -110,6 +124,27 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { | |
*/ | ||
const PDF = 0x202c; | ||
|
||
/** | ||
* LEFT-TO-RIGHT ISOLATE. | ||
* | ||
* @const int | ||
*/ | ||
const LRI = 0x2066; | ||
|
||
/** | ||
* RIGHT-TO-LEFT ISOLATE. | ||
* | ||
* @const int | ||
*/ | ||
const RLI = 0x2067; | ||
|
||
/** | ||
* POP DIRECTIONAL ISOLATE. | ||
* | ||
* @const int | ||
*/ | ||
const PDI = 0x2069; | ||
|
||
/** | ||
* LEFT-TO-RIGHT OVERRIDE. | ||
* | ||
|
@@ -746,7 +781,7 @@ public function getWidth ( ) { | |
|
||
/** | ||
* Get direction of the current string. | ||
* Please, see the self::LTR and self::RTL constants. | ||
* Please, see the self::LTR, self::RTL and self::BIDI constants. | ||
* It does not yet support embedding directions. | ||
* | ||
* @access public | ||
|
@@ -755,32 +790,59 @@ public function getWidth ( ) { | |
public function getDirection ( ) { | ||
|
||
if(null === $this->_direction) { | ||
|
||
if(null === $this->_string) | ||
$this->_direction = static::LTR; | ||
else | ||
$this->_direction = static::getCharDirection( | ||
mb_substr($this->_string, 0, 1) | ||
); | ||
// Default | ||
$this->_direction = static::LTR; | ||
|
||
// Check for LRM or RLM/ARM | ||
$hasLRM = 0 !== preg_match('#\x{200e}#u', $this->_string); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to use constant for LRM & co. in the regular expression :-). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem here is, that the already defined static constant (LRM, RLM, ARM, etc) cannot be easily included in the regular expression, as they are only integers and regular expression use a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not :) ill change it accordingly |
||
$hasRLMOrARM = 0 !== preg_match('#\x{200f}|\x{061c}#u', $this->_string); | ||
|
||
if($hasLRM || $hasRLMOrARM) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain me (in details) the following algorithm please :-)? It will ease the review. Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check for LRM or RLM/ARM codepoints, as the "force" a direction. If we find one, we can immediately tell the direction of the string (either LTR, RTL or BIDI when both kind of codepoints are found) |
||
if($hasLRM && $hasRLMOrARM) | ||
$this->_direction = static::BIDI; | ||
elseif($hasLRM) | ||
$this->_direction = static::LTR; | ||
else | ||
$this->_direction = static::RTL; | ||
} | ||
else { | ||
$this->_direction = static::getCharDirection(mb_substr($this->_string, 0, 1)); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We "guess" the direction by looking at the first character and set it. |
||
// Check for RLE, RLO or RLI in LTR context -> BIDI | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we "guessed" LTR, there cannot be any of the RLE, RLO or RLI codepoints, as they would "force" a direction change - not mater what the actual characters are. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? Do you have references for that please? |
||
if( static::LTR === $this->_direction | ||
&& 0 !== preg_match('#\x{202b}|\x{202e}|\x{2067}#u', $this->_string)) | ||
$this->_direction = static::BIDI; | ||
// Check for LRE, LRO or LRI in RTL context -> BIDI | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same if we "guessed" RTL. |
||
elseif(static::RTL === $this->_direction | ||
&& 0 !== preg_match('#\x{202a}|\x{202d}|\x{2066}#u', $this->_string)) | ||
$this->_direction = static::BIDI; | ||
// Check every other character | ||
else { | ||
foreach ($this as $char) { | ||
if ($this->_direction !== $this->getCharDirection($char)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only thing left could be, that we "guessed" wrong (so the first character was either LTR or RTL, but any of the following character has another direction. If we find such a character, we return BIDI and can immediately break out of the loop - else we keep the "guess" we have set in line 809. Note that I am not sure, if PDF and PDI should be excluded here, as they end a directional change and are not really a directional change. But this actually counts for a lot of characters (punctuation, whitespace, ...) - so may this algorithm is "wrong"? So we have to check for way more things (e.g. "السلام عليكم" is RTL, but the whitespace has no real direction - which falls back to LTR. So this algorithm returns BIDI. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, this is critical in this case :-/. It's not the expected behavior. The algorithm is good but it needs more references I think. Maybe Intl has some ref' about that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The solution here is to distinguish between weak and hard directions: some I'll do some research on that over the weekend and will post some On Fri, 30 Jan 2015 10:24 Ivan Enderlin [email protected] wrote:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excellent :-). |
||
$this->_direction = static::BIDI; | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return $this->_direction; | ||
} | ||
|
||
/** | ||
* Get character of a specific character. | ||
* Please, see the self::LTR and self::RTL constants. | ||
* Returns true if a specific character is RTL. | ||
* | ||
* @access public | ||
* @param string $char Character. | ||
* @return int | ||
* @return bool | ||
*/ | ||
public static function getCharDirection ( $char ) { | ||
|
||
public static function isRTL ( $char ) { | ||
$c = static::toCode($char); | ||
|
||
if(!(0x5be <= $c && 0x10b7f >= $c)) | ||
return static::LTR; | ||
return false; | ||
|
||
if(0x85e >= $c) { | ||
|
||
|
@@ -814,10 +876,14 @@ public static function getCharDirection ( $char ) { | |
|| (0x830 <= $c && 0x83e >= $c) | ||
|| (0x840 <= $c && 0x858 >= $c) | ||
|| 0x85e === $c) | ||
return static::RTL; | ||
return true; | ||
} | ||
elseif(0x200f === $c) | ||
return static::RTL; | ||
elseif( static::RLM === $c | ||
|| static::ARM === $c | ||
|| static::RLO === $c | ||
|| static::RLE === $c | ||
|| static::RLI === $c) | ||
return true; | ||
elseif(0xfb1d <= $c) { | ||
|
||
if( 0xfb1d === $c | ||
|
@@ -855,10 +921,33 @@ public static function getCharDirection ( $char ) { | |
|| (0x10b40 <= $c && 0x10b55 >= $c) | ||
|| (0x10b58 <= $c && 0x10b72 >= $c) | ||
|| (0x10b78 <= $c && 0x10b7f >= $c)) | ||
return static::RTL; | ||
return true; | ||
} | ||
|
||
return static::LTR; | ||
return false; | ||
} | ||
|
||
/** | ||
* Returns true if a specific character is LTR. | ||
* | ||
* @access public | ||
* @param string $char Character. | ||
* @return bool | ||
*/ | ||
public static function isLTR ( $char ) { | ||
return !self::isRTL($char); | ||
} | ||
|
||
/** | ||
* Get direction of a specific character. | ||
* Please, see the self::LTR and self::RTL constants. | ||
* | ||
* @access public | ||
* @param string $char Character. | ||
* @return int | ||
*/ | ||
public static function getCharDirection ( $char ) { | ||
return self::isRTL($char) ? static::RTL : static::LTR; | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Set Default Direction to LTR