Skip to content

Commit

Permalink
Performance optimizations.
Browse files Browse the repository at this point in the history
  • Loading branch information
ivopetkov committed Jan 22, 2017
1 parent 51886f6 commit 1e1eeb4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 33 deletions.
31 changes: 7 additions & 24 deletions src/HTML5DOMDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public function loadHTML($source, $options = 0)
$source = '<body>' . $source . '</body>';
}

if (stripos($source, '<!DOCTYPE') !== 0) {
if (strtoupper(substr($source, 0, 9)) !== '<!DOCTYPE') {
$source = '<!DOCTYPE html>' . $source;
}

Expand Down Expand Up @@ -89,16 +89,8 @@ public function loadHTML($source, $options = 0)
}

// Preserve html entities
$matches = [];
preg_match_all('/&[a-zA-Z]*;/', $source, $matches);
foreach ($matches[0] as $match) {
$source = str_replace($match, 'html5-dom-document-internal-entity1-' . trim($match, '&;') . '-end', $source);
}
$matches = [];
preg_match_all('/&#[0-9]*;/', $source, $matches);
foreach ($matches[0] as $match) {
$source = str_replace($match, 'html5-dom-document-internal-entity2-' . trim($match, '&#;') . '-end', $source);
}
$source = preg_replace('/&([a-zA-Z]*);/', 'html5-dom-document-internal-entity1-$1-end', $source);
$source = preg_replace('/&#([0-9]*);/', 'html5-dom-document-internal-entity2-$1-end', $source);

$result = parent::loadHTML('<?xml encoding="utf-8" ?>' . $source, $options);
if ($internalErrorsOptionValue === false) {
Expand Down Expand Up @@ -295,24 +287,15 @@ public function saveHTML(\DOMNode $node = NULL)
$html = str_replace('<head></head>', '', $html);
}
$html = str_replace('html5-dom-document-internal-content', '', $html);
$matches = [];
preg_match_all('/html5-dom-document-internal-entity1-(.*?)-end/', $html, $matches);
foreach ($matches[0] as $i => $match) {
$html = str_replace($match, '&' . $matches[1][$i] . ';', $html);
}
$matches = [];
preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $html, $matches);
foreach ($matches[0] as $i => $match) {
$html = str_replace($match, '&#' . $matches[1][$i] . ';', $html);
if (strpos($html, 'html5-dom-document-internal-entity') !== false) {
$html = preg_replace('/html5-dom-document-internal-entity1-(.*?)-end/', '&$1;', $html);
$html = preg_replace('/html5-dom-document-internal-entity2-(.*?)-end/', '&#$1;', $html);
}
if ($removeHtmlElement) {
$html = str_replace('<html></html>', '', $html);
}

$voidElementsList = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
foreach ($voidElementsList as $elementName) {
$html = str_replace('</' . $elementName . '>', '', $html);
}
$html = str_replace(['</area>', '</base>', '</br>', '</col>', '</command>', '</embed>', '</hr>', '</img>', '</input>', '</keygen>', '</link>', '</meta>', '</param>', '</source>', '</track>', '</wbr>'], '', $html);
// Remove the whitespace between the doctype and html tag
$html = preg_replace('/\>\s\<html/', '><html', $html, 1);
return trim($html);
Expand Down
20 changes: 11 additions & 9 deletions src/HTML5DOMElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,17 @@ public function __set($name, $value)
*/
private function updateResult($value)
{
$matches = [];
preg_match_all('/html5-dom-document-internal-entity1-(.*?)-end/', $value, $matches);
foreach ($matches[0] as $i => $match) {
$value = str_replace($match, html_entity_decode('&' . $matches[1][$i] . ';'), $value);
}
$matches = [];
preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $value, $matches);
foreach ($matches[0] as $i => $match) {
$value = str_replace($match, html_entity_decode('&#' . $matches[1][$i] . ';'), $value);
if (strpos($value, 'html5-dom-document-internal-entity') !== false) {
$matches = [];
preg_match_all('/html5-dom-document-internal-entity1-(.*?)-end/', $value, $matches);
foreach ($matches[0] as $i => $match) {
$value = str_replace($match, html_entity_decode('&' . $matches[1][$i] . ';'), $value);
}
$matches = [];
preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $value, $matches);
foreach ($matches[0] as $i => $match) {
$value = str_replace($match, html_entity_decode('&#' . $matches[1][$i] . ';'), $value);
}
}
return $value;
}
Expand Down

0 comments on commit 1e1eeb4

Please sign in to comment.