diff --git a/src/HTML5DOMDocument.php b/src/HTML5DOMDocument.php index bf4f310..08ceb65 100644 --- a/src/HTML5DOMDocument.php +++ b/src/HTML5DOMDocument.php @@ -57,7 +57,7 @@ public function loadHTML($source, $options = 0) $source = '
' . $source . ''; } - if (stripos($source, '' . $source; } @@ -89,16 +89,8 @@ public function loadHTML($source, $options = 0) } // Preserve html entities - $matches = []; - preg_match_all('/&[a-zA-Z]*;/', $source, $matches); - foreach ($matches[0] as $match) { - $source = str_replace($match, 'html5-dom-document-internal-entity1-' . trim($match, '&;') . '-end', $source); - } - $matches = []; - preg_match_all('/[0-9]*;/', $source, $matches); - foreach ($matches[0] as $match) { - $source = str_replace($match, 'html5-dom-document-internal-entity2-' . trim($match, '') . '-end', $source); - } + $source = preg_replace('/&([a-zA-Z]*);/', 'html5-dom-document-internal-entity1-$1-end', $source); + $source = preg_replace('/([0-9]*);/', 'html5-dom-document-internal-entity2-$1-end', $source); $result = parent::loadHTML('' . $source, $options); if ($internalErrorsOptionValue === false) { @@ -295,24 +287,15 @@ public function saveHTML(\DOMNode $node = NULL) $html = str_replace('', '', $html); } $html = str_replace('html5-dom-document-internal-content', '', $html); - $matches = []; - preg_match_all('/html5-dom-document-internal-entity1-(.*?)-end/', $html, $matches); - foreach ($matches[0] as $i => $match) { - $html = str_replace($match, '&' . $matches[1][$i] . ';', $html); - } - $matches = []; - preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $html, $matches); - foreach ($matches[0] as $i => $match) { - $html = str_replace($match, '' . $matches[1][$i] . ';', $html); + if (strpos($html, 'html5-dom-document-internal-entity') !== false) { + $html = preg_replace('/html5-dom-document-internal-entity1-(.*?)-end/', '&$1;', $html); + $html = preg_replace('/html5-dom-document-internal-entity2-(.*?)-end/', '$1;', $html); } if ($removeHtmlElement) { $html = str_replace('', '', $html); } - $voidElementsList = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']; - foreach ($voidElementsList as $elementName) { - $html = str_replace('' . $elementName . '>', '', $html); - } + $html = str_replace(['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], '', $html); // Remove the whitespace between the doctype and html tag $html = preg_replace('/\>\s\ $match) { - $value = str_replace($match, html_entity_decode('&' . $matches[1][$i] . ';'), $value); - } - $matches = []; - preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $value, $matches); - foreach ($matches[0] as $i => $match) { - $value = str_replace($match, html_entity_decode('' . $matches[1][$i] . ';'), $value); + if (strpos($value, 'html5-dom-document-internal-entity') !== false) { + $matches = []; + preg_match_all('/html5-dom-document-internal-entity1-(.*?)-end/', $value, $matches); + foreach ($matches[0] as $i => $match) { + $value = str_replace($match, html_entity_decode('&' . $matches[1][$i] . ';'), $value); + } + $matches = []; + preg_match_all('/html5-dom-document-internal-entity2-(.*?)-end/', $value, $matches); + foreach ($matches[0] as $i => $match) { + $value = str_replace($match, html_entity_decode('' . $matches[1][$i] . ';'), $value); + } } return $value; }