From f38f27cec58ca39ae74179c29a59ed690e2125b3 Mon Sep 17 00:00:00 2001 From: Ivo Petkov Date: Fri, 8 Dec 2017 09:03:50 +0200 Subject: [PATCH] Performance optimizations. --- .../Internal/QuerySelectors.php | 67 +++++++++++-------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/src/HTML5DOMDocument/Internal/QuerySelectors.php b/src/HTML5DOMDocument/Internal/QuerySelectors.php index 87c633a..daa6fe9 100644 --- a/src/HTML5DOMDocument/Internal/QuerySelectors.php +++ b/src/HTML5DOMDocument/Internal/QuerySelectors.php @@ -7,7 +7,7 @@ trait QuerySelectors /** * Returns the first element matching the selector - * + * * @param string $selector CSS query selector * @return \DOMElement|null The result DOMElement or null if not found */ @@ -19,33 +19,45 @@ private function internalQuerySelector(string $selector) /** * Returns a list of document elements matching the selector - * + * * @param string $selector CSS query selector * @param int|null $preferredLimit Preferred maximum number of elements to return - * @return \DOMNodeList Returns a list of DOMElements matching the criteria + * @return DOMNodeList Returns a list of DOMElements matching the criteria * @throws \InvalidArgumentException */ private function internalQuerySelectorAll(string $selector, $preferredLimit = null) { - $walkChildren = function($element, $callback) use (&$walkChildren) { // $walkChildren is a lot faster than $this->getElementsByTagName('*') for 300+ elements - foreach ($element->childNodes as $child) { - if ($child instanceof \DOMElement) { + $walkChildren = function($element, $tagName, $callback) use (&$walkChildren) { // $walkChildren is a lot faster than $this->getElementsByTagName('*') for 300+ elements + if ($tagName !== null) { + $children = $element->getElementsByTagName($tagName); + foreach ($children as $child) { if ($callback($child) === true) { return true; } - if ($walkChildren($child, $callback) === true) { - return true; + } + } else { + foreach ($element->childNodes as $child) { + if ($child instanceof \DOMElement) { + if ($callback($child) === true) { + return true; + } + if ($walkChildren($child, $tagName, $callback) === true) { + return true; + } } } } }; - $getElementById = function($id) use (&$walkChildren) { + $getElementById = function($id, $tagName) use (&$walkChildren) { if ($this instanceof \DOMDocument) { - return $this->getElementById($id); + $element = $this->getElementById($id); + if ($element && ($tagName === null || $element->tagName === $tagName)) { + return $element; + } } else { $foundElement = null; - $walkChildren($this, function($element) use ($id, &$foundElement) { + $walkChildren($this, $tagName, function($element) use ($id, &$foundElement) { if ($element->attributes->length > 0 && $element->getAttribute('id') === $id) { $foundElement = $element; return true; @@ -59,28 +71,30 @@ private function internalQuerySelectorAll(string $selector, $preferredLimit = nu $matches = null; if ($selector === '*') { // all $result = []; - $walkChildren($this, function($element) use (&$result) { + $walkChildren($this, null, function($element) use (&$result, $preferredLimit) { $result[] = $element; + if ($preferredLimit !== null && sizeof($result) >= $preferredLimit) { + return true; + } }); return new \IvoPetkov\HTML5DOMNodeList($result); } elseif (preg_match('/^[a-z0-9]+$/', $selector) === 1) { // tagname $result = []; - $walkChildren($this, function($element) use (&$result, $selector) { - if ($element->tagName === $selector) { - $result[] = $element; + $walkChildren($this, $selector, function($element) use (&$result, $preferredLimit) { + $result[] = $element; + if ($preferredLimit !== null && sizeof($result) >= $preferredLimit) { + return true; } }); return new \IvoPetkov\HTML5DOMNodeList($result); } elseif (preg_match('/^([a-z0-9]*)\[(.+)\=\"(.+)\"\]$/', $selector, $matches) === 1) { // tagname[attribute="value"] or [attribute="value"] $result = []; $tagName = strlen($matches[1]) > 0 ? $matches[1] : null; - $walkChildren($this, function($element) use (&$result, $tagName, $preferredLimit, $matches) { - if ($tagName === null || $element->tagName === $tagName) { - if ($element->attributes->length > 0 && $element->getAttribute($matches[2]) === $matches[3]) { - $result[] = $element; - if ($preferredLimit !== null && sizeof($result) >= $preferredLimit) { - return true; - } + $walkChildren($this, $tagName, function($element) use (&$result, $preferredLimit, $matches) { + if ($element->attributes->length > 0 && $element->getAttribute($matches[2]) === $matches[3]) { + $result[] = $element; + if ($preferredLimit !== null && sizeof($result) >= $preferredLimit) { + return true; } } }); @@ -88,18 +102,17 @@ private function internalQuerySelectorAll(string $selector, $preferredLimit = nu } elseif (preg_match('/^([a-z0-9]*)#(.+)$/', $selector, $matches) === 1) { // tagname#id or #id $tagName = strlen($matches[1]) > 0 ? $matches[1] : null; $idSelector = $matches[2]; - $element = $getElementById($idSelector); - if ($element && ($tagName === null || $element->tagName === $tagName)) { + $element = $getElementById($idSelector, $tagName); + if ($element) { return new \IvoPetkov\HTML5DOMNodeList([$element]); } return new \IvoPetkov\HTML5DOMNodeList(); } elseif (preg_match('/^([a-z0-9]*)\.(.+)$/', $selector, $matches) === 1) { // tagname.classname or .classname - $parts = explode('.', $selector, 2); $tagName = strlen($matches[1]) > 0 ? $matches[1] : null; $classSelector = $matches[2]; $result = []; - $walkChildren($this, function($element) use (&$result, $tagName, $classSelector, $preferredLimit) { - if (($tagName === null || $element->tagName === $tagName) && $element->attributes->length > 0) { + $walkChildren($this, $tagName, function($element) use (&$result, $classSelector, $preferredLimit) { + if ($element->attributes->length > 0) { $classAttribute = $element->getAttribute('class'); if ($classAttribute === $classSelector || strpos($classAttribute, $classSelector . ' ') === 0 || substr($classAttribute, -(strlen($classSelector) + 1)) === ' ' . $classSelector || strpos($classAttribute, ' ' . $classSelector . ' ') !== false) { $result[] = $element;