Skip to content

Commit

Permalink
Merge pull request #66 from niklak/feature/minor-adjustments
Browse files Browse the repository at this point in the history
Code minor adjustments
  • Loading branch information
niklak authored Feb 10, 2025
2 parents 8665e9d + 8b3677b commit f1cc245
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 32 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tendril = "0.4.3"
foldhash = "0.1.4"
hashbrown = {version = "0.15.2", default-features = false, features = ["allocator-api2", "inline-more", "default-hasher"], optional = true}
precomputed-hash = "0.1.1"
bit-set = "0.8.0"

[dev-dependencies]
wasm-bindgen-test = "0.3"
Expand Down
4 changes: 2 additions & 2 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ impl TreeSink for Document {
};

self.tree.create_node(NodeData::Element(Element::new(
name.clone(),
name,
attrs,
template_contents,
flags.mathml_annotation_xml_integration_point,
Expand Down Expand Up @@ -459,7 +459,7 @@ impl TreeSink for Document {
}
}

fn append_to_existing_text(prev: &mut TreeNode, text: &str) -> bool {
fn append_to_existing_text(prev: &mut TreeNode, text: &StrTendril) -> bool {
match prev.data {
NodeData::Text { ref mut contents } => {
contents.push_slice(text);
Expand Down
15 changes: 8 additions & 7 deletions src/matcher.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use std::{fmt, iter};

use bit_set::BitSet;
use cssparser::{CowRcStr, ParseError, SourceLocation, ToCss};
use html5ever::Namespace;
use selectors::context::SelectorCaches;
use selectors::parser::{self, SelectorList, SelectorParseErrorKind};
use selectors::{context, matching, visitor, Element};

use crate::css::{CssLocalName, CssString};
use crate::entities::InnerHashSet;
use crate::node::NodeRef;

/// CSS selector.
#[derive(Clone, Debug)]
pub struct Matcher {
Expand Down Expand Up @@ -52,7 +51,7 @@ impl Matcher {
pub struct Matches<'a, 'b> {
nodes: Vec<NodeRef<'a>>,
matcher: &'b Matcher,
set: InnerHashSet<usize>,
seen: BitSet,
caches: SelectorCaches,
}

Expand All @@ -77,10 +76,11 @@ impl<'a, 'b> Matches<'a, 'b> {
}
pub fn from_one(root_node: NodeRef<'a>, matcher: &'b Matcher, match_scope: MatchScope) -> Self {
let nodes = Self::nodes_from_root(iter::once(root_node), match_scope);
let set = BitSet::new();
Self {
nodes,
matcher,
set: Default::default(),
seen: set,
caches: Default::default(),
}
}
Expand All @@ -92,10 +92,11 @@ impl<'a, 'b> Matches<'a, 'b> {
) -> Self {
let nodes = Self::nodes_from_root(root_nodes, match_scope);

let set = BitSet::new();
Self {
nodes,
matcher,
set: Default::default(),
seen: set,
caches: Default::default(),
}
}
Expand All @@ -106,7 +107,7 @@ impl<'a> Iterator for Matches<'a, '_> {

fn next(&mut self) -> Option<Self::Item> {
while let Some(node) = self.nodes.pop() {
if self.set.contains(&node.id.value) {
if self.seen.contains(node.id.value) {
continue;
}
self.nodes
Expand All @@ -116,7 +117,7 @@ impl<'a> Iterator for Matches<'a, '_> {
.matcher
.match_element_with_caches(&node, &mut self.caches)
{
self.set.insert(node.id.value);
self.seen.insert(node.id.value);
return Some(node);
}
}
Expand Down
9 changes: 9 additions & 0 deletions src/node/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ impl TreeNode {
element.remove_class(class);
}
}

/// If element is link.
pub fn is_link(&self) -> bool {
if let Some(element) = self.as_element() {
element.is_link()
} else {
false
}
}
}

impl Clone for TreeNode {
Expand Down
17 changes: 14 additions & 3 deletions src/node/node_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ impl Element {
let attr = self
.attrs
.iter_mut()
.find(|attr| &attr.name.local == "class");
.find(|attr| attr.name.local == local_name!("class"));

match attr {
Some(attr) => {
Expand Down Expand Up @@ -154,7 +154,7 @@ impl Element {
if let Some(attr) = self
.attrs
.iter_mut()
.find(|attr| &attr.name.local == "class")
.find(|attr| attr.name.local == local_name!("class"))
{
let mut set: InnerHashSet<&str> = attr
.value
Expand Down Expand Up @@ -230,7 +230,7 @@ impl Element {
.attrs
.iter()
.map(|e| e.name.clone())
.collect::<InnerHashSet<_>>();
.collect::<Vec<_>>();

self.attrs.extend(
attrs
Expand All @@ -244,4 +244,15 @@ impl Element {
let new_name = QualName::new(None, ns!(), LocalName::from(name));
self.name = new_name;
}

/// If element is a link.
pub fn is_link(&self) -> bool {
matches!(
self.name.local,
local_name!("a") | local_name!("area") | local_name!("link")
) && self
.attrs
.iter()
.any(|a| a.name.local == local_name!("href"))
}
}
17 changes: 2 additions & 15 deletions src/node/selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,7 @@ impl selectors::Element for NodeRef<'_> {
Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => {
false
}
AnyLink | Link => match self.node_name() {
Some(node_name) => {
matches!(node_name.deref(), "a" | "area" | "link") && self.has_attr("href")
}
None => false,
},
AnyLink | Link => self.query_or(false, |n| n.is_link()),
OnlyText => self.has_only_text(),
HasText(s) => self.has_text(s.as_str()),
Contains(s) => self.text().contains(s.as_str()),
Expand All @@ -166,15 +161,7 @@ impl selectors::Element for NodeRef<'_> {
// TODO: This function adds some overhead.
// Its purpose in dom_query is unclear.
// Returning `false` works just fine.
self.query_or(false, |node| {
if let NodeData::Element(ref e) = node.data {
return matches!(
e.name.local,
local_name!("a") | local_name!("area") | local_name!("link")
) && e.attrs.iter().any(|a| a.name.local == local_name!("href"));
}
false
})
self.query_or(false, |node| node.is_link())
}

/// Whether the element is an HTML element.
Expand Down
2 changes: 1 addition & 1 deletion src/node/serializing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ fn push_normalized_text(text: &mut StrTendril, new_text: &str) {
if result.is_empty() && follows_newline {
return;
}

text.push_tendril(&result);

if push_end_whitespace && !text.ends_with(char::is_whitespace) {
Expand Down
3 changes: 1 addition & 2 deletions tests/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ pub static DMC_CONTENTS: &str = r#"<!DOCTYPE html>
</body>
</html>"#;


pub static MINI_TABLE_CONTENTS: &str = r#"<!DOCTYPE html>
<html>
<head></head>
Expand All @@ -148,4 +147,4 @@ pub static MINI_TABLE_CONTENTS: &str = r#"<!DOCTYPE html>
</tr>
</table>
</body>
</html>"#;
</html>"#;
3 changes: 1 addition & 2 deletions tests/node-traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,12 +352,11 @@ That's how we keep our code development!
assert_eq!(text.as_ref(), expected);
}


#[cfg_attr(not(target_arch = "wasm32"), test)]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn test_doc_table_formatted_text() {
let doc = Document::from(MINI_TABLE_CONTENTS);
let text = doc.formatted_text();
let expected = "1 2 3\n4 5 6";
assert_eq!(text.as_ref(), expected);
}
}

0 comments on commit f1cc245

Please sign in to comment.