From 0a19308c32f1d08b3ddf57204de959259d659274 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Tue, 7 Jan 2025 13:39:13 +0200 Subject: [PATCH 1/9] README.md: update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b9f604..4638a7c 100644 --- a/README.md +++ b/README.md @@ -680,7 +680,7 @@ assert_eq!(doc.select("div.content > p").length(), 4); ## Crate features - `hashbrown` — optional, standard hashmaps and hashsets will be replaced `hashbrown` hashmaps and hashsets; -- `atomic` - options, switches `NodeData` from using `StrTendril` to `Tendril`. +- `atomic` — options, switches `NodeData` from using `StrTendril` to `Tendril`. This allows `NodeData` and all ascending structures, including `Document`, to implement the `Send` trait; ## Possible issues From e9c99835da25ce59e0fffc08eccc1d17214d96b2 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Thu, 9 Jan 2025 13:35:57 +0200 Subject: [PATCH 2/9] src/dom_tree/tree.rs: started `Tree::base_uri()` --- src/dom_tree/tree.rs | 17 ++++++++++++++++- src/node/node_ref.rs | 3 +++ tests/node-traversal.rs | 24 ++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/dom_tree/tree.rs b/src/dom_tree/tree.rs index ad9d797..fe17235 100644 --- a/src/dom_tree/tree.rs +++ b/src/dom_tree/tree.rs @@ -1,4 +1,4 @@ -use std::cell::{Ref, RefCell}; +use std::cell::{Ref, RefCell, OnceCell}; use std::fmt::{self, Debug}; use std::ops::{Deref, DerefMut}; @@ -11,12 +11,14 @@ use crate::node::{ ancestor_nodes, child_nodes, descendant_nodes, AncestorNodes, ChildNodes, DescendantNodes, }; use crate::node::{Element, NodeData, NodeId, NodeRef, TreeNode}; +use crate::Selection; use super::ops::TreeNodeOps; /// An implementation of arena-tree. pub struct Tree { pub(crate) nodes: RefCell>, + base_uri_cache: OnceCell>, } impl Debug for Tree { @@ -30,6 +32,7 @@ impl Clone for Tree { let nodes = self.nodes.borrow(); Self { nodes: RefCell::new(nodes.clone()), + base_uri_cache: self.base_uri_cache.clone() } } } @@ -66,6 +69,17 @@ impl Tree { }) .ok() } + + pub fn base_uri(&self) -> Option { + self.base_uri_cache + .get_or_init(|| { + let root = self.root(); + let root_sel = Selection::from(root); + let base_uri_sel = root_sel.select_single("head > base"); + base_uri_sel.attr("href") + }) + .clone() + } } impl Tree { @@ -79,6 +93,7 @@ impl Tree { let root_id = NodeId::new(0); Self { nodes: RefCell::new(vec![TreeNode::new(root_id, root)]), + base_uri_cache: OnceCell::new(), } } /// Creates a new node with the given data. diff --git a/src/node/node_ref.rs b/src/node/node_ref.rs index 5ee3d08..574dd94 100644 --- a/src/node/node_ref.rs +++ b/src/node/node_ref.rs @@ -652,4 +652,7 @@ impl NodeRef<'_> { pub fn is(&self, sel: &str) -> bool { Matcher::new(sel).map_or(false, |matcher| self.is_match(&matcher)) } + pub fn base_uri(&self) -> Option { + self.tree.base_uri() + } } diff --git a/tests/node-traversal.rs b/tests/node-traversal.rs index 67326d5..c4261c8 100644 --- a/tests/node-traversal.rs +++ b/tests/node-traversal.rs @@ -245,4 +245,28 @@ fn test_text_node_is() { assert!(first_child.is_text()); assert!(!first_child.is("#text")); +} + + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn test_node_base_uri() { + + let contents: &str = r#" + + + + Test + + +
+ + + "#; + let doc = Document::from(contents); + + let sel = doc.select_single("#main"); + let node = sel.nodes().first().unwrap(); + let base_uri = node.base_uri().unwrap(); + assert_eq!(base_uri.as_ref(), "https://www.example.com/"); } \ No newline at end of file From 42ba10d6e42ea692898e992d1061f86eea44d7c2 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Thu, 9 Jan 2025 15:50:54 +0200 Subject: [PATCH 3/9] src/dom_tree/tree.rs: revise `Tree::base_uri` --- src/dom_tree/ops.rs | 41 +++++++++++++++++++++++++++++++++++++ src/dom_tree/tree.rs | 19 +++++++++++------ src/node/node_ref.rs | 2 +- tests/node-traversal.rs | 6 +----- tests/selection-property.rs | 9 ++++---- 5 files changed, 60 insertions(+), 17 deletions(-) diff --git a/src/dom_tree/ops.rs b/src/dom_tree/ops.rs index ac1788c..32d1985 100644 --- a/src/dom_tree/ops.rs +++ b/src/dom_tree/ops.rs @@ -111,6 +111,47 @@ impl TreeNodeOps { } None } + + pub fn find_child_element(nodes: Ref>, id: NodeId, f: F) -> Option + where + F: Fn(&TreeNode) -> bool, + { + child_nodes(Ref::clone(&nodes), &id, false) + .filter_map(|node_id| nodes.get(node_id.value)) + .filter(|tree_node| tree_node.is_element()) + .find(|tree_node| f(&tree_node)) + .map(|tree_node| tree_node.id) + } + + pub fn find_child_element_by_name( + nodes: Ref>, + id: NodeId, + name: &str, + ) -> Option { + Self::find_child_element(nodes, id, |tree_node| { + if let Some(node_name) = tree_node.as_element().map(|el| el.node_name()) { + if node_name.as_ref() == name { + return true; + } + } + false + }) + } + + pub fn find_descendant_element(nodes: Ref>, id: NodeId, names: &[&str]) -> Option { + if names.is_empty() { + return None; + } + let mut current_id = id; + + for name in names { + let Some(node_id) = Self::find_child_element_by_name(Ref::clone(&nodes), current_id, name) else { + return None; + }; + current_id = node_id; + } + Some(current_id) + } } // manipulation diff --git a/src/dom_tree/tree.rs b/src/dom_tree/tree.rs index fe17235..1ff5d84 100644 --- a/src/dom_tree/tree.rs +++ b/src/dom_tree/tree.rs @@ -1,4 +1,4 @@ -use std::cell::{Ref, RefCell, OnceCell}; +use std::cell::{OnceCell, Ref, RefCell}; use std::fmt::{self, Debug}; use std::ops::{Deref, DerefMut}; @@ -11,7 +11,6 @@ use crate::node::{ ancestor_nodes, child_nodes, descendant_nodes, AncestorNodes, ChildNodes, DescendantNodes, }; use crate::node::{Element, NodeData, NodeId, NodeRef, TreeNode}; -use crate::Selection; use super::ops::TreeNodeOps; @@ -32,7 +31,7 @@ impl Clone for Tree { let nodes = self.nodes.borrow(); Self { nodes: RefCell::new(nodes.clone()), - base_uri_cache: self.base_uri_cache.clone() + base_uri_cache: self.base_uri_cache.clone(), } } } @@ -74,9 +73,17 @@ impl Tree { self.base_uri_cache .get_or_init(|| { let root = self.root(); - let root_sel = Selection::from(root); - let base_uri_sel = root_sel.select_single("head > base"); - base_uri_sel.attr("href") + let nodes = self.nodes.borrow(); + let Some(base_node_id) = + TreeNodeOps::find_descendant_element(Ref::clone(&nodes), root.id, &["html", "head", "base"]) + else { + return None; + }; + + let Some(base_node) = nodes.get(base_node_id.value) else { + return None; + }; + base_node.as_element().and_then(|el| el.attr("href")) }) .clone() } diff --git a/src/node/node_ref.rs b/src/node/node_ref.rs index 574dd94..1a595ff 100644 --- a/src/node/node_ref.rs +++ b/src/node/node_ref.rs @@ -12,9 +12,9 @@ use tendril::StrTendril; use crate::entities::copy_attrs; use crate::Document; +use crate::Matcher; use crate::Tree; use crate::TreeNodeOps; -use crate::Matcher; use super::child_nodes; use super::id_provider::NodeIdProver; diff --git a/tests/node-traversal.rs b/tests/node-traversal.rs index c4261c8..567e9c4 100644 --- a/tests/node-traversal.rs +++ b/tests/node-traversal.rs @@ -221,7 +221,6 @@ fn test_node_prev_sibling() { assert!(prev_element_sibling_sel.is("#first-child")); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test_node_is() { @@ -233,7 +232,6 @@ fn test_node_is() { assert!(parent_node.is(":has(#first-child)")); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test_text_node_is() { @@ -247,11 +245,9 @@ fn test_text_node_is() { assert!(!first_child.is("#text")); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test_node_base_uri() { - let contents: &str = r#" @@ -269,4 +265,4 @@ fn test_node_base_uri() { let node = sel.nodes().first().unwrap(); let base_uri = node.base_uri().unwrap(); assert_eq!(base_uri.as_ref(), "https://www.example.com/"); -} \ No newline at end of file +} diff --git a/tests/selection-property.rs b/tests/selection-property.rs index 92dfe6c..d6e1854 100644 --- a/tests/selection-property.rs +++ b/tests/selection-property.rs @@ -290,17 +290,16 @@ fn test_selection_query() { let mut font_faces = vec![]; for node in sel.nodes() { - if let Some(face) = node.query(|tree_node| { - tree_node.as_element().and_then(|el| el.attr("face")) - }).flatten() { + if let Some(face) = node + .query(|tree_node| tree_node.as_element().and_then(|el| el.attr("face"))) + .flatten() + { font_faces.push(face.to_string()); } } assert_eq!(font_faces, vec!["Times", "Arial", "Courier"]); } - - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test_doc_try_serialize_html() { From c98206aa984a2bfb9508740afb0ecbcb1bad3c2d Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 11:08:45 +0200 Subject: [PATCH 4/9] src/dom_tree/: cleanup code --- src/dom_tree/ops.rs | 57 ++++++++++++++++++++++++++++++-------------- src/dom_tree/tree.rs | 19 ++++++++------- src/node/node_ref.rs | 4 ++++ 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/dom_tree/ops.rs b/src/dom_tree/ops.rs index 32d1985..3642ab6 100644 --- a/src/dom_tree/ops.rs +++ b/src/dom_tree/ops.rs @@ -112,6 +112,17 @@ impl TreeNodeOps { None } + /// Finds the first child element of a node that satisfies the given predicate. + /// + /// # Arguments + /// + /// * `nodes` - The nodes of the tree. + /// * `id` - The id of the parent node. + /// * `f` - The predicate to apply to each child element. + /// + /// # Returns + /// + /// The id of the first element that satisfies the predicate, if any. pub fn find_child_element(nodes: Ref>, id: NodeId, f: F) -> Option where F: Fn(&TreeNode) -> bool, @@ -123,34 +134,44 @@ impl TreeNodeOps { .map(|tree_node| tree_node.id) } + /// Finds the first child element of a node that has the given name. + /// + /// # Arguments + /// + /// * `nodes` - The nodes of the tree. + /// * `id` - The id of the parent node. + /// * `name` - The name of the element to search for. + /// + /// # Returns + /// + /// The id of the first element that has the given name, if any. pub fn find_child_element_by_name( nodes: Ref>, id: NodeId, name: &str, ) -> Option { Self::find_child_element(nodes, id, |tree_node| { - if let Some(node_name) = tree_node.as_element().map(|el| el.node_name()) { - if node_name.as_ref() == name { - return true; - } - } - false + tree_node + .as_element() + .map_or(false, |el| el.node_name().as_ref() == name) }) } + /// Finds the first descendant element of a node that has the given names. + /// + /// # Arguments + /// + /// * `nodes` - The nodes of the tree. + /// * `id` - The id of the starting node. + /// * `names` - The names of the elements to search for. + /// + /// # Returns + /// + /// The id of the first descendant element that has the given names, if any. pub fn find_descendant_element(nodes: Ref>, id: NodeId, names: &[&str]) -> Option { - if names.is_empty() { - return None; - } - let mut current_id = id; - - for name in names { - let Some(node_id) = Self::find_child_element_by_name(Ref::clone(&nodes), current_id, name) else { - return None; - }; - current_id = node_id; - } - Some(current_id) + names.iter().try_fold(id, |current_id, name| { + Self::find_child_element_by_name(Ref::clone(&nodes), current_id, name) + }) } } diff --git a/src/dom_tree/tree.rs b/src/dom_tree/tree.rs index 1ff5d84..c2bf505 100644 --- a/src/dom_tree/tree.rs +++ b/src/dom_tree/tree.rs @@ -69,21 +69,22 @@ impl Tree { .ok() } + /// Finds the base URI of the tree by looking for `` tags in document's head. + /// + /// The base URI is the value of the `href` attribute of the first + /// `` tag in the document's head. If no such tag is found, + /// the method returns `None`. + /// + /// The result is cached after the first call. pub fn base_uri(&self) -> Option { self.base_uri_cache .get_or_init(|| { let root = self.root(); let nodes = self.nodes.borrow(); - let Some(base_node_id) = - TreeNodeOps::find_descendant_element(Ref::clone(&nodes), root.id, &["html", "head", "base"]) - else { - return None; - }; - let Some(base_node) = nodes.get(base_node_id.value) else { - return None; - }; - base_node.as_element().and_then(|el| el.attr("href")) + TreeNodeOps::find_descendant_element(Ref::clone(&nodes), root.id, &["html", "head", "base"]) + .and_then(|base_node_id| nodes.get(base_node_id.value)) + .and_then(|base_node| base_node.as_element()?.attr("href")) }) .clone() } diff --git a/src/node/node_ref.rs b/src/node/node_ref.rs index 1a595ff..c60c1a7 100644 --- a/src/node/node_ref.rs +++ b/src/node/node_ref.rs @@ -652,6 +652,10 @@ impl NodeRef<'_> { pub fn is(&self, sel: &str) -> bool { Matcher::new(sel).map_or(false, |matcher| self.is_match(&matcher)) } + + /// Returns the base URI of the document. + /// + /// This is the value of the `` element in the document's head, or `None` if the document does not have a `` element. pub fn base_uri(&self) -> Option { self.tree.base_uri() } From 06a8fa957ab753063fbb4d9162cb9b6e544f2038 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 11:47:54 +0200 Subject: [PATCH 5/9] src/document.rs: added `Document::base_uri` --- src/document.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/document.rs b/src/document.rs index 937015e..ec4cb5d 100644 --- a/src/document.rs +++ b/src/document.rs @@ -117,6 +117,17 @@ impl Document { self.root().text() } + /// Finds the base URI of the tree by looking for `` tags in document's head. + /// + /// The base URI is the value of the `href` attribute of the first + /// `` tag in the document's head. If no such tag is found, + /// the method returns `None`. + /// + /// The result is cached after the first call. + pub fn base_uri(&self) -> Option { + self.tree.base_uri() + } + /// Merges adjacent text nodes and removes empty text nodes. /// /// Normalization is necessary to ensure that adjacent text nodes are merged into one text node. From 862e192b8acdd1f104685eb5b75d1cd748278777 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 12:00:14 +0200 Subject: [PATCH 6/9] CHANGELOG.md: update --- CHANGELOG.md | 2 +- tests/node-traversal.rs | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e9634c..dbb642a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ All notable changes to the `dom_query` crate will be documented in this file. - Implemented `NodeRef::is_match` and `NodeRef::is` methods, which allow checking if a node matches a given matcher (`&Matcher`) or selector (`&str`) without creating a `Selection` object. - +- Implemented `Tree::base_uri`, a quick method that returns the base URI of the document based on the `href` attribute of the `` element. `Document::base_uri` and `NodeRef::base_uri` provide the same functionality. Inspired by [Node: baseURI property]( https://developer.mozilla.org/en-US/docs/Web/API/Node/baseURI). ### Changed diff --git a/tests/node-traversal.rs b/tests/node-traversal.rs index 567e9c4..3a9d17a 100644 --- a/tests/node-traversal.rs +++ b/tests/node-traversal.rs @@ -261,8 +261,21 @@ fn test_node_base_uri() { "#; let doc = Document::from(contents); + // during first call of .base_uri, the result will be cached with OnceCell + let base_uri = doc.base_uri().unwrap(); + assert_eq!(base_uri.as_ref(), "https://www.example.com/"); + let sel = doc.select_single("#main"); let node = sel.nodes().first().unwrap(); + // using cached result. Access at any node of the tree. let base_uri = node.base_uri().unwrap(); assert_eq!(base_uri.as_ref(), "https://www.example.com/"); } + + +#[cfg_attr(not(target_arch = "wasm32"), test)] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn test_node_base_uri_none() { + let doc = Document::from(ANCESTORS_CONTENTS); + assert!(doc.base_uri().is_none()); +} \ No newline at end of file From cb0b389517b17ad84d2714f40f1cf30a66d24ae7 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 12:10:50 +0200 Subject: [PATCH 7/9] .github/workflows/wasm.yml: update --- .github/workflows/wasm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 94242a2..19622a4 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -20,9 +20,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install stable rust - uses: dtolnay/rust-toolchain@stable + uses: dtolnay/rust-toolchain@master with: - toolchain: stable + toolchain: 1.83.0 targets: wasm32-unknown-unknown - name: Install wasm-bindgen-cli uses: taiki-e/install-action@v2 From ddbd65b68f4c116634ce93c1890eb38f0d6c2221 Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 13:33:14 +0200 Subject: [PATCH 8/9] src/dom_tree/tree.rs: switch to `once_cell::sync::OnceCell` --- Cargo.toml | 5 +++-- src/dom_tree/ops.rs | 2 +- src/dom_tree/tree.rs | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3bbe848..8ef35ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,14 +18,15 @@ html5ever = "0.29.0" selectors = "0.26.0" cssparser = "0.34.0" tendril = "0.4.3" -foldhash = "0.1.3" +foldhash = "0.1.4" hashbrown = {version = "0.15.2", default-features = false, features = ["allocator-api2", "inline-more", "default-hasher"], optional = true} precomputed-hash = "0.1.1" +once_cell = {version = "1.20.2"} [dev-dependencies] ureq = {version = "2.12.1", default-features = false} wasm-bindgen-test = "0.3" -mini-alloc = "0.6.0" +mini-alloc = "0.7.0" [features] hashbrown = ["dep:hashbrown"] diff --git a/src/dom_tree/ops.rs b/src/dom_tree/ops.rs index 3642ab6..4d2802b 100644 --- a/src/dom_tree/ops.rs +++ b/src/dom_tree/ops.rs @@ -130,7 +130,7 @@ impl TreeNodeOps { child_nodes(Ref::clone(&nodes), &id, false) .filter_map(|node_id| nodes.get(node_id.value)) .filter(|tree_node| tree_node.is_element()) - .find(|tree_node| f(&tree_node)) + .find(|tree_node| f(tree_node)) .map(|tree_node| tree_node.id) } diff --git a/src/dom_tree/tree.rs b/src/dom_tree/tree.rs index c2bf505..071d14c 100644 --- a/src/dom_tree/tree.rs +++ b/src/dom_tree/tree.rs @@ -1,4 +1,4 @@ -use std::cell::{OnceCell, Ref, RefCell}; +use std::cell::{Ref, RefCell}; use std::fmt::{self, Debug}; use std::ops::{Deref, DerefMut}; @@ -6,6 +6,8 @@ use html5ever::LocalName; use html5ever::{namespace_url, ns, QualName}; use tendril::StrTendril; +use once_cell::sync::OnceCell; + use crate::entities::{wrap_tendril, InnerHashMap}; use crate::node::{ ancestor_nodes, child_nodes, descendant_nodes, AncestorNodes, ChildNodes, DescendantNodes, From 226213c315be0656e07e397a6a1e773dadd6e65f Mon Sep 17 00:00:00 2001 From: Mykola Humanov Date: Fri, 10 Jan 2025 14:02:43 +0200 Subject: [PATCH 9/9] decline OnceCell, since it breaks atomicity --- Cargo.toml | 1 - src/document.rs | 1 - src/dom_tree/ops.rs | 6 +++++- src/dom_tree/tree.rs | 25 +++++++++---------------- src/node/node_ref.rs | 2 +- tests/node-traversal.rs | 7 +++---- 6 files changed, 18 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ef35ed..298cb0a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ tendril = "0.4.3" foldhash = "0.1.4" hashbrown = {version = "0.15.2", default-features = false, features = ["allocator-api2", "inline-more", "default-hasher"], optional = true} precomputed-hash = "0.1.1" -once_cell = {version = "1.20.2"} [dev-dependencies] ureq = {version = "2.12.1", default-features = false} diff --git a/src/document.rs b/src/document.rs index ec4cb5d..b840ce1 100644 --- a/src/document.rs +++ b/src/document.rs @@ -123,7 +123,6 @@ impl Document { /// `` tag in the document's head. If no such tag is found, /// the method returns `None`. /// - /// The result is cached after the first call. pub fn base_uri(&self) -> Option { self.tree.base_uri() } diff --git a/src/dom_tree/ops.rs b/src/dom_tree/ops.rs index 4d2802b..4dc1741 100644 --- a/src/dom_tree/ops.rs +++ b/src/dom_tree/ops.rs @@ -168,7 +168,11 @@ impl TreeNodeOps { /// # Returns /// /// The id of the first descendant element that has the given names, if any. - pub fn find_descendant_element(nodes: Ref>, id: NodeId, names: &[&str]) -> Option { + pub fn find_descendant_element( + nodes: Ref>, + id: NodeId, + names: &[&str], + ) -> Option { names.iter().try_fold(id, |current_id, name| { Self::find_child_element_by_name(Ref::clone(&nodes), current_id, name) }) diff --git a/src/dom_tree/tree.rs b/src/dom_tree/tree.rs index 071d14c..cd0389a 100644 --- a/src/dom_tree/tree.rs +++ b/src/dom_tree/tree.rs @@ -6,8 +6,6 @@ use html5ever::LocalName; use html5ever::{namespace_url, ns, QualName}; use tendril::StrTendril; -use once_cell::sync::OnceCell; - use crate::entities::{wrap_tendril, InnerHashMap}; use crate::node::{ ancestor_nodes, child_nodes, descendant_nodes, AncestorNodes, ChildNodes, DescendantNodes, @@ -19,7 +17,6 @@ use super::ops::TreeNodeOps; /// An implementation of arena-tree. pub struct Tree { pub(crate) nodes: RefCell>, - base_uri_cache: OnceCell>, } impl Debug for Tree { @@ -33,7 +30,6 @@ impl Clone for Tree { let nodes = self.nodes.borrow(); Self { nodes: RefCell::new(nodes.clone()), - base_uri_cache: self.base_uri_cache.clone(), } } } @@ -77,18 +73,16 @@ impl Tree { /// `` tag in the document's head. If no such tag is found, /// the method returns `None`. /// - /// The result is cached after the first call. + /// This is a very fast method compare to [`crate::Document::select`]. pub fn base_uri(&self) -> Option { - self.base_uri_cache - .get_or_init(|| { - let root = self.root(); - let nodes = self.nodes.borrow(); - - TreeNodeOps::find_descendant_element(Ref::clone(&nodes), root.id, &["html", "head", "base"]) - .and_then(|base_node_id| nodes.get(base_node_id.value)) - .and_then(|base_node| base_node.as_element()?.attr("href")) - }) - .clone() + // TODO: It is possible to wrap the result of this function with `OnceCell`, + // but then appears a problem with atomicity and the `Send` trait for the Tree. + let root = self.root(); + let nodes = self.nodes.borrow(); + + TreeNodeOps::find_descendant_element(Ref::clone(&nodes), root.id, &["html", "head", "base"]) + .and_then(|base_node_id| nodes.get(base_node_id.value)) + .and_then(|base_node| base_node.as_element()?.attr("href")) } } @@ -103,7 +97,6 @@ impl Tree { let root_id = NodeId::new(0); Self { nodes: RefCell::new(vec![TreeNode::new(root_id, root)]), - base_uri_cache: OnceCell::new(), } } /// Creates a new node with the given data. diff --git a/src/node/node_ref.rs b/src/node/node_ref.rs index c60c1a7..24e6050 100644 --- a/src/node/node_ref.rs +++ b/src/node/node_ref.rs @@ -652,7 +652,7 @@ impl NodeRef<'_> { pub fn is(&self, sel: &str) -> bool { Matcher::new(sel).map_or(false, |matcher| self.is_match(&matcher)) } - + /// Returns the base URI of the document. /// /// This is the value of the `` element in the document's head, or `None` if the document does not have a `` element. diff --git a/tests/node-traversal.rs b/tests/node-traversal.rs index 3a9d17a..3ef0566 100644 --- a/tests/node-traversal.rs +++ b/tests/node-traversal.rs @@ -261,21 +261,20 @@ fn test_node_base_uri() { "#; let doc = Document::from(contents); - // during first call of .base_uri, the result will be cached with OnceCell + // It may be called from document level. let base_uri = doc.base_uri().unwrap(); assert_eq!(base_uri.as_ref(), "https://www.example.com/"); let sel = doc.select_single("#main"); let node = sel.nodes().first().unwrap(); - // using cached result. Access at any node of the tree. + // Access at any node of the tree. let base_uri = node.base_uri().unwrap(); assert_eq!(base_uri.as_ref(), "https://www.example.com/"); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test_node_base_uri_none() { let doc = Document::from(ANCESTORS_CONTENTS); assert!(doc.base_uri().is_none()); -} \ No newline at end of file +}