Skip to content

Commit

Permalink
src/mini_selector/: revise; add support for multiple attr selectors i…
Browse files Browse the repository at this point in the history
…n one mini selector
  • Loading branch information
niklak committed Feb 26, 2025
1 parent 3f0c793 commit 080bc4a
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 56 deletions.
3 changes: 0 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,3 @@ pub use matcher::Matcher;
pub use node::SerializableNodeRef;
pub use node::{Element, Node, NodeData, NodeId, NodeIdProver, NodeRef};
pub use selection::Selection;



2 changes: 1 addition & 1 deletion src/mini_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ mod extension;
mod parser;
mod selector;

pub use parser::{parse_selector_list, parse_single_selector};
pub use parser::{parse_selector_list, parse_mini_selector};
pub use selector::MiniSelector;
4 changes: 3 additions & 1 deletion src/mini_selector/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl NodeRef<'_> {
///
/// `true` if this node matches the given CSS selector, `false` otherwise.
pub fn snap_is(&self, css_sel: &str) -> bool {
MiniSelector::new(css_sel).map_or(false, |(_, sel)| self.snap_match(&sel))
MiniSelector::new(css_sel).map_or(false, |sel| self.snap_match(&sel))
}

/// Checks if this node matches the given CSS selector.
Expand Down Expand Up @@ -244,6 +244,8 @@ mod tests {
assert!(!link_node.snap_is(r#"#link"#));
assert!(!link_node.snap_is(r#"a[target="_blank"]"#));
assert!(link_node.snap_is(r#"a[target]"#));
assert!(!link_node.snap_is(r#"a[href^="https://"][href*="examplxe"]"#));
assert!(link_node.snap_is(r#"a[href^="https://"][href*="example"[href$="/"]"#));

let another_sel = doc.select_single(r#"a.other-link"#);
let another_link_node = another_sel.nodes().first().unwrap();
Expand Down
71 changes: 39 additions & 32 deletions src/mini_selector/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use nom::{
branch::alt,
bytes::complete::{is_not, tag, take_while1},
character::complete::{char, multispace0},
combinator::{cut, map, opt},
combinator::{cut, map, not, opt, peek},
multi::{many0, many1},
sequence::{delimited, preceded, terminated},
IResult, Parser,
Expand Down Expand Up @@ -38,7 +38,6 @@ fn parse_classes(input: &str) -> IResult<&str, Vec<&str>> {
),
))
.parse(input)
.map(|(input, classes)| (input, classes.into_iter().collect()))
}

fn parse_attr_operator(input: &str) -> IResult<&str, AttrOperator> {
Expand Down Expand Up @@ -78,6 +77,10 @@ fn parse_attr(input: &str) -> IResult<&str, Attribute> {
))
}

fn parse_attrs(input: &str) -> IResult<&str, Vec<Attribute>> {
many1(terminated(parse_attr, peek(not(char(']'))))).parse(input)
}

fn parse_combinator(input: &str) -> IResult<&str, Combinator> {
delimited(
multispace0,
Expand All @@ -91,14 +94,18 @@ fn parse_combinator(input: &str) -> IResult<&str, Combinator> {
.parse(input)
}

pub fn parse_single_selector(input: &str) -> IResult<&str, MiniSelector> {
pub fn parse_mini_selector(input: &str) -> IResult<&str, MiniSelector> {
let (input, combinator) = opt(parse_combinator).parse(input)?;
let (input, name) = opt(parse_name).parse(input)?;
let (input, id) = opt(parse_id).parse(input)?;
let (input, classes) = opt(parse_classes).parse(input)?;
let (input, attr) = opt(parse_attr).parse(input)?;
let (input, attrs) = opt(parse_attrs).parse(input)?;

if name.is_none() && id.is_none() && classes.is_none() && attr.is_none() && combinator.is_none()
if name.is_none()
&& id.is_none()
&& classes.is_none()
&& attrs.is_none()
&& combinator.is_none()
{
return Err(nom::Err::Error(nom::error::Error::new(
input,
Expand All @@ -112,14 +119,14 @@ pub fn parse_single_selector(input: &str) -> IResult<&str, MiniSelector> {
name,
id,
classes,
attr,
attrs,
combinator,
};
Ok((input, sel))
}

pub fn parse_selector_list(input: &str) -> IResult<&str, Vec<MiniSelector>> {
let mut parser = many0(delimited(multispace0, parse_single_selector, multispace0));
let mut parser = many0(delimited(multispace0, parse_mini_selector, multispace0));
let (input, selectors) = parser.parse(input)?;
Ok((input, selectors))
}
Expand All @@ -137,25 +144,25 @@ mod tests {
name: Some("div"),
id: None,
classes: None,
attr: None,
attrs: None,
combinator: Combinator::Descendant,
},
MiniSelector {
name: Some("a"),
id: None,
classes: None,
attr: Some(Attribute {
attrs: Some(vec![Attribute {
key: "href",
op: Some(AttrOperator::Equals),
value: Some("example"),
}),
}]),
combinator: Combinator::Child,
},
MiniSelector {
name: Some("span"),
id: None,
classes: Some(vec!["class-1", "class-2"]),
attr: None,
attrs: None,
combinator: Combinator::Adjacent,
},
];
Expand All @@ -173,78 +180,78 @@ mod tests {
let test_cases = vec![
(
"span[title]",
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: None,
value: None,
}),
}]),
),
(
r##"span[title="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Equals),
value: Some("Title"),
}),
}]),
),
(
r##"span[title~="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Includes),
value: Some("Title"),
}),
}]),
),
(
r##"span[title|="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::DashMatch),
value: Some("Title"),
}),
}]),
),
(
r##"span[title^="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Prefix),
value: Some("Title"),
}),
}]),
),
(
r##"span[title$="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Suffix),
value: Some("Title"),
}),
}]),
),
(
r##"span[title*="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Substring),
value: Some("Title"),
}),
}]),
),
(
r##"span[title ="Title"]"##,
Some(Attribute {
Some(vec![Attribute {
key: "title",
op: Some(AttrOperator::Equals),
value: Some("Title"),
}),
}]),
),
(r##"span[title**"Title"]"##, None),
];

for test in test_cases {
let parsed = parse_single_selector(test.0).unwrap();
let parsed = parse_mini_selector(test.0).unwrap();
let expected = MiniSelector {
name: Some("span"),
id: None,
classes: None,
attr: test.1,
attrs: test.1,
combinator: Combinator::Descendant,
};
assert_eq!(parsed.1, expected);
Expand All @@ -254,16 +261,16 @@ mod tests {
#[test]
fn test_mini_selector() {
let sel = r#"a#main-link.main-class.extra-class[href="https://example.com"]"#;
let parsed = parse_single_selector(sel).unwrap();
let parsed = parse_mini_selector(sel).unwrap();
let expected = MiniSelector {
name: Some("a"),
id: Some("main-link"),
classes: Some(vec!["main-class", "extra-class"]),
attr: Some(Attribute {
attrs: Some(vec![Attribute {
key: "href",
op: Some(AttrOperator::Equals),
value: Some("https://example.com"),
}),
}]),
combinator: Combinator::Descendant,
};
assert_eq!(parsed.1, expected);
Expand Down
44 changes: 26 additions & 18 deletions src/mini_selector/selector.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use nom::IResult;

use crate::{node::TreeNode, Element, NodeRef};

use super::parser::parse_single_selector;
use super::parser::parse_mini_selector;

static SELECTOR_WHITESPACE: &[char] = &[' ', '\t', '\n', '\r', '\x0C'];

Expand Down Expand Up @@ -57,17 +55,16 @@ pub(crate) struct Attribute<'a> {

/// Current support of CSS is limited: it supports only the `child` (`>`) and `descendant` (` `) combinators.
/// It does not support the `selector list` combinator (`,`) or any pseudo-classes.
/// Each selector in the chain may contain at most one attribute selector.
#[derive(Debug, PartialEq)]
pub struct MiniSelector<'a> {
pub(crate) name: Option<&'a str>,
pub(crate) id: Option<&'a str>,
pub(crate) classes: Option<Vec<&'a str>>,
pub(crate) attr: Option<Attribute<'a>>,
pub(crate) attrs: Option<Vec<Attribute<'a>>>,
pub(crate) combinator: Combinator,
}

impl MiniSelector<'_> {
impl<'a> MiniSelector<'a> {
/// Parses a single CSS selector string and returns a `MiniSelector` representing the parsed selector.
///
/// # Arguments
Expand All @@ -76,17 +73,20 @@ impl MiniSelector<'_> {
///
/// # Returns
///
/// A nom `IResult` containing the parsed `MiniSelector` if the CSS selector string is valid, or an error if it is not.
pub fn new(css_sel: &str) -> IResult<&str, MiniSelector> {
parse_single_selector(css_sel)
/// A `Result` containing the parsed `MiniSelector` if the CSS selector string is valid, or an [nom::Err] if it is not.
pub fn new(css_sel: &'a str) -> Result<Self, nom::Err<nom::error::Error<&'a str>>> {
let (_, sel) = parse_mini_selector(css_sel)?;
Ok(sel)
}
}

impl MiniSelector<'_> {
pub(crate) fn match_tree_node(&self, t: &TreeNode) -> bool {
if let Some(el) = t.as_element() {
self.match_name(el)
&& self.match_id_attr(el)
&& self.match_classes(el)
&& self.match_attr(el)
&& self.match_attrs(el)
} else {
false
}
Expand Down Expand Up @@ -128,16 +128,24 @@ impl MiniSelector<'_> {
classes.iter().all(|class| el.has_class(class))
}

fn match_attr(&self, el: &Element) -> bool {
let Some(Attribute { key, ref op, value }) = self.attr else {
fn match_attrs(&self, el: &Element) -> bool {
let Some(ref attrs) = self.attrs else {
return true;
};
match (op, value) {
(Some(op), Some(v)) => el
.attrs
.iter()
.any(|a| &a.name.local == key && op.match_attr(&a.value, v)),
_ => el.has_attr(key),
let mut is_ok = true;
for attr in attrs {
let key = attr.key;
is_ok = match (&attr.op, attr.value) {
(Some(op), Some(v)) => el
.attrs
.iter()
.any(|a| &a.name.local == key && op.match_attr(&a.value, v)),
_ => el.has_attr(key),
};
if !is_ok {
break;
}
}
is_ok
}
}
1 change: 0 additions & 1 deletion src/serializing.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#[cfg(feature = "markdown")]
mod md;

Expand Down

0 comments on commit 080bc4a

Please sign in to comment.