Skip to content

Commit

Permalink
Merge pull request #71 from niklak/feature/md
Browse files Browse the repository at this point in the history
- Implemented the `markdown` feature, which allows serializing a `Document` or `NodeRef` into Markdown text using the `md()` method.
  • Loading branch information
niklak authored Feb 22, 2025
2 parents ca689ae + ee18fdc commit 1fef123
Show file tree
Hide file tree
Showing 16 changed files with 1,245 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Run clippy
run: cargo clippy --verbose --all-targets -- -D warnings
run: cargo clippy --verbose --all-targets --all-features -- -D warnings
- name: Install cargo audit
uses: taiki-e/install-action@cargo-audit
- name: Run audit
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/raspberrypi5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ jobs:
- name: Run tests
run: cargo test --all-targets
- name: Run tests with hashbrown
run: cargo test --all-targets --features "hashbrown"
run: cargo test --all-targets --all-features
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
- name: Run tests
run: cargo test --verbose --all-targets
- name: Run tests with hashbrown
run: cargo test --verbose --all-targets --features "hashbrown"
run: cargo test --verbose --all-targets --all-features
2 changes: 1 addition & 1 deletion .github/workflows/wasm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ jobs:
- name: Run tests
run: cargo test --target wasm32-unknown-unknown --all
- name: Run tests with hashbrown
run: cargo test --target wasm32-unknown-unknown --features "hashbrown"
run: cargo test --target wasm32-unknown-unknown --all-features
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
All notable changes to the `dom_query` crate will be documented in this file.


## [Unreleased]

### Added
- Implemented the `markdown` feature, which allows serializing a `Document` or `NodeRef` into Markdown text using the `md()` method.

## [0.14.0] - 2025-02-16

### Added
Expand Down
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ alloc_cat = "1.0.0"
[features]
hashbrown = ["dep:hashbrown"]
atomic = []

markdown = []


[[example]]
name = "send_document"
required-features = ["atomic"]
required-features = ["atomic"]

[[example]]
name = "markdown"
required-features = ["markdown"]
48 changes: 48 additions & 0 deletions Examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -755,4 +755,52 @@ assert_eq!(selected_count, total_links);
let found_count = main_node.find(&["div", "a"]).len();
assert_eq!(found_count, total_links);
```
</details>


<details>
<summary><b>Serializing a document to Markdown</b></summary>


```rust
#[cfg(feature = "markdown")]
{
use dom_query::Document;

let contents = "
<style>p {color: blue;}</style>
<p>I really like using <b>Markdown</b>.</p>
<p>I think I'll use it to format all of my documents from now on.</p>";

let expected = "I really like using **Markdown**\\.\n\n\
I think I'll use it to format all of my documents from now on\\.";

let doc = Document::from(contents);
// Passing `None` into md allows to use default skip tags, which are:
// `["script", "style", "meta", "head"]`.
let got = doc.md(None);
assert_eq!(got.as_ref(), expected);

// If you need the full text content of the elements, pass `Some(&vec![])` to `md`.
// If you pass content like the example below to `Document::from`,
// `html5ever` will create a `<head>` element and place your `<style>` element inside it.
// To preserve the original order, use `Document::fragment`.

let contents = "<style>p {color: blue;}</style>\
<div><h1>Content Heading<h1></div>\
<p>I really like using Markdown.</p>\
<p>I think I'll use it to format all of my documents from now on.</p>";

let expected = "p \\{color: blue;\\}\n\
I really like using Markdown\\.\n\n\
I think I'll use it to format all of my documents from now on\\.";

let doc = Document::fragment(contents);
let got = doc.md(Some(&["div"]));
assert_eq!(got.as_ref(), expected);
}

```

</details>
49 changes: 48 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,52 @@ assert_eq!(found_count, total_links);
```
</details>


<details>
<summary><b>Serializing a document to Markdown</b></summary>

*This example requires `markdown` feature.*

```rust
use dom_query::Document;

let contents = "
<style>p {color: blue;}</style>
<p>I really like using <b>Markdown</b>.</p>
<p>I think I'll use it to format all of my documents from now on.</p>";

let expected = "I really like using **Markdown**\\.\n\n\
I think I'll use it to format all of my documents from now on\\.";

let doc = Document::from(contents);
// Passing `None` into md allows to use default skip tags, which are:
// `["script", "style", "meta", "head"]`.
let got = doc.md(None);
assert_eq!(got.as_ref(), expected);

// If you need the full text content of the elements, pass `Some(&vec![])` to `md`.
// If you pass content like the example below to `Document::from`,
// `html5ever` will create a `<head>` element and place your `<style>` element inside it.
// To preserve the original order, use `Document::fragment`.

let contents = "<style>p {color: blue;}</style>\
<div><h1>Content Heading<h1></div>\
<p>I really like using Markdown.</p>\
<p>I think I'll use it to format all of my documents from now on.</p>";

let expected = "p \\{color: blue;\\}\n\
I really like using Markdown\\.\n\n\
I think I'll use it to format all of my documents from now on\\.";

let doc = Document::fragment(contents);
let got = doc.md(Some(&["div"]));
assert_eq!(got.as_ref(), expected);

```

</details>

- **[more examples](./examples/)**
- **[dom_query by example](https://niklak.github.io/dom_query_by_example/)**

Expand All @@ -794,8 +840,9 @@ assert_eq!(found_count, total_links);
## Crate features

- `hashbrown` — optional, standard hashmaps and hashsets will be replaced `hashbrown` hashmaps and hashsets;
- `atomic`options, switches `NodeData` from using `StrTendril` to `Tendril<tendril::fmt::UTF8, tendril::Atomic>`.
- `atomic`optional, switches `NodeData` from using `StrTendril` to `Tendril<tendril::fmt::UTF8, tendril::Atomic>`.
This allows `NodeData` and all ascending structures, including `Document`, to implement the `Send` trait;
- `markdown` — optional, enables the `Document::md` and `NodeRef::md` methods, allowing serialization of a document or node to `Markdown` text.

## Possible issues
* [wasm32 compilation](https://niklak.github.io/dom_query_by_example/WASM32-compilation.html)
Expand Down
8 changes: 8 additions & 0 deletions examples/markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use dom_query::Document;

fn main() {
let html = include_str!("../test-pages/hacker_news.html");
let document = Document::from(html);
let md = document.md(None);
println!("{}", md);
}
12 changes: 12 additions & 0 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,15 @@ fn append_to_existing_text(prev: &mut TreeNode, text: &StrTendril) -> bool {
_ => false,
}
}

#[cfg(feature = "markdown")]
impl Document {
/// Produces a *Markdown* representation of the [`Document`],
/// skipping elements matching the specified `skip_tags` list along with their descendants.
///
/// - If `skip_tags` is `None`, the default list is used: `["script", "style", "meta", "head"]`.
/// - To process all elements without exclusions, pass `Some(&[])`.
pub fn md(&self, skip_tags: Option<&[&str]>) -> StrTendril {
self.root().md(skip_tags)
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod entities;
mod matcher;
mod node;
mod selection;
mod serializing;

pub use document::Document;
pub use dom_tree::Tree;
Expand Down
12 changes: 12 additions & 0 deletions src/node/node_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -717,3 +717,15 @@ impl NodeRef<'_> {
TreeNodeOps::normalized_char_count(nodes, self.id)
}
}

#[cfg(feature = "markdown")]
impl NodeRef<'_> {
/// Produces a *Markdown* representation of the node and its descendants,
/// skipping elements matching the specified `skip_tags` list along with their descendants.
///
/// - If `skip_tags` is `None`, the default list is used: `["script", "style", "meta", "head"]`.
/// - To process all elements without exclusions, pass `Some(&[])`.
pub fn md(&self, skip_tags: Option<&[&str]>) -> StrTendril {
crate::serializing::serialize_md(self, false, skip_tags)
}
}
4 changes: 4 additions & 0 deletions src/serializing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod md;

#[cfg(feature = "markdown")]
pub(crate) use md::serialize_md;
Loading

0 comments on commit 1fef123

Please sign in to comment.