From 86182724c0937c6567dc155aaf23b07f957136f2 Mon Sep 17 00:00:00 2001
From: Ben Brown <ben.brown@softiron.com>
Date: Fri, 1 Mar 2024 09:56:24 +0000
Subject: [PATCH 1/2] Stream response as to not read the entire contents into
 memory

---
 htmlproofer/plugin.py     | 7 ++++++-
 tests/unit/test_plugin.py | 4 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/htmlproofer/plugin.py b/htmlproofer/plugin.py
index 8550389..0dfe40a 100644
--- a/htmlproofer/plugin.py
+++ b/htmlproofer/plugin.py
@@ -146,7 +146,12 @@ def get_external_url(self, url, scheme, src_path):
     @lru_cache(maxsize=1000)
     def resolve_web_scheme(self, url: str) -> int:
         try:
-            response = self._session.get(url, timeout=URL_TIMEOUT)
+            response = self._session.get(url, timeout=URL_TIMEOUT, stream=True)
+
+            # Download the entire contents as to not break previous behaviour.
+            for _ in response.iter_content(chunk_size=1024):
+                pass
+
             return response.status_code
         except requests.exceptions.Timeout:
             return 504
diff --git a/tests/unit/test_plugin.py b/tests/unit/test_plugin.py
index 2645779..8f0cd11 100644
--- a/tests/unit/test_plugin.py
+++ b/tests/unit/test_plugin.py
@@ -78,8 +78,10 @@ def test_on_post_page(
     })
 
     # Always raise a 500 error
-    mock_requests.side_effect = [Mock(spec=Response, status_code=500)]
     link_to_500 = '<a href="https://google.com"><a/>'
+    iter_content = Mock()
+    iter_content.side_effect = link_to_500
+    mock_requests.side_effect = [Mock(spec=Response, status_code=500, iter_content=iter_content)]
 
     plugin.files = empty_files
     page = Mock(

From 08f01526824cfa01d3e283e90e85f9fa1b523792 Mon Sep 17 00:00:00 2001
From: Ben Brown <ben.brown@softiron.com>
Date: Fri, 1 Mar 2024 12:17:20 +0000
Subject: [PATCH 2/2] Add config option to skip downloading the body of a
 response

Closes #76.
---
 README.md                    | 11 +++++++++++
 htmlproofer/plugin.py        |  8 +++++---
 tests/integration/mkdocs.yml |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3ee32b6..c2c2d6d 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,17 @@ plugins:
       validate_rendered_template: True
 ```
 
+### `skip_downloads`
+
+Optionally skip downloading of a remote URLs content via GET request. This can
+considerably reduce the time taken to validate URLs.
+
+```yaml
+plugins:
+  - htmlproofer:
+      skip_downloads: True
+```
+
 ## Compatibility with `attr_list` extension
 
 If you need to manually specify anchors make use of the `attr_list` [extension](https://python-markdown.github.io/extensions/attr_list) in the markdown.
diff --git a/htmlproofer/plugin.py b/htmlproofer/plugin.py
index 0dfe40a..d1ea3f5 100644
--- a/htmlproofer/plugin.py
+++ b/htmlproofer/plugin.py
@@ -65,6 +65,7 @@ class HtmlProoferPlugin(BasePlugin):
         ('raise_error', config_options.Type(bool, default=False)),
         ('raise_error_after_finish', config_options.Type(bool, default=False)),
         ('raise_error_excludes', config_options.Type(dict, default={})),
+        ('skip_downloads', config_options.Type(bool, default=False)),
         ('validate_external_urls', config_options.Type(bool, default=True)),
         ('validate_rendered_template', config_options.Type(bool, default=False)),
         ('ignore_urls', config_options.Type(list, default=[])),
@@ -148,9 +149,10 @@ def resolve_web_scheme(self, url: str) -> int:
         try:
             response = self._session.get(url, timeout=URL_TIMEOUT, stream=True)
 
-            # Download the entire contents as to not break previous behaviour.
-            for _ in response.iter_content(chunk_size=1024):
-                pass
+            if self.config['skip_downloads'] is False:
+                # Download the entire contents as to not break previous behaviour.
+                for _ in response.iter_content(chunk_size=1024):
+                    pass
 
             return response.status_code
         except requests.exceptions.Timeout:
diff --git a/tests/integration/mkdocs.yml b/tests/integration/mkdocs.yml
index 2ca79d4..abe26a5 100644
--- a/tests/integration/mkdocs.yml
+++ b/tests/integration/mkdocs.yml
@@ -16,3 +16,4 @@ plugins:
             'page2.html#BAD_ANCHOR',
             '../../../tests',
           ]
+        skip_downloads: True