From ed60f7cfdca2b7ea52efe3d49fa935ca9bcc23a5 Mon Sep 17 00:00:00 2001 From: Sean T Allen Date: Sun, 3 Mar 2024 05:16:03 -0500 Subject: [PATCH] Don't treat some HTTP status error codes as not errors (#78) Previously 401, 403, 503, 504, and 999 HTTP status codes were treated as "not an error" and therefore not reported. At the time this made sense as there was no easy way to say "do not treat 504 codes as an error, I will assume it is transient". However, with a recent change you can do: ```yml raise_error_excludes: 504: ['*'] ``` And treat all 504s as not an error. This commit makes all "special HTTP status codes" an error and allows for users to configure their `raise_error_excludes` if they want to get the old behavior back. I found this functionality when trying to figure out why some links that we had that there bad weren't being reported by htmlproofer. It turned out that the broken links were coming back with "probably good" error codes and thus we never saw them. Closes #77 --- htmlproofer/plugin.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/htmlproofer/plugin.py b/htmlproofer/plugin.py index 59d62fa..8550389 100644 --- a/htmlproofer/plugin.py +++ b/htmlproofer/plugin.py @@ -285,17 +285,10 @@ def contains_anchor(markdown: str, anchor: str) -> bool: def bad_url(url_status: int) -> bool: if url_status == -1: return True - elif url_status == 401 or url_status == 403: - return False - elif url_status in (503, 504): - # Usually transient - return False - elif url_status == 999: - # Returned by some websites (e.g. LinkedIn) that think you're crawling them. - return False elif url_status >= 400: return True - return False + else: + return False @staticmethod def is_error(config: Config, url: str, url_status: int) -> bool: