Skip to content

Commit

Permalink
refactor: break down into smaller functions
Browse files Browse the repository at this point in the history
  • Loading branch information
NagariaHussain committed Dec 12, 2024
1 parent cfa8b46 commit 4550132
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
BROKEN_INTERNAL_URL = "/api/method/ring"


def internal_to_external_urls(internal_url: str):
def internal_to_external_urls(internal_url: str) -> str:
if internal_url == WORKING_INTERNAL_URL:
return WORKING_EXTERNAL_URL
else:
Expand Down
45 changes: 28 additions & 17 deletions wiki/wiki/report/wiki_broken_links/wiki_broken_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,16 @@ def get_data(filters: dict | None = None) -> list[list]:
check_internal_links = filters and bool(filters.get("check_internal_links"))

for page in wiki_pages:
broken_links_for_page = get_broken_links(page.content, include_images, not check_internal_links)
broken_links_for_page = get_broken_links(page.content, include_images, check_internal_links)
rows = [{"broken_link": link, "wiki_page": page["name"]} for link in broken_links_for_page]
data.extend(rows)

return data


def get_broken_links(md_content: str, include_images: bool = True, ignore_relative_urls: bool = True):
def get_broken_links(
md_content: str, include_images: bool = True, include_relative_urls: bool = False
) -> list[str]:
html = frappe.utils.md_to_html(md_content)
soup = BeautifulSoup(html, "html.parser")

Expand All @@ -85,29 +87,38 @@ def get_broken_links(md_content: str, include_images: bool = True, ignore_relati
is_relative = is_relative_url(url)
relative_url = None

if is_relative and ignore_relative_urls:
if is_relative and not include_relative_urls:
continue
elif is_relative:
from frappe.utils.data import get_url

if is_relative:
relative_url = url
url = get_url(url) # absolute URL

try:
response = requests.head(url, verify=False, timeout=5)
if response.status_code >= 400:
if is_relative:
broken_links.append(relative_url)
else:
broken_links.append(url)
except Exception:
url = frappe.utils.data.get_url(url) # absolute URL

is_broken = is_broken_link(url)
if is_broken:
if is_relative:
broken_links.append(relative_url)
broken_links.append(relative_url) # original URL
else:
broken_links.append(url)

return broken_links


def is_relative_url(url: str):
def is_relative_url(url: str) -> bool:
return url.startswith("/")


def is_broken_link(url: str) -> bool:
try:
status_code = get_request_status_code(url)
if status_code >= 400:
return True
except Exception:
return True

return False


def get_request_status_code(url: str) -> int:
response = requests.head(url, verify=False, timeout=5)
return response.status_code

0 comments on commit 4550132

Please sign in to comment.