-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1272 from DDMAL/broken-link-checker
GitHub Actions Broken Link Checker for Production
- Loading branch information
Showing
2 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
name: Link checker for articles and flatpages on CantusDB | ||
|
||
on: | ||
schedule: | ||
- cron: "8 8 * * 0" # Cron job will run at 08h08 UTC time every Sunday | ||
|
||
jobs: | ||
get-all-links: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
matrix: ${{ steps.set-matrix.outputs.matrix }} | ||
steps: | ||
- id: set-matrix | ||
run: | | ||
BASE_URL="https://cantusdatabase.org" | ||
flatpages=$(curl "$BASE_URL/flatpages-list/" | awk '{ gsub (" ", "\",\"", $0); print}') | ||
articles=$(curl "$BASE_URL/articles-list/" | awk '{ gsub (" ", "\",\"", $0); print}') | ||
list="{\"links\": [\"${flatpages}\",\"${articles}\"]}" | ||
echo $list | ||
echo "matrix=$list" >> $GITHUB_OUTPUT | ||
link-Checker: | ||
runs-on: ubuntu-latest | ||
needs: get-all-links | ||
strategy: | ||
fail-fast: false | ||
max-parallel: 4 | ||
matrix: ${{fromJson(needs.get-all-links.outputs.matrix)}} | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Link Checker | ||
id: lychee | ||
uses: lycheeverse/[email protected] | ||
with: | ||
args: --exclude http:\/\/cantus\.sk.* ${{ matrix.links }} | ||
format: json | ||
output: /tmp/link-checker-output.txt | ||
- name: Curating Link Checker Output | ||
run: | | ||
echo "***Python Version***" | ||
python --version | ||
echo "***Invoking Parsing Script***" | ||
python "$GITHUB_WORKSPACE/scripts/parse_link_checker_output.py" >> $GITHUB_STEP_SUMMARY | ||
echo "***Printing Summary***" | ||
cat $GITHUB_STEP_SUMMARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
"""Modules""" | ||
import json | ||
import sys | ||
from pathlib import Path | ||
import itertools | ||
|
||
print(f"Running: {sys.argv[0]}", file=sys.stderr) | ||
|
||
FILE_LOCATION = "/tmp/link-checker-output.txt" | ||
|
||
# If link checker does not have any errors, exit gracefully | ||
if not Path(FILE_LOCATION).exists(): | ||
print("✅ No Broken Links Found.") | ||
sys.exit(0) | ||
else: | ||
print("❌ Broken Links Found. Proceeding to Parsing Step.", file=sys.stderr) | ||
|
||
# Loading link checker output result | ||
with open(FILE_LOCATION, encoding='utf-8') as link_checker_output_file: | ||
print(f"Parsing the json data in {FILE_LOCATION}", file=sys.stderr) | ||
link_checker_results = json.load(link_checker_output_file) | ||
|
||
list_of_failures = link_checker_results['fail_map'] | ||
|
||
if not list_of_failures: | ||
print("✅ No Broken Links") | ||
sys.exit(0) | ||
|
||
# Flatten the list of lists into a single list - | ||
# list_of_failures is returned as a list of lists. | ||
all_failures = list(itertools.chain.from_iterable(list_of_failures.values())) | ||
|
||
real_errors = [] | ||
skippable_errors = [] | ||
|
||
# Process each failure in the flattened list | ||
for failure in all_failures: | ||
error_code = failure['status'].get('code') | ||
|
||
# Check if it's a timeout or a client-side issue | ||
if not error_code: | ||
skippable_errors.append(failure) | ||
continue | ||
|
||
# Find all 4xx errors | ||
if 400 <= error_code < 500: | ||
real_errors.append(failure) | ||
else: | ||
skippable_errors.append(failure) | ||
|
||
if real_errors: | ||
print("❌ Broken Links:") | ||
for error in real_errors: | ||
print(f"* {error['url']}: {error['status']['code']}") | ||
print("\n") | ||
|
||
if skippable_errors: | ||
print("🆗 Skippable Errors:") | ||
for error in skippable_errors: | ||
print(f"* {error['url']}: {error['status']['text']}") | ||
print("\n") | ||
|
||
if real_errors: | ||
sys.exit(1) |