Merge pull request #17 from NHSDigital/websitecheckerupdate

updated github action for ease of use
NHSDigital · Oct 5, 2023 · 359a56a · 359a56a
2 parents 4070b1c + 975753d
commit 359a56a
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 110 deletions.
diff --git a/.github/workflows/errorChecker.yml b/.github/workflows/errorChecker.yml
diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml
diff --git a/.github/workflows/spellChecker.yml b/.github/workflows/spellChecker.yml
diff --git a/.github/workflows/websiteChecker.yml b/.github/workflows/websiteChecker.yml
@@ -0,0 +1,65 @@
+---
+name: Simplifier IG Website Checking
+on:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+    inputs:
+      websiteurl:
+        default: "https://simplifier.net/guide/uk-core-implementation-guidance-directory?version=current"
+jobs:
+  job1:
+    name: html error checker
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo content
+        uses: actions/checkout@v3
+      - name: Set up python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r ./IGPageContentValidator/requirements.txt
+      - name: Execute HTML Error Check
+        run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/errorChecker.py
+  job2:
+    name: url link checker
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo content
+        uses: actions/checkout@v3
+      - name: Install dependencies
+        run: |
+          sudo apt install python3-bs4 python3-dnspython python3-requests
+          pip3 install linkchecker
+      - name: Check input link is valid
+        run: >
+          echo 'exit codes can be found at
+          https://everything.curl.dev/usingcurl/returns'
+
+          curl ${{ github.event.inputs.websiteurl }} -s -f -o /dev/null
+      - name: Execute Link Check
+        run: >
+          linkchecker -r 2 --check-extern --no-status -f
+          ./IGPageContentValidator/linkcheckerrc ${{ github.event.inputs.websiteurl }} || test $? = 1;
+  job3:
+    name: spell checker
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo content
+        uses: actions/checkout@v3
+      - name: Set up python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - name: Install dependencies
+        run: |
+          sudo apt install aspell
+          python -m pip install --upgrade pip
+          pip install -r ./IGPageContentValidator/requirements.txt
+      - name: execute relToAbsLinks.py
+        run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/relToAbsLinks.py
+
+      - name: Execute Spell Check
+        run: cat OutputLinks.txt | while read p; do wget -nv -O - $p | aspell list -H  --camel-case --lang en_GB --add-html-skip=nocheck -p ./IGPageContentValidator/.aspell.en.pws |sort| uniq -c; echo -e '\n'; done;
diff --git a/IGPageContentValidator/README.md b/IGPageContentValidator/README.md
@@ -1,6 +1,6 @@
 # Simplifier Implementation Guide Page Content Validation
 
-The validator works by scraping the webpage within website.txt for any internal webpage links within the Simplifier Guide. These webpages are then validated individually. 
+The validator works by scraping the webpage for any internal webpage links within the Simplifier Guide. These webpages are then validated individually. 
 
 The website validation is in three parts:
 - HTML Error Checking - This checks each page for any html errors. This captures any errors caused by using Simplifier relative links, e.g `{{pagelink: }}`, amongst the usual coding errors.
@@ -9,13 +9,13 @@ The website validation is in three parts:
 
 ## Instructions
 
-1. Edit the file `website.txt` ensuring the website you want scraped is entered on the first line. Note: Only Simplifier.net guides will work with this checker.  
-2. Click the `Actions` button. the top 3 actions will be the individual checkers needed. Wait until there is a green tick next to each. 
-3. Within each Action click the `Build` button
-4. Within the Build click the following for the results:
-- HTML Error Check
-- Link Check
-- Spell Check
+1. Go to [Actions..websiteChecker](https://github.com/NHSDigital/IOPS-FHIR-Test-Scripts/actions/workflows/websiteChecker.yml)  
+2. Click `Run workflow`. 
+3. Enter the website url into the `websiteurl` box and click `Run workflow`.
+4. Click on the action and then click on the following for the results:
+- html error checker
+- link checker
+- spell checker
 
 ## HTML Error Checking
 Uses the errorChecker.py script. Checks for any html errors on a website using BeautifulSoup's `find_all('div',{'class':"error"})`. This returns the errors for each individual page.

diff --git a/IGPageContentValidator/linkScraper.py b/IGPageContentValidator/linkScraper.py
@@ -5,9 +5,9 @@
 
 from bs4 import BeautifulSoup # this module helps in web scrapping.
 import requests  # this module helps us to download a web page
+import os
 
-with open('./IGPageContentValidator/website.txt', 'r') as file:
-    data = file.readline().strip('\n')
+data = os.environ['INPUT_STORE']
 
 '''returns html page of link within website.txt'''    
 def RequestData(url):

diff --git a/IGPageContentValidator/website.txt b/IGPageContentValidator/website.txt