From aa414748453c709bc055abceca568efe151a8d61 Mon Sep 17 00:00:00 2001 From: Ryan May Date: Wed, 4 Oct 2023 16:03:25 +0100 Subject: [PATCH 1/2] updated github action for ease of use --- .github/workflows/spellChecker.yml | 36 --------------- .github/workflows/websiteChecker.yml | 65 +++++++++++++++++++++++++++ IGPageContentValidator/README.md | 2 +- IGPageContentValidator/linkScraper.py | 4 +- IGPageContentValidator/website.txt | 7 --- 5 files changed, 68 insertions(+), 46 deletions(-) delete mode 100644 .github/workflows/spellChecker.yml create mode 100644 .github/workflows/websiteChecker.yml delete mode 100644 IGPageContentValidator/website.txt diff --git a/.github/workflows/spellChecker.yml b/.github/workflows/spellChecker.yml deleted file mode 100644 index 2936579..0000000 --- a/.github/workflows/spellChecker.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Action for the IGPageContentValidator to check any spelling mistakes within the IG webpages -name: run Spell Checker - -on: - push: - paths: - - IGPageContentValidator/** - -jobs: - build: - runs-on: ubuntu-latest - steps: - - - name: checkout repo content - uses: actions/checkout@v3 - - - name: Set up python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - sudo apt install aspell - python -m pip install --upgrade pip - pip install -r ./IGPageContentValidator/requirements.txt - - - name: execute relToAbsLinks.py - run: | - python ./IGPageContentValidator/relToAbsLinks.py - - - name: Execute Spell Check - run: | - #aspell dump dicts - cat OutputLinks.txt | while read p; do wget -nv -O - $p | aspell list -H --camel-case --lang en_GB --add-html-skip=nocheck -p ./IGPageContentValidator/.aspell.en.pws |sort| uniq -c; echo -e '\n'; done; - diff --git a/.github/workflows/websiteChecker.yml b/.github/workflows/websiteChecker.yml new file mode 100644 index 0000000..11cef2d --- /dev/null +++ b/.github/workflows/websiteChecker.yml @@ -0,0 +1,65 @@ +--- +name: Simplifier IG Website Checking +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + websiteurl: + default: "https://simplifier.net/guide/uk-core-implementation-guidance-directory?version=current" +jobs: + job1: + name: html Error Checking + runs-on: ubuntu-latest + steps: + - name: Checkout repo content + uses: actions/checkout@v3 + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: 3.x + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r ./IGPageContentValidator/requirements.txt + - name: Execute HTML Error Check + run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/errorChecker.py + job2: + name: url link checking + runs-on: ubuntu-latest + steps: + - name: checkout repo content + uses: actions/checkout@v3 + - name: Install dependencies + run: | + sudo apt install python3-bs4 python3-dnspython python3-requests + pip3 install linkchecker + - name: Check input link is valid + run: > + echo 'exit codes can be found at + https://everything.curl.dev/usingcurl/returns' + + curl ${{ github.event.inputs.websiteurl }} -s -f -o /dev/null + - name: Execute Link Check + run: > + linkchecker -r 2 --check-extern --no-status -f + ./IGPageContentValidator/linkcheckerrc ${{ github.event.inputs.websiteurl }} || test $? = 1; + job3: + name: spell checking + runs-on: ubuntu-latest + steps: + - name: checkout repo content + uses: actions/checkout@v3 + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: 3.x + - name: Install dependencies + run: | + sudo apt install aspell + python -m pip install --upgrade pip + pip install -r ./IGPageContentValidator/requirements.txt + - name: execute relToAbsLinks.py + run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/relToAbsLinks.py + + - name: Execute Spell Check + run: cat OutputLinks.txt | while read p; do wget -nv -O - $p | aspell list -H --camel-case --lang en_GB --add-html-skip=nocheck -p ./IGPageContentValidator/.aspell.en.pws |sort| uniq -c; echo -e '\n'; done; diff --git a/IGPageContentValidator/README.md b/IGPageContentValidator/README.md index 2e5762f..62303f2 100644 --- a/IGPageContentValidator/README.md +++ b/IGPageContentValidator/README.md @@ -1,6 +1,6 @@ # Simplifier Implementation Guide Page Content Validation -The validator works by scraping the webpage within website.txt for any internal webpage links within the Simplifier Guide. These webpages are then validated individually. +The validator works by scraping the webpage for any internal webpage links within the Simplifier Guide. These webpages are then validated individually. The website validation is in three parts: - HTML Error Checking - This checks each page for any html errors. This captures any errors caused by using Simplifier relative links, e.g `{{pagelink: }}`, amongst the usual coding errors. diff --git a/IGPageContentValidator/linkScraper.py b/IGPageContentValidator/linkScraper.py index f0695e2..a797602 100644 --- a/IGPageContentValidator/linkScraper.py +++ b/IGPageContentValidator/linkScraper.py @@ -5,9 +5,9 @@ from bs4 import BeautifulSoup # this module helps in web scrapping. import requests # this module helps us to download a web page +import os -with open('./IGPageContentValidator/website.txt', 'r') as file: - data = file.readline().strip('\n') +data = os.environ['INPUT_STORE'] '''returns html page of link within website.txt''' def RequestData(url): diff --git a/IGPageContentValidator/website.txt b/IGPageContentValidator/website.txt deleted file mode 100644 index f15fa85..0000000 --- a/IGPageContentValidator/website.txt +++ /dev/null @@ -1,7 +0,0 @@ -https://simplifier.net/guide/uk-core-implementation-guide-stu3-sequence?version=current - -### IMPORTANT INFO ### -# Add website link on the first line. Single Link only, no multiple. -# Needs to be on the https://simplifier.net/ domain. -# IG must be "public" -# All relevant pages to be scanned need to have a link from the website link above, e.g. within the banner. From 975753db493442217a74603b4c65c10ab8fc8de9 Mon Sep 17 00:00:00 2001 From: Ryan May Date: Wed, 4 Oct 2023 16:11:40 +0100 Subject: [PATCH 2/2] fixed readme --- .github/workflows/errorChecker.yml | 29 ---------------------------- .github/workflows/linkchecker.yml | 28 --------------------------- .github/workflows/websiteChecker.yml | 6 +++--- IGPageContentValidator/README.md | 14 +++++++------- 4 files changed, 10 insertions(+), 67 deletions(-) delete mode 100644 .github/workflows/errorChecker.yml delete mode 100644 .github/workflows/linkchecker.yml diff --git a/.github/workflows/errorChecker.yml b/.github/workflows/errorChecker.yml deleted file mode 100644 index a7658a7..0000000 --- a/.github/workflows/errorChecker.yml +++ /dev/null @@ -1,29 +0,0 @@ -# Action for the IGPageContentValidator to check any HTML errors within the IG webpages -name: run HTML Error Checker - -on: - push: - paths: - - IGPageContentValidator/** - -jobs: - build: - runs-on: ubuntu-latest - steps: - - - name: Checkout repo content - uses: actions/checkout@v3 - - - name: Set up python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r ./IGPageContentValidator/requirements.txt - - - name: Execute HTML Error Check - run: python ./IGPageContentValidator/errorChecker.py - diff --git a/.github/workflows/linkchecker.yml b/.github/workflows/linkchecker.yml deleted file mode 100644 index 0ba1e95..0000000 --- a/.github/workflows/linkchecker.yml +++ /dev/null @@ -1,28 +0,0 @@ -# Action for the IGPageContentValidator to check any bad hyperlinks within the IG webpages -name: run Link Checker - -on: - push: - paths: - - IGPageContentValidator/** - -jobs: - build: - runs-on: ubuntu-latest - steps: - - - name: checkout repo content - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - sudo apt install python3-bs4 python3-dnspython python3-requests - pip3 install linkchecker - - name: Check input link is valid #Checks link is valid. Used as exit code 1 muted in 'execute linkchecker' - run: | - echo 'exit codes can be found at https://everything.curl.dev/usingcurl/returns' - curl $(head -n 1 ./IGPageContentValidator/website.txt) -s -f -o /dev/null - - - name: Execute Link Check # exit code 1 muted as linkchecker sets it if a link is not valid. This causes the GitHub action to fail. - run: | - linkchecker -r 2 --check-extern --no-status -f ./IGPageContentValidator/linkcheckerrc $(head -n 1 ./IGPageContentValidator/website.txt) || test $? = 1; diff --git a/.github/workflows/websiteChecker.yml b/.github/workflows/websiteChecker.yml index 11cef2d..11d9ad0 100644 --- a/.github/workflows/websiteChecker.yml +++ b/.github/workflows/websiteChecker.yml @@ -8,7 +8,7 @@ on: default: "https://simplifier.net/guide/uk-core-implementation-guidance-directory?version=current" jobs: job1: - name: html Error Checking + name: html error checker runs-on: ubuntu-latest steps: - name: Checkout repo content @@ -24,7 +24,7 @@ jobs: - name: Execute HTML Error Check run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/errorChecker.py job2: - name: url link checking + name: url link checker runs-on: ubuntu-latest steps: - name: checkout repo content @@ -44,7 +44,7 @@ jobs: linkchecker -r 2 --check-extern --no-status -f ./IGPageContentValidator/linkcheckerrc ${{ github.event.inputs.websiteurl }} || test $? = 1; job3: - name: spell checking + name: spell checker runs-on: ubuntu-latest steps: - name: checkout repo content diff --git a/IGPageContentValidator/README.md b/IGPageContentValidator/README.md index 62303f2..cc7eb35 100644 --- a/IGPageContentValidator/README.md +++ b/IGPageContentValidator/README.md @@ -9,13 +9,13 @@ The website validation is in three parts: ## Instructions -1. Edit the file `website.txt` ensuring the website you want scraped is entered on the first line. Note: Only Simplifier.net guides will work with this checker. -2. Click the `Actions` button. the top 3 actions will be the individual checkers needed. Wait until there is a green tick next to each. -3. Within each Action click the `Build` button -4. Within the Build click the following for the results: -- HTML Error Check -- Link Check -- Spell Check +1. Go to [Actions..websiteChecker](https://github.com/NHSDigital/IOPS-FHIR-Test-Scripts/actions/workflows/websiteChecker.yml) +2. Click `Run workflow`. +3. Enter the website url into the `websiteurl` box and click `Run workflow`. +4. Click on the action and then click on the following for the results: +- html error checker +- link checker +- spell checker ## HTML Error Checking Uses the errorChecker.py script. Checks for any html errors on a website using BeautifulSoup's `find_all('div',{'class':"error"})`. This returns the errors for each individual page.