Skip to content

Commit

Permalink
Merge pull request #102 from NHSDigital/ryma2fhir-patch-7
Browse files Browse the repository at this point in the history
Update errorChecker.py
  • Loading branch information
ryma2fhir authored Jul 25, 2024
2 parents d5f287e + 90aa1a2 commit b96581e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 20 deletions.
65 changes: 48 additions & 17 deletions IGPageContentValidator/errorChecker.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,56 @@
# -*- coding: utf-8 -*-
"""
This script checks webpages for any error messages
This script checks webpages for any error messages and console logs (not including stack trace)
"""

from linkScraper import *
import re

def classErrors(warnings, soup):
'''returns all class errors within the webpage'''
class_errors = soup.find_all('div',{'class':"error"})
if class_errors:
for err in class_errors:
warnings.append(err)
return warnings

''' Interates over ListOfLinks returning any pages that have errors '''
def FindErrors(url):
websites = ListOfLinks(url)
for e in websites:
url_check = 'https://simplifier.net'+ e
data_check = requests.get(url_check).text
soup_check = BeautifulSoup(data_check,"html.parser")
error = soup_check.find_all('div',{'class':"error"})
if error:
print(url_check)
for err in error:
print(err)
print()
print("Check Complete")

def consoleLog(warnings,soup):
''' finds all console.log items, then finds the text associated with it. expect 'console.log(<eror type>,<error var>)' & '<error var> = JSON.stringify(`<text>`). Retuns wanings as <error var><text> '''
script_tags = soup.find_all('script')
for script in script_tags:
script_text = script.get_text()
log_messages = re.findall(r'console\.log\((.*?)\)', script_text)
for msg in log_messages:
msg = (msg.replace("'", "").replace(' ', '').split(','))
lines = script_text.splitlines()
for line in lines:
try:
if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line:
warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\n\tAt'))
except IndexError:
pass
return(warnings)

FindErrors(data)
def printWarnings(warnings, url):
'''prints all warnings'''
print(url)
for x in warnings:
print("\t",x,"\n")

''' Iterates over ListOfLinks returning any pages that have errors '''
def getSoup(url):
data = requests.get(url).text
soup = BeautifulSoup(data,"html.parser")
return soup


websites = ListOfLinks(data)
for suffix in websites:
warnings = []
soup = getSoup('https://simplifier.net'+ suffix)
warnings = classErrors(warnings, soup)
warnings = consoleLog(warnings,soup)
if warnings:
printWarnings(warnings, 'https://simplifier.net'+ suffix)

print("Check Complete")
3 changes: 0 additions & 3 deletions IGPageContentValidator/linkScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ def RequestData(url):
def ListOfLinks(url):
soup = RequestData(url)
websites = []
print("webpages to check")
for link in soup.find_all('a'):
site = link.get('href')
if isinstance(site, str) and site[0:6]=='/guide':
print(site)
websites.append(site)
print('\n\n')
list_set = set(websites)
unique_websites = list(list_set)
return unique_websites
Expand Down

0 comments on commit b96581e

Please sign in to comment.