From 324de14c4cd8fac06428be96f2de3d414a0e9d3a Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:22:44 +0100 Subject: [PATCH 01/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 60c3deb..296d3fa 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -18,7 +18,21 @@ def FindErrors(url): print(url_check) for err in error: print(err) - print() + print("\n") + script_tags = soup_check.find_all('script') + ''' finds all console.log items, then finds the text associated with it. expect 'console.log(,)' & ' = JSON.stringify(``). Retuns ''' + for script in script_tags: + script_text = script.get_text() + log_messages = re.findall(r'console\.log\((.*?)\)', script_text) + for msg in log_messages: + msg = (msg.replace("'", "").replace(' ', '').split(',')) + try: + lines = script_text.splitlines() + for line in lines: + if msg[1] in line and 'var' not in line and 'console.log' not in line: + print(msg[0],line.split("`")[1].rsplit("`", 0)[0]) + except IndexError: + pass print("Check Complete") From 22d88edc88138b09a5233c88341b53e1e4439b39 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:30:06 +0100 Subject: [PATCH 02/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 296d3fa..77b3043 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -4,6 +4,7 @@ """ from linkScraper import * +import re ''' Interates over ListOfLinks returning any pages that have errors ''' From 9f6b4f0a9feefe601274736fe11a852b527f8ba1 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:20:06 +0100 Subject: [PATCH 03/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 72 ++++++++++++++++---------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 77b3043..922700e 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -1,40 +1,56 @@ # -*- coding: utf-8 -*- """ -This script checks webpages for any error messages +This script checks webpages for any error messages and console logs (not including stack trace) """ from linkScraper import * import re +def classErrors(warnings, soup): + '''returns all class errors within the webpage''' + class_errors = soup.find_all('div',{'class':"error"}) + if class_errors: + for err in class_errors: + warnings.append(err) + return warnings -''' Interates over ListOfLinks returning any pages that have errors ''' -def FindErrors(url): - websites = ListOfLinks(url) - for e in websites: - url_check = 'https://simplifier.net'+ e - data_check = requests.get(url_check).text - soup_check = BeautifulSoup(data_check,"html.parser") - error = soup_check.find_all('div',{'class':"error"}) - if error: - print(url_check) - for err in error: - print(err) - print("\n") - script_tags = soup_check.find_all('script') - ''' finds all console.log items, then finds the text associated with it. expect 'console.log(,)' & ' = JSON.stringify(``). Retuns ''' - for script in script_tags: - script_text = script.get_text() - log_messages = re.findall(r'console\.log\((.*?)\)', script_text) - for msg in log_messages: - msg = (msg.replace("'", "").replace(' ', '').split(',')) +def consoleLog(warnings,soup): + ''' finds all console.log items, then finds the text associated with it. expect 'console.log(,)' & ' = JSON.stringify(``). Retuns wanings as ''' + script_tags = soup.find_all('script') + for script in script_tags: + script_text = script.get_text() + log_messages = re.findall(r'console\.log\((.*?)\)', script_text) + for msg in log_messages: + msg = (msg.replace("'", "").replace(' ', '').split(',')) + lines = script_text.splitlines() + for line in lines: try: - lines = script_text.splitlines() - for line in lines: - if msg[1] in line and 'var' not in line and 'console.log' not in line: - print(msg[0],line.split("`")[1].rsplit("`", 0)[0]) + if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line: + warnings.append(msg[0]+" "+msg[1].replace(' At','\nAt')) #+" "+line.split("`")[1].rsplit("`", 0)[0] except IndexError: pass - print("Check Complete") - + return(warnings) -FindErrors(data) +def printWarnings(warnings, url): + '''prints all warnings''' + print(url) + for x in warnings: + print(x,"\n") + +''' Interates over ListOfLinks returning any pages that have errors ''' +def getSoup(url): + data = requests.get(url).text + soup = BeautifulSoup(data,"html.parser") + return soup + + +websites = ListOfLinks(url) +for suffix in websites: + warnings = [] + soup = getSoup('https://simplifier.net'+ suffix) + warnings = classErrors(warnings, soup) + warnings = consoleLog(warnings,soup) + if warnings: + printWarnings(warnings, 'https://simplifier.net'+ suffix) + +print("Check Complete") From d636b2e15fa1bec4035d6535de0a84cc9eb3eed6 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:29:13 +0100 Subject: [PATCH 04/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 922700e..7914e86 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -44,7 +44,7 @@ def getSoup(url): return soup -websites = ListOfLinks(url) +websites = ListOfLinks(data) for suffix in websites: warnings = [] soup = getSoup('https://simplifier.net'+ suffix) From 50d56fa216d19db11117d0f3ef0bd72b52afa93b Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:39:41 +0100 Subject: [PATCH 05/10] Update linkScraper.py --- IGPageContentValidator/linkScraper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/IGPageContentValidator/linkScraper.py b/IGPageContentValidator/linkScraper.py index a797602..a912b7c 100644 --- a/IGPageContentValidator/linkScraper.py +++ b/IGPageContentValidator/linkScraper.py @@ -19,13 +19,10 @@ def RequestData(url): def ListOfLinks(url): soup = RequestData(url) websites = [] - print("webpages to check") for link in soup.find_all('a'): site = link.get('href') if isinstance(site, str) and site[0:6]=='/guide': - print(site) websites.append(site) - print('\n\n') list_set = set(websites) unique_websites = list(list_set) return unique_websites From 0a296d7b94b657cc4d2578b2d7423619b2202901 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:42:28 +0100 Subject: [PATCH 06/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 7914e86..b0cc13d 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -26,7 +26,7 @@ def consoleLog(warnings,soup): for line in lines: try: if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line: - warnings.append(msg[0]+" "+msg[1].replace(' At','\nAt')) #+" "+line.split("`")[1].rsplit("`", 0)[0] + warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\nAt')) except IndexError: pass return(warnings) From 16bc7bb66bb7f4fd9fd7c8b1cd611d3e716361a6 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Thu, 25 Jul 2024 08:44:18 +0100 Subject: [PATCH 07/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index b0cc13d..4a13a5a 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -35,7 +35,7 @@ def printWarnings(warnings, url): '''prints all warnings''' print(url) for x in warnings: - print(x,"\n") + print("\t",x) ''' Interates over ListOfLinks returning any pages that have errors ''' def getSoup(url): From 759a1edb563ab0686d1a6e0edee3715efa3eb21a Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:02:56 +0100 Subject: [PATCH 08/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 4a13a5a..1703099 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -26,7 +26,7 @@ def consoleLog(warnings,soup): for line in lines: try: if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line: - warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\nAt')) + warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\n\tAt')) except IndexError: pass return(warnings) From fb7fa0027ae86c8faebce52aaefe42c36906a03a Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:04:52 +0100 Subject: [PATCH 09/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 1703099..4abfd0e 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -35,7 +35,7 @@ def printWarnings(warnings, url): '''prints all warnings''' print(url) for x in warnings: - print("\t",x) + print("\t",x,"\n") ''' Interates over ListOfLinks returning any pages that have errors ''' def getSoup(url): From 90aa1a2a8b4bea3d6f00ec23e327556dddb16bb6 Mon Sep 17 00:00:00 2001 From: Ryan May <112563297+ryma2fhir@users.noreply.github.com> Date: Thu, 25 Jul 2024 11:36:39 +0100 Subject: [PATCH 10/10] Update errorChecker.py --- IGPageContentValidator/errorChecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 4abfd0e..10b9ee4 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -37,7 +37,7 @@ def printWarnings(warnings, url): for x in warnings: print("\t",x,"\n") -''' Interates over ListOfLinks returning any pages that have errors ''' +''' Iterates over ListOfLinks returning any pages that have errors ''' def getSoup(url): data = requests.get(url).text soup = BeautifulSoup(data,"html.parser")