From 2fbe877e7f004db2356be40697bfb7ffff7fca98 Mon Sep 17 00:00:00 2001
From: OSINT-TECHNOLOGIES
 <77023667+OSINT-TECHNOLOGIES@users.noreply.github.com>
Date: Thu, 22 Aug 2024 02:25:29 +0300
Subject: [PATCH] Reworked logs into solid journal file for all scans

---
 datagather_modules/networking_processor.py | 89 +++++++++++-----------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/datagather_modules/networking_processor.py b/datagather_modules/networking_processor.py
index ca76213..21ce20e 100644
--- a/datagather_modules/networking_processor.py
+++ b/datagather_modules/networking_processor.py
@@ -1,4 +1,6 @@
 import sys
+sys.path.append('service')
+from logs_processing import logging
 
 try:
     import dns.resolver
@@ -14,7 +16,7 @@
 
 def get_dns_info(short_domain, report_file_extension):
     try:
-        get_dns_info_status = 'DNS INFO GATHERING: OK'
+        logging.info('DNS INFO GATHERING: OK')
         mx_list = []
         mx_records = dns.resolver.resolve(short_domain, 'MX')
         for record in mx_records:
@@ -22,21 +24,21 @@ def get_dns_info(short_domain, report_file_extension):
         if not mx_list:
             mx_list.append('MX records were not gathered')
         if report_file_extension == 'xlsx':
-            return ', '.join(map(str, mx_list)), get_dns_info_status
+            return ', '.join(map(str, mx_list))
         elif report_file_extension == 'pdf':
-            return ', '.join(map(str, mx_list)), get_dns_info_status
+            return ', '.join(map(str, mx_list))
     except dns.resolver.NoAnswer as error_noans:
-        get_dns_info_status = f'DNS INFO GATHERING: NOT OK. REASON: {error_noans}'
-        print(Fore.RED + "No answer from domain about MX records. See logs for details")
-        return 'No information about MX records was gathered', get_dns_info_status
+        print(Fore.RED + "No answer from domain about MX records. See journal for details")
+        logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_noans}')
+        return 'No information about MX records was gathered'
     except dns.resolver.Timeout as error_timeout:
-        get_dns_info_status = f'DNS INFO GATHERING: NOT OK. REASON: {error_timeout}'
-        print(Fore.RED + "Timeout while getting MX records. See logs for details")
-        return 'No information about MX records was gathered', get_dns_info_status
+        print(Fore.RED + "Timeout while getting MX records. See journal for details")
+        logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_timeout}')
+        return 'No information about MX records was gathered'
 
 def get_ssl_certificate(short_domain, port=443):
     try:
-        get_ssl_certificate_status = 'SSL CERTIFICATE GATHERING: OK'
+        logging.info('SSL CERTIFICATE GATHERING: OK')
         context = ssl.create_default_context()
         conn = socket.create_connection((short_domain, port))
         sock = context.wrap_socket(conn, server_hostname=short_domain)
@@ -47,16 +49,16 @@ def get_ssl_certificate(short_domain, port=443):
         notAfter = cert['notAfter']
         commonName = str(cert['issuer'][2][0][1]) + ', version: ' + str(cert['version'])
         serialNumber = cert['serialNumber']
-        return issuer, subject, notBefore, notAfter, commonName, serialNumber, get_ssl_certificate_status
+        return issuer, subject, notBefore, notAfter, commonName, serialNumber
     except (ssl.CertificateError, ssl.SSLError, socket.gaierror, ConnectionRefusedError) as e:
-        get_ssl_certificate_status = f'SSL CERTIFICATE GATHERING: NOT OK. REASON: {e}'
-        print(Fore.RED + "Error while gathering info about SSL certificate. See logs for details")
+        print(Fore.RED + "Error while gathering info about SSL certificate. See journal for details")
+        logging.error(f'SSL CERTIFICATE GATHERING: ERROR. REASON: {e}')
         issuer = subject = notBefore = notAfter = commonName = serialNumber = ["No information about SSL certificate was gathered"]
-        return issuer, subject, notBefore, notAfter, commonName, serialNumber, get_ssl_certificate_status
+        return issuer, subject, notBefore, notAfter, commonName, serialNumber
 
 def query_internetdb(ip, report_file_extension):
     try:
-        query_internetdb_status = 'GATHERING INTERNETDB DATA: OK'
+        logging.info('INTERNETDB DATA GATHERING: OK')
         url = f"https://internetdb.shodan.io/{ip}"
         response = requests.get(url)
         if response.status_code == 200:
@@ -77,23 +79,23 @@ def query_internetdb(ip, report_file_extension):
             if not vulns:
                 vulns = ['Vulnerabilities were not found']
             if report_file_extension == 'pdf':
-                return ports, hostnames, cpes, tags, vulns, query_internetdb_status
+                return ports, hostnames, cpes, tags, vulns
             elif report_file_extension == 'xlsx':
-                return ports, hostnames, cpes, tags, vulns, query_internetdb_status
+                return ports, hostnames, cpes, tags, vulns
         else:
             print(Fore.RED + "No information was found on InternetDB" + Style.RESET_ALL)
             ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"]
-            return ports, hostnames, cpes, tags, vulns, query_internetdb_status
+            return ports, hostnames, cpes, tags, vulns
     except Exception as e:
-        query_internetdb_status = f'GATHERING INTERNETDB DATA: NOT OK. REASON: {e}'
-        print(Fore.RED + "No information was found on InternetDB due to some error. See logs for details" + Style.RESET_ALL)
+        print(Fore.RED + "No information was found on InternetDB due to some error. See journal for details" + Style.RESET_ALL)
         ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"]
-        return ports, hostnames, cpes, tags, vulns, query_internetdb_status
+        logging.error(f'INTERNETDB DATA GATHERING: ERROR. REASON: {e}')
+        return ports, hostnames, cpes, tags, vulns
 
 
 def get_robots_txt(url, robots_path):
     try:
-        get_robots_txt_status = 'ROBOTS.TXT EXTRACTION: OK'
+        logging.info('ROBOTS.TXT EXTRACTION: OK')
         if not url.startswith('http'):
             url = 'http://' + url
         robots_url = url + '/robots.txt'
@@ -101,17 +103,17 @@ def get_robots_txt(url, robots_path):
         if response.status_code == 200:
             with open(robots_path, 'w') as f:
                 f.write(response.text)
-            return 'File "robots.txt" was extracted to text file in report folder', get_robots_txt_status
+            return 'File "robots.txt" was extracted to text file in report folder'
         else:
-            return 'File "robots.txt" was not found', get_robots_txt_status
+            return 'File "robots.txt" was not found'
     except Exception as e:
-        get_robots_txt_status = f'ROBOTS.TXT EXTRACTION: NOT OK. REASON: {e}'
-        print(Fore.RED + 'robots.txt file was not extracted due to some error. See logs for details')
-        return 'File "robots.txt" was not found', get_robots_txt_status
+        print(Fore.RED + 'robots.txt file was not extracted due to some error. See journal for details')
+        logging.error(f'ROBOTS.TXT EXTRACTION: ERROR. REASON: {e}')
+        return 'File "robots.txt" was not found'
 
 def get_sitemap_xml(url, sitemap_path):
     try:
-        get_sitemap_xml_status = 'SITEMAP.XML EXTRACTION: OK'
+        logging.info('SITEMAP.XML EXTRACTION: OK')
         if not url.startswith('http'):
             url = 'http://' + url
         sitemap_url = url + '/sitemap.xml'
@@ -120,37 +122,37 @@ def get_sitemap_xml(url, sitemap_path):
             if response.status_code == 200:
                 with open(sitemap_path, 'w') as f:
                     f.write(response.text)
-                return 'File "sitemap.xml" was extracted to text file in report folder', get_sitemap_xml_status
+                return 'File "sitemap.xml" was extracted to text file in report folder'
             else:
-                return 'File "sitemap.xml" was not found', get_sitemap_xml_status
+                return 'File "sitemap.xml" was not found'
         else:
             with open(sitemap_path, 'w') as f:
                 f.write('0')
             print(Fore.RED + "Error while gathering sitemap.xml. Probably it's unreachable")
-            return 'File "sitemap.xml" was not found', get_sitemap_xml_status
+            return 'File "sitemap.xml" was not found'
     except Exception as e:
-        get_sitemap_xml_status = f'SITEMAP.XML EXTRACTION: NOT OK. REASON: {e}'
-        print(Fore.RED + "Error while gathering sitemap.xml. See logs for details")
-        return 'Error occured during sitemap.xml gathering', get_sitemap_xml_status
+        print(Fore.RED + "Error while gathering sitemap.xml. See journal for details")
+        logging.error(f'SITEMAP.XML EXTRACTION: ERROR. REASON: {e}')
+        return 'Error occured during sitemap.xml gathering'
 
 def extract_links_from_sitemap(sitemap_links_path, sitemap_path):
     try:
-        extract_links_from_sitemap_status = 'LINKS EXTRACTION FROM SITEMAP: OK'
+        logging.info('SITEMAP.XML LINKS EXTRACTION: OK')
         tree = ET.parse(sitemap_path)
         root = tree.getroot()
         links = [elem.text for elem in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')]
         with open(sitemap_links_path, 'w') as f:
             for link in links:
                 f.write(f"{link}\n")
-        return 'Links from "sitemap.txt" were successfully parsed', extract_links_from_sitemap_status
+        return 'Links from "sitemap.txt" were successfully parsed'
     except (ET.ParseError, FileNotFoundError) as e:
-        extract_links_from_sitemap_status = f'LINKS EXTRACTION FROM SITEMAP: NOT OK. REASON: {e}'
-        print(Fore.RED + "Links from sitemap.txt were not parsed. See logs for details")
-        return 'Links from "sitemap.txt" were not parsed', extract_links_from_sitemap_status
+        print(Fore.RED + "Links from sitemap.txt were not parsed. See journal for details")
+        logging.error(f'SITEMAP.XML LINKS EXTRACTION: ERROR. REASON: {e}')
+        return 'Links from "sitemap.txt" were not parsed'
 
 def get_technologies(url):
     try:
-        get_technologies_status = 'GATHERING WEB-TECHNOLOGIES: OK'
+        logging.info('WEB-TECHNOLOGIES GATHERING: OK')
         tech = builtwith.parse(url)
         web_servers = tech.get('web-servers', [])
         cms = tech.get('cms', [])
@@ -170,8 +172,9 @@ def get_technologies(url):
             analytics = ['Used analytics services were not determined']
         if not javascript_frameworks:
             javascript_frameworks = ['Used JS frameworks were not determined']
-        return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks, get_technologies_status
+        return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks
     except Exception as e:
-        get_technologies_status = f'GATHERING WEB-TECHNOLOGIES: NOT OK. REASON: {e}'
         web_servers = cms = programming_languages = web_frameworks = analytics = javascript_frameworks = ['Found nothing related to web-technologies due to some error']
-        return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks, get_technologies_status
+        print(Fore.RED + "Error when gathering info about web technologies. See journal for details")
+        logging.error(f'WEB-TECHNOLOGIES GATHERING: ERROR. REASON: {e}')
+        return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks