From fd5b792c4af8732bf6828e05f5c6beb560d6bda9 Mon Sep 17 00:00:00 2001 From: Sean Whalen <44679+seanthegeek@users.noreply.github.com> Date: Sun, 31 Mar 2024 20:00:47 -0400 Subject: [PATCH] Close issue #500 Add the following general configuration options: - `always_use_local_files` - Disables the download of the reverse DNS map - `local_reverse_dns_map_path` - Overrides the default local file path to use for the reverse DNS map --- parsedmarc/__init__.py | 148 +++++++++++++++++++++++++++++------------ parsedmarc/cli.py | 37 +++++++++-- parsedmarc/utils.py | 28 ++++++-- 3 files changed, 160 insertions(+), 53 deletions(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index dec79c98..4380bb92 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -72,7 +72,11 @@ class InvalidForensicReport(InvalidDMARCReport): """Raised when an invalid DMARC forensic report is encountered""" -def _parse_report_record(record, ip_db_path=None, offline=False, +def _parse_report_record(record, ip_db_path=None, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, + offline=False, nameservers=None, dns_timeout=2.0): """ Converts a record from a DMARC aggregate report into a more consistent @@ -80,6 +84,9 @@ def _parse_report_record(record, ip_db_path=None, offline=False, Args: record (OrderedDict): The record to convert + always_use_local_files (bool): Do not download files + reverse_dns_map_path (str): Path to a reverse DNS map file + reverse_dns_map_url (str): URL to a reverse DNS map file ip_db_path (str): Path to a MMDB file from MaxMind or DBIP offline (bool): Do not query online for geolocation or DNS nameservers (list): A list of one or more nameservers to use @@ -91,12 +98,16 @@ def _parse_report_record(record, ip_db_path=None, offline=False, """ record = record.copy() new_record = OrderedDict() - new_record_source = get_ip_address_info(record["row"]["source_ip"], - cache=IP_ADDRESS_CACHE, - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - timeout=dns_timeout) + new_record_source = get_ip_address_info( + record["row"]["source_ip"], + cache=IP_ADDRESS_CACHE, + ip_db_path=ip_db_path, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + offline=offline, + nameservers=nameservers, + timeout=dns_timeout) new_record["source"] = new_record_source new_record["count"] = int(record["row"]["count"]) policy_evaluated = record["row"]["policy_evaluated"].copy() @@ -387,14 +398,24 @@ def parsed_smtp_tls_reports_to_csv(reports): return csv_file_object.getvalue() -def parse_aggregate_report_xml(xml, ip_db_path=None, offline=False, - nameservers=None, timeout=2.0, - keep_alive=None): +def parse_aggregate_report_xml( + xml, + ip_db_path=None, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, + offline=False, + nameservers=None, + timeout=2.0, + keep_alive=None): """Parses a DMARC XML report string and returns a consistent OrderedDict Args: xml (str): A string of DMARC aggregate report XML ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + always_use_local_files (bool): Do not download files + reverse_dns_map_path (str): Path to a reverse DNS map file + reverse_dns_map_url (str): URL to a reverse DNS map file offline (bool): Do not query online for geolocation or DNS nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) @@ -516,19 +537,27 @@ def parse_aggregate_report_xml(xml, ip_db_path=None, offline=False, keep_alive() logger.debug("Processed {0}/{1}".format( i, len(report["record"]))) - report_record = _parse_report_record(report["record"][i], - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - dns_timeout=timeout) + report_record = _parse_report_record( + report["record"][i], + ip_db_path=ip_db_path, + offline=offline, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + nameservers=nameservers, + dns_timeout=timeout) records.append(report_record) else: - report_record = _parse_report_record(report["record"], - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - dns_timeout=timeout) + report_record = _parse_report_record( + report["record"], + ip_db_path=ip_db_path, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + offline=offline, + nameservers=nameservers, + dns_timeout=timeout) records.append(report_record) new_report["records"] = records @@ -607,16 +636,25 @@ def extract_report(input_): return report -def parse_aggregate_report_file(_input, offline=False, ip_db_path=None, - nameservers=None, - dns_timeout=2.0, - keep_alive=None): +def parse_aggregate_report_file( + _input, + offline=False, + always_use_local_files=None, + reverse_dns_map_path=None, + reverse_dns_map_url=None, + ip_db_path=None, + nameservers=None, + dns_timeout=2.0, + keep_alive=None): """Parses a file at the given path, a file-like object. or bytes as an aggregate DMARC report Args: _input: A path to a file, a file like object, or bytes offline (bool): Do not query online for geolocation or DNS + always_use_local_files (bool): Do not download files + reverse_dns_map_path (str): Path to a reverse DNS map file + reverse_dns_map_url (str): URL to a reverse DNS map file ip_db_path (str): Path to a MMDB file from MaxMind or DBIP nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) @@ -632,12 +670,16 @@ def parse_aggregate_report_file(_input, offline=False, ip_db_path=None, except Exception as e: raise InvalidAggregateReport(e) - return parse_aggregate_report_xml(xml, - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - timeout=dns_timeout, - keep_alive=keep_alive) + return parse_aggregate_report_xml( + xml, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + ip_db_path=ip_db_path, + offline=offline, + nameservers=nameservers, + timeout=dns_timeout, + keep_alive=keep_alive) def parsed_aggregate_reports_to_csv_rows(reports): @@ -781,18 +823,28 @@ def parsed_aggregate_reports_to_csv(reports): return csv_file_object.getvalue() -def parse_forensic_report(feedback_report, sample, msg_date, - offline=False, ip_db_path=None, - nameservers=None, dns_timeout=2.0, +def parse_forensic_report(feedback_report, + sample, + msg_date, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, + offline=False, + ip_db_path=None, + nameservers=None, + dns_timeout=2.0, strip_attachment_payloads=False): """ Converts a DMARC forensic report and sample to a ``OrderedDict`` Args: feedback_report (str): A message's feedback report as a string + sample (str): The RFC 822 headers or RFC 822 message sample ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + always_use_local_files (bool): Do not download files + reverse_dns_map_path (str): Path to a reverse DNS map file + reverse_dns_map_url (str): URL to a reverse DNS map file offline (bool): Do not query online for geolocation or DNS - sample (str): The RFC 822 headers or RFC 822 message sample msg_date (str): The message's date header nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) @@ -840,12 +892,16 @@ def parse_forensic_report(feedback_report, sample, msg_date, parsed_report["arrival_date_utc"] = arrival_utc ip_address = re.split(r'\s', parsed_report["source_ip"]).pop(0) - parsed_report_source = get_ip_address_info(ip_address, - cache=IP_ADDRESS_CACHE, - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - timeout=dns_timeout) + parsed_report_source = get_ip_address_info( + ip_address, + cache=IP_ADDRESS_CACHE, + ip_db_path=ip_db_path, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + offline=offline, + nameservers=nameservers, + timeout=dns_timeout) parsed_report["source"] = parsed_report_source del parsed_report["source_ip"] @@ -1144,6 +1200,9 @@ def parse_report_email(input_, offline=False, ip_db_path=None, def parse_report_file(input_, nameservers=None, dns_timeout=2.0, strip_attachment_payloads=False, ip_db_path=None, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, offline=False, keep_alive=None): """Parses a DMARC aggregate or forensic file at the given path, a file-like object. or bytes @@ -1156,6 +1215,9 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0, strip_attachment_payloads (bool): Remove attachment payloads from forensic report results ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + always_use_local_files (bool): Do not download files + reverse_dns_map_path (str): Path to a reverse DNS map + reverse_dns_map_url (str): URL to a reverse DNS map offline (bool): Do not make online queries for geolocation or DNS keep_alive (callable): Keep alive function @@ -1173,8 +1235,10 @@ def parse_report_file(input_, nameservers=None, dns_timeout=2.0, content = file_object.read() file_object.close() try: - report = parse_aggregate_report_file(content, - ip_db_path=ip_db_path, + report = parse_aggregate_report_file( + content, + ip_db_path=ip_db_path, + always_use_local_files=always_use_local_files, offline=offline, nameservers=nameservers, dns_timeout=dns_timeout, diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 60d92063..efc07376 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -41,15 +41,23 @@ def _str_to_list(s): def cli_parse(file_path, sa, nameservers, dns_timeout, - ip_db_path, offline, conn): + ip_db_path, offline, + always_use_local_files, + reverse_dns_map_path, + reverse_dns_map_url, + conn): """Separated this function for multiprocessing""" try: - file_results = parse_report_file(file_path, - ip_db_path=ip_db_path, - offline=offline, - nameservers=nameservers, - dns_timeout=dns_timeout, - strip_attachment_payloads=sa) + file_results = parse_report_file( + file_path, + ip_db_path=ip_db_path, + offline=offline, + always_use_local_files=always_use_local_files, + reverse_dns_map_path=reverse_dns_map_path, + reverse_dns_map_url=reverse_dns_map_url, + nameservers=nameservers, + dns_timeout=dns_timeout, + strip_attachment_payloads=sa) conn.send([file_results, file_path]) except ParserError as error: conn.send([error, file_path]) @@ -473,6 +481,9 @@ def process_reports(reports_): log_file=args.log_file, n_procs=1, ip_db_path=None, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, la_client_id=None, la_client_secret=None, la_tenant_id=None, @@ -545,6 +556,15 @@ def process_reports(reports_): opts.ip_db_path = general_config["ip_db_path"] else: opts.ip_db_path = None + if "always_use_local_files" in general_config: + opts.always_use_local_files = general_config.getboolean( + "always_use_local_files") + if "reverse_dns_map_path" in general_config: + opts.reverse_dns_map_path = general_config[ + "reverse_dns_path"] + if "reverse_dns_map_url" in general_config: + opts.reverse_dns_map_url = general_config[ + "reverse_dns_url"] if "mailbox" in config.sections(): mailbox_config = config["mailbox"] @@ -1164,6 +1184,9 @@ def process_reports(reports_): opts.dns_timeout, opts.ip_db_path, opts.offline, + opts.always_use_local_files, + opts.reverse_dns_map_path, + opts.reverse_dns_map_url, child_conn, )) processes.append(process) diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index d38f755e..9c38bc73 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -299,6 +299,9 @@ def get_ip_address_country(ip_address, db_path=None): def get_service_from_reverse_dns_base_domain(base_domain, + always_use_local_file=False, + local_file_path=None, + url=None, offline=False, reverse_dns_map=None): """ @@ -306,6 +309,9 @@ def get_service_from_reverse_dns_base_domain(base_domain, Args: base_domain (str): The base domain of the reverse DNS lookup + always_use_local_file (bool): Always use a local map file + local_file_path (str): Path to a local map file + url (str): URL ro a reverse DNS map offline (bool): Use the built-in copy of the reverse DNS map reverse_dns_map (dict): A reverse DNS map Returns: @@ -322,13 +328,15 @@ def load_csv(_csv_file): type=row["type"]) base_domain = base_domain.lower().strip() - url = ("https://raw.githubusercontent.com/domainaware/parsedmarc/master/" - "parsedmarc/resources/maps/base_reverse_dns_map.csv") + if url is None: + url = ("https://raw.githubusercontent.com/domainaware/parsedmarc/master/" + "parsedmarc/resources/maps/base_reverse_dns_map.csv") if reverse_dns_map is None: reverse_dns_map = dict() csv_file = io.StringIO() - if not offline and len(reverse_dns_map) == 0: + if (not (offline or always_use_local_file) + and len(reverse_dns_map) == 0): try: logger.debug(f"Trying to fetch " f"reverse DNS map from {url}...") @@ -341,6 +349,8 @@ def load_csv(_csv_file): logger.info("Loading included reverse DNS map...") with pkg_resources.path(parsedmarc.resources.maps, "base_reverse_dns_map.csv") as path: + if local_file_path is not None: + path = local_file_path with open(path) as csv_file: load_csv(csv_file) try: @@ -351,7 +361,11 @@ def load_csv(_csv_file): return service -def get_ip_address_info(ip_address, ip_db_path=None, +def get_ip_address_info(ip_address, + ip_db_path=None, + reverse_dns_map_path=None, + always_use_local_files=False, + reverse_dns_map_url=None, cache=None, reverse_dns_map=None, offline=False, @@ -362,6 +376,9 @@ def get_ip_address_info(ip_address, ip_db_path=None, Args: ip_address (str): The IP address to check ip_db_path (str): path to a MMDB file from MaxMind or DBIP + reverse_dns_map_path (str): Path to a reverse DNS map file + reverse_dns_map_url (str): URL to the reverse DNS map file + always_use_local_files (bool): Do not download files cache (ExpiringDict): Cache storage reverse_dns_map (dict): A reverse DNS map offline (bool): Do not make online queries for geolocation or DNS @@ -402,6 +419,9 @@ def get_ip_address_info(ip_address, ip_db_path=None, service = get_service_from_reverse_dns_base_domain( base_domain, offline=offline, + local_file_path=reverse_dns_map_path, + url=reverse_dns_map_url, + always_use_local_file=always_use_local_files, reverse_dns_map=reverse_dns_map) info["base_domain"] = base_domain info["type"] = service["type"]