From eea0ad812a9448df077568660f2bdb73e8f37d43 Mon Sep 17 00:00:00 2001 From: Joe Clarke Date: Fri, 8 Nov 2024 05:34:30 -0500 Subject: [PATCH] Add support for extracting RFCs from XML. Fallback to .txt if the XML file cannot be found. --- extractors/rfc_extractor.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/extractors/rfc_extractor.py b/extractors/rfc_extractor.py index a959f19..d38bc27 100755 --- a/extractors/rfc_extractor.py +++ b/extractors/rfc_extractor.py @@ -47,11 +47,21 @@ def __init__(self, rfc_extractor_paths: RFCExtractorPaths, debug_level: int): self.__create_ietf_rfcs_list() def __create_ietf_rfcs_list(self): - self.ietf_rfcs = [ - f - for f in os.listdir(self.rfc_extractor_paths.rfc_path) - if os.path.isfile(os.path.join(self.rfc_extractor_paths.rfc_path, f)) - ] + for f in os.listdir(self.rfc_extractor_paths.rfc_path): + if not f.endswith(".txt"): + continue + + (base, _) = os.path.splitext(f) + full_path = os.path.join(self.rfc_extractor_paths.rfc_path, f) + fname = f + xml_file = os.path.join(self.rfc_extractor_paths.rfc_path, base + ".xml") + if os.path.isfile(xml_file): + full_path = xml_file + fname = base + ".xml" + + if os.path.isfile(full_path): + self.ietf_rfcs.append(fname) + self.ietf_rfcs.sort() print('IETF RFCs list created') @@ -95,6 +105,7 @@ def extract_from_rfc_file(self, rfc_file: str) -> list[str]: force_revision_pyang=False, force_revision_regexp=True, extract_code_snippets=True, + rfcxml=(rfc_file.endswith(".xml")), code_snippets_dir=os.path.join(self.code_snippets_directory, os.path.splitext(rfc_file)[0]), )