From b8340545975f8c371aa2fc48580db5054d99037c Mon Sep 17 00:00:00 2001 From: Barbara Hui Date: Tue, 22 Oct 2024 16:15:04 -0700 Subject: [PATCH] Merge _process_record() into parse() --- .../mappers/marc/ucb_tind_mapper.py | 83 ++++++------------- 1 file changed, 24 insertions(+), 59 deletions(-) diff --git a/metadata_mapper/mappers/marc/ucb_tind_mapper.py b/metadata_mapper/mappers/marc/ucb_tind_mapper.py index 71e1837e..3eb044ba 100644 --- a/metadata_mapper/mappers/marc/ucb_tind_mapper.py +++ b/metadata_mapper/mappers/marc/ucb_tind_mapper.py @@ -369,64 +369,29 @@ def parse(self, api_response): ) records = [] - for re in record_elements: - record = self._process_record(re, request_url) - # sickle_rec = models.Record(re) - # sickle_header = sickle_rec.header - # if sickle_header.deleted: - # continue - - # record = self.strip_metadata(sickle_rec.metadata) - # record["datestamp"] = sickle_header.datestamp - # record["id"] = sickle_header.identifier - # record["request_url"] = request_url - records.append(record) + for record_element in record_elements: + sickle_rec = models.Record(record_element) + sickle_header = sickle_rec.header + if not sickle_header.deleted: + marc_record_element = record_element.find( + ".//marc:record", + namespaces={"marc": "http://www.loc.gov/MARC21/slim"} + ) + marc_record_string = etree.tostring( + marc_record_element,encoding="utf-8").decode("utf-8") + + # Wrap the record in collection so pymarc can read it + marc_collection_xml_full = \ + ('' + f'{marc_record_string}' + '') + + record = { + "datestamp": sickle_header.datestamp, + "id": sickle_header.identifier, + "request_url": request_url, + "marc": parse_xml_to_array(StringIO(marc_collection_xml_full))[0] + } + records.append(record) return self.get_records(records) - - def strip_metadata(self, record_metadata): - stripped = {} - for key, value in record_metadata.items(): - if isinstance(value, str): - value = value.strip() - elif isinstance(value, list): - value = [v.strip() if isinstance(v, str) else v for v in value] - stripped[key] = value - - return stripped - - def _process_record(self, - record_element: etree.ElementBase, - request_url: Optional[str]) -> Optional[dict]: - """ - Process a record element and extract relevant information. - - :param record_element: Element representing a single record. - :param request_url: The URL of the request. - :return: A dictionary containing the extracted information from the record. - """ - sickle_rec = models.Record(record_element) - sickle_header = sickle_rec.header - if sickle_header.deleted: - return None - - marc_record_element = record_element.find(".//marc:record", namespaces={ - "marc": "http://www.loc.gov/MARC21/slim"}) - marc_record_string = etree.tostring(marc_record_element, - encoding="utf-8").decode("utf-8") - - # Wrap the record in collection so pymarc can read it - marc_collection_xml_full = \ - ('' - f'{marc_record_string}' - '') - - - record = { - "datestamp": sickle_header.datestamp, - "id": sickle_header.identifier, - "request_url": request_url, - "marc": parse_xml_to_array(StringIO(marc_collection_xml_full))[0] - } - - return record \ No newline at end of file