Skip to content

Commit

Permalink
Merge _process_record() into parse()
Browse files Browse the repository at this point in the history
  • Loading branch information
barbarahui committed Oct 22, 2024
1 parent 8dc9adf commit b834054
Showing 1 changed file with 24 additions and 59 deletions.
83 changes: 24 additions & 59 deletions metadata_mapper/mappers/marc/ucb_tind_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,64 +369,29 @@ def parse(self, api_response):
)

records = []
for re in record_elements:
record = self._process_record(re, request_url)
# sickle_rec = models.Record(re)
# sickle_header = sickle_rec.header
# if sickle_header.deleted:
# continue

# record = self.strip_metadata(sickle_rec.metadata)
# record["datestamp"] = sickle_header.datestamp
# record["id"] = sickle_header.identifier
# record["request_url"] = request_url
records.append(record)
for record_element in record_elements:
sickle_rec = models.Record(record_element)
sickle_header = sickle_rec.header
if not sickle_header.deleted:
marc_record_element = record_element.find(
".//marc:record",
namespaces={"marc": "http://www.loc.gov/MARC21/slim"}
)
marc_record_string = etree.tostring(
marc_record_element,encoding="utf-8").decode("utf-8")

# Wrap the record in collection so pymarc can read it
marc_collection_xml_full = \
('<collection xmlns="http://www.loc.gov/MARC21/slim">'
f'{marc_record_string}'
'</collection>')

record = {
"datestamp": sickle_header.datestamp,
"id": sickle_header.identifier,
"request_url": request_url,
"marc": parse_xml_to_array(StringIO(marc_collection_xml_full))[0]
}
records.append(record)

return self.get_records(records)

def strip_metadata(self, record_metadata):
stripped = {}
for key, value in record_metadata.items():
if isinstance(value, str):
value = value.strip()
elif isinstance(value, list):
value = [v.strip() if isinstance(v, str) else v for v in value]
stripped[key] = value

return stripped

def _process_record(self,
record_element: etree.ElementBase,
request_url: Optional[str]) -> Optional[dict]:
"""
Process a record element and extract relevant information.
:param record_element: Element representing a single record.
:param request_url: The URL of the request.
:return: A dictionary containing the extracted information from the record.
"""
sickle_rec = models.Record(record_element)
sickle_header = sickle_rec.header
if sickle_header.deleted:
return None

marc_record_element = record_element.find(".//marc:record", namespaces={
"marc": "http://www.loc.gov/MARC21/slim"})
marc_record_string = etree.tostring(marc_record_element,
encoding="utf-8").decode("utf-8")

# Wrap the record in collection so pymarc can read it
marc_collection_xml_full = \
('<collection xmlns="http://www.loc.gov/MARC21/slim">'
f'{marc_record_string}'
'</collection>')


record = {
"datestamp": sickle_header.datestamp,
"id": sickle_header.identifier,
"request_url": request_url,
"marc": parse_xml_to_array(StringIO(marc_collection_xml_full))[0]
}

return record

0 comments on commit b834054

Please sign in to comment.