Skip to content

Commit

Permalink
Update to current lobid transformation #225
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiasNx committed Jan 17, 2025
1 parent eaa1d0e commit 5fdfe75
Show file tree
Hide file tree
Showing 27 changed files with 799 additions and 399 deletions.
310 changes: 160 additions & 150 deletions conf/lobid-transformation/fix/contribution.fix

Large diffs are not rendered by default.

55 changes: 32 additions & 23 deletions conf/lobid-transformation/fix/describedBy.fix
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ prepend("describedBy.id", "http://lobid.org/resources/")
copy_field("almaMmsId", "describedBy.label")
prepend("describedBy.label", "Webseite der hbz-Ressource ")

set_array("describedBy.type[]", "BibliographicDescription")
add_array("describedBy.type[]", "BibliographicDescription")


add_field("describedBy.inDataset.id","http://lobid.org/resources/dataset#!")

add_field("describedBy.inDataset.label","lobid-resources – Der hbz-Verbundkatalog als Linked Open Data")

set_array("describedBy.resultOf.type[]", "CreateAction")
add_array("describedBy.resultOf.type[]", "CreateAction")

add_field("@createTime","$[createEndTime]")
if all_match("@createTime","0")
Expand All @@ -24,35 +24,50 @@ end

add_field("describedBy.resultOf.instrument.id","https://github.com/hbz/lobid-resources")

set_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication")
add_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication")

add_field("describedBy.resultOf.instrument.label","Software lobid-resources")

copy_field("almaMmsId","describedBy.resultOf.object.id")
prepend("describedBy.resultOf.object.id","https://lobid.org/marcxml/")

# MNG is a ALMA-specific element
# 008/00-05 has the initial cataloguing date. We test strictly if 008 only has 6 digits, sometimes records have 8 digits that are not valid.
# We use MNG info as fallback.
# MNG is a ALMA-specific element (MNG .b only states the indexing date into ALMA.)

if any_match("008", "^\\d{6}\\D.*") # 008/00-05 is the correct form for the cataloguing date in MARC.
copy_field("008","@initialCataloguingDate")
substring("@initialCataloguingDate","0","6")
end

if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000
prepend("@initialCataloguingDate","20")
elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000
prepend("@initialCataloguingDate","19")
else
copy_field("MNG .b","@initialCataloguingDate")
end
copy_field("@initialCataloguingDate","describedBy.resultOf.object.dateCreated")

copy_field("MNG .b","describedBy.resultOf.object.dateCreated")
copy_field("MNG .d","describedBy.resultOf.object.dateModified")
replace_all("describedBy.resultOf.object.dateCreated","-","")
replace_all("describedBy.resultOf.object.dateCreated"," .*","")
replace_all("describedBy.resultOf.object.dateCreated","c|©|\\s?|,|.|:|;|/|=","")
replace_all("describedBy.resultOf.object.dateModified","-","")
replace_all("describedBy.resultOf.object.dateModified"," .*","")
replace_all("describedBy.resultOf.object.dateModified","c|©|\\s?|,|.|:|;|/|=","")
unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}")
remove_field("describedBy.resultOf.object.dateCreated")
end
unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}")
remove_field("describedBy.resultOf.object.dateModified")
end
#unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}")
# remove_field("describedBy.resultOf.object.dateCreated")
#end
#unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}")
# remove_field("describedBy.resultOf.object.dateModified")
#end
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01")
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01")

set_array("describedBy.resultOf.object.type[]", "DataFeedItem")
add_array("describedBy.resultOf.object.type[]", "DataFeedItem")

copy_field("almaMmsId","describedBy.resultOf.object.label")
prepend("describedBy.resultOf.object.label","hbz-Ressource ")
Expand All @@ -62,7 +77,7 @@ add_field("describedBy.resultOf.object.inDataset.id", "https://datahub.io/datase

add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog")

set_array("describedBy.license[]")
add_array("describedBy.license[]")
add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" )
add_field("describedBy.license[].$last.label","Creative Commons-Lizenz CC0 1.0 Universal" )

Expand All @@ -85,23 +100,17 @@ do list(path: "040 ", "var":"$i")
end
end

set_array("describedBy.resultOf.object.modifiedBy[]")
add_array("describedBy.resultOf.object.modifiedBy[]")
do list(path:"$i.d", "var":"$j")
copy_field("$j", "describedBy.resultOf.object.modifiedBy[].$append.id")
end

end

call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id")
copy_field("describedBy.resultOf.object.sourceOrganization.id","describedBy.resultOf.object.sourceOrganization.label")
lookup("describedBy.resultOf.object.sourceOrganization.label","lobidOrgLabels",delete:"true")
call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id")
copy_field("describedBy.resultOf.object.provider.id","describedBy.resultOf.object.provider.label")
lookup("describedBy.resultOf.object.provider.label","lobidOrgLabels",delete:"true")
call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id",label: "describedBy.resultOf.object.sourceOrganization.label")
call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id",label: "describedBy.resultOf.object.provider.label")
do list(path:"describedBy.resultOf.object.modifiedBy[]","var":"$i")
call_macro("provenanceLinks",field: "$i.id")
copy_field("$i.id","$i.label")
call_macro("provenanceLinks",field: "$i.id",label:"$i.label")
end
lookup("describedBy.resultOf.object.modifiedBy[].*.label","lobidOrgLabels",delete:"true")

uniq("describedBy.resultOf.object.modifiedBy[]")
24 changes: 13 additions & 11 deletions conf/lobid-transformation/fix/identifiers.fix
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ paste("id", "~http://lobid.org/resources/", "001", "~#!", join_char: "")
# 024 - Other Standard Identifier (R) Subfield: $a (NR) $2 (NR)
# urn

set_array("urn[]")
add_array("urn[]")

do list(path: "0247?", "var": "$i")
if any_equal("$i.2","urn")
Expand All @@ -17,7 +17,7 @@ end
# Sometimes urn are not set in 024 then we could pick up the missing from 856.
# 856 - Electronic Location and Access (R) - Subfield: $u (R) $3 (NR)
# 1. Indicator: 4 = HTTP
set_array("@urnLinks")
add_array("@urnLinks")

do list(path:"856??", "var":"$i")
if all_match("$i.u", "^http.*(urn=|\\.(org|de)/)urn:.+$") # This should ignore repository links like: https://sammlungen.ulb.uni-muenster.de/urn/urn:nbn:de:hbz:6-85659520092
Expand Down Expand Up @@ -50,8 +50,8 @@ end
# 020 - International Standard Book Number (R) - $a (NR)
# source data sometimes provides repeated subfield $a even if this is not valid marc

set_array("@isbn[]")
set_array("isbn[]")
add_array("@isbn[]")
add_array("isbn[]")

do list(path:"020 ", "var": "$i")
do list(path:"$i.a", "var": "$j")
Expand All @@ -74,23 +74,23 @@ uniq("isbn[]")


# 022 - International Standard Serial Number (R) - Subfield $a (NR)
set_array("issn[]")
add_array("issn[]")
do list(path:"022? ", "var":"$i")
copy_field("$i.a", "issn[].$append")
end
replace_all("issn[].*", "-","")
uniq("issn[]")

# 024 - Other Standard Identifier (R) - Subfield a (NR) 1. Indicator 2 = ISMN
set_array("ismn[]")
add_array("ismn[]")
do list(path:"0242?", "var":"$i")
copy_field("$i.a", "ismn[].$append")
end
replace_all("ismn[].*", "-","")


# 024 (R) Subfield a (NR) 1. Indicator 7 = to defined Identifier
set_array("doi[]")
add_array("doi[]")
do list(path:"0247?", "var":"$i")
if all_equal("$i.2","doi")
copy_field("$i.a", "doi[].$append")
Expand All @@ -109,7 +109,7 @@ replace_all("doi[].*", ".*doi.org.*(10\\.(\\d)+/(\\S)+).*", "$1")
uniq("doi[]")

# 035 - System Control Number (R) - Subfield: $a (NR)
set_array("oclcNumber[]")
add_array("oclcNumber[]")

do list(path:"035 ", "var":"$i")
if all_match("$i.a", "\\(OCoLC\\)(.*)")
Expand Down Expand Up @@ -137,8 +137,6 @@ do list(path:"035 ", "var":"$i")
unless exists("zdbId")
if all_match("$i.a", "\\(DE-600\\)(.*)")
copy_field("$i.a", "zdbId")
elsif all_match("$i.a", "\\(DE-599\\)(ZDB.*)")
copy_field("$i.a", "zdbId")
end
end
end
Expand All @@ -153,7 +151,7 @@ copy_field("almaMmsId","rpbId")
lookup("rpbId","almaMmsId2rpbId",delete:"true")
replace_all("rpbId", "^RPB","")

set_array("stockNumber[]")
add_array("stockNumber[]")
do list(path:"028??", "var":"$i")
copy_field("$i.a", "stockNumber[].$append")
end
Expand All @@ -168,3 +166,7 @@ unless exists("hbzId")
copy_field("@hbzId","hbzId")
end
end

unless exists("@inNZ")
remove_field("hbzId")
end
30 changes: 15 additions & 15 deletions conf/lobid-transformation/fix/item.fix
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
set_array("hasItem[]")
set_array("@ITM-H") # Helper element for creating Holding from HOL ("PhysikalischerTitel")
add_array("hasItem[]")
add_array("@ITM-H") # Helper element for creating Holding from HOL ("PhysikalischerTitel")
do list(path:"ITM ", "var": "$i")
call_macro("suppressedLocation", targetField: "$i", libraryCodeSubfield: "w", locationCodeSubfield: "x")
unless exists("$i.suppressedLocation") # Test if location is suppressed with mapping provided by the libraries.
set_hash( "hasItem[].$append")
add_hash( "hasItem[].$append")
add_field("hasItem[].$last.label", "lobid Bestandsressource")
set_array("hasItem[].$last.type[]", "Item","PhysicalObject")
add_array("hasItem[].$last.type[]", "Item","PhysicalObject")
if exists("$i.z") # Temporary call number subfield
copy_field("$i.z", "hasItem[].$last.callNumber")
elsif exists("$i.n") # Item call number subfield
Expand All @@ -21,7 +21,7 @@ do list(path:"ITM ", "var": "$i")
end
end

set_array("@HOL-M_POR-M") # Helper element for creating Holding from MBD ("NurTitel")
add_array("@HOL-M_POR-M") # Helper element for creating Holding from MBD ("NurTitel")
do list(path: "HOL ", "var": "$i")
copy_field("$i.M","@HOL-M_POR-M.$append")
unless in("$i.8", "@ITM-H") # Checks if there is no corresponding ITM-Field
Expand All @@ -30,9 +30,9 @@ do list(path: "HOL ", "var": "$i")
if exists("$H52.b")
call_macro("suppressedLocation", targetField: "$H52", libraryCodeSubfield: "b", locationCodeSubfield: "c")
unless exists("$H52.suppressedLocation") # Test if location is suppressed with mapping provided by the libraries.
set_hash( "hasItem[].$append")
add_hash( "hasItem[].$append")
add_field("hasItem[].$last.label", "lobid Bestandsressource")
set_array("hasItem[].$last.type[]", "Item","PhysikalischerTitel")
add_array("hasItem[].$last.type[]", "Item","PhysikalischerTitel")
if exists("$H52.b.1")
copy_field("$H52.b.1","hasItem[].$last.currentLibrary")
else
Expand All @@ -56,8 +56,8 @@ do list(path:"POR ", "var": "$i")
copy_field("$i.M","@HOL-M_POR-M.$append")
# entity for every POR .a without POR .A
unless any_match("$i.a",".*6441$") # filter out hbz
set_hash( "hasItem[].$append")
set_array("hasItem[].$last.type[]", "Item", "DigitalDocument")
add_hash( "hasItem[].$append")
add_array("hasItem[].$last.type[]", "Item", "DigitalDocument")
add_field("hasItem[].$last.label", "Electronic Portfolio")
copy_field("$i.D", "$i.@electronicLocator")
replace_all("$i.@electronicLocator","https://eu04.alma.exlibrisgroup.com/view/uresolver/49HBZ_NETWORK","")
Expand All @@ -74,8 +74,8 @@ do list(path:"POR ", "var": "$i")
copy_field("$i.d", "$i.@sublocation")
replace_all("$i.@sublocation","https://hbz-network.userservices.exlibrisgroup.com/view/uresolver/49HBZ_NETWORK","")
do list(path:"$i.A", "var": "$j")
set_hash( "hasItem[].$append")
set_array("hasItem[].$last.type[]", "Item", "DigitalDocument")
add_hash( "hasItem[].$append")
add_array("hasItem[].$last.type[]", "Item", "DigitalDocument")
add_field("hasItem[].$last.label", "Electronic Portfolio")
paste("hasItem[].$last.electronicLocator", "~https://eu04.alma.exlibrisgroup.com/view/uresolver/","$j","$i.@electronicLocator", join_char: "")
paste("hasItem[].$last.sublocation", "~https://hbz-network.userservices.exlibrisgroup.com/view/uresolver/","$j","$i.@sublocation", join_char: "")
Expand All @@ -85,7 +85,7 @@ do list(path:"POR ", "var": "$i")
copy_field("$i.@iz","hasItem[].$last.heldBy.isil")
paste("hasItem[].$last.heldBy.id", "~http://lobid.org/organisations/", "hasItem[].$last.heldBy.isil", "~#!", join_char:"")
copy_field("hasItem[].$last.heldBy.id", "hasItem[].$last.heldBy.label")
set_array("hasItem[].$last.inCollection[]")
add_array("hasItem[].$last.inCollection[]")
paste("hasItem[].$last.inCollection[].$append.id", "~http://lobid.org/organisations/", "$i.@iz", "~#!", join_char:"")
copy_field("hasItem[].$last.inCollection[].$last.id", "hasItem[].$last.inCollection[].$last.label")
# item id is constructed "http://lobid.org/items/[almaMmsId of the record]:[isil of the Owner]:[almaMmsId of the holding]#!"
Expand All @@ -97,17 +97,17 @@ end
do list(path: "MBD ", "var": "$i")
unless any_match("$i.M","49HBZ_NETWORK")
unless in("$i.M", "@HOL-M_POR-M") # Checks if there is no corresponding HOL or POR-Field
set_hash( "hasItem[].$append")
add_hash( "hasItem[].$append")
add_field("hasItem[].$last.label", "lobid Bestandsressource")
set_array("hasItem[].$last.type[]", "Item","NurTitel")
add_array("hasItem[].$last.type[]", "Item","NurTitel")
copy_field("$i.i", "$i.@iz")
replace_all("$i.@iz",".*(\\d{4})$","$1")
lookup("$i.@iz", "alma-institution-code-to-isil")
call_macro("opacLink", field:"@iz")
copy_field("$i.@iz","hasItem[].$last.heldBy.isil")
paste("hasItem[].$last.heldBy.id", "~http://lobid.org/organisations/", "hasItem[].$last.heldBy.isil", "~#!", join_char:"")
copy_field("hasItem[].$last.heldBy.id", "hasItem[].$last.heldBy.label")
set_array("hasItem[].$last.inCollection[]")
add_array("hasItem[].$last.inCollection[]")
paste("hasItem[].$last.inCollection[].$append.id", "~http://lobid.org/organisations/", "$i.@iz", "~#!", join_char:"")
copy_field("hasItem[].$last.inCollection[].$last.id", "hasItem[].$last.inCollection[].$last.label")
# item id is constructed "http://lobid.org/items/[almaMmsId of the record]:[isil of the Owner]:[almaMmsId of the holding]#!"
Expand Down
Loading

0 comments on commit 5fdfe75

Please sign in to comment.