-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DI 51 - implement OAI-ETDMS endpoint (#41)
* DI-51: add ETDMS as OAI metadata format. Very draft. Search builder does not filter by Etd work type or presence of CU/LAC agreements. * DI-51: include file URLs in <identifier> for ETDMS only (not DC) * DI-51 add check on degree_level: Etds only * DI-51 don't assume Etd has file(s) * DI-51 Element order matters in ETDMS * DI-51: add empty tags for ETDMS-required elements * DI-51: Add Solr field for OAI sets, based on Collection membership, with OAI-valid set spec * DI-51 Support LAC harvester: add file extensions to download URLs * DI-51: LAC harvester - fix file extension for unknown mimetypes * DI-51: ETDMS for LAC - include only public files * DI-51 check FileSet visibility before adding download URL * DI-51 Add licence check for LAC dissemination * DI-51 move oai_etdms constraints to override lib/blacklight_oai_provider/solr_document_wrapper.rb * DI-51 Fix Solr query to limit OAI_ETDMS to licenced theses only * DI-51 update licenced_fq comments * DI-51 oai_etdms spec & Etd factory updates * DI-51 .. with oai_etdms spec this time * DI-51 rename oai-pmh spec & update for consistency w/ oai-etdms spec * DI-51 update oai-dc, oai-etdms specs for consistency * DI-51 Only theses should be exportable as oai_etdms * DI-51 Move Etd search limits to config & fix ETDMS registration * DI-51 Move metadata format filtering to Blacklight OAI config. Override SolrDocumentWrapper to apply format filters. Add to specs.
- Loading branch information
Showing
13 changed files
with
861 additions
and
301 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,12 +26,22 @@ def self.modified_field | |
repository_name: 'Carleton University Institutional Repository', | ||
repository_url: ENV.fetch('REPOSITORY_URL', 'https://repository.library.carleton.ca/catalog/oai'), | ||
admin_email: ENV.fetch('CONTACT_EMAIL', '[email protected]'), | ||
record_prefix: 'oai:repository.library.carleton.ca', | ||
record_prefix: 'oai:repository.library.carleton.ca' | ||
}, | ||
document: { | ||
set_fields: [ | ||
{ label: 'Collection', solr_field: solr_name('member_of_collections', :symbol) } | ||
] | ||
{ label: 'Collection', solr_field: 'member_of_oai_sets_ssim' } | ||
], | ||
format_filters: { | ||
'oai_etdms': [ | ||
# Only Etds are available over oai_etdms | ||
'has_model_ssim:Etd', | ||
# Filter OUT any Etds that are not licenced to LAC. | ||
# Filter OUT any Etds that have (any Carleton licence) AND none of the LAC licences. | ||
# See config/authorities/agreements.yml for terms | ||
'-agreement_tesim:(+(pc289j04q OR ng451h485) -tt44pm84n -6h440t871)' | ||
] | ||
} | ||
} | ||
} | ||
|
||
|
@@ -316,4 +326,9 @@ def self.modified_field | |
def render_bookmarks_control? | ||
false | ||
end | ||
|
||
# Register oai_etdms metadata format | ||
BlacklightOaiProvider::SolrDocumentProvider.register_format( | ||
OAI::Provider::Metadata::Etdms.instance | ||
) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
# Generated via | ||
# `rails generate hyrax:work Etd` | ||
class EtdIndexer < Hyrax::WorkIndexer | ||
# This indexes the default metadata. You can remove it if you want to | ||
# provide your own metadata and indexing. | ||
include Hyrax::IndexesBasicMetadata | ||
|
||
# Fetch remote labels for based_near. You can remove this if you don't want | ||
# this behavior | ||
include Hyrax::IndexesLinkedMetadata | ||
|
||
# Index an object's top-level parent collection(s) | ||
include ParentCollectionBehavior | ||
|
||
# Use date parsing helper in HyraxHelper | ||
include HyraxHelper | ||
|
||
# Uncomment this block if you want to add custom indexing behavior: | ||
def generate_solr_document | ||
super.tap do |solr_doc| | ||
solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } | ||
solr_doc = index_parent_collections(solr_doc) | ||
end | ||
end | ||
class EtdIndexer < SharedWorkIndexer | ||
# Uncomment to add indexing behaviour specific to Etd Works | ||
# def generate_solr_document | ||
# super.tap do |solr_doc| | ||
# solr_doc['my_custom_field_ssim'] = object.my_custom_property | ||
# end | ||
# end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
# Generated via | ||
# `rails generate hyrax:work ResearchWork` | ||
class ResearchWorkIndexer < Hyrax::WorkIndexer | ||
# This indexes the default metadata. You can remove it if you want to | ||
# provide your own metadata and indexing. | ||
include Hyrax::IndexesBasicMetadata | ||
|
||
# Fetch remote labels for based_near. You can remove this if you don't want | ||
# this behavior | ||
include Hyrax::IndexesLinkedMetadata | ||
|
||
# Index an object's top-level parent collection(s) | ||
include ParentCollectionBehavior | ||
|
||
# Use date parsing helper in HyraxHelper | ||
include HyraxHelper | ||
|
||
# Uncomment this block if you want to add custom indexing behavior: | ||
def generate_solr_document | ||
super.tap do |solr_doc| | ||
solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } | ||
solr_doc = index_parent_collections(solr_doc) | ||
end | ||
end | ||
class ResearchWorkIndexer < SharedWorkIndexer | ||
# Uncomment to add indexing behaviour specific to Research Works | ||
# def generate_solr_document | ||
# super.tap do |solr_doc| | ||
# solr_doc['my_custom_field_ssim'] = object.my_custom_property | ||
# end | ||
# end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# frozen_string_literal: true | ||
|
||
# Custom indexing behaviour shared by all work types | ||
class SharedWorkIndexer < Hyrax::WorkIndexer | ||
# This indexes the default metadata. You can remove it if you want to | ||
# provide your own metadata and indexing. | ||
include Hyrax::IndexesBasicMetadata | ||
|
||
# Fetch remote labels for based_near. You can remove this if you don't want | ||
# this behavior | ||
include Hyrax::IndexesLinkedMetadata | ||
|
||
# Index an object's top-level parent collection(s) | ||
include ParentCollectionBehavior | ||
|
||
# Use date parsing helper in HyraxHelper | ||
include HyraxHelper | ||
|
||
# Uncomment this block if you want to add custom indexing behavior: | ||
def generate_solr_document | ||
super.tap do |solr_doc| | ||
# store YYYY-formatted year | ||
solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } | ||
|
||
# Index OAI set membership based on collection names & valid as set specs (spaces aren't valid) | ||
solr_doc['member_of_oai_sets_ssim'] = object.member_of_collections.map do |collection| | ||
# Join titles into a workable set spec / name. | ||
collection.title.join(' ').strip.gsub(/\s+/, '_') | ||
end | ||
|
||
# Index all parent collections for nesting | ||
index_parent_collections(solr_doc) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
# Generated via | ||
# `rails generate hyrax:work Work` | ||
class WorkIndexer < Hyrax::WorkIndexer | ||
# This indexes the default metadata. You can remove it if you want to | ||
# provide your own metadata and indexing. | ||
include Hyrax::IndexesBasicMetadata | ||
|
||
# Fetch remote labels for based_near. You can remove this if you don't want | ||
# this behavior | ||
include Hyrax::IndexesLinkedMetadata | ||
|
||
# Index an object's top-level parent collection(s) | ||
include ParentCollectionBehavior | ||
|
||
# Use date parsing helper in HyraxHelper | ||
include HyraxHelper | ||
|
||
# Uncomment this block if you want to add custom indexing behavior: | ||
def generate_solr_document | ||
super.tap do |solr_doc| | ||
solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } | ||
solr_doc = index_parent_collections(solr_doc) | ||
end | ||
end | ||
class WorkIndexer < SharedWorkIndexer | ||
# Uncomment to add indexing behaviour specific to generic Works | ||
# def generate_solr_document | ||
# super.tap do |solr_doc| | ||
# solr_doc['my_custom_field_ssim'] = object.my_custom_property | ||
# end | ||
# end | ||
end | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'builder' | ||
|
||
# See Blacklight: app/models/concerns/blacklight/document/export.rb | ||
module Blacklight | ||
module Document | ||
module Etdms | ||
def self.extended(document) | ||
Blacklight::Document::Etdms.register_export_formats(document) | ||
end | ||
|
||
def self.register_export_formats(document) | ||
document.will_export_as(:xml) | ||
document.will_export_as(:etdms_xml, 'text/xml') | ||
document.will_export_as(:oai_etdms_xml, 'text/xml') | ||
end | ||
|
||
def etdms_field_names | ||
# order matters! | ||
# additional oai_etdms_identififer required by LAC: see app/models/solr_document.rb | ||
%i[ | ||
title | ||
creator | ||
subject | ||
description | ||
publisher | ||
contributor | ||
date | ||
type | ||
identifier | ||
oai_etdms_identifier | ||
language | ||
rights | ||
] | ||
end | ||
|
||
def etdms_degree_field_names | ||
# elements nested under <degree>, in order: | ||
%i[ | ||
name | ||
level | ||
discipline | ||
grantor | ||
] | ||
end | ||
|
||
# For valid ETDMS-XML: | ||
# 1. Order matters. | ||
# 2. The following elements must be present but can be empty. If no values provided, output empty tags. | ||
# - title | ||
# - creator | ||
# - subject | ||
# - type | ||
# - identifier | ||
# 3. The following elements must be present and CAN'T be empty. But if no value provided, | ||
# it's a metadata error that needs to be fixed. Provide empty tag & fix on harvesting error. | ||
# - date | ||
def etdms_required_field_names | ||
%i[ | ||
title | ||
creator | ||
subject | ||
type | ||
identifier | ||
date | ||
] | ||
end | ||
|
||
def export_as_oai_etdms_xml | ||
xml = Builder::XmlMarkup.new | ||
xml.tag!('oai_etdms:thesis', | ||
'xmlns:oai_etdms' => "http://www.ndltd.org/standards/metadata/etdms/1.0/", | ||
'xmlns:thesis' => "http://www.ndltd.org/standards/metadata/etdms/1.0/", | ||
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", | ||
'xsi:schemaLocation' => %(http://www.ndltd.org/standards/metadata/etdms/1.0/ http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd)) do | ||
# fetch semantic values hash | ||
semantic_values = to_semantic_values | ||
|
||
etdms_field_names.each do |field| | ||
# If element is required but no value is available, OAI ETDMS schema requires an empty element | ||
semantic_values[field] = '' if field.in?(etdms_required_field_names) && semantic_values[field].empty? | ||
|
||
# Output DC-ish elements | ||
Array.wrap(semantic_values[field]).each do |v| | ||
xml.tag! "thesis:#{field.to_s.gsub('oai_etdms_', '')}", v | ||
end | ||
end | ||
|
||
# Add degree-specific field names under <thesis:degree> parent element | ||
xml.tag! 'thesis:degree' do | ||
etdms_degree_field_names.each do |field| | ||
Array.wrap(semantic_values[field]).each do |v| | ||
xml.tag! "thesis:#{field}", v | ||
end | ||
end | ||
end | ||
end | ||
xml.target! | ||
end | ||
|
||
alias export_as_xml export_as_oai_etdms_xml | ||
alias export_as_etdms_xml export_as_oai_etdms_xml | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# frozen_string_literal: true | ||
|
||
module OAI | ||
module Provider | ||
module Metadata | ||
# OAI-ETDMS metadata format | ||
class Etdms < Format | ||
def initialize | ||
@prefix = 'oai_etdms' | ||
@schema = 'http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd' | ||
@namespace = 'http://www.ndltd.org/standards/metadata/etdms/1.0/' | ||
@element_namespace = 'thesis' | ||
end | ||
|
||
def header_specification | ||
{ | ||
'xmlns:oai_etdms' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/', | ||
'xmlns:thesis' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/', | ||
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', | ||
'xsi:schemaLocation' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/ ' / | ||
'http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd' | ||
} | ||
end | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.