diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index ea2b1d5..aadac2e 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -26,12 +26,22 @@ def self.modified_field repository_name: 'Carleton University Institutional Repository', repository_url: ENV.fetch('REPOSITORY_URL', 'https://repository.library.carleton.ca/catalog/oai'), admin_email: ENV.fetch('CONTACT_EMAIL', 'repository.support@carleton.ca'), - record_prefix: 'oai:repository.library.carleton.ca', + record_prefix: 'oai:repository.library.carleton.ca' }, document: { set_fields: [ - { label: 'Collection', solr_field: solr_name('member_of_collections', :symbol) } - ] + { label: 'Collection', solr_field: 'member_of_oai_sets_ssim' } + ], + format_filters: { + 'oai_etdms': [ + # Only Etds are available over oai_etdms + 'has_model_ssim:Etd', + # Filter OUT any Etds that are not licenced to LAC. + # Filter OUT any Etds that have (any Carleton licence) AND none of the LAC licences. + # See config/authorities/agreements.yml for terms + '-agreement_tesim:(+(pc289j04q OR ng451h485) -tt44pm84n -6h440t871)' + ] + } } } @@ -316,4 +326,9 @@ def self.modified_field def render_bookmarks_control? false end + + # Register oai_etdms metadata format + BlacklightOaiProvider::SolrDocumentProvider.register_format( + OAI::Provider::Metadata::Etdms.instance + ) end diff --git a/app/indexers/etd_indexer.rb b/app/indexers/etd_indexer.rb index 194b073..fc63e9d 100644 --- a/app/indexers/etd_indexer.rb +++ b/app/indexers/etd_indexer.rb @@ -1,25 +1,12 @@ +# frozen_string_literal: true + # Generated via # `rails generate hyrax:work Etd` -class EtdIndexer < Hyrax::WorkIndexer - # This indexes the default metadata. You can remove it if you want to - # provide your own metadata and indexing. - include Hyrax::IndexesBasicMetadata - - # Fetch remote labels for based_near. You can remove this if you don't want - # this behavior - include Hyrax::IndexesLinkedMetadata - - # Index an object's top-level parent collection(s) - include ParentCollectionBehavior - - # Use date parsing helper in HyraxHelper - include HyraxHelper - - # Uncomment this block if you want to add custom indexing behavior: - def generate_solr_document - super.tap do |solr_doc| - solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } - solr_doc = index_parent_collections(solr_doc) - end - end +class EtdIndexer < SharedWorkIndexer + # Uncomment to add indexing behaviour specific to Etd Works + # def generate_solr_document + # super.tap do |solr_doc| + # solr_doc['my_custom_field_ssim'] = object.my_custom_property + # end + # end end diff --git a/app/indexers/research_work_indexer.rb b/app/indexers/research_work_indexer.rb index f0a1d18..e630155 100644 --- a/app/indexers/research_work_indexer.rb +++ b/app/indexers/research_work_indexer.rb @@ -1,25 +1,12 @@ +# frozen_string_literal: true + # Generated via # `rails generate hyrax:work ResearchWork` -class ResearchWorkIndexer < Hyrax::WorkIndexer - # This indexes the default metadata. You can remove it if you want to - # provide your own metadata and indexing. - include Hyrax::IndexesBasicMetadata - - # Fetch remote labels for based_near. You can remove this if you don't want - # this behavior - include Hyrax::IndexesLinkedMetadata - - # Index an object's top-level parent collection(s) - include ParentCollectionBehavior - - # Use date parsing helper in HyraxHelper - include HyraxHelper - - # Uncomment this block if you want to add custom indexing behavior: - def generate_solr_document - super.tap do |solr_doc| - solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } - solr_doc = index_parent_collections(solr_doc) - end - end +class ResearchWorkIndexer < SharedWorkIndexer + # Uncomment to add indexing behaviour specific to Research Works + # def generate_solr_document + # super.tap do |solr_doc| + # solr_doc['my_custom_field_ssim'] = object.my_custom_property + # end + # end end diff --git a/app/indexers/shared_work_indexer.rb b/app/indexers/shared_work_indexer.rb new file mode 100644 index 0000000..308a514 --- /dev/null +++ b/app/indexers/shared_work_indexer.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# Custom indexing behaviour shared by all work types +class SharedWorkIndexer < Hyrax::WorkIndexer + # This indexes the default metadata. You can remove it if you want to + # provide your own metadata and indexing. + include Hyrax::IndexesBasicMetadata + + # Fetch remote labels for based_near. You can remove this if you don't want + # this behavior + include Hyrax::IndexesLinkedMetadata + + # Index an object's top-level parent collection(s) + include ParentCollectionBehavior + + # Use date parsing helper in HyraxHelper + include HyraxHelper + + # Uncomment this block if you want to add custom indexing behavior: + def generate_solr_document + super.tap do |solr_doc| + # store YYYY-formatted year + solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } + + # Index OAI set membership based on collection names & valid as set specs (spaces aren't valid) + solr_doc['member_of_oai_sets_ssim'] = object.member_of_collections.map do |collection| + # Join titles into a workable set spec / name. + collection.title.join(' ').strip.gsub(/\s+/, '_') + end + + # Index all parent collections for nesting + index_parent_collections(solr_doc) + end + end +end diff --git a/app/indexers/work_indexer.rb b/app/indexers/work_indexer.rb index ea935e7..4f6ecd3 100644 --- a/app/indexers/work_indexer.rb +++ b/app/indexers/work_indexer.rb @@ -1,27 +1,12 @@ +# frozen_string_literal: true + # Generated via # `rails generate hyrax:work Work` -class WorkIndexer < Hyrax::WorkIndexer - # This indexes the default metadata. You can remove it if you want to - # provide your own metadata and indexing. - include Hyrax::IndexesBasicMetadata - - # Fetch remote labels for based_near. You can remove this if you don't want - # this behavior - include Hyrax::IndexesLinkedMetadata - - # Index an object's top-level parent collection(s) - include ParentCollectionBehavior - - # Use date parsing helper in HyraxHelper - include HyraxHelper - - # Uncomment this block if you want to add custom indexing behavior: - def generate_solr_document - super.tap do |solr_doc| - solr_doc['date_created_year_ssim'] = object.date_created.map { |value| date_created_year(value) } - solr_doc = index_parent_collections(solr_doc) - end - end +class WorkIndexer < SharedWorkIndexer + # Uncomment to add indexing behaviour specific to generic Works + # def generate_solr_document + # super.tap do |solr_doc| + # solr_doc['my_custom_field_ssim'] = object.my_custom_property + # end + # end end - - diff --git a/app/models/concerns/blacklight/document/etdms.rb b/app/models/concerns/blacklight/document/etdms.rb new file mode 100644 index 0000000..bb0ecca --- /dev/null +++ b/app/models/concerns/blacklight/document/etdms.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require 'builder' + +# See Blacklight: app/models/concerns/blacklight/document/export.rb +module Blacklight + module Document + module Etdms + def self.extended(document) + Blacklight::Document::Etdms.register_export_formats(document) + end + + def self.register_export_formats(document) + document.will_export_as(:xml) + document.will_export_as(:etdms_xml, 'text/xml') + document.will_export_as(:oai_etdms_xml, 'text/xml') + end + + def etdms_field_names + # order matters! + # additional oai_etdms_identififer required by LAC: see app/models/solr_document.rb + %i[ + title + creator + subject + description + publisher + contributor + date + type + identifier + oai_etdms_identifier + language + rights + ] + end + + def etdms_degree_field_names + # elements nested under , in order: + %i[ + name + level + discipline + grantor + ] + end + + # For valid ETDMS-XML: + # 1. Order matters. + # 2. The following elements must be present but can be empty. If no values provided, output empty tags. + # - title + # - creator + # - subject + # - type + # - identifier + # 3. The following elements must be present and CAN'T be empty. But if no value provided, + # it's a metadata error that needs to be fixed. Provide empty tag & fix on harvesting error. + # - date + def etdms_required_field_names + %i[ + title + creator + subject + type + identifier + date + ] + end + + def export_as_oai_etdms_xml + xml = Builder::XmlMarkup.new + xml.tag!('oai_etdms:thesis', + 'xmlns:oai_etdms' => "http://www.ndltd.org/standards/metadata/etdms/1.0/", + 'xmlns:thesis' => "http://www.ndltd.org/standards/metadata/etdms/1.0/", + 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", + 'xsi:schemaLocation' => %(http://www.ndltd.org/standards/metadata/etdms/1.0/ http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd)) do + # fetch semantic values hash + semantic_values = to_semantic_values + + etdms_field_names.each do |field| + # If element is required but no value is available, OAI ETDMS schema requires an empty element + semantic_values[field] = '' if field.in?(etdms_required_field_names) && semantic_values[field].empty? + + # Output DC-ish elements + Array.wrap(semantic_values[field]).each do |v| + xml.tag! "thesis:#{field.to_s.gsub('oai_etdms_', '')}", v + end + end + + # Add degree-specific field names under parent element + xml.tag! 'thesis:degree' do + etdms_degree_field_names.each do |field| + Array.wrap(semantic_values[field]).each do |v| + xml.tag! "thesis:#{field}", v + end + end + end + end + xml.target! + end + + alias export_as_xml export_as_oai_etdms_xml + alias export_as_etdms_xml export_as_oai_etdms_xml + end + end +end diff --git a/app/models/concerns/oai/provider/metadata/etdms.rb b/app/models/concerns/oai/provider/metadata/etdms.rb new file mode 100644 index 0000000..7f2e11e --- /dev/null +++ b/app/models/concerns/oai/provider/metadata/etdms.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module OAI + module Provider + module Metadata + # OAI-ETDMS metadata format + class Etdms < Format + def initialize + @prefix = 'oai_etdms' + @schema = 'http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd' + @namespace = 'http://www.ndltd.org/standards/metadata/etdms/1.0/' + @element_namespace = 'thesis' + end + + def header_specification + { + 'xmlns:oai_etdms' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/', + 'xmlns:thesis' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/', + 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation' => 'http://www.ndltd.org/standards/metadata/etdms/1.0/ ' / + 'http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd' + } + end + end + end + end +end diff --git a/app/models/solr_document.rb b/app/models/solr_document.rb index 218a216..c869628 100644 --- a/app/models/solr_document.rb +++ b/app/models/solr_document.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + class SolrDocument include Blacklight::Solr::Document include BlacklightOaiProvider::SolrDocument @@ -8,7 +9,6 @@ class SolrDocument # Adds Hyrax behaviors to the SolrDocument. include Hyrax::SolrDocumentBehavior - # self.unique_key = 'id' # Email uses the semantic field mappings below to generate the body of an email. @@ -17,16 +17,15 @@ class SolrDocument # SMS uses the semantic field mappings below to generate the body of an SMS email. SolrDocument.use_extension(Blacklight::Document::Sms) - # DublinCore uses the semantic field mappings below to assemble an OAI-compliant Dublin Core document - # Semantic mappings of solr stored fields. Fields may be multi or - # single valued. See Blacklight::Document::SemanticFields#field_semantics + # Ssemantic field mappings below are used to assemble OAI-compliant DC or ETDMS + # documents, as requested. Fields may be multi or single valued. + # See Blacklight::Document::SemanticFields#field_semantics # and Blacklight::Document::SemanticFields#to_semantic_values - # Recommendation: Use field names from Dublin Core use_extension(Blacklight::Document::DublinCore) + use_extension(Blacklight::Document::Etdms) - # Do content negotiation for AF models. - - use_extension( Hydra::ContentNegotiation ) + # Do content negotiation for AF models. + use_extension(Hydra::ContentNegotiation) # Get YYYY date created for all work types def date_created_year @@ -59,41 +58,100 @@ def agreement self['agreement_tesim'] end - # OAI Metadata fields (DC only) + # OAI Metadata fields + # Element names & mappings can be shared by Dublin Core and ETDMS formats + # See Blacklight::Document::DublinCore and Blacklight::Document::Etdms for XML exporters field_semantics.merge!( title: 'title_tesim', creator: 'creator_tesim', contributor: 'contributor_tesim', - subject: [ 'subject_tesim', 'keyword_tesim' ], - description: [ 'description_tesim', 'abstract_tesim' ], + subject: %w[subject_tesim keyword_tesim], + description: %w[description_tesim abstract_tesim], publisher: 'publisher_tesim', type: 'resource_type_tesim', language: 'language_tesim', - rights: [ 'license_tesim', 'rights_notes_tesim', 'rights_statement_tesim' ], + rights: %w[license_tesim rights_notes_tesim rights_statement_tesim], relation: 'related_url_tesim', - # overridden hash keys + # ETDMS-specific elements + name: 'degree_tesim', + discipline: 'degree_discipline_tesim', + # ... degree grantor is available from publisher + grantor: 'publisher_tesim', + # ... and override hash key for degree level + level: 'oai_etdms_level', + # Override hash keys for shared elements date and identifier date: 'oai_date', - identifier: [ 'identifier_tesim', 'oai_identifier' ] + identifier: %w[identifier_tesim oai_identifier], + # ... and create a *special* element to hold file URLs as identifiers in ETDMS records but not DC + oai_etdms_identifier: 'oai_etdms_identifier' ) # Override SolrDocument hash access to provide custom values in OAI fields def [](key) - return send(key) if ['oai_date', 'oai_identifier'].include?(key) + return send(key) if %w[oai_etdms_level oai_date oai_identifier oai_etdms_identifier].include?(key) + super end + # Provide label for degree level authority + def oai_etdms_level + return unless self['has_model_ssim'].first == 'Etd' + + # Degree Level is required & allows a single value. + ::DegreeLevelsService.label(self['degree_level_tesim'].first) + end + + # Provide correct date format for different work types def oai_date # if ETD, use YYYY date self['has_model_ssim'].first == 'Etd' ? self['date_created_year_ssim'] : self['date_created_tesim'] end + # Include collection & work URLs in dc:identifier def oai_identifier - # Include collection & work URLs in dc:identifier - if self['has_model_ssim'].first.to_s == 'Collection' - Hyrax::Engine.routes.url_helpers.url_for(only_path: false, action: 'show', host: CatalogController.blacklight_config.oai[:provider][:repository_url].gsub('/catalog/oai', ''), controller: 'hyrax/collections', id: id) + url_vars = { only_path: false, action: 'show', host: hyrax_host, + controller: "hyrax/#{self['has_model_ssim'].first.underscore.pluralize}", + id: id } + + if self['has_model_ssim'].first == 'Collection' + # Return collection URL + Hyrax::Engine.routes.url_helpers.url_for(url_vars) else - Rails.application.routes.url_helpers.url_for(only_path: false, action: 'show', host: CatalogController.blacklight_config.oai[:provider][:repository_url].gsub('/catalog/oai', ''), controller: "hyrax/#{self['has_model_ssim'].first.to_s.underscore.pluralize}", id: id) + # Return work URL + Rails.application.routes.url_helpers.url_for(url_vars) + end + end + + # LAC requires OAI-ETDMS requires file download URLs in identifier element. + # LAC requiresd download URLs with a file extension. Add file extension based + # on mimetype. + def oai_etdms_identifier + return unless self['has_model_ssim'].first == 'Etd' + + # Support PDFs & ZIPs file formats expected in transfer and warn about anything else + mime_types = { 'application/pdf' => 'pdf', 'application/zip' => 'zip' } + + self['file_set_ids_ssim']&.map do |fs_id| + # Fetch FileSet metadata from Solr + fs = Hyrax::SolrService.search_by_id(fs_id) + next unless + fs['visibility_ssi'] == Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC && + mime_types.keys.include?(fs['mime_type_ssi']) + + # append extension to download URL + Hyrax::Engine.routes.url_helpers.download_url(fs_id, host: hyrax_host) + ".#{mime_types[fs['mime_type_ssi']]}" end end + # Return an ETDMS representation of the document. Required by ruby-oai. + # See lib/oai/provider/response/list_metadata_formats.rb:record_supports + def to_oai_etdms + export_as(:etdms_xml) + end + + private + + def hyrax_host + CatalogController.blacklight_config.oai[:provider][:repository_url].gsub('/catalog/oai', '') + end end diff --git a/app/overrides/lib/blacklight_oai_provider/solr_document_wrapper_override.rb b/app/overrides/lib/blacklight_oai_provider/solr_document_wrapper_override.rb new file mode 100644 index 0000000..2164aba --- /dev/null +++ b/app/overrides/lib/blacklight_oai_provider/solr_document_wrapper_override.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +# Override blacklight_oai_provider/lib/blacklight_oai_provider/solr_document_wrapper.rb +# Add metadata format filters defined in OAI config to queries to filter search results +BlacklightOaiProvider::SolrDocumentWrapper.class_eval do + # Override BlacklightOaiProvider::SolrDocumentWrapper.find to apply format filters to single-record queries + def find(selector, options = {}) + return next_set(options[:resumption_token]) if options[:resumption_token] + + if selector == :all + response = search_service.repository.search(conditions(options)) + + if limit && response.total > limit + return select_partial(BlacklightOaiProvider::ResumptionToken.new(options.merge(last: 0), nil, response.total)) + end + + response.documents + else + query = search_service.search_builder.where(id: selector).query + response = search_service.repository.search(query).documents + + # If no documents in response, id is invalid + raise OAI::IdException if response.empty? + + # append format filters defined in OAI config as filter queries + format_filters(options).each { |fq| query.append_filter_query(fq) } + + # Search again. If still no search result, record can't be disseminated in selected format + response = search_service.repository.search(query).documents + raise OAI::FormatException if response.empty? + + # Return best match, or nil & let the exception handler deal with it + response.first + end + end + + # Override BlacklightOaiProvider::SolrDocumentWrapper.conditions to include format filters in conditions + def conditions(constraints) + query = search_service.search_builder.merge(sort: "#{solr_timestamp} asc", rows: limit).query + + if constraints[:from].present? || constraints[:until].present? + from_val = solr_date(constraints[:from]) + to_val = solr_date(constraints[:until], true) + if from_val == to_val + query.append_filter_query("#{solr_timestamp}:\"#{from_val}\"") + else + query.append_filter_query("#{solr_timestamp}:[#{from_val} TO #{to_val}]") + end + end + + # append format filters defined in OAI config as filter queries + format_filters(constraints).each { |fq| query.append_filter_query(fq) } + + # append set filter if present + query.append_filter_query(@set.from_spec(constraints[:set])) if constraints[:set].present? + query + end + + private + + def format_filters(options) + return [] unless options[:metadata_prefix].present? + + # get format filters defined in OAI config, if any, or return an empty list + @controller.blacklight_config.oai.dig(:document, :format_filters, options[:metadata_prefix].to_sym) || [] + end +end diff --git a/spec/factories/etds.rb b/spec/factories/etds.rb index d2b1c6f..2cd81ff 100644 --- a/spec/factories/etds.rb +++ b/spec/factories/etds.rb @@ -1,19 +1,36 @@ # frozen_string_literal: true FactoryBot.define do - factory :etd do # Required metadata - title { ['Thesis or dissertation title ' + Time.new.strftime("%Y-%m-%d %H:%M:%S")] } + title { ["Thesis or dissertation title #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] } creator { ['Surname, Given Name'] } resource_type { ['Thesis'] } degree_level { '1' } # Master's degree degree { 'Master of Science (M.Sc.)' } degree_discipline { 'Engineering' } + # Optional metadata commonly provided for Etds + contributor { ['Person, First Name (Thesis advisor)'] } + subject { ['Subject Area', 'Subject Area -- Ontario', 'Subject Area -- 20th century'] } + abstract { ['A summary of the work.'] } + publisher { ['Carleton University'] } + date_created { ['2022-04-10'] } + identifier { ['https://doi.org/10.22215/2023-12345'] } + language { ['eng'] } + rights_notes { ['Copyright (c) 2022 the author.'] } + # Optional, limited-access metadata (admin, Library staff only) internal_note { ['This is an internal note added by the Metadata team.'] } - agreement { ['https://repository.library.carleton.ca/concern/works/pc289j04q'] } # Carleton University Thesis Licence Agreement + + # Carleton University Thesis Licence Agreement and LAC licence + # See config/authorities.yml + agreement do + [ + 'https://repository.library.carleton.ca/concern/works/pc289j04q', + 'https://repository.library.carleton.ca/concern/works/6h440t871' + ] + end transient do depositing_user { User.find_by(email: 'staff_user@example.com') } @@ -29,10 +46,32 @@ visibility { Hydra::AccessControls::AccessRight::VISIBILITY_TEXT_VALUE_PUBLIC } end - trait :private do + trait :private do # default visibility is private end + # Public ETD with public file, licenced to Carleton and LAC + factory :public_etd_with_public_file, traits: [:public] do + before(:create) do |etd, context| + etd.ordered_members << create(:public_file) + end + end + + # Public ETD with private file, licenced to Carleton and LAC + factory :public_etd_with_private_file, traits: [:public] do + before(:create) do |etd, context| + etd.ordered_members << create(:private_file) + end + end + + # Private ETD with private file, licenced to Carleton and LAC + factory :private_etd_with_private_file, traits: [:private] do + before(:create) do |etd, context| + etd.ordered_members << create(:private_file) + end + end + + # Public ETD with CU & LAC licences, but no files factory :public_etd, traits: [:public] end -end \ No newline at end of file +end diff --git a/spec/requests/oai_dc_endpoint_spec.rb b/spec/requests/oai_dc_endpoint_spec.rb new file mode 100644 index 0000000..0577c5e --- /dev/null +++ b/spec/requests/oai_dc_endpoint_spec.rb @@ -0,0 +1,217 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe 'OAI-DC endpoint' do + # Set up config and create works + before(:all) do + @repository_name = 'Carleton University Institutional Repository' + @hyrax_host = CatalogController.blacklight_config.oai[:provider][:repository_url].sub('/catalog/oai', '') + @repository_id = 'oai:repository.library.carleton.ca' + + # Create a public work with extra attributes + @public_work = + FactoryBot.create(:public_work_with_public_file, { + title: ["Public work #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"], + creator: ['Lastname, Given'], + contributor: ['Surname, First'], + date_created: ['2023-04-02'], + rights_statement: ['http://rightsstatements.org/vocab/InC/1.0/'], + rights_notes: ['Copyright 2023'], + license: ['https://creativecommons.org/licenses/by/4.0/'], + publisher: ['Large Publishing Co., Ltd.'], + identifier: ['DOI: https://doi.org/10.22215/2023-12345'], + language: ['eng'], + resource_type: ['Article'], + keyword: ['Term', 'Descriptive phrase', 'Key topic'], + subject: ['Subject Area', 'Subject Area -- Ontario', 'Subject Area -- 20th century'], + abstract: ['A summary of the work.'], + description: ['A description of the work.'], + related_url: ['https://www.example.com/related/work'] + }) + + # Public work with private file: file URL should not be included in oai_dc + @public_work_private_file = + FactoryBot.create(:public_work_with_private_file, { + title: ["Public Work with private file #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] + }) + + # Create a private work with default attributes + @private_work = + FactoryBot.create(:work, :private, { + title: ["Private work #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] + }) + + # Create a public Etd with default attributes + @public_etd = + FactoryBot.create(:etd, :public, { title: ["Public Etd #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] }) + end + + # the endpoint is available + describe 'root page' do + it 'displays an error message about missing verb' do + get oai_catalog_path + expect(response.body).to include 'not a legal OAI-PMH verb' + end + end + + # the Identify verb returns repository information + describe 'Identify verb' do + it 'displays repository information' do + get oai_catalog_path(verb: 'Identify') + expect(response.body).to include @repository_name + end + end + + # the endpoint supports OAI-DC as a metadata format + describe 'ListMetadataFormats verb' do + it 'lists oai_dc as a supported format' do + get oai_catalog_path(verb: 'ListMetadataFormats') + expect(response.body).to include('oai_dc') + end + end + + describe 'GetRecord verb' do + context 'a PUBLIC Work with metadataFormat=oai_dc' do + before(:all) do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', + identifier: "#{@repository_id}:#{@public_work.id}") + end + it 'includes Work URL in dc:identifier' do + expect(response.body).to include("#{@hyrax_host}/concern/works/#{@public_work.id}") + end + it 'includes Date Created in dc:date with YYYY-MM-DD format' do + expect(response.body).to include("#{@public_work.date_created.first}") + end + it 'includes Title in dc:title' do + expect(response.body).to include("#{@public_work.title.first}") + end + it 'includes Creator in dc:creator' do + expect(response.body).to include("#{@public_work.creator.first}") + end + it 'includes Contributor in dc:contributor' do + expect(response.body).to include("#{@public_work.contributor.first}") + end + it 'includes Rights Statement, Rights Notes, and Creative Commons license in dc:rights' do + expect(response.body).to include("#{@public_work.rights_statement.first}") + expect(response.body).to include("#{@public_work.rights_notes.first}") + expect(response.body).to include("#{@public_work.license.first}") + end + it 'includes Publisher in dc:publisher' do + expect(response.body).to include("#{@public_work.publisher.first}") + end + it 'includes Identifier in dc:identifier' do + expect(response.body).to include("#{@public_work.identifier.first}") + end + it 'includes Language in dc:language' do + expect(response.body).to include("#{@public_work.language.first}") + end + it 'includes Resource Type in dc:type' do + expect(response.body).to include("#{@public_work.resource_type.first}") + end + it 'includes Keyword and Subject in dc:subject' do + @public_work.keyword.each { |keyword| expect(response.body).to include("#{keyword}")} + @public_work.subject.each { |subject| expect(response.body).to include("#{subject}")} + end + it 'includes Abstract and Description in dc:description' do + expect(response.body).to include("#{@public_work.abstract.first}") + expect(response.body).to include("#{@public_work.description.first}") + end + it 'includes Related URL in dc:relation' do + expect(response.body).to include("#{@public_work.related_url.first}") + end + + context 'with a PRIVATE file' do + it 'does NOT have file URL in dc:identifier' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', + identifier: "#{@repository_id}:#{@public_work_private_file.id}") + # Confirm response + expect(response.body).to include("#{@repository_id}:#{@public_work_private_file.id}") + # Confirm file URL not present + # e.g., https://repository.library.carleton.ca/downloads/f1881k888.pdf + expect(response.body).not_to include( + '' \ + "#{@hyrax_host}/downloads/#{@public_work_private_file.file_set_ids.first}.pdf" + ) + end + end + end + + context 'a PUBLIC Etd with metadataFormat=oai_dc' do + before(:all) do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', + identifier: "#{@repository_id}:#{@public_etd.id}") + end + it 'includes Etd URL in dc:identifier' do + expect(response.body).to include( + "#{@hyrax_host}/concern/etds/#{@public_etd.id}" + ) + end + it 'includes Date Created in dc:date with YYYY format' do + expect(response.body).to include("#{Date.parse(@public_etd.date_created.first).year}") + end + it 'includes Title in dc:title' do + expect(response.body).to include("#{@public_etd.title.first}") + end + it 'includes Creator in dc:creator' do + expect(response.body).to include("#{@public_etd.creator.first}") + end + it 'includes Contributor in dc:contributor' do + expect(response.body).to include("#{@public_etd.contributor.first}") + end + it 'includes Rights Notes in dc:rights' do + expect(response.body).to include("#{@public_etd.rights_notes.first}") + end + it 'includes Publisher in dc:publisher' do + expect(response.body).to include("#{@public_etd.publisher.first}") + end + it 'includes Identifier in dc:identifier' do + expect(response.body).to include("#{@public_etd.identifier.first}") + end + it 'includes Language in dc:language' do + expect(response.body).to include("#{@public_etd.language.first}") + end + it 'includes Resource Type in dc:type' do + expect(response.body).to include("#{@public_etd.resource_type.first}") + end + it 'includes Subject in dc:subject' do + @public_etd.subject.each { |subject| expect(response.body).to include("#{subject}") } + end + it 'includes Abstract in dc:description' do + expect(response.body).to include("#{@public_etd.abstract.first}") + end + end + + context 'a PRIVATE Work with metadataFormat=oai_dc' do + it 'displays an idDoesNotExist error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', + identifier: "#{@repository_id}:#{@private_work.id}") + expect(response.body).to include('') + end + end + + context 'with an identifier that does NOT exist' do + it 'displays an idDoesNotExist error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', + identifier: "#{@repository_id}:not-a-real-id") + + expect(response.body).to include('') + end + end + end + + # describe 'ListSets verb' do + # it 'displays Collections as sets' + # ... etc + # Factory / factories to create collection type, collection & add works to confirm OAI ListSets config, etc., + # is a lot of overhead that would duplicate efforts in Hyrax. See https://github.com/samvera/hyrax/pull/6664 + # and e.g. spec/factories/collections.rb. Being able to use upstream factories will be great when we upgrade + # to Hyrax v5! For now, fetch OAI config & confirm Sets are configured based on collection membership. + describe 'ListSets verb' do + it 'is configured to show Collections as Sets' do + expect(CatalogController.blacklight_config.oai[:document][:set_fields].count).to eq(1) + expect(CatalogController.blacklight_config.oai[:document][:set_fields][0][:label]).to eq('Collection') + expect(CatalogController.blacklight_config.oai[:document][:set_fields][0][:solr_field]).to eq('member_of_oai_sets_ssim') + end + end +end diff --git a/spec/requests/oai_etdms_endpoint_spec.rb b/spec/requests/oai_etdms_endpoint_spec.rb new file mode 100644 index 0000000..2f100be --- /dev/null +++ b/spec/requests/oai_etdms_endpoint_spec.rb @@ -0,0 +1,243 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe 'OAI-ETDMS endpoint' do + # Set up config & create works + before(:all) do + @repository_id = 'oai:repository.library.carleton.ca' + @hyrax_host = CatalogController.blacklight_config.oai[:provider][:repository_url].sub('/catalog/oai', '') + + # Public Etd, with public PDF, licenced to CU & LAC + @public_etd = FactoryBot.create(:public_etd_with_public_file, { + title: ["Public Etd licenced to LAC #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] + }) + + # Private Etd, private file. Licenced to CU & LAC, and shouldn't be included due to access restrictions + @private_etd = FactoryBot.create(:private_etd_with_private_file, { + title: ["Private Etd licenced to LAC #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] + }) + + # Public Etd does not have explicit agreements. LAC licence is in PDF. + @public_etd_no_agreements = + FactoryBot.create(:public_etd_with_public_file, { + title: ["Public Etd has LAC licence in PDF #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"], + agreement: [] + }) + + # Public Etd, has CU Thesis Licence Agreement but no LAC licence + @public_etd_not_licenced = + FactoryBot.create(:public_etd_with_public_file, { + title: ["Public Etd does not have an LAC licence #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"], + agreement: ['https://repository.library.carleton.ca/concern/works/pc289j04q'] + }) + + # Public Etd with a private file. Private file URL should NOT be included. + @public_etd_private_file = + FactoryBot.create(:public_etd_with_private_file, { + title: ["Public Etd with private file #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] + }) + + # Public Work, shouldn't be included in requests when metadataFormat=oai_etdms + @public_work = FactoryBot.create(:public_work_with_public_file, + { title: ["Public work #{Time.new.strftime('%Y-%m-%d %H:%M:%S')}"] }) + end + + describe 'ListMetadataFormats verb' do + it 'oai_etdms is a supported format' do + get oai_catalog_path(verb: 'ListMetadataFormats') + expect(response.body).to include('oai_etdms') + end + + it 'oai_etdms is a supported format for Etds' do + get oai_catalog_path(verb: 'ListMetadataFormats', identifier: "#{@repository_id}:#{@public_etd.id}") + expect(response.body).to include('oai_etdms') + end + + # this fails + # it 'oai_etdms is NOT a supported format for Works' do + # get oai_catalog_path(verb: 'ListMetadataFormats', identifier: "#{repository_id}:#{work.id}") + # expect(response.body).not_to include('oai_etdms') + # end + end + + describe 'ListIdentifiers verb' do + # metadataFormat is a required argument for ListRecords verb + context 'with metadataFormat=oai_dc' do + it 'lists identifiers for all PUBLIC items' do + get oai_catalog_path(verb: 'ListIdentifiers', metadataPrefix: 'oai_dc') + expect(response.body).to include("#{@repository_id}:#{@public_work.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_no_agreements.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_not_licenced.id}") + expect(response.body).not_to include("#{@repository_id}:#{@private_etd.id}") + end + end + + context 'with metadataFormat=oai_etdms' do + it 'lists identifiers for PUBLIC Etds that can be harvested by LAC' do + get oai_catalog_path(verb: 'ListIdentifiers', metadataPrefix: 'oai_etdms') + expect(response.body).not_to include("#{@repository_id}:#{@public_work.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_no_agreements.id}") + expect(response.body).not_to include("#{@repository_id}:#{@public_etd_not_licenced.id}") + expect(response.body).not_to include("#{@repository_id}:#{@private_etd.id}") + end + end + end + + describe 'ListRecords verb' do + # metadataFormat is a required argument for ListRecords verb + context 'with metadataFormat=oai_dc' do + it 'lists records for all PUBLIC items' do + get oai_catalog_path(verb: 'ListRecords', metadataPrefix: 'oai_dc') + expect(response.body).to include("#{@repository_id}:#{@public_work.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_no_agreements.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_not_licenced.id}") + expect(response.body).not_to include("#{@repository_id}:#{@private_etd.id}") + end + end + + context 'with metadataFormat=oai_etdms' do + it 'lists records for PUBLIC Etds that can be harvested by LAC' do + get oai_catalog_path(verb: 'ListRecords', metadataPrefix: 'oai_etdms') + expect(response.body).not_to include("#{@repository_id}:#{@public_work.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd.id}") + expect(response.body).to include("#{@repository_id}:#{@public_etd_no_agreements.id}") + expect(response.body).not_to include("#{@repository_id}:#{@public_etd_not_licenced.id}") + expect(response.body).not_to include("#{@repository_id}:#{@private_etd.id}") + end + end + end + + describe 'GetRecord verb' do + context 'a PUBLIC Etd with metadataFormat=oai_etdms' do + before(:all) do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:#{@public_etd.id}") + end + + it 'has a title in thesis:title' do + expect(response.body).to include("#{@public_etd.title.first}") + end + it 'has a creator in thesis:creator' do + expect(response.body).to include("#{@public_etd.creator.first}") + end + it 'has type Thesis in thesis:type' do + expect(response.body).to include("#{@public_etd.resource_type.first}") + end + it 'has a degree level in thesis:level' do + # See config/authorities/degree_levels.yml + # HyraxHelper::degree_term_level should be moved to ApplicationHelper & used here + expect(response.body).to include("#{@public_etd.degree_level == '1' ? "Master's" : 'Doctoral'}") + end + it 'has a degree name in thesis:name' do + expect(response.body).to include("#{@public_etd.degree}") + end + it 'has a degree discipline in thesis:discipline' do + expect(response.body).to include("#{@public_etd.degree_discipline}") + end + it 'has a degree grantor in thesis:grantor' do + # Publisher (Carleton University) is mapped to degree grantor + expect(response.body).to include("#{@public_etd.publisher.first}") + end + it 'has a contributor in thesis:contributor' do + expect(response.body).to include("#{@public_etd.contributor.first}") + end + it 'has subjects in thesis:subject' do + # Theses usually have multiple subjects -- confirm all + @public_etd.subject.each do |sub| + expect(response.body).to include("#{sub}") + end + end + it 'has an abstract in thesis:description' do + # abstract is mapped to + expect(response.body).to include("#{@public_etd.abstract.first}") + end + it 'has a publisher in thesis:publisher' do + # Publisher is always 'Carleton University' + expect(response.body).to include("#{@public_etd.publisher.first}") + end + it 'has a YYYY date in thesis:date' do + # Date provided is YYYY format + expect(response.body).to include("#{Date.parse(@public_etd.date_created.first).year}") + end + it 'has an ISO 639-3 language code in thesis:language' do + expect(response.body).to include("#{@public_etd.language.first}") + end + it 'has a copyright statement in thesis:rights' do + expect(response.body).to include("#{@public_etd.rights_notes.first}") + end + it 'has a DOI in thesis:identifier' do + # Factory attribute doesn't include 'DOI: ' as a prefix + expect(response.body).to include("#{@public_etd.identifier.first}") + end + it 'has Etd landing page URL in thesis:identifier' do + # Link to Etd landing page is included as an identifier + # e.g. https://repository.library.carleton.ca/concern/etds/r207tp32d + expect(response.body).to include("#{@hyrax_host}/concern/etds/#{@public_etd.id}") + end + it 'has PUBLIC file URL in thesis:identifier' do + # @public_etd has one PDF file. Expect Hyrax download URL with '.pdf' appended, e.g., + # https://repository.library.carleton.ca/downloads/f1881k888.pdf + expect(response.body).to include( + '' \ + "#{@hyrax_host}/downloads/#{@public_etd.file_set_ids.first}.pdf" + ) + end + + context 'that is NOT licensed to LAC' do + it 'displays a cannotDisseminateFormat error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:#{@public_etd_not_licenced.id}") + + expect(response.body).not_to include( + "#{@repository_id}:#{@public_etd_not_licenced.id}" + ) + expect(response.body).to include('') + end + end + + context 'with an identifier that does NOT exist' do + it 'displays an idDoesNotExist error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:not-a-real-id") + + expect(response.body).to include('') + end + end + + context 'with a PRIVATE file' do + it 'does NOT have file URL' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:#{@public_etd_private_file.id}") + # Confirm response + expect(response.body).to include("#{@repository_id}:#{@public_etd_private_file.id}") + + # Confirm file URL not present, e.g., https://repository.library.carleton.ca/downloads/f1881k888.pdf + expect(response.body).not_to include( + '' \ + "#{@hyrax_host}/downloads/#{@public_etd_private_file.file_set_ids.first}.pdf" + ) + end + end + end + + context 'a PRIVATE Etd with metadataFormat=oai_etdms' do + it 'displays an idDoesNotExist error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:#{@private_etd.id}") + expect(response.body).to include('') + end + end + + context 'a PUBLIC Work with metadataFormat=oai_etdms' do + it 'displays a cannotDisseminateFormat error' do + get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_etdms', + identifier: "#{@repository_id}:#{@public_work.id}") + expect(response.body).to include('') + end + end + end +end diff --git a/spec/requests/oai_pmh_endpoint_spec.rb b/spec/requests/oai_pmh_endpoint_spec.rb deleted file mode 100644 index dea391b..0000000 --- a/spec/requests/oai_pmh_endpoint_spec.rb +++ /dev/null @@ -1,206 +0,0 @@ -# frozen_string_literal: true -require 'rails_helper' - -RSpec.describe 'OAI-PMH endpoint' do - let(:repository_name) { 'Carleton University Institutional Repository' } - let(:repository_url) { CatalogController.blacklight_config.oai[:provider][:repository_url] } - let(:repository_id) { 'oai:repository.library.carleton.ca' } - - # the endpoint is available - describe 'root page' do - it 'displays an error message about missing verb' do - get oai_catalog_path - expect(response.body).to include 'not a legal OAI-PMH verb' - end - end - - # the Identify verb returns repository information - describe 'Identify verb' do - it 'displays repository information' do - get oai_catalog_path(verb: 'Identify') - expect(response.body).to include repository_name - end - end - - # the endpoint supports OAI-DC as a metadata format - describe 'ListMetadataFormats verb' do - it 'lists oai_dc as a supported format' do - get oai_catalog_path(verb: 'ListMetadataFormats') - expect(response.body).to include('oai_dc') - end - end - - describe 'GetRecord verb' do - - context 'for a Work' do - let(:work_attributes) do - { - title: ['Example work in OAI-DC record' + Time.new.strftime('%Y-%m-%d %H:%M:%S')], - creator: ['Lastname, Given'], - contributor: ['Surname, First'], - date_created: ['2023-04-02'], - rights_statement: ['http://rightsstatements.org/vocab/InC/1.0/'], - rights_notes: ['Copyright 2023'], - license: ['https://creativecommons.org/licenses/by/4.0/'], - publisher: ['Large Publishing Co., Ltd.'], - identifier: ['DOI: https://doi.org/10.22215/2023-12345'], - language: ['eng'], - resource_type: ['Article'], - keyword: ['Term', 'Descriptive phrase', 'Key topic'], - subject: ['Subject Area', 'Subject Area -- Ontario', 'Subject Area -- 20th century'], - abstract: ['A summary of the work.'], - description: ['A description of the work.'], - related_url: ['https://www.example.com/related/work'] - } - end - let(:work) { FactoryBot.create(:work, :public, work_attributes) } - - before do - get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', identifier: "#{repository_id}:#{work.id}") - end - - it 'includes work URL in dc:identifier' do - expect(response.body).to include('' + repository_url.gsub('/catalog/oai', '/concern/works/' + work.id) + '') - end - - it 'includes Date Created in dc:date with YYYY-MM-DD format' do - expect(response.body).to include('' + work.date_created.first + '') - end - - it 'includes Title in dc:title' do - expect(response.body).to include('' + work.title.first + '') - end - - it 'includes Creator in dc:creator' do - expect(response.body).to include('' + work.creator.first + '') - end - - it 'includes Contributor in dc:contributor' do - expect(response.body).to include('' + work.contributor.first + '') - end - - it 'includes Rights Statement, Rights Notes, and Creative Commons license in dc:rights' do - expect(response.body).to include('' + work.rights_statement.first + '') - expect(response.body).to include('' + work.rights_notes.first + '') - expect(response.body).to include('' + work.license.first + '') - end - - it 'includes Publisher in dc:publisher' do - expect(response.body).to include('' + work.publisher.first + '') - end - - it 'includes Identifier in dc:identifier' do - expect(response.body).to include('' + work.identifier.first + '') - end - - it 'includes Language in dc:language' do - expect(response.body).to include('' + work.language.first + '') - end - - it 'includes Resource Type in dc:type' do - expect(response.body).to include('' + work.resource_type.first + '') - end - - it 'includes Keyword and Subject in dc:subject' do - work.keyword.each { |keyword| expect(response.body).to include('' + keyword + '')} - work.subject.each { |subject| expect(response.body).to include('' + subject + '')} - end - - it 'includes Abstract and Description in dc:description' do - expect(response.body).to include('' + work.abstract.first + '') - expect(response.body).to include('' + work.description.first + '') - end - - it 'includes Related URL in dc:relation' do - expect(response.body).to include('' + work.related_url.first + '') - end - end - - context 'for an Etd' do - let(:etd_attributes) do - { - title: ['Example Etd in OAI-DC record' + Time.new.strftime('%Y-%m-%d %H:%M:%S')], - creator: ['Lastname, Given'], - contributor: ['Surname, First (Supervisor)'], - date_created: ['2023-04-02'], - rights_notes: ['Copyright 2023'], - publisher: ['Carleton University'], - identifier: ['DOI: https://doi.org/10.22215/2023-12345'], - language: ['eng'], - resource_type: ['Thesis'], - subject: ['Subject Area', 'Subject Area -- Ontario', 'Subject Area -- 20th century'], - abstract: ['A summary of the work.'] - } - end - let(:etd) { FactoryBot.create(:etd, :public, etd_attributes) } - - before do - get oai_catalog_path(verb: 'GetRecord', metadataPrefix: 'oai_dc', identifier: "#{repository_id}:#{etd.id}") - end - - it 'includes Etd URL in dc:identifier' do - expect(response.body).to include('' + repository_url.gsub('/catalog/oai', '/concern/etds/' + etd.id) + '') - end - - it 'includes Date Created in dc:date with YYYY format' do - expect(response.body).to include('' + Date.parse(etd.date_created.first).year.to_s + '') - end - - it 'includes Title in dc:title' do - expect(response.body).to include('' + etd.title.first + '') - end - - it 'includes Creator in dc:creator' do - expect(response.body).to include('' + etd.creator.first + '') - end - - it 'includes Contributor in dc:contributor' do - expect(response.body).to include('' + etd.contributor.first + '') - end - - it 'includes Rights Notes in dc:rights' do - expect(response.body).to include('' + etd.rights_notes.first + '') - end - - it 'includes Publisher in dc:publisher' do - expect(response.body).to include('' + etd.publisher.first + '') - end - - it 'includes Identifier in dc:identifier' do - expect(response.body).to include('' + etd.identifier.first + '') - end - - it 'includes Language in dc:language' do - expect(response.body).to include('' + etd.language.first + '') - end - - it 'includes Resource Type in dc:type' do - expect(response.body).to include('' + etd.resource_type.first + '') - end - - it 'includes Subject in dc:subject' do - etd.subject.each { |subject| expect(response.body).to include('' + subject + '')} - end - - it 'includes Abstract in dc:description' do - expect(response.body).to include('' + etd.abstract.first + '') - end - end - end - - # describe 'ListSets verb' do - # it 'displays Collections as sets' - # ... etc - # Factory / factories to create collection type, collection & add works to confirm OAI ListSets config, etc., - # is a lot of overhead that would duplicate efforts in Hyrax. See https://github.com/samvera/hyrax/pull/6664 - # and e.g. spec/factories/collections.rb. Being able to use upstream factories will be great when we upgrade - # to Hyrax v5! For now, fetch OAI config & confirm Sets are configured based on collection membership. - describe 'ListSets verb' do - it 'is configured to show Collections as Sets' do - expect(CatalogController.blacklight_config.oai[:document][:set_fields].count).to eq(1) - expect(CatalogController.blacklight_config.oai[:document][:set_fields][0][:label]).to eq('Collection') - expect(CatalogController.blacklight_config.oai[:document][:set_fields][0][:solr_field]).to eq('member_of_collections_ssim') - end - end - -end \ No newline at end of file