diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index c5f3d93a..b7a8110b 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -46,6 +46,7 @@ def build_metadata add_visibility add_metadata_for_model add_rights_statement + sanitize_controlled_uri_values! add_local self.parsed_metadata diff --git a/app/models/concerns/bulkrax/import_behavior.rb b/app/models/concerns/bulkrax/import_behavior.rb index c30779bd..699690d8 100644 --- a/app/models/concerns/bulkrax/import_behavior.rb +++ b/app/models/concerns/bulkrax/import_behavior.rb @@ -2,7 +2,7 @@ module Bulkrax # Import Behavior for Entry classes - module ImportBehavior + module ImportBehavior # rubocop:disable Metrics/ModuleLength extend ActiveSupport::Concern def build_for_importer @@ -105,6 +105,65 @@ def add_collections end end + # Attempt to sanitize Questioning Authority URI values for configured controlled fields of common + # data entry mistakes. Controlled URI values are only valid if they are an exact match. + # Example: + # Valid value: http://rightsstatements.org/vocab/InC/1.0/ + # Provided value: https://rightsstatements.org/vocab/InC/1.0 + # Sanitized value: http://rightsstatements.org/vocab/InC/1.0/ ("s" from "https" removed, trailing "/" added) + # + # @return [Boolean] true if all controlled URI values are sanitized successfully + def sanitize_controlled_uri_values! + Bulkrax.qa_controlled_properties.each do |field| + next if parsed_metadata[field].blank? + + parsed_metadata[field].each_with_index do |value, i| + next if value.blank? + + if (validated_uri_value = validate_value(value, field)) + parsed_metadata[field][i] = validated_uri_value + else + debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml) + Rails.logger.debug(debug_msg) + error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field) + raise ::StandardError, error_msg + end + end + end + + true + end + + # @param value [String] value to validate + # @param field [String] name of the controlled property + # @return [String, nil] validated URI value or nil + def validate_value(value, field) + if value.match?(::URI::DEFAULT_PARSER.make_regexp) + value = value.strip.chomp + # add trailing forward slash unless one is already present + value << '/' unless value.match?(%r{/$}) + end + + valid = if active_id_for_authority?(value, field) + true + else + value.include?('https') ? value.sub!('https', 'http') : value.sub!('http', 'https') + active_id_for_authority?(value, field) + end + + valid ? value : nil + end + + # @param value [String] value to check + # @param field [String] name of the controlled property + # @return [Boolean] provided value is a present, active authority ID for the provided field + def active_id_for_authority?(value, field) + field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize + active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] } + + active_authority_ids.include?(value) + end + def factory @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata, source_identifier_value: identifier, diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index c02aa4e4..c72b90d0 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -12,13 +12,13 @@ class << self :related_children_field_mapping, :related_parents_field_mapping, :reserved_properties, + :qa_controlled_properties, :field_mappings, :import_path, :export_path, :removed_image_path, :server_name, - :api_definition, - :removed_image_path + :api_definition self.parsers = [ { name: "OAI - Dublin Core", class_name: "Bulkrax::OaiDcParser", partial: "oai_fields" }, @@ -119,6 +119,11 @@ class << self original_url relative_path ] + + # List of Questioning Authority properties that are controlled via YAML files in + # the config/authorities/ directory. For example, the :rights_statement property + # is controlled by the active terms in config/authorities/rights_statements.yml + self.qa_controlled_properties = %w[rights_statement license] end def self.api_definition @@ -131,8 +136,6 @@ def self.api_definition ) end - self.removed_image_path = 'app/assets/images/bulkrax/removed.png' - # this function maps the vars from your app into your engine def self.setup yield self diff --git a/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb b/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb index 33692fae..5901187a 100644 --- a/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +++ b/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb @@ -61,6 +61,12 @@ # Properties that should not be used in imports/exports. They are reserved for use by Hyrax. # config.reserved_properties += ['my_field'] + + # List of Questioning Authority properties that are controlled via YAML files in + # the config/authorities/ directory. For example, the :rights_statement property + # is controlled by the active terms in config/authorities/rights_statements.yml + # Defaults: 'rights_statement' and 'license' + # config.qa_controlled_properties += ['my_field'] end # Sidebar for hyrax 3+ support diff --git a/spec/models/bulkrax/csv_entry_spec.rb b/spec/models/bulkrax/csv_entry_spec.rb index 8381f04e..d1318df6 100644 --- a/spec/models/bulkrax/csv_entry_spec.rb +++ b/spec/models/bulkrax/csv_entry_spec.rb @@ -63,6 +63,128 @@ module Bulkrax end end + describe 'importing controlled questioning authority fields' do + before do + allow(subject).to receive(:raw_metadata).and_return('source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'calls #sanitize_controlled_uri_values!' do + expect(subject).to receive(:sanitize_controlled_uri_values!).once + + subject.build_metadata + end + + describe 'importing :rights_statement' do + context 'when the http/https does not match' do + context 'when the authority ID has https' do + let(:auth_id) { 'https://rightsstatements.org/vocab/fake/1.0/' } + + before do + allow(subject).to receive(:raw_metadata).and_return('rights_statement' => 'http://rightsstatements.org/vocab/fake/1.0/', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'replaces http with https' do + subject.build_metadata + + expect(subject.parsed_metadata['rights_statement']).to eq([auth_id]) + end + end + + context 'when the authority ID has http' do + let(:auth_id) { 'http://rightsstatements.org/vocab/InC/1.0/' } + + before do + allow(subject).to receive(:raw_metadata).and_return('rights_statement' => 'https://rightsstatements.org/vocab/InC/1.0/', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'replaces https with http' do + subject.build_metadata + + expect(subject.parsed_metadata['rights_statement']).to eq([auth_id]) + end + end + end + + context 'when the value does not have a forward slash at the end' do + before do + allow(subject).to receive(:raw_metadata).and_return('rights_statement' => 'http://rightsstatements.org/vocab/InC/1.0', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'adds a trailing forward slash' do + subject.build_metadata + + expect(subject.parsed_metadata['rights_statement']).to eq(['http://rightsstatements.org/vocab/InC/1.0/']) + end + end + + context 'when the value does not match an existing, active authority ID' do + before do + allow(subject).to receive(:raw_metadata).and_return('rights_statement' => 'hello world', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'raises a StandardError' do + expect { subject.build_metadata } + .to raise_error(StandardError, %("hello world" is not a valid and/or active authority ID for the :rights_statement field)) + end + end + end + + describe 'importing :license' do + context 'when the http/https does not match' do + context 'when the authority ID has https' do + let(:auth_id) { 'https://creativecommons.org/licenses/by/4.0/' } + + before do + allow(subject).to receive(:raw_metadata).and_return('license' => 'http://creativecommons.org/licenses/by/4.0/', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'replaces http with https' do + subject.build_metadata + + expect(subject.parsed_metadata['license']).to eq([auth_id]) + end + end + + context 'when the authority ID has http' do + let(:auth_id) { 'http://creativecommons.org/publicdomain/mark/1.0/' } + + before do + allow(subject).to receive(:raw_metadata).and_return('license' => 'https://creativecommons.org/publicdomain/mark/1.0/', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'replaces https with http' do + subject.build_metadata + + expect(subject.parsed_metadata['license']).to eq([auth_id]) + end + end + end + + context 'when the value does not have a forward slash at the end' do + before do + allow(subject).to receive(:raw_metadata).and_return('license' => 'https://creativecommons.org/licenses/by/4.0', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'adds a trailing forward slash' do + subject.build_metadata + + expect(subject.parsed_metadata['license']).to eq(['https://creativecommons.org/licenses/by/4.0/']) + end + end + + context 'when the value does not match an existing, active authority ID' do + before do + allow(subject).to receive(:raw_metadata).and_return('license' => 'hello world', 'source_identifier' => 'qa_1', 'title' => 'some title') + end + + it 'raises a StandardError' do + expect { subject.build_metadata } + .to raise_error(StandardError, %("hello world" is not a valid and/or active authority ID for the :license field)) + end + end + end + end + context 'with parent-child relationships' do let(:importer) { FactoryBot.create(:bulkrax_importer_csv, :with_relationships_mappings) } let(:required_data) do diff --git a/spec/test_app/app/models/work.rb b/spec/test_app/app/models/work.rb index ca755712..033ed928 100644 --- a/spec/test_app/app/models/work.rb +++ b/spec/test_app/app/models/work.rb @@ -5,4 +5,7 @@ class Work < ActiveFedora::Base include ::Hyrax::WorkBehavior property :single_object, predicate: ::RDF::Vocab::DC.creator, multiple: false property :multiple_objects, predicate: ::RDF::Vocab::DC.creator + + property :license, predicate: ::RDF::Vocab::DC.rights + property :rights_statement, predicate: ::RDF::Vocab::EDM.rights end diff --git a/spec/test_app/config/authorities/rights_statements.yml b/spec/test_app/config/authorities/rights_statements.yml index 719e7576..d44e8967 100644 --- a/spec/test_app/config/authorities/rights_statements.yml +++ b/spec/test_app/config/authorities/rights_statements.yml @@ -35,3 +35,6 @@ terms: - id: http://rightsstatements.org/vocab/NKC/1.0/ term: "No Known Copyright" active: true +- id: https://rightsstatements.org/vocab/fake/1.0/ + term: "Fake Term for Testing" + active: true