diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 49ee25a7..b6e2e15b 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,19 +1,19 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2021-12-14 15:16:54 -0800 using RuboCop version 0.85.1. +# on 2022-01-26 14:39:48 -0800 using RuboCop version 0.85.1. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new # versions of RuboCop, may require this file to be generated again. -# Offense count: 6 +# Offense count: 7 # Cop supports --auto-correct. # Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns. # URISchemes: http, https Layout/LineLength: Max: 301 -# Offense count: 17 +# Offense count: 20 # Configuration parameters: IgnoredMethods. Metrics/AbcSize: Max: 42 @@ -21,14 +21,14 @@ Metrics/AbcSize: # Offense count: 6 # Configuration parameters: CountComments. Metrics/ClassLength: - Max: 197 + Max: 202 -# Offense count: 10 +# Offense count: 11 # Configuration parameters: IgnoredMethods. Metrics/CyclomaticComplexity: Max: 12 -# Offense count: 26 +# Offense count: 30 # Configuration parameters: CountComments, ExcludedMethods. Metrics/MethodLength: Max: 26 @@ -38,7 +38,7 @@ Metrics/MethodLength: Metrics/ModuleLength: Max: 131 -# Offense count: 10 +# Offense count: 11 # Configuration parameters: IgnoredMethods. Metrics/PerceivedComplexity: Max: 13 @@ -66,7 +66,7 @@ RSpec/MessageChain: - 'spec/parsers/bulkrax/csv_parser_spec.rb' - 'spec/parsers/bulkrax/xml_parser_spec.rb' -# Offense count: 80 +# Offense count: 78 # Configuration parameters: . # SupportedStyles: have_received, receive RSpec/MessageSpies: diff --git a/Gemfile.lock b/Gemfile.lock index 0e413f1f..66c2fee1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -928,4 +928,4 @@ DEPENDENCIES willow_sword! BUNDLED WITH - 1.17.3 + 2.1.4 diff --git a/app/controllers/bulkrax/importers_controller.rb b/app/controllers/bulkrax/importers_controller.rb index 52849694..72dc737a 100644 --- a/app/controllers/bulkrax/importers_controller.rb +++ b/app/controllers/bulkrax/importers_controller.rb @@ -37,6 +37,7 @@ def show @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30) @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30) + @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30) end end diff --git a/app/factories/bulkrax/object_factory.rb b/app/factories/bulkrax/object_factory.rb index c58fa175..03b3884b 100644 --- a/app/factories/bulkrax/object_factory.rb +++ b/app/factories/bulkrax/object_factory.rb @@ -4,11 +4,13 @@ module Bulkrax class ObjectFactory extend ActiveModel::Callbacks include Bulkrax::FileFactory + include DynamicRecordLookup + define_model_callbacks :save, :create - attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :collection_field_mapping + attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :collection_field_mapping, :related_parents_parsed_mapping # rubocop:disable Metrics/ParameterLists - def initialize(attributes:, source_identifier_value:, work_identifier:, collection_field_mapping:, replace_files: false, user: nil, klass: nil, update_files: false) + def initialize(attributes:, source_identifier_value:, work_identifier:, collection_field_mapping:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false) ActiveSupport::Deprecation.warn( 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.' @@ -19,6 +21,7 @@ def initialize(attributes:, source_identifier_value:, work_identifier:, collecti @user = user || User.batch_user @work_identifier = work_identifier @collection_field_mapping = collection_field_mapping + @related_parents_parsed_mapping = related_parents_parsed_mapping @source_identifier_value = source_identifier_value @klass = klass || Bulkrax.default_work_type.constantize end @@ -33,7 +36,7 @@ def run arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass } @object = find if object - object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX + object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent) ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update } else ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create } @@ -51,10 +54,16 @@ def run! def update raise "Object doesn't exist" unless object - destroy_existing_files if @replace_files && klass != Collection + destroy_existing_files if @replace_files && ![Collection, FileSet].include?(klass) attrs = attribute_update run_callbacks :save do - klass == Collection ? update_collection(attrs) : work_actor.update(environment(attrs)) + if klass == Collection + update_collection(attrs) + elsif klass == FileSet + update_file_set(attrs) + else + work_actor.update(environment(attrs)) + end end log_updated(object) end @@ -90,10 +99,16 @@ def search_by_identifier def create attrs = create_attributes @object = klass.new - object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX + object.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if object.respond_to?(:reindex_extent) run_callbacks :save do run_callbacks :create do - klass == Collection ? create_collection(attrs) : work_actor.create(environment(attrs)) + if klass == Collection + create_collection(attrs) + elsif klass == FileSet + create_file_set(attrs) + else + work_actor.create(environment(attrs)) + end end end log_created(object) @@ -150,6 +165,35 @@ def update_collection(attrs) object.save! end + # This method is heavily inspired by Hyrax's AttachFilesToWorkJob + def create_file_set(attrs) + work = find_record(attributes[related_parents_parsed_mapping].first) + work_permissions = work.permissions.map(&:to_hash) + file_set_attrs = attrs.slice(*object.attributes.keys) + object.assign_attributes(file_set_attrs) + + attrs['uploaded_files'].each do |uploaded_file_id| + uploaded_file = ::Hyrax::UploadedFile.find(uploaded_file_id) + next if uploaded_file.file_set_uri.present? + + actor = ::Hyrax::Actors::FileSetActor.new(object, @user) + uploaded_file.update(file_set_uri: actor.file_set.uri) + actor.file_set.permissions_attributes = work_permissions + actor.create_metadata + actor.create_content(uploaded_file) + actor.attach_to_work(work) + end + + object.save! + end + + def update_file_set(attrs) + file_set_attrs = attrs.slice(*object.attributes.keys) + actor = ::Hyrax::Actors::FileSetActor.new(object, @user) + + actor.update_metadata(file_set_attrs) + end + # Add child to parent's #member_collections # Add parent to child's #member_of_collections def persist_collection_memberships(parent:, child:) diff --git a/app/jobs/bulkrax/create_relationships_job.rb b/app/jobs/bulkrax/create_relationships_job.rb index c3409128..96241eb6 100644 --- a/app/jobs/bulkrax/create_relationships_job.rb +++ b/app/jobs/bulkrax/create_relationships_job.rb @@ -17,6 +17,8 @@ module Bulkrax # NOTE: In the context of this job, "identifier" is used to generically refer # to either a record's ID or an Bulkrax::Entry's source_identifier. class CreateRelationshipsJob < ApplicationJob + include DynamicRecordLookup + queue_as :import attr_accessor :base_entry, :child_record, :parent_record, :importer_run @@ -74,39 +76,6 @@ def create_relationship end end - # This method allows us to create relationships with preexisting records (by their ID) OR - # with records that are concurrently being imported (by their Bulkrax::Entry source_identifier). - # - # @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier - # @return [Work, Collection, nil] Work or Collection if found, otherwise nil - def find_record(identifier) - record = Entry.find_by(identifier: identifier) - record ||= ::Collection.where(id: identifier).first - if record.blank? - available_work_types.each do |work_type| - record ||= work_type.where(id: identifier).first - end - end - record = record.factory.find if record.is_a?(Entry) - - record - end - - # Check if the record is a Work - def curation_concern?(record) - available_work_types.include?(record.class) - end - - # @return [Array] list of work type classes - def available_work_types - # If running in a Hyku app, do not reference disabled work types - @available_work_types ||= if defined?(::Hyku) - ::Site.instance.available_works.map(&:constantize) - else - ::Hyrax.config.curation_concerns - end - end - def user @user ||= importer_run.importer.user end diff --git a/app/jobs/bulkrax/import_collection_job.rb b/app/jobs/bulkrax/import_collection_job.rb index b80c8cbb..2a279cea 100644 --- a/app/jobs/bulkrax/import_collection_job.rb +++ b/app/jobs/bulkrax/import_collection_job.rb @@ -11,9 +11,11 @@ def perform(*args) entry.build entry.save add_user_to_permission_template!(entry) + ImporterRun.find(args[1]).increment!(:processed_records) ImporterRun.find(args[1]).increment!(:processed_collections) ImporterRun.find(args[1]).decrement!(:enqueued_records) rescue => e + ImporterRun.find(args[1]).increment!(:failed_records) ImporterRun.find(args[1]).increment!(:failed_collections) ImporterRun.find(args[1]).decrement!(:enqueued_records) raise e diff --git a/app/jobs/bulkrax/import_file_set_job.rb b/app/jobs/bulkrax/import_file_set_job.rb new file mode 100644 index 00000000..da12d738 --- /dev/null +++ b/app/jobs/bulkrax/import_file_set_job.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module Bulkrax + class MissingParentError < ::StandardError; end + class ImportFileSetJob < ApplicationJob + include DynamicRecordLookup + + queue_as :import + + def perform(entry_id, importer_run_id) + entry = Entry.find(entry_id) + parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip + + validate_parent!(parent_identifier) + + entry.build + if entry.succeeded? + # rubocop:disable Rails/SkipsModelValidations + ImporterRun.find(importer_run_id).increment!(:processed_records) + ImporterRun.find(importer_run_id).increment!(:processed_file_sets) + else + ImporterRun.find(importer_run_id).increment!(:failed_records) + ImporterRun.find(importer_run_id).increment!(:failed_file_sets) + # rubocop:enable Rails/SkipsModelValidations + end + ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations + entry.save! + + rescue MissingParentError => e + # try waiting for the parent record to be created + entry.import_attempts += 1 + entry.save! + if entry.import_attempts < 5 + ImportFileSetJob + .set(wait: (entry.import_attempts + 1).minutes) + .perform_later(entry_id, importer_run_id) + else + ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations + entry.status_info(e) + end + end + + private + + attr_reader :parent_record + + def validate_parent!(parent_identifier) + # if parent_identifier is missing, it will be caught by #validate_presence_of_parent! + return if parent_identifier.blank? + + find_parent_record(parent_identifier) + check_parent_exists!(parent_identifier) + check_parent_is_a_work!(parent_identifier) + end + + def check_parent_exists!(parent_identifier) + raise MissingParentError, %(Unable to find a record with the identifier "#{parent_identifier}") if parent_record.blank? + end + + def check_parent_is_a_work!(parent_identifier) + error_msg = %(A record with the ID "#{parent_identifier}" was found, but it was a #{parent_record.class}, which is not an valid/available work type) + raise ::StandardError, error_msg unless curation_concern?(parent_record) + end + + def find_parent_record(parent_identifier) + @parent_record ||= find_record(parent_identifier) + end + end +end diff --git a/app/jobs/bulkrax/import_work_job.rb b/app/jobs/bulkrax/import_work_job.rb index 3b085940..b826135e 100644 --- a/app/jobs/bulkrax/import_work_job.rb +++ b/app/jobs/bulkrax/import_work_job.rb @@ -10,11 +10,13 @@ def perform(*args) entry.build if entry.status == "Complete" ImporterRun.find(args[1]).increment!(:processed_records) + ImporterRun.find(args[1]).increment!(:processed_works) ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches else # do not retry here because whatever parse error kept you from creating a work will likely # keep preventing you from doing so. ImporterRun.find(args[1]).increment!(:failed_records) + ImporterRun.find(args[1]).increment!(:failed_works) ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches end entry.save! diff --git a/app/jobs/bulkrax/importer_job.rb b/app/jobs/bulkrax/importer_job.rb index 761dabbf..432f8df1 100644 --- a/app/jobs/bulkrax/importer_job.rb +++ b/app/jobs/bulkrax/importer_job.rb @@ -20,6 +20,7 @@ def import(importer, only_updates_since_last_import) importer.import_collections importer.import_works + importer.import_file_sets end def unzip_imported_file(parser) @@ -31,6 +32,7 @@ def unzip_imported_file(parser) def update_current_run_counters(importer) importer.current_run.total_work_entries = importer.limit || importer.parser.works_total importer.current_run.total_collection_entries = importer.parser.collections_total + importer.current_run.total_file_set_entries = importer.parser.file_sets_total importer.current_run.save! end diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index a5ed82be..409d85ae 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -40,9 +40,9 @@ def build_metadata self.parsed_metadata = {} add_identifier - add_metadata_for_model add_visibility add_ingested_metadata + add_metadata_for_model add_rights_statement add_collections add_local @@ -57,6 +57,9 @@ def add_identifier def add_metadata_for_model if factory_class == Collection add_collection_type_gid + elsif factory_class == FileSet + add_path_to_file + validate_presence_of_parent! else add_file unless importerexporter.metadata_only? add_admin_set_id @@ -85,7 +88,11 @@ def add_file elsif record['file'].is_a?(Array) self.parsed_metadata['file'] = record['file'] end - self.parsed_metadata['file'] = self.parsed_metadata['file'].map { |f| path_to_file(f.tr(' ', '_')) } + self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f| + next if f.blank? + + path_to_file(f.tr(' ', '_')) + end.compact end def build_export_metadata diff --git a/app/models/bulkrax/csv_file_set_entry.rb b/app/models/bulkrax/csv_file_set_entry.rb new file mode 100644 index 00000000..7ad48874 --- /dev/null +++ b/app/models/bulkrax/csv_file_set_entry.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Bulkrax + class CsvFileSetEntry < CsvEntry + def factory_class + ::FileSet + end + + def add_path_to_file + parsed_metadata['file'].each_with_index do |filename, i| + path_to_file = ::File.join(parser.path_to_files, filename) + + parsed_metadata['file'][i] = path_to_file + end + raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all? + + parsed_metadata['file'] + end + + def validate_presence_of_parent! + return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any? + + raise StandardError, 'File set must be related to at least one work' + end + end +end diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 51c81d11..a711a342 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -99,7 +99,12 @@ def current_run @current_run ||= if file? && zip? self.importer_runs.create! else - self.importer_runs.create!(total_work_entries: self.limit || parser.works_total, total_collection_entries: parser.collections_total) + entry_counts = { + total_work_entries: self.limit || parser.works_total, + total_collection_entries: parser.collections_total, + total_file_set_entries: parser.file_sets_total + } + self.importer_runs.create!(entry_counts) end end @@ -134,6 +139,13 @@ def import_collections status_info(e) end + def import_file_sets + self.save if self.new_record? # Object needs to be saved for statuses + parser.create_file_sets + rescue StandardError => e + status_info(e) + end + # Prepend the base_url to ensure unique set identifiers # @todo - move to parser, as this is OAI specific def unique_collection_identifier(id) diff --git a/app/models/concerns/bulkrax/dynamic_record_lookup.rb b/app/models/concerns/bulkrax/dynamic_record_lookup.rb new file mode 100644 index 00000000..0332414c --- /dev/null +++ b/app/models/concerns/bulkrax/dynamic_record_lookup.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Bulkrax + module DynamicRecordLookup + # Search entries, collections, and every available work type for a record that + # has the provided identifier. + # + # @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier + # @return [Work, Collection, nil] Work or Collection if found, otherwise nil + def find_record(identifier) + record = Entry.find_by(identifier: identifier) + record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy + if record.blank? + available_work_types.each do |work_type| + record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy + end + end + + record.is_a?(Entry) ? record.factory.find : record + end + + # Check if the record is a Work + def curation_concern?(record) + available_work_types.include?(record.class) + end + + private + + # @return [Array] list of work type classes + def available_work_types + # If running in a Hyku app, do not include disabled work types + @available_work_types ||= if defined?(::Hyku) + ::Site.instance.available_works.map(&:constantize) + else + ::Hyrax.config.curation_concerns + end + end + end +end diff --git a/app/models/concerns/bulkrax/import_behavior.rb b/app/models/concerns/bulkrax/import_behavior.rb index c7703f40..9070e648 100644 --- a/app/models/concerns/bulkrax/import_behavior.rb +++ b/app/models/concerns/bulkrax/import_behavior.rb @@ -96,6 +96,7 @@ def factory source_identifier_value: identifier, work_identifier: parser.work_identifier, collection_field_mapping: parser.collection_field_mapping, + related_parents_parsed_mapping: related_parents_parsed_mapping, replace_files: replace_files, user: user, klass: factory_class, diff --git a/app/models/concerns/bulkrax/importer_exporter_behavior.rb b/app/models/concerns/bulkrax/importer_exporter_behavior.rb index 9de1340c..7efdfc89 100644 --- a/app/models/concerns/bulkrax/importer_exporter_behavior.rb +++ b/app/models/concerns/bulkrax/importer_exporter_behavior.rb @@ -20,15 +20,17 @@ def next_import_at (last_imported_at || Time.current) + frequency.to_seconds if schedulable? && last_imported_at.present? end - def increment_counters(index, collection = false) + def increment_counters(index, collection: false, file_set: false) # Only set the totals if they were not set on initialization if collection current_run.total_collection_entries = index + 1 unless parser.collections_total.positive? + elsif file_set + current_run.total_file_set_entries = index + 1 unless parser.file_sets_total.positive? else # TODO: differentiate between work and collection counts for exporters current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive? end - current_run.enqueued_records = index + 1 + current_run.enqueued_records += 1 current_run.save! end diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index 9050c1b8..a306014f 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -114,6 +114,10 @@ def create_works raise StandardError, 'must be defined' if importer? end + def create_file_sets + raise StandardError, 'must be defined' if importer? + end + # Optional, define if using browse everything for file upload def retrieve_cloud_files(files); end @@ -234,6 +238,10 @@ def collections_total 0 end + def file_sets_total + 0 + end + def write write_files zip diff --git a/app/parsers/bulkrax/bagit_parser.rb b/app/parsers/bulkrax/bagit_parser.rb index 817f27af..bfb7d874 100644 --- a/app/parsers/bulkrax/bagit_parser.rb +++ b/app/parsers/bulkrax/bagit_parser.rb @@ -59,7 +59,7 @@ def create_collections } new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata) ImportCollectionJob.perform_now(new_entry.id, current_run.id) - increment_counters(index, true) + increment_counters(index, collection: true) end end diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index a111229b..f04bc3e6 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -38,13 +38,27 @@ def collections_total end def works - records - collections + records - collections - file_sets end def works_total works.size end + def file_sets + records.map do |r| + file_sets = [] + model_field_mappings.each do |model_mapping| + file_sets << r if r[model_mapping.to_sym]&.downcase == 'fileset' + end + file_sets + end.flatten.compact.uniq + end + + def file_sets_total + file_sets.size + end + # We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data def import_fields @import_fields ||= records.inject(:merge).keys.compact.uniq @@ -98,7 +112,7 @@ def create_collections new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash) # TODO: add support for :delete option ImportCollectionJob.perform_now(new_entry.id, current_run.id) - increment_counters(index, true) + increment_counters(index, collection: true) end importer.record_status rescue StandardError => e @@ -124,6 +138,20 @@ def create_works status_info(e) end + def create_file_sets + file_sets.each_with_index do |file_set, index| + next unless record_has_source_identifier(file_set, records.find_index(file_set)) + break if limit_reached?(limit, records.find_index(file_set)) + + new_entry = find_or_create_entry(file_set_entry_class, file_set[source_identifier], 'Bulkrax::Importer', file_set.to_h) + ImportFileSetJob.perform_later(new_entry.id, current_run.id) + increment_counters(index, file_set: true) + end + importer.record_status + rescue StandardError => e + status_info(e) + end + def write_partial_import_file(file) import_filename = import_file_path.split('/').last partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv" @@ -199,6 +227,10 @@ def collection_entry_class CsvCollectionEntry end + def file_set_entry_class + CsvFileSetEntry + end + # See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory # Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data) def total diff --git a/app/parsers/bulkrax/oai_dc_parser.rb b/app/parsers/bulkrax/oai_dc_parser.rb index 598c2931..80eb8443 100644 --- a/app/parsers/bulkrax/oai_dc_parser.rb +++ b/app/parsers/bulkrax/oai_dc_parser.rb @@ -76,7 +76,7 @@ def create_collections new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: metadata).first_or_create! # perform now to ensure this gets created before work imports start ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id) - increment_counters(index, true) + increment_counters(index, collection: true) end end diff --git a/app/views/bulkrax/importers/index.html.erb b/app/views/bulkrax/importers/index.html.erb index 9b3fdcb0..b6fbe386 100644 --- a/app/views/bulkrax/importers/index.html.erb +++ b/app/views/bulkrax/importers/index.html.erb @@ -24,6 +24,7 @@ Entries Deleted Upstream Total Collection Entries Total Work Entries + Total File Set Entries @@ -36,12 +37,13 @@ <%= importer.status %> <%= importer.last_imported_at.strftime("%b %d, %Y") if importer.last_imported_at %> <%= importer.next_import_at.strftime("%b %d, %Y") if importer.next_import_at %> - <%= importer.importer_runs.last&.enqueued_records %> - <%= (importer.importer_runs.last&.processed_collections || 0) + (importer.importer_runs.last&.processed_records || 0) %> - <%= (importer.importer_runs.last&.failed_collections || 0) + (importer.importer_runs.last&.failed_records || 0) %> - <%= importer.importer_runs.last&.deleted_records %> - <%= importer.importer_runs.last&.total_collection_entries %> - <%= importer.importer_runs.last&.total_work_entries %> + <%= importer.last_run&.enqueued_records %> + <%= (importer.last_run&.processed_records || 0) %> + <%= (importer.last_run&.failed_records || 0) %> + <%= importer.last_run&.deleted_records %> + <%= importer.last_run&.total_collection_entries %> + <%= importer.last_run&.total_work_entries %> + <%= importer.last_run&.total_file_set_entries %> <%= link_to raw(''), importer_path(importer) %> <%= link_to raw(''), edit_importer_path(importer) %> <%= link_to raw(''), importer, method: :delete, data: { confirm: 'Are you sure?' } %> diff --git a/app/views/bulkrax/importers/show.html.erb b/app/views/bulkrax/importers/show.html.erb index f205b16d..e8ff914c 100644 --- a/app/views/bulkrax/importers/show.html.erb +++ b/app/views/bulkrax/importers/show.html.erb @@ -56,14 +56,19 @@ <%= render partial: 'bulkrax/shared/bulkrax_field_mapping', locals: {item: @importer} %> -

+

Total Works: - <%= @importer.importer_runs.last&.total_work_entries %> + <%= @importer.last_run&.total_work_entries %>

-

+

Total Collections: - <%= @importer.importer_runs.last&.total_collection_entries %> + <%= @importer.last_run&.total_collection_entries %> +

+ +

+ Total File Sets: + <%= @importer.last_run&.total_file_set_entries %>

@@ -71,6 +76,7 @@
@@ -158,6 +164,42 @@ <%= page_entries_info(@collection_entries) %>
<%= paginate(@collection_entries, theme: 'blacklight', param_name: :collections_entries_page, params: {anchor: 'collection-entries'}) %>
+
+ + + + + + + + + + + + + <% @file_set_entries.each do |e| %> + + + + <% if e.status == "Complete" %> + + <% else %> + + <% end %> + <% if e.last_error.present? %> + + <% else %> + + <% end %> + + + + <% end %> + +
IdentifierEntry IDStatusErrorsStatus Set AtActions
<%= link_to e.identifier, bulkrax.importer_entry_path(@importer.id, e.id) %><%= e.id %> <%= e.status %> <%= e.status %><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %><%= e.status_at %><%= link_to raw(''), bulkrax.importer_entry_path(@importer.id, e.id) %>
+ <%= page_entries_info(@file_set_entries) %>
+ <%= paginate(@file_set_entries, theme: 'blacklight', param_name: :file_set_entries_page, params: {anchor: 'file-set-entries'}) %> +
diff --git a/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb b/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb new file mode 100644 index 00000000..9fe66c88 --- /dev/null +++ b/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb @@ -0,0 +1,7 @@ +class AddFileSetCountersToImporterRuns < ActiveRecord::Migration[5.2] + def change + add_column :bulkrax_importer_runs, :processed_file_sets, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :processed_file_sets) + add_column :bulkrax_importer_runs, :failed_file_sets, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :failed_file_sets) + add_column :bulkrax_importer_runs, :total_file_set_entries, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :total_file_set_entries) + end +end diff --git a/db/migrate/20220118001339_add_import_attempts_to_entries.rb b/db/migrate/20220118001339_add_import_attempts_to_entries.rb new file mode 100644 index 00000000..94465544 --- /dev/null +++ b/db/migrate/20220118001339_add_import_attempts_to_entries.rb @@ -0,0 +1,5 @@ +class AddImportAttemptsToEntries < ActiveRecord::Migration[5.2] + def change + add_column :bulkrax_entries, :import_attempts, :integer, default: 0 unless column_exists?(:bulkrax_entries, :import_attempts) + end +end diff --git a/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb b/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb new file mode 100644 index 00000000..1c610852 --- /dev/null +++ b/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb @@ -0,0 +1,6 @@ +class AddWorkCountersToImporterRuns < ActiveRecord::Migration[5.2] + def change + add_column :bulkrax_importer_runs, :processed_works, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :processed_works) + add_column :bulkrax_importer_runs, :failed_works, :integer, default: 0 unless column_exists?(:bulkrax_importer_runs, :failed_works) + end +end diff --git a/spec/factories/bulkrax/object_factory_spec.rb b/spec/factories/bulkrax/object_factory_spec.rb new file mode 100644 index 00000000..907df758 --- /dev/null +++ b/spec/factories/bulkrax/object_factory_spec.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require 'rails_helper' +require Rails.root.parent.parent.join('spec', 'models', 'concerns', 'bulkrax', 'dynamic_record_lookup_spec').to_s + +module Bulkrax + RSpec.describe ObjectFactory do + subject(:object_factory) { build(:object_factory) } + + describe 'is capable of looking up records dynamically' do + include_examples 'dynamic record lookup' + end + end +end diff --git a/spec/factories/bulkrax_entries.rb b/spec/factories/bulkrax_entries.rb index d8c476b6..7cab064d 100644 --- a/spec/factories/bulkrax_entries.rb +++ b/spec/factories/bulkrax_entries.rb @@ -49,4 +49,22 @@ raw_metadata { {} } parsed_metadata { {} } end + + factory :bulkrax_csv_entry_file_set, class: 'Bulkrax::CsvFileSetEntry' do + identifier { 'file_set_entry_1' } + type { 'Bulkrax::CsvFileSetEntry' } + importerexporter { FactoryBot.build(:bulkrax_importer) } + raw_metadata { {} } + parsed_metadata { {} } + end + + trait :with_file_set_metadata do + raw_metadata do + { + 'title' => 'FileSet Entry', + 'source_identifier' => 'file_set_entry_1', + 'file' => 'removed.png' + } + end + end end diff --git a/spec/factories/bulkrax_object_factories.rb b/spec/factories/bulkrax_object_factories.rb new file mode 100644 index 00000000..ef619b23 --- /dev/null +++ b/spec/factories/bulkrax_object_factories.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :object_factory, class: 'Bulkrax::ObjectFactory' do + initialize_with do + new( + attributes: {}, + source_identifier_value: :source_identifier, + work_identifier: :source, + collection_field_mapping: :collection + ) + end + end +end diff --git a/spec/fixtures/csv/work_with_file_sets.csv b/spec/fixtures/csv/work_with_file_sets.csv new file mode 100644 index 00000000..9af2ce86 --- /dev/null +++ b/spec/fixtures/csv/work_with_file_sets.csv @@ -0,0 +1,4 @@ +source_identifier,model,parent,title,description +w1,Work,,Work with two files,Lorem ipsum +fs1,FileSet,w1,FileSet 1,First +fs2,FileSet,w1,FileSet 2,Second diff --git a/spec/jobs/bulkrax/create_relationships_job_spec.rb b/spec/jobs/bulkrax/create_relationships_job_spec.rb index 9a6198da..3076dcb6 100644 --- a/spec/jobs/bulkrax/create_relationships_job_spec.rb +++ b/spec/jobs/bulkrax/create_relationships_job_spec.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'rails_helper' +require Rails.root.parent.parent.join('spec', 'models', 'concerns', 'bulkrax', 'dynamic_record_lookup_spec').to_s module Bulkrax RSpec.describe CreateRelationshipsJob, type: :job do @@ -30,6 +31,10 @@ module Bulkrax allow(child_entry).to receive(:factory).and_return(child_factory) end + describe 'is capable of looking up records dynamically' do + include_examples 'dynamic record lookup' + end + describe '#perform' do context 'when adding a child work to a parent collection' do let(:factory_attrs) do diff --git a/spec/jobs/bulkrax/import_file_set_job_spec.rb b/spec/jobs/bulkrax/import_file_set_job_spec.rb new file mode 100644 index 00000000..989e6f60 --- /dev/null +++ b/spec/jobs/bulkrax/import_file_set_job_spec.rb @@ -0,0 +1,182 @@ +# frozen_string_literal: true + +require 'rails_helper' +require Rails.root.parent.parent.join('spec', 'models', 'concerns', 'bulkrax', 'dynamic_record_lookup_spec').to_s + +module Bulkrax + RSpec.describe ImportFileSetJob, type: :job do + subject(:import_file_set_job) { described_class.new } + let(:importer) { FactoryBot.create(:bulkrax_importer_csv_complex) } + let(:importer_run) { importer.current_run } + let(:entry) { create(:bulkrax_csv_entry_file_set, :with_file_set_metadata, importerexporter: importer) } + let(:factory) { instance_double(ObjectFactory) } + + describe 'is capable of looking up records dynamically' do + include_examples 'dynamic record lookup' + end + + before do + allow(Entry).to receive(:find).with(entry.id).and_return(entry) + allow(ImporterRun).to receive(:find).with(importer_run.id).and_return(importer_run) + allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work]) + allow(::Work).to receive(:where).and_return([]) + allow(importer.parser).to receive(:path_to_files).and_return('spec/fixtures/') + end + + describe '#perform' do + context 'when the entry has a parent identifier' do + before do + allow(::Collection).to receive(:where).and_return([]) + allow(entry).to receive(:related_parents_raw_mapping).and_return('parents') + allow(entry).to receive(:related_parents_parsed_mapping).and_return('parents') + allow(entry).to receive(:factory).and_return(factory) + entry.raw_metadata['parents'] = 'work_1' + allow(factory).to receive(:run!) + end + + context 'when the parent work has been created' do + before do + allow(import_file_set_job).to receive(:validate_parent!).and_return(nil) + end + + it 'builds the entry successfully' do + expect(entry).to receive(:build) + expect(entry).to receive(:save!) + + import_file_set_job.perform(entry.id, importer_run.id) + end + + it "runs the entry's factory" do + expect(factory).to receive(:run!) + + import_file_set_job.perform(entry.id, importer_run.id) + end + + it "updates the importer run's counters" do + expect(importer_run).to receive(:increment!).with(:processed_records).once + expect(importer_run).to receive(:increment!).with(:processed_file_sets).once + expect(importer_run).to receive(:decrement!).with(:enqueued_records).once + + expect(importer_run).not_to receive(:increment!).with(:failed_records) + expect(importer_run).not_to receive(:increment!).with(:failed_file_sets) + + import_file_set_job.perform(entry.id, importer_run.id) + end + end + + context 'when the parent work is not found or has not been created yet' do + before do + allow(import_file_set_job).to receive(:find_record).and_return(nil) + end + + context "when the entry's :import_attempts are less than 5" do + it "increments the entry's :import_attempts" do + expect { import_file_set_job.perform(entry.id, importer_run.id) } + .to change(entry, :import_attempts).by(1) + end + + it 'does not throw any errors' do + expect { import_file_set_job.perform(entry.id, importer_run.id) }.not_to raise_error + end + + it "does not update any of importer run's counters" do + expect(importer_run).not_to receive(:increment!) + expect(importer_run).not_to receive(:decrement!) + end + + it 'reschedules the job to try again after a couple minutes' do + configured_job = instance_double(::ActiveJob::ConfiguredJob) + # #set initializes an ActiveJob::ConfiguredJob + expect(described_class).to receive(:set).with(wait: 2.minutes).and_return(configured_job) + expect(configured_job).to receive(:perform_later).with(entry.id, importer_run.id).once + + import_file_set_job.perform(entry.id, importer_run.id) + end + end + + context "when the entry's :import_attempts are 5 or greater" do + before do + entry.import_attempts = 4 # before failed attempt + entry.save! + end + + it "increments the entry's :import_attempts" do + expect { import_file_set_job.perform(entry.id, importer_run.id) } + .to change(entry, :import_attempts).by(1) + end + + it 'logs a MissingParentError on the entry' do + expect(entry).to receive(:status_info).with(instance_of(MissingParentError)) + + import_file_set_job.perform(entry.id, importer_run.id) + end + + it "only decrements the importer run's :enqueued_records counter" do + expect(importer_run).not_to receive(:increment!) + expect(importer_run).to receive(:decrement!).with(:enqueued_records).once + + import_file_set_job.perform(entry.id, importer_run.id) + end + + it 'does not reschedule the job' do + expect(described_class).not_to receive(:set) + + import_file_set_job.perform(entry.id, importer_run.id) + end + end + end + + context 'when the parent identifier does not reference a work' do + before do + allow(import_file_set_job).to receive(:find_record).and_return(non_work) + end + + context 'when it references a collection' do + let(:non_work) { build(:collection) } + + it 'raises an error' do + expect { import_file_set_job.perform(entry.id, importer_run.id) } + .to raise_error(::StandardError, /not an valid\/available work type/) + end + end + + context 'when it references a file set' do + let(:non_work) { instance_double(::FileSet) } + + it 'raises an error' do + expect { import_file_set_job.perform(entry.id, importer_run.id) } + .to raise_error(::StandardError, /not an valid\/available work type/) + end + end + end + end + + context 'when the entry does not have a parent identifier' do + it 'builds the entry unsuccessfully' do + expect(entry).to receive(:build) + + import_file_set_job.perform(entry.id, importer_run.id) + + expect(entry.reload.succeeded?).to eq(nil) + end + + it "does not run the entry's factory" do + expect(factory).not_to receive(:run!) + + import_file_set_job.perform(entry.id, importer_run.id) + end + + it "updates the importer run's counters" do + expect(importer_run).to receive(:increment!).with(:failed_records).once + expect(importer_run).to receive(:increment!).with(:failed_file_sets).once + expect(importer_run).to receive(:decrement!).with(:enqueued_records).once + + expect(importer_run).not_to receive(:increment!).with(:processed_records) + expect(importer_run).not_to receive(:increment!).with(:processed_file_sets) + + import_file_set_job.perform(entry.id, importer_run.id) + end + end + end + end +end diff --git a/spec/jobs/bulkrax/import_work_job_spec.rb b/spec/jobs/bulkrax/import_work_job_spec.rb index b3cc7be1..14652c4c 100644 --- a/spec/jobs/bulkrax/import_work_job_spec.rb +++ b/spec/jobs/bulkrax/import_work_job_spec.rb @@ -22,6 +22,7 @@ module Bulkrax end it 'increments :processed_records' do expect(importer_run).to receive(:increment!).with(:processed_records) + expect(importer_run).to receive(:increment!).with(:processed_works) expect(importer_run).to receive(:decrement!).with(:enqueued_records) import_work_job.perform(1, importer_run_id) end @@ -48,6 +49,7 @@ module Bulkrax end it 'increments :failed_records' do expect(importer_run).to receive(:increment!).with(:failed_records) + expect(importer_run).to receive(:increment!).with(:failed_works) expect(importer_run).to receive(:decrement!).with(:enqueued_records) import_work_job.perform(1, importer_run_id) end diff --git a/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb b/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb new file mode 100644 index 00000000..ded8998c --- /dev/null +++ b/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'rails_helper' + +module Bulkrax + RSpec.shared_examples 'dynamic record lookup' do + let(:importer) { FactoryBot.create(:bulkrax_importer_csv_complex) } + + before do + allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work]) + # DRY spec setup -- by default, assume #find_record doesn't find anything + allow(Entry).to receive(:find_by).and_return(nil) + allow(::Collection).to receive(:where).and_return([]) + allow(::Work).to receive(:where).and_return([]) + end + + describe '#find_record' do + context 'when passed a Bulkrax source_identifier' do + let(:source_identifier) { 'bulkrax_identifier_1' } + + it 'looks through entries, collections, and all work types' do + expect(Entry).to receive(:find_by).with(identifier: source_identifier).once + expect(::Collection).to receive(:where).with(id: source_identifier).once.and_return([]) + expect(::Work).to receive(:where).with(id: source_identifier).once.and_return([]) + + subject.find_record(source_identifier) + end + + context 'when an entry is found' do + let(:entry) { create(:bulkrax_csv_entry_work, importerexporter: importer) } + let(:factory) { instance_double(ObjectFactory, find: record) } + let(:record) { instance_double(::Work, title: ["Found through Entry's factory"]) } + + before do + allow(Entry).to receive(:find_by).with(identifier: source_identifier).and_return(entry) + allow(entry).to receive(:factory).and_return(factory) + end + + it "returns the entry's record" do + expect(subject.find_record(source_identifier)).to eq(record) + end + + it "uses the entry's factory to find its record" do + expect(entry).to receive(:factory) + expect(factory).to receive(:find) + + found_record = subject.find_record(source_identifier) + + expect(found_record.title).to eq(record.title) + end + end + + context 'when nothing is found' do + it 'returns nil' do + expect(subject.find_record(source_identifier)).to be_nil + end + end + end + + context 'when passed an ID' do + let(:id) { 'xyz6789' } + + it 'looks through entries, collections, and all work types' do + expect(Entry).to receive(:find_by).with(identifier: id).once + expect(::Collection).to receive(:where).with(id: id).once.and_return([]) + expect(::Work).to receive(:where).with(id: id).once.and_return([]) + + subject.find_record(id) + end + + context 'when a collection is found' do + let(:collection) { instance_double(::Collection) } + + before do + allow(::Collection).to receive(:where).with(id: id).and_return([collection]) + end + + it 'returns the collection' do + expect(subject.find_record(id)).to eq(collection) + end + end + + context 'when a work is found' do + let(:work) { instance_double(::Work) } + + before do + allow(::Work).to receive(:where).with(id: id).and_return([work]) + end + + it 'returns the work' do + expect(subject.find_record(id)).to eq(work) + end + end + + context 'when nothing is found' do + it 'returns nil' do + expect(subject.find_record(id)).to be_nil + end + end + end + end + + describe '#curation_concern?' do + context 'when record is a work' do + let(:record) { build(:work) } + + it 'returns true' do + expect(subject.curation_concern?(record)).to eq(true) + end + end + + context 'when record is a collection' do + let(:record) { build(:collection) } + + it 'returns false' do + expect(subject.curation_concern?(record)).to eq(false) + end + end + + context 'when record is an Entry' do + let(:record) { build(:bulkrax_entry) } + + it 'returns false' do + expect(subject.curation_concern?(record)).to eq(false) + end + end + end + end +end diff --git a/spec/parsers/bulkrax/csv_parser_spec.rb b/spec/parsers/bulkrax/csv_parser_spec.rb index 3ebef718..0d1580d0 100644 --- a/spec/parsers/bulkrax/csv_parser_spec.rb +++ b/spec/parsers/bulkrax/csv_parser_spec.rb @@ -110,6 +110,17 @@ module Bulkrax end end + describe '#file_sets' do + before do + importer.parser_fields = { import_file_path: './spec/fixtures/csv/work_with_file_sets.csv' } + end + + it 'returns all file set records' do + expect(subject.file_sets.collect { |fs| fs[:source_identifier] }) + .to contain_exactly('fs1', 'fs2') + end + end + describe '#create_collections' do context 'when importing collections by title through works' do before do @@ -271,6 +282,22 @@ module Bulkrax end end + describe '#create_file_sets' do + before do + importer.parser_fields = { import_file_path: './spec/fixtures/csv/work_with_file_sets.csv' } + end + + it 'creates CSV file set entries for each collection' do + expect { subject.create_file_sets }.to change(CsvFileSetEntry, :count).by(2) + end + + it 'runs an ImportFileSetJob for each entry' do + expect(ImportFileSetJob).to receive(:perform_later).twice + + subject.create_file_sets + end + end + describe '#write_partial_import_file', clean_downloads: true do let(:importer) { FactoryBot.create(:bulkrax_importer_csv_failed) } let(:file) { fixture_file_upload('./spec/fixtures/csv/ok.csv') } diff --git a/spec/test_app/db/schema.rb b/spec/test_app/db/schema.rb index a1c5a7a5..5abb5b35 100644 --- a/spec/test_app/db/schema.rb +++ b/spec/test_app/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2021_12_03_195233) do +ActiveRecord::Schema.define(version: 2022_01_19_213325) do create_table "bookmarks", force: :cascade do |t| t.integer "user_id", null: false @@ -36,7 +36,7 @@ t.datetime "last_error_at" t.datetime "last_succeeded_at" t.string "importerexporter_type", default: "Bulkrax::Importer" - t.index ["importerexporter_id"], name: "index_bulkrax_entries_on_importerexporter_id" + t.integer "import_attempts", default: 0 end create_table "bulkrax_exporter_runs", force: :cascade do |t| @@ -85,8 +85,11 @@ t.integer "processed_relationships", default: 0 t.integer "failed_relationships", default: 0 t.text "invalid_records", limit: 16777215 - t.integer "processed_children", default: 0 - t.integer "failed_children", default: 0 + t.integer "processed_file_sets", default: 0 + t.integer "failed_file_sets", default: 0 + t.integer "total_file_set_entries", default: 0 + t.integer "processed_works", default: 0 + t.integer "failed_works", default: 0 t.index ["importer_id"], name: "index_bulkrax_importer_runs_on_importer_id" end