From 166e212024470522eb1e8b778fa51853cb6ed412 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 22:10:22 -0800 Subject: [PATCH 01/19] rails 5 does not have pick, fixes latest status check --- app/models/bulkrax/status.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index 534f176f..1cf21d55 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -23,7 +23,12 @@ def self.latest_by_statusable_subtable end def latest? - self.id == self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) + # TODO: remove if statment when we stop supporting Hyrax < 4 + self.id == if Gem::Version.new(Rails::VERSION) >= Gem::Version.new('6.0.0') + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) + else + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first + end end end end From 83b3237dd624cbf6ae852d29cf1d4e7369617131 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 22:13:37 -0800 Subject: [PATCH 02/19] call the cops --- app/models/bulkrax/status.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index 1cf21d55..e9411f0c 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -27,7 +27,7 @@ def latest? self.id == if Gem::Version.new(Rails::VERSION) >= Gem::Version.new('6.0.0') self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) else - self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pic end end end From 2e2b899e5618c037b24706a48d9d826326639d4a Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 23:33:44 -0800 Subject: [PATCH 03/19] typo --- app/models/bulkrax/status.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index e9411f0c..a0e385a5 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -27,7 +27,7 @@ def latest? self.id == if Gem::Version.new(Rails::VERSION) >= Gem::Version.new('6.0.0') self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) else - self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pic + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick end end end From 3c406428caf768cd369b8c8f04c97a0d37f77683 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 23:36:48 -0800 Subject: [PATCH 04/19] mark entries as skipped if they have not been seen during an importer run --- app/models/bulkrax/importer.rb | 9 +++++++++ app/models/concerns/bulkrax/status_info.rb | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 94c400f3..efa43295 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -195,10 +195,19 @@ def import_objects(types_array = nil) end end parser.create_objects(types) + mark_unseen_as_skipped rescue StandardError => e set_status_info(e) end + # After an import any entries we did not touch are skipped. + # They are not really pending, complete for the last run, or failed + def mark_unseen_as_skipped + entries.where.not(id. seen).find_each do |entry| + entry.set_status_info('Skipped') + end + end + # Prepend the base_url to ensure unique set identifiers # @todo - move to parser, as this is OAI specific def unique_collection_identifier(id) diff --git a/app/models/concerns/bulkrax/status_info.rb b/app/models/concerns/bulkrax/status_info.rb index 02ce1883..c48dc58e 100644 --- a/app/models/concerns/bulkrax/status_info.rb +++ b/app/models/concerns/bulkrax/status_info.rb @@ -13,6 +13,7 @@ module StatusInfo scope :failed, -> { where(status_message: 'Failed') } scope :complete, -> { where(status_message: 'Complete') } scope :pending, -> { where(status_message: 'Pending') } + scope :skipped, -> { where(status_message: 'Skipped') } end def current_status @@ -28,6 +29,10 @@ def succeeded? current_status&.status_message&.match(/^Complete$/) end + def skipped? + current_status&.status_message&.match('Skipped') + end + def status current_status&.status_message || 'Pending' end From d82541cc39d4de5b66d36262abc503d0f459f397 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 00:08:56 -0800 Subject: [PATCH 05/19] fix version string --- app/models/bulkrax/status.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index a0e385a5..86c850b0 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -24,7 +24,7 @@ def self.latest_by_statusable_subtable def latest? # TODO: remove if statment when we stop supporting Hyrax < 4 - self.id == if Gem::Version.new(Rails::VERSION) >= Gem::Version.new('6.0.0') + self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0') self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) else self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick From 610e6ea3bd2096700f7703c1cd7842b1d1ba58f9 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 00:43:46 -0800 Subject: [PATCH 06/19] Add ability to set a remove_and_rerun property. setting this will delete the record before recreating it --- .../delete_and_import_collection_job.rb | 8 ++ .../bulkrax/delete_and_import_file_set_job.rb | 8 ++ app/jobs/bulkrax/delete_and_import_job.rb | 16 ++++ .../bulkrax/delete_and_import_work_job.rb | 8 ++ app/parsers/bulkrax/application_parser.rb | 78 +++++++++++++------ app/parsers/bulkrax/bagit_parser.rb | 23 ------ app/parsers/bulkrax/csv_parser.rb | 52 ------------- app/parsers/bulkrax/oai_dc_parser.rb | 42 ++++++---- app/parsers/bulkrax/xml_parser.rb | 39 +++++----- .../bulkrax/application_parser_spec.rb | 23 ++++-- 10 files changed, 156 insertions(+), 141 deletions(-) create mode 100644 app/jobs/bulkrax/delete_and_import_collection_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_file_set_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_work_job.rb diff --git a/app/jobs/bulkrax/delete_and_import_collection_job.rb b/app/jobs/bulkrax/delete_and_import_collection_job.rb new file mode 100644 index 00000000..b076f72a --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_collection_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportCollectionJob < DeleteAndImportJob + self.delete_job = Bulkrax::DeleteCollectionJob + self.import_job = Bulkrax::ImportCollectionJob + end +end diff --git a/app/jobs/bulkrax/delete_and_import_file_set_job.rb b/app/jobs/bulkrax/delete_and_import_file_set_job.rb new file mode 100644 index 00000000..e52e1cdf --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_file_set_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportFileSetJob < DeleteAndImportJob + self.delete_job = Bulkrax::DeleteFileSetJob + self.import_job = Bulkrax::ImportFileSetJob + end +end diff --git a/app/jobs/bulkrax/delete_and_import_job.rb b/app/jobs/bulkrax/delete_and_import_job.rb new file mode 100644 index 00000000..3af7edf4 --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_job.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportJob < ApplicationJob + queue_as :import + + cattr_accessor :delete_class, :import_class + self.delete_class = Bulkrax::DeleteJob + self.import_class = Bulkrax::ImportJob + + def perform(entry, importer_run) + self.delete_class.perform_now(entry, importer_run) + self.import_class.perform_now(entry, importer_run) + end + end +end diff --git a/app/jobs/bulkrax/delete_and_import_work_job.rb b/app/jobs/bulkrax/delete_and_import_work_job.rb new file mode 100644 index 00000000..bfa57fb7 --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_work_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportWorkJob < DeleteAndImportJob + self.delete_job = Bulkrax::DeleteWorkJob + self.import_job = Bulkrax::ImportWorkJob + end +end diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index 2a46d124..49c65fe3 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -47,6 +47,10 @@ def entry_class raise NotImplementedError, 'must be defined' end + def work_entry_class + entry_class + end + # @api public # @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser. def collection_entry_class @@ -157,39 +161,67 @@ def visibility @visibility ||= self.parser_fields['visibility'] || 'open' end - # @api public - # - # @param types [Array] the types of objects that we'll create. - # - # @see Bulkrax::Importer::DEFAULT_OBJECT_TYPES - # @see #create_collections - # @see #create_works - # @see #create_file_sets - # @see #create_relationships - def create_objects(types = []) - types.each do |object_type| - send("create_#{object_type.pluralize}") - end - end - - # @abstract Subclass and override {#create_collections} to implement behavior for the parser. def create_collections - raise NotImplementedError, 'must be defined' if importer? + create_objects(['collection']) end - # @abstract Subclass and override {#create_works} to implement behavior for the parser. def create_works - raise NotImplementedError, 'must be defined' if importer? + create_objects(['work']) end - # @abstract Subclass and override {#create_file_sets} to implement behavior for the parser. def create_file_sets - raise NotImplementedError, 'must be defined' if importer? + create_objects(['file_set']) end - # @abstract Subclass and override {#create_relationships} to implement behavior for the parser. def create_relationships - raise NotImplementedError, 'must be defined' if importer? + create_objects(['relationship']) + end + + # @api public + # + # @param types [Array] the types of objects that we'll create. + # + # @see Bulkrax::Importer::DEFAULT_OBJECT_TYPES + # @see #create_collections + # @see #create_works + # @see #create_file_sets + # @see #create_relationships + def create_objects(types_array = nil) + index = 0 + (types_array || %w[collection work file_set relationship]).each do |type| + if type.eql?('relationship') + ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id) + next + end + send(type.pluralize).each do |current_record| + next unless record_has_source_identifier(current_record, index) + break if limit_reached?(limit, index) + + seen[current_record[source_identifier]] = true + create_entry_and_job(current_record, type) + increment_counters(index, "#{type}": true) + index += 1 + end + importer.record_status + end + true + rescue StandardError => e + set_status_info(e) + end + + def create_entry_and_job(current_record, type, identifier = nil) + identifier ||= current_record[source_identifier] + new_entry = find_or_create_entry(send("#{type}_entry_class"), + identifier, + 'Bulkrax::Importer', + current_record.to_h) + if current_record[:delete].present? + "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) + elsif current_record[:remove_and_rerun].present? + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) + end end # Optional, define if using browse everything for file upload diff --git a/app/parsers/bulkrax/bagit_parser.rb b/app/parsers/bulkrax/bagit_parser.rb index eccbee16..8d93a1b5 100644 --- a/app/parsers/bulkrax/bagit_parser.rb +++ b/app/parsers/bulkrax/bagit_parser.rb @@ -63,29 +63,6 @@ def get_data(bag, data) data end - def create_works - entry_class == CsvEntry ? super : create_rdf_works - end - - def create_rdf_works - records.each_with_index do |record, index| - next unless record_has_source_identifier(record, index) - break if limit_reached?(limit, index) - - seen[record[source_identifier]] = true - new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record) - if record[:delete].present? - DeleteWorkJob.send(perform_method, new_entry, current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, current_run.id) - end - increment_counters(index, work: true) - end - importer.record_status - rescue StandardError => e - set_status_info(e) - end - # export methods # rubocop:disable Metrics/MethodLength, Metrics/AbcSize diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index f7e34dda..653c15b9 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -113,57 +113,6 @@ def valid_import? false end - def create_collections - create_objects(['collection']) - end - - def create_works - create_objects(['work']) - end - - def create_file_sets - create_objects(['file_set']) - end - - def create_relationships - create_objects(['relationship']) - end - - def create_objects(types_array = nil) - index = 0 - (types_array || %w[collection work file_set relationship]).each do |type| - if type.eql?('relationship') - ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id) - next - end - send(type.pluralize).each do |current_record| - next unless record_has_source_identifier(current_record, index) - break if limit_reached?(limit, index) - - seen[current_record[source_identifier]] = true - create_entry_and_job(current_record, type) - increment_counters(index, "#{type}": true) - index += 1 - end - importer.record_status - end - true - rescue StandardError => e - set_status_info(e) - end - - def create_entry_and_job(current_record, type) - new_entry = find_or_create_entry(send("#{type}_entry_class"), - current_record[source_identifier], - 'Bulkrax::Importer', - current_record.to_h) - if current_record[:delete].present? - "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) - else - "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) - end - end - def write_partial_import_file(file) import_filename = import_file_path.split('/').last partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv" @@ -204,7 +153,6 @@ def create_new_entries def entry_class CsvEntry end - alias work_entry_class entry_class def collection_entry_class CsvCollectionEntry diff --git a/app/parsers/bulkrax/oai_dc_parser.rb b/app/parsers/bulkrax/oai_dc_parser.rb index 4319ab3f..03a3a663 100644 --- a/app/parsers/bulkrax/oai_dc_parser.rb +++ b/app/parsers/bulkrax/oai_dc_parser.rb @@ -63,6 +63,12 @@ def import_fields delegate :list_sets, to: :client + def create_objects(types = []) + types.each do |object_type| + send("create_#{object_type.pluralize}") + end + end + def create_collections metadata = { visibility: 'open' @@ -86,27 +92,31 @@ def create_works results = self.records(quick: true) return if results.blank? results.full.each_with_index do |record, index| - identifier = record.send(source_identifier) - if identifier.blank? - if Bulkrax.fill_in_blank_source_identifiers.present? - identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index) - else - invalid_record("Missing #{source_identifier} for #{record.to_h}\n") - next - end - end - + identifier = record_has_source_identifier(record, index) + next unless identifier break if limit_reached?(limit, index) + seen[identifier] = true - new_entry = entry_class.where(importerexporter: self.importerexporter, identifier: identifier).first_or_create! - if record.deleted? - DeleteWorkJob.send(perform_method, new_entry, importerexporter.current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, importerexporter.current_run.id) - end + create_entry_and_job(record, 'work', identifier) increment_counters(index, work: true) end importer.record_status + rescue StandardError => e + set_status_info(e) + end + + # oai records so not let us set the source identifier easily + def record_has_source_identifier(record, index) + identifier = record.send(source_identifier) + if identifier.blank? + if Bulkrax.fill_in_blank_source_identifiers.present? + identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index) + else + invalid_record("Missing #{source_identifier} for #{record.to_h}\n") + return false + end + end + identifier end def collections diff --git a/app/parsers/bulkrax/xml_parser.rb b/app/parsers/bulkrax/xml_parser.rb index c76d9a48..a5854e3c 100644 --- a/app/parsers/bulkrax/xml_parser.rb +++ b/app/parsers/bulkrax/xml_parser.rb @@ -11,13 +11,29 @@ def entry_class def collection_entry_class; end # @todo not yet supported - def create_collections; end + def create_collections + raise NotImplementedError + end # @todo not yet supported def file_set_entry_class; end # @todo not yet supported - def create_file_sets; end + def create_file_sets + raise NotImplementedError + end + + def file_sets + raise NotImplementedError + end + + def collections + raise NotImplementedError + end + + def works + records + end # TODO: change to differentiate between collection and work records when adding ability to import collection metadata def works_total @@ -92,25 +108,6 @@ def good_file_type?(path) %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml') end - def create_works - records.each_with_index do |record, index| - next unless record_has_source_identifier(record, index) - break if !limit.nil? && index >= limit - - seen[record[source_identifier]] = true - new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record) - if record[:delete].present? - DeleteWorkJob.send(perform_method, new_entry, current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, current_run.id) - end - increment_counters(index, work: true) - end - importer.record_status - rescue StandardError => e - set_status_info(e) - end - def total records.size end diff --git a/spec/parsers/bulkrax/application_parser_spec.rb b/spec/parsers/bulkrax/application_parser_spec.rb index f8cc8af9..2eefed7c 100644 --- a/spec/parsers/bulkrax/application_parser_spec.rb +++ b/spec/parsers/bulkrax/application_parser_spec.rb @@ -17,13 +17,24 @@ module Bulkrax describe '#create_objects' do subject(:application_parser) { described_class.new(importer) } - it 'sends the create_* methods based on given types' do - expect(application_parser).to receive(:create_works) - expect(application_parser).to receive(:create_collections) - expect(application_parser).to receive(:create_file_sets) - expect(application_parser).to receive(:create_relationships) + it 'create_works calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['work']) + application_parser.create_works + end + + it 'create_collections calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['collection']) + application_parser.create_collections + end + + it 'create_file_sets calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['file_set']) + application_parser.create_file_sets + end - application_parser.create_objects(%w[collection work file_set relationship]) + it 'create_relationships calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['relationship']) + application_parser.create_relationships end end From c19cd6840a811de730eee18999fd1b271ffb65ff Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 00:48:42 -0800 Subject: [PATCH 07/19] use the delet and import job to make remove and rerun work better --- app/models/bulkrax/exporter.rb | 4 ++++ app/models/bulkrax/importer.rb | 6 ------ app/parsers/bulkrax/application_parser.rb | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/app/models/bulkrax/exporter.rb b/app/models/bulkrax/exporter.rb index 42b62c13..e4b5f702 100644 --- a/app/models/bulkrax/exporter.rb +++ b/app/models/bulkrax/exporter.rb @@ -23,6 +23,10 @@ def export set_status_info(e) end + def remove_and_rerun + self.parser_fields['remove_and_rerun'] + end + # #export_source accessors # Used in form to prevent it from getting confused as to which value to populate #export_source with. # Also, used to display the correct selected value when rendering edit form. diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index efa43295..1e1778a9 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -188,12 +188,6 @@ def import_objects(types_array = nil) self.only_updates ||= false self.save if self.new_record? # Object needs to be saved for statuses types = types_array || DEFAULT_OBJECT_TYPES - if remove_and_rerun - self.entries.find_each do |e| - e.factory.find&.destroy! - e.destroy! - end - end parser.create_objects(types) mark_unseen_as_skipped rescue StandardError => e diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index 49c65fe3..a095b129 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -14,7 +14,7 @@ class ApplicationParser # rubocop:disable Metrics/ClassLength :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers, :key_without_numbers, :status, :set_status_info, :status_info, :status_at, :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only, - :zip?, :file?, + :zip?, :file?, :remove_and_rerun, to: :importerexporter # @todo Convert to `class_attribute :parser_fiels, default: {}` @@ -217,7 +217,7 @@ def create_entry_and_job(current_record, type, identifier = nil) current_record.to_h) if current_record[:delete].present? "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) - elsif current_record[:remove_and_rerun].present? + elsif current_record[:remove_and_rerun].present? || remove_and_rerun "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) else "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) From b5e7dc07f0808a550a896029f5b803353d939424 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 01:06:52 -0800 Subject: [PATCH 08/19] individual entry updte should happen in the background --- app/controllers/bulkrax/entries_controller.rb | 22 +++++++++++++++---- app/models/bulkrax/exporter.rb | 7 ++++-- app/models/bulkrax/importer.rb | 3 ++- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/app/controllers/bulkrax/entries_controller.rb b/app/controllers/bulkrax/entries_controller.rb index 4328ed0b..d3d4431c 100644 --- a/app/controllers/bulkrax/entries_controller.rb +++ b/app/controllers/bulkrax/entries_controller.rb @@ -17,13 +17,27 @@ def show def update @entry = Entry.find(params[:id]) - @entry.factory&.find&.destroy if params[:destroy_first] - @entry.build - @entry.save + type = case @entry.type.downcase + when /fileset/ + 'file_set' + when /collection/ + 'collection' + else + 'work' + end item = @entry.importerexporter + # do not run counters as it loads the whole parser + current_run = item.current_run(skip_counts: true) + @entry.set_status_info('Pending', current_run) + if params[:destroy_first] + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.perform_later(@entry, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.perform_later(@entry.id, current_run.id) + end + entry_path = item.class.to_s.include?('Importer') ? bulkrax.importer_entry_path(item.id, @entry.id) : bulkrax.exporter_entry_path(item.id, @entry.id) - redirect_back fallback_location: entry_path, notice: "Entry update ran, new status is #{@entry.status}" + redirect_back fallback_location: entry_path, notice: "Entry #{@entry.id} update has been queued" end def destroy diff --git a/app/models/bulkrax/exporter.rb b/app/models/bulkrax/exporter.rb index e4b5f702..de054a59 100644 --- a/app/models/bulkrax/exporter.rb +++ b/app/models/bulkrax/exporter.rb @@ -106,9 +106,12 @@ def importers_list Importer.all.map { |i| [i.name, i.id] } end - def current_run + def current_run(skip_counts: false) + @current_run ||= self.exporter_runs.create! if skip_counts + return @current_run if @current_run + total = self.limit || parser.total - @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total) + @current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total) end def last_run diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 1e1778a9..01f0a325 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -103,11 +103,12 @@ def schedulable? frequency.to_seconds != 0 end - def current_run + def current_run(skip_counts: false) return @current_run if @current_run.present? @current_run = self.importer_runs.create! return @current_run if file? && zip? + return @current_run if skip_counts entry_counts = { total_work_entries: self.limit || parser.works_total, From 077c4db7bdaff60008c26f8d765cdf5e77f5f719 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 02:09:46 -0800 Subject: [PATCH 09/19] call the cops --- app/controllers/bulkrax/entries_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/bulkrax/entries_controller.rb b/app/controllers/bulkrax/entries_controller.rb index d3d4431c..f7a4a492 100644 --- a/app/controllers/bulkrax/entries_controller.rb +++ b/app/controllers/bulkrax/entries_controller.rb @@ -30,7 +30,7 @@ def update current_run = item.current_run(skip_counts: true) @entry.set_status_info('Pending', current_run) if params[:destroy_first] - "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.perform_later(@entry, current_run) + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.perform_later(@entry, current_run) else "Bulkrax::Import#{type.camelize}Job".constantize.perform_later(@entry.id, current_run.id) end From 623fc909d72f7dce64e6e49511ffd0ecf3940f45 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 03:23:38 -0800 Subject: [PATCH 10/19] missing job --- app/jobs/bulkrax/import_job.rb | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 app/jobs/bulkrax/import_job.rb diff --git a/app/jobs/bulkrax/import_job.rb b/app/jobs/bulkrax/import_job.rb new file mode 100644 index 00000000..3f9138bb --- /dev/null +++ b/app/jobs/bulkrax/import_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class ImportJob < ApplicationJob + queue_as :import + + end +end From 20fec68dc21dd8000fcb155975cc6399bf19a92e Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 03:36:32 -0800 Subject: [PATCH 11/19] fixing job classes --- app/jobs/bulkrax/delete_and_import_collection_job.rb | 4 ++-- app/jobs/bulkrax/delete_and_import_file_set_job.rb | 4 ++-- app/jobs/bulkrax/delete_and_import_work_job.rb | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/jobs/bulkrax/delete_and_import_collection_job.rb b/app/jobs/bulkrax/delete_and_import_collection_job.rb index b076f72a..09f0beed 100644 --- a/app/jobs/bulkrax/delete_and_import_collection_job.rb +++ b/app/jobs/bulkrax/delete_and_import_collection_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportCollectionJob < DeleteAndImportJob - self.delete_job = Bulkrax::DeleteCollectionJob - self.import_job = Bulkrax::ImportCollectionJob + self.delete_class = Bulkrax::DeleteCollectionJob + self.import_class = Bulkrax::ImportCollectionJob end end diff --git a/app/jobs/bulkrax/delete_and_import_file_set_job.rb b/app/jobs/bulkrax/delete_and_import_file_set_job.rb index e52e1cdf..11e0a677 100644 --- a/app/jobs/bulkrax/delete_and_import_file_set_job.rb +++ b/app/jobs/bulkrax/delete_and_import_file_set_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportFileSetJob < DeleteAndImportJob - self.delete_job = Bulkrax::DeleteFileSetJob - self.import_job = Bulkrax::ImportFileSetJob + self.delete_class = Bulkrax::DeleteFileSetJob + self.import_class = Bulkrax::ImportFileSetJob end end diff --git a/app/jobs/bulkrax/delete_and_import_work_job.rb b/app/jobs/bulkrax/delete_and_import_work_job.rb index bfa57fb7..686c9f3a 100644 --- a/app/jobs/bulkrax/delete_and_import_work_job.rb +++ b/app/jobs/bulkrax/delete_and_import_work_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportWorkJob < DeleteAndImportJob - self.delete_job = Bulkrax::DeleteWorkJob - self.import_job = Bulkrax::ImportWorkJob + self.delete_class = Bulkrax::DeleteWorkJob + self.import_class = Bulkrax::ImportWorkJob end end From cdfd59ae6c2a5c75e8ec42576b4b8886346d6947 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 03:53:55 -0800 Subject: [PATCH 12/19] missing dependency --- lib/bulkrax.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index f485a592..07068879 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -3,8 +3,8 @@ require "bulkrax/version" require "bulkrax/engine" require 'active_support/all' +require 'coderay' require 'denormalize_fields' - # rubocop:disable Metrics/ModuleLength module Bulkrax extend self # rubocop:disable Style/ModuleFunction From b9f8fcd58575a2aaea7847a084238c6e6ad691d8 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 11:14:13 -0800 Subject: [PATCH 13/19] rubocop --- app/jobs/bulkrax/import_job.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/jobs/bulkrax/import_job.rb b/app/jobs/bulkrax/import_job.rb index 3f9138bb..b8ff2d5d 100644 --- a/app/jobs/bulkrax/import_job.rb +++ b/app/jobs/bulkrax/import_job.rb @@ -3,6 +3,5 @@ module Bulkrax class ImportJob < ApplicationJob queue_as :import - - end + end end From 8f5c0e4df77dff3156210ad12765eebd9f4345ec Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 12:40:51 -0800 Subject: [PATCH 14/19] Select existing entries for update (#924) * We may want to rebuild entries without getting a new file, this allows for that * call the cops --- app/assets/javascripts/bulkrax/importers.js.erb | 16 +++++++++++++++- .../stylesheets/bulkrax/import_export.scss | 7 ++++++- app/controllers/bulkrax/importers_controller.rb | 2 +- app/models/bulkrax/importer.rb | 6 +++++- app/parsers/bulkrax/application_parser.rb | 14 ++++++++++++++ app/views/bulkrax/importers/_csv_fields.html.erb | 6 +++++- 6 files changed, 46 insertions(+), 5 deletions(-) diff --git a/app/assets/javascripts/bulkrax/importers.js.erb b/app/assets/javascripts/bulkrax/importers.js.erb index 87c0f7b1..e254c60e 100644 --- a/app/assets/javascripts/bulkrax/importers.js.erb +++ b/app/assets/javascripts/bulkrax/importers.js.erb @@ -74,12 +74,14 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', null) } else { $('#file_path').show() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', 'required') $('#file_upload input').attr('required', null) $('#importer_parser_fields_file_style_specify_a_path_on_the_server').attr('checked', true) @@ -89,6 +91,7 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').show() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', 'required') }) @@ -96,6 +99,7 @@ function handleFileToggle(file_path) { $('#file_path').show() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', 'required') $('#file_upload input').attr('required', null) }) @@ -103,9 +107,19 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').hide() $('#cloud').show() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', null) }) + $('#importer_parser_fields_file_style_existing_entries').click(function(e){ + $('#file_path').hide() + $('#file_upload').hide() + $('#cloud').hide() + $('#existing_options').show() + $('#file_path input').attr('required', null) + $('#file_upload input').attr('required', null) + }) + } function handleParserKlass() { @@ -189,4 +203,4 @@ function setError(selector, error) { selector.attr('disabled', true) } -$(document).on({'ready': prepBulkrax, 'turbolinks:load': prepBulkrax}) \ No newline at end of file +$(document).on({'ready': prepBulkrax, 'turbolinks:load': prepBulkrax}) diff --git a/app/assets/stylesheets/bulkrax/import_export.scss b/app/assets/stylesheets/bulkrax/import_export.scss index 1834840a..0e182842 100644 --- a/app/assets/stylesheets/bulkrax/import_export.scss +++ b/app/assets/stylesheets/bulkrax/import_export.scss @@ -34,4 +34,9 @@ div#s2id_exporter_export_source_collection { .bulkrax-clear-toggles { clear: both; -} \ No newline at end of file +} + +#existing_options .collection_check_boxes { + margin-left: 10px; + margin-right: 10px; +} diff --git a/app/controllers/bulkrax/importers_controller.rb b/app/controllers/bulkrax/importers_controller.rb index 8248975c..dd58d897 100644 --- a/app/controllers/bulkrax/importers_controller.rb +++ b/app/controllers/bulkrax/importers_controller.rb @@ -218,7 +218,7 @@ def importable_params end def importable_parser_fields - params&.[](:importer)&.[](:parser_fields)&.except(:file)&.keys + params&.[](:importer)&.[](:parser_fields)&.except(:file, :entry_statuses)&.keys&. + [{ "entry_statuses" => [] }] end # Only allow a trusted parameters through. diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 01f0a325..7618de90 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -167,6 +167,10 @@ def metadata_only? parser.parser_fields['metadata_only'] == true end + def existing_entries? + parser.parser_fields['file_style']&.match(/Existing Entries/) + end + def import_works import_objects(['work']) end @@ -189,7 +193,7 @@ def import_objects(types_array = nil) self.only_updates ||= false self.save if self.new_record? # Object needs to be saved for statuses types = types_array || DEFAULT_OBJECT_TYPES - parser.create_objects(types) + existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types) mark_unseen_as_skipped rescue StandardError => e set_status_info(e) diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index a095b129..ad42ca61 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -209,6 +209,20 @@ def create_objects(types_array = nil) set_status_info(e) end + def rebuild_entries(_types_array = nil) + index = 0 + importer.entries.where(status_message: parser_fields['entry_statuses']).find_each do |e| + seen[e.identifier] = true + if remove_and_rerun + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.send(perform_method, e, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id) + end + increment_counters(index) + index += 1 + end + end + def create_entry_and_job(current_record, type, identifier = nil) identifier ||= current_record[source_identifier] new_entry = find_or_create_entry(send("#{type}_entry_class"), diff --git a/app/views/bulkrax/importers/_csv_fields.html.erb b/app/views/bulkrax/importers/_csv_fields.html.erb index 758766e7..88dcd61b 100644 --- a/app/views/bulkrax/importers/_csv_fields.html.erb +++ b/app/views/bulkrax/importers/_csv_fields.html.erb @@ -25,13 +25,17 @@

Add CSV File to Import:

<%# accept a single file upload; data files and bags will need to be added another way %> - <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server'], as: :radio_buttons, label: false %> + <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server', 'Existing Entries'], as: :radio_buttons, label: false %>
<%= fi.input 'file', as: :file, input_html: { accept: 'text/csv,application/zip' } %>
<%= fi.input :import_file_path, as: :string, input_html: { value: importer.parser_fields['import_file_path'] } %>
+
+ <%= fi.collection_check_boxes :entry_statuses, [['Failed'], ['Pending'], ['Skipped'], ['Deleted'], ['Complete']], :first, :first %> +
+ <% if defined?(::Hyrax) && Hyrax.config.browse_everything? %>

Add Files to Import:

Choose files to upload. The filenames must be unique, and the filenames must be referenced in a column called 'file' in the accompanying CSV file.

From 44cb7d77009528740910f2c07f718fc04e77eb71 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 14:36:41 -0800 Subject: [PATCH 15/19] make deleteandimport jobs thread safe --- app/controllers/bulkrax/entries_controller.rb | 2 + .../delete_and_import_collection_job.rb | 4 +- .../bulkrax/delete_and_import_file_set_job.rb | 4 +- app/jobs/bulkrax/delete_and_import_job.rb | 16 ++++--- .../bulkrax/delete_and_import_job.rb.orig | 25 +++++++++++ .../bulkrax/delete_and_import_work_job.rb | 4 +- app/jobs/bulkrax/delete_job.rb | 4 ++ app/parsers/bulkrax/application_parser.rb | 42 ++++++++++++++----- spec/test_app/db/schema.rb | 5 ++- 9 files changed, 82 insertions(+), 24 deletions(-) create mode 100644 app/jobs/bulkrax/delete_and_import_job.rb.orig diff --git a/app/controllers/bulkrax/entries_controller.rb b/app/controllers/bulkrax/entries_controller.rb index f7a4a492..ea1e4fee 100644 --- a/app/controllers/bulkrax/entries_controller.rb +++ b/app/controllers/bulkrax/entries_controller.rb @@ -29,6 +29,8 @@ def update # do not run counters as it loads the whole parser current_run = item.current_run(skip_counts: true) @entry.set_status_info('Pending', current_run) + ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: @entry.importer.id) + if params[:destroy_first] "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.perform_later(@entry, current_run) else diff --git a/app/jobs/bulkrax/delete_and_import_collection_job.rb b/app/jobs/bulkrax/delete_and_import_collection_job.rb index 09f0beed..2e434fb6 100644 --- a/app/jobs/bulkrax/delete_and_import_collection_job.rb +++ b/app/jobs/bulkrax/delete_and_import_collection_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportCollectionJob < DeleteAndImportJob - self.delete_class = Bulkrax::DeleteCollectionJob - self.import_class = Bulkrax::ImportCollectionJob + DELETE_CLASS = Bulkrax::DeleteCollectionJob + IMPORT_CLASS = Bulkrax::ImportCollectionJob end end diff --git a/app/jobs/bulkrax/delete_and_import_file_set_job.rb b/app/jobs/bulkrax/delete_and_import_file_set_job.rb index 11e0a677..8660a082 100644 --- a/app/jobs/bulkrax/delete_and_import_file_set_job.rb +++ b/app/jobs/bulkrax/delete_and_import_file_set_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportFileSetJob < DeleteAndImportJob - self.delete_class = Bulkrax::DeleteFileSetJob - self.import_class = Bulkrax::ImportFileSetJob + DELETE_CLASS = Bulkrax::DeleteFileSetJob + IMPORT_CLASS = Bulkrax::ImportFileSetJob end end diff --git a/app/jobs/bulkrax/delete_and_import_job.rb b/app/jobs/bulkrax/delete_and_import_job.rb index 3af7edf4..03be3f14 100644 --- a/app/jobs/bulkrax/delete_and_import_job.rb +++ b/app/jobs/bulkrax/delete_and_import_job.rb @@ -4,13 +4,17 @@ module Bulkrax class DeleteAndImportJob < ApplicationJob queue_as :import - cattr_accessor :delete_class, :import_class - self.delete_class = Bulkrax::DeleteJob - self.import_class = Bulkrax::ImportJob - def perform(entry, importer_run) - self.delete_class.perform_now(entry, importer_run) - self.import_class.perform_now(entry, importer_run) + status = self.class::DELETE_CLASS.perform_now(entry, importer_run) + if status.status_message == "Deleted" + entry = Bulkrax::Entry.find(entry.id) # maximum reload + self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id) + end + + rescue => e + entry.set_status_info(e) + # this causes caught exception to be reraised + raise end end end diff --git a/app/jobs/bulkrax/delete_and_import_job.rb.orig b/app/jobs/bulkrax/delete_and_import_job.rb.orig new file mode 100644 index 00000000..0fb24062 --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_job.rb.orig @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportJob < ApplicationJob + queue_as :import + + def perform(entry, importer_run) +<<<<<<< Updated upstream + self.delete_class.perform_now(entry, importer_run) + self.import_class.perform_now(entry, importer_run) +======= + status = self.class::DELETE_CLASS.perform_now(entry, importer_run) + if status.status_message == "Deleted" + entry = Bulkrax::Entry.find(entry.id) # maximum reload + self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id) + end + + rescue => e + entry.set_status_info(e) + # this causes caught exception to be reraised + raise +>>>>>>> Stashed changes + end + end +end diff --git a/app/jobs/bulkrax/delete_and_import_work_job.rb b/app/jobs/bulkrax/delete_and_import_work_job.rb index 686c9f3a..318982cf 100644 --- a/app/jobs/bulkrax/delete_and_import_work_job.rb +++ b/app/jobs/bulkrax/delete_and_import_work_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteAndImportWorkJob < DeleteAndImportJob - self.delete_class = Bulkrax::DeleteWorkJob - self.import_class = Bulkrax::ImportWorkJob + DELETE_CLASS = Bulkrax::DeleteWorkJob + IMPORT_CLASS = Bulkrax::ImportWorkJob end end diff --git a/app/jobs/bulkrax/delete_job.rb b/app/jobs/bulkrax/delete_job.rb index 1fcd04cc..0b337261 100644 --- a/app/jobs/bulkrax/delete_job.rb +++ b/app/jobs/bulkrax/delete_job.rb @@ -15,6 +15,10 @@ def perform(entry, importer_run) entry.importer.current_run = ImporterRun.find(importer_run.id) entry.importer.record_status entry.set_status_info("Deleted", ImporterRun.find(importer_run.id)) + rescue => e + entry.set_status_info(e) + # this causes caught exception to be reraised + raise end end end diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index ad42ca61..34f6697a 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -209,30 +209,52 @@ def create_objects(types_array = nil) set_status_info(e) end - def rebuild_entries(_types_array = nil) + def rebuild_entries(types_array = nil) index = 0 - importer.entries.where(status_message: parser_fields['entry_statuses']).find_each do |e| - seen[e.identifier] = true - if remove_and_rerun - "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.send(perform_method, e, current_run) - else - "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id) + (types_array || %w[collection work file_set relationship]).each do |type| + # works are not gurneteed to have Work in the type + + importer.entries.where(rebuild_entry_query(type, parser_fields['entry_statuses'])).find_each do |e| + seen[e.identifier] = true + e.status_info('Pending') + if remove_and_rerun + delay = calculate_type_delay(type) + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, e, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id) + end + increment_counters(index) + index += 1 end - increment_counters(index) - index += 1 end end + def rebuild_entry_query(type, statuses) + type_col = Bulkrax::Entry.arel_table['type'] + status_col = Bulkrax::Entry.arel_table['status_message'] + + query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize)) + query.and(status_col.in(statuses)) + end + + def calculate_type_delay(type) + return 2.minutes if type == 'file_set' + return 1.minute if type == 'work' + return 0 + end + def create_entry_and_job(current_record, type, identifier = nil) identifier ||= current_record[source_identifier] new_entry = find_or_create_entry(send("#{type}_entry_class"), identifier, 'Bulkrax::Importer', current_record.to_h) + new_entry.status_info('Pending') if current_record[:delete].present? "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) elsif current_record[:remove_and_rerun].present? || remove_and_rerun - "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) + delay = calculate_type_delay(type) + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run) else "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) end diff --git a/spec/test_app/db/schema.rb b/spec/test_app/db/schema.rb index 70580d6e..05672a92 100644 --- a/spec/test_app/db/schema.rb +++ b/spec/test_app/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2023_06_08_153601) do +ActiveRecord::Schema.define(version: 2024_02_09_070952) do create_table "accounts", force: :cascade do |t| t.string "name" @@ -41,7 +41,8 @@ t.datetime "last_succeeded_at" t.string "importerexporter_type", default: "Bulkrax::Importer" t.integer "import_attempts", default: 0 - t.index ["identifier"], name: "index_bulkrax_entries_on_identifier" + t.string "status_message", default: "Pending" + t.index ["identifier", "importerexporter_id", "importerexporter_type"], name: "bulkrax_identifier_idx" t.index ["importerexporter_id", "importerexporter_type"], name: "bulkrax_entries_importerexporter_idx" t.index ["type"], name: "index_bulkrax_entries_on_type" end From f5cd0fdcbbfc8527d8ff4ec2ba5abe70b8006841 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 14:41:08 -0800 Subject: [PATCH 16/19] fixes skip and do not show existing entry option on new importer --- Rakefile | 14 +++++++------- app/models/bulkrax/importer.rb | 2 +- app/views/bulkrax/importers/_csv_fields.html.erb | 4 +++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index 41f4f9cd..6dc616de 100644 --- a/Rakefile +++ b/Rakefile @@ -25,18 +25,18 @@ require 'bundler/gem_tasks' require 'solr_wrapper/rake_task' unless Rails.env.production? +require 'rubocop/rake_task' + +RuboCop::RakeTask.new(:rubocop) do |t| + t.options = ['--display-cop-names', '--ignore-parent-exclusion', '-a'] +end + begin require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) - task default: :spec + task default: [:rubocop, :spec] rescue LoadError # rubocop:disable Lint/HandleExceptions # no rspec available end - -require 'rubocop/rake_task' - -RuboCop::RakeTask.new(:rubocop) do |t| - t.options = ['--display-cop-names'] -end diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 7618de90..e2432038 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -202,7 +202,7 @@ def import_objects(types_array = nil) # After an import any entries we did not touch are skipped. # They are not really pending, complete for the last run, or failed def mark_unseen_as_skipped - entries.where.not(id. seen).find_each do |entry| + entries.where.not(identifier: seen.keys).find_each do |entry| entry.set_status_info('Skipped') end end diff --git a/app/views/bulkrax/importers/_csv_fields.html.erb b/app/views/bulkrax/importers/_csv_fields.html.erb index 88dcd61b..faf96d4b 100644 --- a/app/views/bulkrax/importers/_csv_fields.html.erb +++ b/app/views/bulkrax/importers/_csv_fields.html.erb @@ -25,7 +25,9 @@

Add CSV File to Import:

<%# accept a single file upload; data files and bags will need to be added another way %> - <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server', 'Existing Entries'], as: :radio_buttons, label: false %> + <% file_style_list = ['Upload a File', 'Specify a Path on the Server'] %> + <% file_style_list << 'Existing Entries' unless importer.new_record? %> + <%= fi.input :file_style, collection: file_style_list, as: :radio_buttons, label: false %>
<%= fi.input 'file', as: :file, input_html: { accept: 'text/csv,application/zip' } %>
From c7f880ab8c740d5ec2dc632b524d042e94a19d0a Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:40:39 -0800 Subject: [PATCH 17/19] remove orig file --- .../bulkrax/delete_and_import_job.rb.orig | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100644 app/jobs/bulkrax/delete_and_import_job.rb.orig diff --git a/app/jobs/bulkrax/delete_and_import_job.rb.orig b/app/jobs/bulkrax/delete_and_import_job.rb.orig deleted file mode 100644 index 0fb24062..00000000 --- a/app/jobs/bulkrax/delete_and_import_job.rb.orig +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -module Bulkrax - class DeleteAndImportJob < ApplicationJob - queue_as :import - - def perform(entry, importer_run) -<<<<<<< Updated upstream - self.delete_class.perform_now(entry, importer_run) - self.import_class.perform_now(entry, importer_run) -======= - status = self.class::DELETE_CLASS.perform_now(entry, importer_run) - if status.status_message == "Deleted" - entry = Bulkrax::Entry.find(entry.id) # maximum reload - self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id) - end - - rescue => e - entry.set_status_info(e) - # this causes caught exception to be reraised - raise ->>>>>>> Stashed changes - end - end -end From 3ee01e319226ac9c7403733d166fb924e1928f4d Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:54:12 -0800 Subject: [PATCH 18/19] spec fix --- app/parsers/bulkrax/application_parser.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index e9f0d76f..d2b603ee 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -196,7 +196,6 @@ def create_objects(types_array = nil) send(type.pluralize).each do |current_record| next unless record_has_source_identifier(current_record, index) break if limit_reached?(limit, index) - seen[current_record[source_identifier]] = true create_entry_and_job(current_record, type) increment_counters(index, "#{type}": true) @@ -216,7 +215,7 @@ def rebuild_entries(types_array = nil) importer.entries.where(rebuild_entry_query(type, parser_fields['entry_statuses'])).find_each do |e| seen[e.identifier] = true - e.status_info('Pending') + e.status_info('Pending', importer.current_run) if remove_and_rerun delay = calculate_type_delay(type) "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, e, current_run) @@ -249,7 +248,7 @@ def create_entry_and_job(current_record, type, identifier = nil) identifier, 'Bulkrax::Importer', current_record.to_h) - new_entry.status_info('Pending') + new_entry.status_info('Pending', importer.current_run) if current_record[:delete].present? "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) elsif current_record[:remove_and_rerun].present? || remove_and_rerun From aea34778432ce8fafdf4423d4eb7e4335fd206e6 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:58:09 -0800 Subject: [PATCH 19/19] do not call rubocop in ci only --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 38896fc2..1eacd7b5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: run: bundle exec rake db:migrate db:test:prepare - name: Run rspec - run: bundle exec rake + run: bundle exec rake spec - name: Upload coverage results uses: actions/upload-artifact@v2