Skip to content

Commit

Permalink
417 Do not stop all collections from importing when one fails (#419)
Browse files Browse the repository at this point in the history
* use #perform_later for collections that are not imported using the collection_field_mapping

* #possible_collection_ids now handles collections imported using the collection field mapping that have their source_identifier automatically generated

* update spec, resolve rubocop offenses

* make deprecations conditional

* remove colon as split character, do not include nils in #possible_collection_ids

* add specs for #add_required_collection_metadata, expand specs for #create_collections
  • Loading branch information
bkiahstroud authored Mar 1, 2022
1 parent bdd4d4d commit 95bd0f4
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 24 deletions.
22 changes: 17 additions & 5 deletions app/models/bulkrax/csv_entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
require 'csv'

module Bulkrax
class CsvEntry < Entry
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
# We do too much in these entry classes. We need to extract the common logic from the various
# entry models into a module that can be shared between them.
class CsvEntry < Entry # rubocop:disable Metrics/ClassLength
serialize :raw_metadata, JSON

def self.fields_from_data(data)
Expand Down Expand Up @@ -229,10 +232,19 @@ def possible_collection_ids
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
)
@possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
memo
end || []
return @possible_collection_ids if @possible_collection_ids.present?

collection_field_mapping = self.class.collection_field
return [] unless collection_field_mapping.present? && record[collection_field_mapping].present?

identifiers = []
split_titles = record[collection_field_mapping].split(/\s*[;|]\s*/)
split_titles.each do |c_title|
matching_collection_entries = importerexporter.entries.select { |e| e.raw_metadata['title'] == c_title }
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@possible_collection_ids = identifiers.compact.presence || []
end

def collections_created?
Expand Down
48 changes: 33 additions & 15 deletions app/parsers/bulkrax/csv_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,31 +88,30 @@ def create_collections
collections.each_with_index do |collection, index|
next if collection.blank?
break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
ActiveSupport::Deprecation.warn(
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
)

## BEGIN
# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
# TODO: Remove once collection_field_mapping is removed
metadata = if collection.delete(:from_collection_field_mapping)
uci = unique_collection_identifier(collection)
{
title: collection[:title],
work_identifier => uci,
source_identifier => uci,
visibility: 'open',
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
}
end
metadata = add_required_collection_metadata(collection)
collection_hash = metadata.presence || collection
## END

new_entry = find_or_create_entry(collection_entry_class, collection_hash[source_identifier], 'Bulkrax::Importer', collection_hash)
increment_counters(index, collection: true)
# TODO: add support for :delete option
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
if collection.key?(:from_collection_field_mapping)
ActiveSupport::Deprecation.warn(
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
)
# When importing collections using the deprecated collection_field_mapping, the collection MUST be created
# before the work, so we use #perform_now to make sure that happens. The downside is, if a collection fails
# to import, it will stop the rest of the collections from importing successfully.
# TODO: Remove once collection_field_mapping is removed
ImportCollectionJob.perform_now(new_entry.id, current_run.id)
else
ImportCollectionJob.perform_later(new_entry.id, current_run.id)
end
end
importer.record_status
rescue StandardError => e
Expand Down Expand Up @@ -152,6 +151,25 @@ def create_file_sets
status_info(e)
end

# Add required metadata to collections being imported using the collection_field_mapping, which only have a :title
# TODO: Remove once collection_field_mapping is removed
def add_required_collection_metadata(raw_collection_data)
return unless raw_collection_data.key?(:from_collection_field_mapping)
ActiveSupport::Deprecation.warn(
'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
)

uci = unique_collection_identifier(raw_collection_data)
{
title: raw_collection_data[:title],
work_identifier => uci,
source_identifier => uci,
visibility: 'open',
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
}
end

def write_partial_import_file(file)
import_filename = import_file_path.split('/').last
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
Expand Down
3 changes: 3 additions & 0 deletions spec/fixtures/csv/failed_collection.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source_identifier,model,title,description
fc1,Collection,,Failed collection's description
fc2,Collection,Valid Collection,Processed after failed collection
55 changes: 51 additions & 4 deletions spec/parsers/bulkrax/csv_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ module Bulkrax
expect { subject.create_collections }.to change(CsvCollectionEntry, :count).by(2)
end

it 'runs an ImportCollectionJob for each entry' do
it 'runs an ImportCollectionJob in memory for each entry' do
expect(ImportCollectionJob).to receive(:perform_now).twice

subject.create_collections
Expand All @@ -142,20 +142,29 @@ module Bulkrax
context 'when importing collections with metadata' do
before do
importer.parser_fields = { import_file_path: './spec/fixtures/csv/collections.csv' }
allow(ImportCollectionJob).to receive(:perform_now)
end

it 'creates CSV collection entries for each collection' do
expect { subject.create_collections }.to change(CsvCollectionEntry, :count).by(2)
end

it 'runs an ImportCollectionJob for each entry' do
expect(ImportCollectionJob).to receive(:perform_now).twice
it 'runs an ImportCollectionJob in the background for each entry' do
expect(ImportCollectionJob).to receive(:perform_later).twice

subject.create_collections
end
end

context 'when a collection entry fails during creation' do
before do
importer.parser_fields = { import_file_path: './spec/fixtures/csv/failed_collection.csv' }
end

it 'does not stop the remaining collection entries from being processed' do
expect { subject.create_collections }.to change(CsvCollectionEntry, :count).by(2)
end
end

describe 'setting collection entry identifiers' do
before do
allow(subject)
Expand Down Expand Up @@ -722,5 +731,43 @@ module Bulkrax
end
end
end

describe '#add_required_collection_metadata' do
context 'when importing collections using the collection_field_mapping' do
let(:collection_data) do
{
title: 'Collection from collection_field_mapping',
from_collection_field_mapping: true
}
end

it 'adds metadata required for collections' do
expect(subject.add_required_collection_metadata(collection_data)).to eq(
{
title: collection_data[:title],
source: collection_data[:title],
source_identifier: collection_data[:title],
visibility: 'open',
collection_type_gid: ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
}
)
end
end

context 'when importing collections without using the collection_field_mapping' do
let(:collection_data) do
{
source_identifier: 'cwm1',
title: 'Collection with Metadata',
model: 'Collection',
description: 'Lorem ipsum'
}
end

it 'does not alter the data' do
expect(subject.add_required_collection_metadata(collection_data)).to be_nil
end
end
end
end
end

0 comments on commit 95bd0f4

Please sign in to comment.