From b2a0dace048f99103c74dfe0fe18c3aaacd207c3 Mon Sep 17 00:00:00 2001 From: Alisha Evans Date: Fri, 5 Aug 2022 08:13:22 -0500 Subject: [PATCH] 591 export from one tenant into another (#623) * export file sets from all export sources * only export thumbnails on the work type row, not files * update the csv entry and parser specs * remove bagit parser specs for methods that are no longer in the bagit parser file * move the #find_child_file_sets spec into the csv parser spec file since that is where the method is now --- app/models/bulkrax/csv_entry.rb | 14 +- app/parsers/bulkrax/csv_parser.rb | 1 + spec/models/bulkrax/csv_entry_spec.rb | 7 +- spec/parsers/bulkrax/bagit_parser_spec.rb | 152 +--------------------- spec/parsers/bulkrax/csv_parser_spec.rb | 75 +++++++---- 5 files changed, 66 insertions(+), 183 deletions(-) diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index e3ca4dbb..2584ee30 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -112,12 +112,16 @@ def build_system_metadata end def build_files_metadata - file_mapping = key_for_export('file') - file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets - filenames = map_file_sets(file_sets) + # attaching files to the FileSet row only so we don't have duplicates when importing to a new tenant + if hyrax_record.work? + build_thumbnail_files + else + file_mapping = key_for_export('file') + file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets + filenames = map_file_sets(file_sets) - handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?) - build_thumbnail_files if hyrax_record.work? + handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?) + end end def build_relationship_metadata diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index a068c4d9..858eb6fc 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -203,6 +203,7 @@ def current_record_ids find_child_file_sets(@work_ids) when 'worktype' @work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id) + find_child_file_sets(@work_ids) when 'importer' set_ids_for_exporting_from_importer end diff --git a/spec/models/bulkrax/csv_entry_spec.rb b/spec/models/bulkrax/csv_entry_spec.rb index 26512729..be36d6bc 100644 --- a/spec/models/bulkrax/csv_entry_spec.rb +++ b/spec/models/bulkrax/csv_entry_spec.rb @@ -1009,10 +1009,10 @@ module Bulkrax context 'with join set to true' do let(:exporter) { create(:bulkrax_exporter, field_mapping: { 'file' => { from: ['filename'], join: true } }) } - it "adds the work's file set's filenames to the file mapping in parsed_metadata" do + it "does not add the work's file set's filenames to the file mapping in parsed_metadata" do entry.build_files_metadata - expect(entry.parsed_metadata['filename']).to eq('hello.png | world.jpg') + expect(entry.parsed_metadata).to eq({}) end end end @@ -1021,8 +1021,7 @@ module Bulkrax it "adds the work's file set's filenames to the 'file' key in parsed_metadata" do entry.build_files_metadata - expect(entry.parsed_metadata['file_1']).to eq('hello.png') - expect(entry.parsed_metadata['file_2']).to eq('world.jpg') + expect(entry.parsed_metadata).to eq({}) end end end diff --git a/spec/parsers/bulkrax/bagit_parser_spec.rb b/spec/parsers/bulkrax/bagit_parser_spec.rb index b52c9471..5e44b484 100644 --- a/spec/parsers/bulkrax/bagit_parser_spec.rb +++ b/spec/parsers/bulkrax/bagit_parser_spec.rb @@ -274,117 +274,12 @@ module Bulkrax end context 'when exporting a bagit file' do + # Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances. subject { described_class.new(exporter) } let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit) } let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] } let(:collection_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] } let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] } - let(:parent_record_1) { build(:work) } - let(:parent_record_2) { build(:another_work) } - - before do - allow(parent_record_1).to receive(:file_set_ids).and_return([file_set_ids_solr.pluck(:id).first]) - allow(parent_record_1).to receive(:member_of_collection_ids).and_return([collection_ids_solr.first.id]) - allow(parent_record_2).to receive(:file_set_ids).and_return([]) - allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1) - allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.last.id).and_return(parent_record_2) - end - - describe '#find_child_file_sets' do - before do - subject.instance_variable_set(:@file_set_ids, []) - end - - it 'returns the ids when child file sets are present' do - subject.find_child_file_sets(work_ids_solr.pluck(:id)) - expect(subject.instance_variable_get(:@file_set_ids)).to eq([file_set_ids_solr.pluck(:id).first]) - end - end - - describe '#create_new_entries' do - # Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances. - - it 'invokes Bulkrax::ExportWorkJob once per Entry' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times - subject.create_new_entries - end - - context 'with an export limit of 1' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit, limit: 1) } - - it 'invokes Bulkrax::ExportWorkJob once' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times - subject.create_new_entries - end - end - - context 'with an export limit of 0' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit, limit: 0) } - - it 'invokes Bulkrax::ExportWorkJob once per Entry' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times - subject.create_new_entries - end - end - - context 'when exporting all' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) } - - before do - allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr) - allow(ActiveFedora::Base).to receive(:find).and_return(parent_record_1) - end - - it 'creates entries for all works, collections and file sets' do - expect(ExportWorkJob).to receive(:perform_now).exactly(6).times - - subject.create_new_entries - end - - it 'creates entries for all works' do - work_entry_ids = Entry.where(identifier: work_ids_solr.map(&:id)).map(&:id) - work_entry_ids.each do |id| - expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once - end - - subject.create_new_entries - end - - it 'creates entries for all collections' do - collection_entry_ids = Entry.where(identifier: collection_ids_solr.map(&:id)).map(&:id) - collection_entry_ids.each do |id| - expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once - end - - subject.create_new_entries - end - - it 'creates entries for all file sets' do - file_set_entry_ids = Entry.where(identifier: file_set_ids_solr.map(&:id)).map(&:id) - file_set_entry_ids.each do |id| - expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once - end - - subject.create_new_entries - end - - it 'exported entries are given the correct class' do - # Bulkrax::CsvFileSetEntry == Bulkrax::CsvEntry (false) - # Bulkrax::CsvFileSetEntry.is_a? Bulkrax::CsvEntry (true) - # because of the above, although we only have 2 work id's, the 3 file set id's also increase the Bulkrax::CsvEntry count - expect { subject.create_new_entries } - .to change(CsvEntry, :count) - .by(6) - .and change(CsvCollectionEntry, :count) - .by(1) - .and change(CsvFileSetEntry, :count) - .by(3) - end - end - end describe '#write_files' do let(:work_entry_1) { FactoryBot.create(:bulkrax_csv_entry, importerexporter: exporter) } @@ -452,51 +347,6 @@ module Bulkrax end end end - - describe '#export_headers' do - let(:work_id) { SecureRandom.alphanumeric(9) } - let(:exporter) do - FactoryBot.create(:bulkrax_exporter_worktype_bagit, field_mapping: { - 'id' => { from: ['id'], source_identifier: true }, - 'title' => { from: ['display_title'] }, - 'first_name' => { from: ['multiple_objects_first_name'], object: 'multiple_objects' }, - 'last_name' => { from: ['multiple_objects_last_name'], object: 'multiple_objects' }, - 'position' => { from: ['multiple_objects_position'], object: 'multiple_objects', nested_type: 'Array' } - }) - end - - let(:entry) do - FactoryBot.create(:bulkrax_csv_entry, importerexporter: exporter, parsed_metadata: { - 'id' => work_id, - 'display_title' => 'First', - 'multiple_objects_first_name_1' => 'Judge', - 'multiple_objects_last_name_1' => 'Hines', - 'multiple_objects_position_1_1' => 'King', - 'multiple_objects_position_1_2' => 'Lord', - 'multiple_objects_first_name_2' => 'Aaliyah' - }) - end - - before do - allow(ActiveFedora::SolrService).to receive(:query).and_return(OpenStruct.new(id: work_id)) - allow(exporter.entries).to receive(:where).and_return([entry]) - allow(subject).to receive(:headers).and_return(entry.parsed_metadata.keys) - end - - # rubocop:disable RSpec/ExampleLength - it 'returns an array of single, numerated and double numerated header values' do - headers = subject.export_headers - expect(headers).to include('id') - expect(headers).to include('model') - expect(headers).to include('display_title') - expect(headers).to include('multiple_objects_first_name_1') - expect(headers).to include('multiple_objects_last_name_1') - expect(headers).to include('multiple_objects_position_1_1') - expect(headers).to include('multiple_objects_position_1_2') - expect(headers).to include('multiple_objects_first_name_2') - end - # rubocop:enable RSpec/ExampleLength - end end end end diff --git a/spec/parsers/bulkrax/csv_parser_spec.rb b/spec/parsers/bulkrax/csv_parser_spec.rb index 45c00fb8..b462f9fa 100644 --- a/spec/parsers/bulkrax/csv_parser_spec.rb +++ b/spec/parsers/bulkrax/csv_parser_spec.rb @@ -389,47 +389,56 @@ module Bulkrax end end + describe '#find_child_file_sets' do + subject(:parser) { described_class.new(exporter) } + let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) } + let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] } + let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] } + let(:parent_record_1) { build(:work) } + + before do + parser.instance_variable_set(:@file_set_ids, []) + allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) + allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1) + allow(parent_record_1).to receive(:file_set_ids).and_return(file_set_ids_solr.pluck(:id)) + end + + it 'returns the ids when child file sets are present' do + parser.find_child_file_sets(work_ids_solr.pluck(:id)) + expect(parser.instance_variable_get(:@file_set_ids)).to eq(file_set_ids_solr.pluck(:id)) + end + end + describe '#create_new_entries' do subject(:parser) { described_class.new(exporter) } - let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) } + let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) } # Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances. let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] } let(:collection_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] } let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] } - it 'invokes Bulkrax::ExportWorkJob once per Entry' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times - parser.create_new_entries + before do + allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr) end - context 'with an export limit of 1' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype, limit: 1) } - - it 'invokes Bulkrax::ExportWorkJob once' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times + context 'with an export limit of 0' do + it 'invokes Bulkrax::ExportWorkJob once per Entry' do + expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(7).times parser.create_new_entries end end - context 'with an export limit of 0' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype, limit: 0) } + context 'with an export limit of 1' do + it 'invokes Bulkrax::ExportWorkJob once' do + exporter.limit = 1 - it 'invokes Bulkrax::ExportWorkJob once per Entry' do - expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) - expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times + # although the work has a file attached, the limit means the file set is not exported + expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times parser.create_new_entries end end context 'when exporting all' do - let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) } - - before do - allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr) - end - it 'exports works, collections, and file sets' do expect(ExportWorkJob).to receive(:perform_now).exactly(7).times @@ -470,7 +479,7 @@ module Bulkrax .and change(CsvCollectionEntry, :count) .by(2) .and change(CsvEntry, :count) - .by(7) # 6 csv entries minus 3 file set entries minus 2 collection entries equals 2 work entries + .by(7) # 7 csv entries minus 3 file set entries minus 2 collection entries equals 2 work entries end end @@ -490,6 +499,26 @@ module Bulkrax parser.create_new_entries end end + + context 'when exporting by work type' do + let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) } + let(:parent_record_1) { build(:work, id: work_ids_solr.first.id) } + let(:parent_record_2) { build(:work, id: work_ids_solr.last.id) } + + before do + allow(parent_record_1).to receive(:file_set_ids).and_return([file_set_ids_solr.pluck(:id).first]) + allow(parent_record_2).to receive(:file_set_ids).and_return(file_set_ids_solr.pluck(:id).from(1)) + allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr) + allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1) + allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.last.id).and_return(parent_record_2) + end + + it 'exports the works and file sets related to the works' do + expect(ExportWorkJob).to receive(:perform_now).exactly(5).times + + parser.create_new_entries + end + end end describe '#setup_export_file' do