Skip to content

Commit

Permalink
591 export from one tenant into another (#623)
Browse files Browse the repository at this point in the history
* export file sets from all export sources

* only export thumbnails on the work type row, not files

* update the csv entry and parser specs

* remove bagit parser specs for methods that are no longer in the bagit parser file

* move the #find_child_file_sets spec into the csv parser spec file since that is where the method is now
  • Loading branch information
alishaevn authored Aug 5, 2022
1 parent e1851e9 commit b2a0dac
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 183 deletions.
14 changes: 9 additions & 5 deletions app/models/bulkrax/csv_entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,16 @@ def build_system_metadata
end

def build_files_metadata
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
# attaching files to the FileSet row only so we don't have duplicates when importing to a new tenant
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)

handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
build_thumbnail_files if hyrax_record.work?
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end

def build_relationship_metadata
Expand Down
1 change: 1 addition & 0 deletions app/parsers/bulkrax/csv_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def current_record_ids
find_child_file_sets(@work_ids)
when 'worktype'
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
find_child_file_sets(@work_ids)
when 'importer'
set_ids_for_exporting_from_importer
end
Expand Down
7 changes: 3 additions & 4 deletions spec/models/bulkrax/csv_entry_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1009,10 +1009,10 @@ module Bulkrax
context 'with join set to true' do
let(:exporter) { create(:bulkrax_exporter, field_mapping: { 'file' => { from: ['filename'], join: true } }) }

it "adds the work's file set's filenames to the file mapping in parsed_metadata" do
it "does not add the work's file set's filenames to the file mapping in parsed_metadata" do
entry.build_files_metadata

expect(entry.parsed_metadata['filename']).to eq('hello.png | world.jpg')
expect(entry.parsed_metadata).to eq({})
end
end
end
Expand All @@ -1021,8 +1021,7 @@ module Bulkrax
it "adds the work's file set's filenames to the 'file' key in parsed_metadata" do
entry.build_files_metadata

expect(entry.parsed_metadata['file_1']).to eq('hello.png')
expect(entry.parsed_metadata['file_2']).to eq('world.jpg')
expect(entry.parsed_metadata).to eq({})
end
end
end
Expand Down
152 changes: 1 addition & 151 deletions spec/parsers/bulkrax/bagit_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -274,117 +274,12 @@ module Bulkrax
end

context 'when exporting a bagit file' do
# Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances.
subject { described_class.new(exporter) }
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit) }
let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:collection_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:parent_record_1) { build(:work) }
let(:parent_record_2) { build(:another_work) }

before do
allow(parent_record_1).to receive(:file_set_ids).and_return([file_set_ids_solr.pluck(:id).first])
allow(parent_record_1).to receive(:member_of_collection_ids).and_return([collection_ids_solr.first.id])
allow(parent_record_2).to receive(:file_set_ids).and_return([])
allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1)
allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.last.id).and_return(parent_record_2)
end

describe '#find_child_file_sets' do
before do
subject.instance_variable_set(:@file_set_ids, [])
end

it 'returns the ids when child file sets are present' do
subject.find_child_file_sets(work_ids_solr.pluck(:id))
expect(subject.instance_variable_get(:@file_set_ids)).to eq([file_set_ids_solr.pluck(:id).first])
end
end

describe '#create_new_entries' do
# Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances.

it 'invokes Bulkrax::ExportWorkJob once per Entry' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times
subject.create_new_entries
end

context 'with an export limit of 1' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit, limit: 1) }

it 'invokes Bulkrax::ExportWorkJob once' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times
subject.create_new_entries
end
end

context 'with an export limit of 0' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype_bagit, limit: 0) }

it 'invokes Bulkrax::ExportWorkJob once per Entry' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times
subject.create_new_entries
end
end

context 'when exporting all' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) }

before do
allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr)
allow(ActiveFedora::Base).to receive(:find).and_return(parent_record_1)
end

it 'creates entries for all works, collections and file sets' do
expect(ExportWorkJob).to receive(:perform_now).exactly(6).times

subject.create_new_entries
end

it 'creates entries for all works' do
work_entry_ids = Entry.where(identifier: work_ids_solr.map(&:id)).map(&:id)
work_entry_ids.each do |id|
expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once
end

subject.create_new_entries
end

it 'creates entries for all collections' do
collection_entry_ids = Entry.where(identifier: collection_ids_solr.map(&:id)).map(&:id)
collection_entry_ids.each do |id|
expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once
end

subject.create_new_entries
end

it 'creates entries for all file sets' do
file_set_entry_ids = Entry.where(identifier: file_set_ids_solr.map(&:id)).map(&:id)
file_set_entry_ids.each do |id|
expect(ExportWorkJob).to receive(:perform_now).with(id, exporter.last_run.id).once
end

subject.create_new_entries
end

it 'exported entries are given the correct class' do
# Bulkrax::CsvFileSetEntry == Bulkrax::CsvEntry (false)
# Bulkrax::CsvFileSetEntry.is_a? Bulkrax::CsvEntry (true)
# because of the above, although we only have 2 work id's, the 3 file set id's also increase the Bulkrax::CsvEntry count
expect { subject.create_new_entries }
.to change(CsvEntry, :count)
.by(6)
.and change(CsvCollectionEntry, :count)
.by(1)
.and change(CsvFileSetEntry, :count)
.by(3)
end
end
end

describe '#write_files' do
let(:work_entry_1) { FactoryBot.create(:bulkrax_csv_entry, importerexporter: exporter) }
Expand Down Expand Up @@ -452,51 +347,6 @@ module Bulkrax
end
end
end

describe '#export_headers' do
let(:work_id) { SecureRandom.alphanumeric(9) }
let(:exporter) do
FactoryBot.create(:bulkrax_exporter_worktype_bagit, field_mapping: {
'id' => { from: ['id'], source_identifier: true },
'title' => { from: ['display_title'] },
'first_name' => { from: ['multiple_objects_first_name'], object: 'multiple_objects' },
'last_name' => { from: ['multiple_objects_last_name'], object: 'multiple_objects' },
'position' => { from: ['multiple_objects_position'], object: 'multiple_objects', nested_type: 'Array' }
})
end

let(:entry) do
FactoryBot.create(:bulkrax_csv_entry, importerexporter: exporter, parsed_metadata: {
'id' => work_id,
'display_title' => 'First',
'multiple_objects_first_name_1' => 'Judge',
'multiple_objects_last_name_1' => 'Hines',
'multiple_objects_position_1_1' => 'King',
'multiple_objects_position_1_2' => 'Lord',
'multiple_objects_first_name_2' => 'Aaliyah'
})
end

before do
allow(ActiveFedora::SolrService).to receive(:query).and_return(OpenStruct.new(id: work_id))
allow(exporter.entries).to receive(:where).and_return([entry])
allow(subject).to receive(:headers).and_return(entry.parsed_metadata.keys)
end

# rubocop:disable RSpec/ExampleLength
it 'returns an array of single, numerated and double numerated header values' do
headers = subject.export_headers
expect(headers).to include('id')
expect(headers).to include('model')
expect(headers).to include('display_title')
expect(headers).to include('multiple_objects_first_name_1')
expect(headers).to include('multiple_objects_last_name_1')
expect(headers).to include('multiple_objects_position_1_1')
expect(headers).to include('multiple_objects_position_1_2')
expect(headers).to include('multiple_objects_first_name_2')
end
# rubocop:enable RSpec/ExampleLength
end
end
end
end
75 changes: 52 additions & 23 deletions spec/parsers/bulkrax/csv_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -389,47 +389,56 @@ module Bulkrax
end
end

describe '#find_child_file_sets' do
subject(:parser) { described_class.new(exporter) }
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) }
let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:parent_record_1) { build(:work) }

before do
parser.instance_variable_set(:@file_set_ids, [])
allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1)
allow(parent_record_1).to receive(:file_set_ids).and_return(file_set_ids_solr.pluck(:id))
end

it 'returns the ids when child file sets are present' do
parser.find_child_file_sets(work_ids_solr.pluck(:id))
expect(parser.instance_variable_get(:@file_set_ids)).to eq(file_set_ids_solr.pluck(:id))
end
end

describe '#create_new_entries' do
subject(:parser) { described_class.new(exporter) }
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) }
let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) }
# Use OpenStructs to simulate the behavior of ActiveFedora::SolrHit instances.
let(:work_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:collection_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] }
let(:file_set_ids_solr) { [OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9)), OpenStruct.new(id: SecureRandom.alphanumeric(9))] }

it 'invokes Bulkrax::ExportWorkJob once per Entry' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times
parser.create_new_entries
before do
allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr)
end

context 'with an export limit of 1' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype, limit: 1) }

it 'invokes Bulkrax::ExportWorkJob once' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times
context 'with an export limit of 0' do
it 'invokes Bulkrax::ExportWorkJob once per Entry' do
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(7).times
parser.create_new_entries
end
end

context 'with an export limit of 0' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype, limit: 0) }
context 'with an export limit of 1' do
it 'invokes Bulkrax::ExportWorkJob once' do
exporter.limit = 1

it 'invokes Bulkrax::ExportWorkJob once per Entry' do
expect(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(2).times
# although the work has a file attached, the limit means the file set is not exported
expect(Bulkrax::ExportWorkJob).to receive(:perform_now).exactly(1).times
parser.create_new_entries
end
end

context 'when exporting all' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter, :all) }

before do
allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr, collection_ids_solr, file_set_ids_solr)
end

it 'exports works, collections, and file sets' do
expect(ExportWorkJob).to receive(:perform_now).exactly(7).times

Expand Down Expand Up @@ -470,7 +479,7 @@ module Bulkrax
.and change(CsvCollectionEntry, :count)
.by(2)
.and change(CsvEntry, :count)
.by(7) # 6 csv entries minus 3 file set entries minus 2 collection entries equals 2 work entries
.by(7) # 7 csv entries minus 3 file set entries minus 2 collection entries equals 2 work entries
end
end

Expand All @@ -490,6 +499,26 @@ module Bulkrax
parser.create_new_entries
end
end

context 'when exporting by work type' do
let(:exporter) { FactoryBot.create(:bulkrax_exporter_worktype) }
let(:parent_record_1) { build(:work, id: work_ids_solr.first.id) }
let(:parent_record_2) { build(:work, id: work_ids_solr.last.id) }

before do
allow(parent_record_1).to receive(:file_set_ids).and_return([file_set_ids_solr.pluck(:id).first])
allow(parent_record_2).to receive(:file_set_ids).and_return(file_set_ids_solr.pluck(:id).from(1))
allow(ActiveFedora::SolrService).to receive(:query).and_return(work_ids_solr)
allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.first.id).and_return(parent_record_1)
allow(ActiveFedora::Base).to receive(:find).with(work_ids_solr.last.id).and_return(parent_record_2)
end

it 'exports the works and file sets related to the works' do
expect(ExportWorkJob).to receive(:perform_now).exactly(5).times

parser.create_new_entries
end
end
end

describe '#setup_export_file' do
Expand Down

0 comments on commit b2a0dac

Please sign in to comment.