From a88af47d1830b831caaf6e76a01b46a37b489c6b Mon Sep 17 00:00:00 2001 From: LaRita Robinson Date: Wed, 17 May 2023 14:23:34 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Refactor=20create=20relationshps?= =?UTF-8?q?=20job?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bring into alignment with Bulkrax relationship job - Adjust for appropriate reindexing needed for UV - Add relationship job specs --- .rubocop.yml | 1 + lib/iiif_print.rb | 1 - lib/iiif_print/engine.rb | 1 + .../jobs/create_relationships_job.rb | 104 ++++++++++++------ .../jobs/create_relationships_job_spec.rb | 100 +++++++++++++++-- 5 files changed, 166 insertions(+), 41 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 4e0d6376..3303f2d0 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -72,6 +72,7 @@ Metrics/MethodLength: - 'lib/generators/iiif_print/catalog_controller_generator.rb' - 'lib/iiif_print/ingest/ndnp/ndnp_mets_helper.rb' - 'lib/iiif_print/ingest/pdf_issue_ingester.rb' + - 'lib/iiif_print/jobs/create_relationships_job.rb' - 'spec/model_shared.rb' Naming/PredicateName: diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb index 218b1dd1..e554af67 100644 --- a/lib/iiif_print.rb +++ b/lib/iiif_print.rb @@ -17,7 +17,6 @@ require "iiif_print/jobs/application_job" require "iiif_print/blacklight_iiif_search/annotation_decorator" require "iiif_print/jobs/child_works_from_pdf_job" -require "iiif_print/jobs/create_relationships_job" require "iiif_print/split_pdfs/base_splitter" require "iiif_print/split_pdfs/child_work_creation_from_pdf_service" diff --git a/lib/iiif_print/engine.rb b/lib/iiif_print/engine.rb index da5fa4bd..b8273cf2 100644 --- a/lib/iiif_print/engine.rb +++ b/lib/iiif_print/engine.rb @@ -12,6 +12,7 @@ class Engine < ::Rails::Engine # rubocop:disable Metrics/BlockLength config.to_prepare do + require "iiif_print/jobs/create_relationships_job" # We don't have a hard requirement of Bullkrax but in our experience, lingering on earlier # versions can introduce bugs of both Bulkrax and some of the assumptions that we've resolved. # Very early versions of Bulkrax do not have VERSION defined diff --git a/lib/iiif_print/jobs/create_relationships_job.rb b/lib/iiif_print/jobs/create_relationships_job.rb index 5b48ee0a..56ce7e6c 100644 --- a/lib/iiif_print/jobs/create_relationships_job.rb +++ b/lib/iiif_print/jobs/create_relationships_job.rb @@ -1,41 +1,60 @@ module IiifPrint module Jobs - # Break a pdf into individual pages + # Link newly created child works to the parent class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob - # Link newly created child works to the parent - # @param user: [User] user + include Hyrax::Lockable + + RETRY_MAX = 10 + # @param parent_id: [] parent work id # @param parent_model: [] parent model # @param child_model: [] child model - def perform(user:, parent_id:, parent_model:, child_model:) - if completed_child_data_for(parent_id, child_model) + # @param retries: [] count used during rescheduling to prevent infinite retries + def perform(parent_id:, parent_model:, child_model:, retries: 0, **) + @parent_id = parent_id + @parent_model = parent_model + @child_model = child_model + @retries = retries + 1 + + @number_of_successes = 0 + @number_of_failures = 0 + @parent_record_members_added = false + @errors = [] + + # Because we need our children in the correct order, we can't create any + # relationships until all child works have been created. + if completed_child_data # add the members - parent_work = parent_model.constantize.find(parent_id) - create_relationships(user: user, parent: parent_work, ordered_children: @child_works) - @pending_children.each(&:destroy) + add_children_to_parent + if @number_of_failures.zero? && @number_of_successes == @pending_children.count + # remove pending relationships upon valid completion + @pending_children.each(&:destroy) + else + raise "CreateRelationshipsJob failed for parent id: #{@parent_id} " \ + "had #{@number_of_successes} successes & #{@number_of_failures} failures, " \ + "with errors: #{@errors}" + end else - # reschedule the job and end this one normally - # - # TODO: Depending on how things shake out, we could be infinitely rescheduling this job. - # Consider a time to live parameter. - reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model) + # if we aren't ready yet, reschedule the job and end this one normally + reschedule_job end end private - # load @child_works, and return true or false - def completed_child_data_for(parent_id, child_model) + # load @child_works and @pending children, and + # return boolean indicating whether all chilren are present + def completed_child_data @child_works = [] found_all_children = true # find and sequence all pending children - @pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc') + @pending_children = IiifPrint::PendingRelationship.where(parent_id: @parent_id).order('child_order asc') # find child works (skip out if any haven't yet been created) @pending_children.each do |child| # find by title... if any aren't found, the child works are not yet ready - found_children = find_children_by_title_for(child.child_title, child_model) + found_children = find_children_by_title_for(child.child_title, @child_model) found_all_children = false if found_children.empty? break unless found_all_children == true @child_works += found_children @@ -49,30 +68,53 @@ def find_children_by_title_for(title, model) model.constantize.where(title: title) end - def reschedule(user:, parent_id:, parent_model:, child_model:) + def add_children_to_parent + parent_work = @parent_model.constantize.find(@parent_id) + create_relationships(parent: parent_work, ordered_children: @child_works) + end + + def reschedule_job + return if @retries > RETRY_MAX CreateRelationshipsJob.set(wait: 10.minutes).perform_later( - user: user, - parent_id: parent_id, - parent_model: parent_model, - child_model: child_model + parent_id: @parent_id, + parent_model: @parent_model, + child_model: @child_model, + retries: @retries ) end - def create_relationships(user:, parent:, ordered_children:) - records_hash = {} - ordered_children.map(&:id).each_with_index do |child_id, i| - records_hash[i.to_s] = { id: child_id } + def create_relationships(parent:, ordered_children:) + acquire_lock_for(parent.id) do + # Not sure uncached is needed here, but kept + # for consistency with Bulkrax's relationships logic + ActiveRecord::Base.uncached do + ordered_children.each do |child| + add_to_work(child_record: child, parent_record: parent) + @number_of_successes += 1 + rescue => e + @number_of_failures += 1 + @errors << e + end + end + parent.save! if @parent_record_members_added && @number_of_failures.zero? end - attrs = { work_members_attributes: records_hash } - parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) - env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs) - raise env unless Hyrax::CurationConcern.actor.update(env) - # need to reindex all file_sets to make all ancestors are indexed + # Bulkrax no longer reindexes file_sets, but IiifPrint needs both to add is_page_of_ssim for universal viewer. + # This is where child works need to be indexed (AFTER the parent save), as opposed to how Bulkrax does it. ordered_children.each do |child_work| + child_work.update_index child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets) end end + + def add_to_work(child_record:, parent_record:) + return true if parent_record.ordered_members.to_a.include?(child_record) + + parent_record.ordered_members << child_record + @parent_record_members_added = true + # Bulkrax does child_record.save! here, but it makes no sense + # as there is nothing to save or index at this point. + end end end end diff --git a/spec/iiif_print/jobs/create_relationships_job_spec.rb b/spec/iiif_print/jobs/create_relationships_job_spec.rb index 5eb14c78..707fd39b 100644 --- a/spec/iiif_print/jobs/create_relationships_job_spec.rb +++ b/spec/iiif_print/jobs/create_relationships_job_spec.rb @@ -1,17 +1,99 @@ +# frozen_string_literal: true + require 'spec_helper' require 'misc_shared' -RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do - # TODO: add specs - let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) } - let(:my_user) { build(:user) } - let(:parent_model) { WorkWithIiifPrintConfig } - let(:child_model) { WorkWithIiifPrintConfig } +module IiifPrint::Jobs + RSpec.describe CreateRelationshipsJob, type: :job do + let(:create_relationships_job) { described_class.new } + + let(:parent_model) { WorkWithIiifPrintConfig.to_s } + let(:child_model) { WorkWithIiifPrintConfig.to_s } + let(:parent_record) { WorkWithIiifPrintConfig.new(title: ['required title']) } + let(:child_record1) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 01"]) } + let(:child_record2) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 02"]) } + let(:pending_rel1) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01") } + let(:pending_rel2) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02") } + + describe '#perform' do + before do + allow(create_relationships_job).to receive(:acquire_lock_for).and_yield + allow(create_relationships_job).to receive(:reschedule_job) + allow(parent_record).to receive(:save!) + + parent_record.save + pending_rel1.save + pending_rel2.save + end + + subject(:perform) do + create_relationships_job.perform( + parent_id: parent_record.id, + parent_model: parent_model, + child_model: child_model, + retries: 0 + ) + end + + context 'when adding a child work to a parent work' do + before do + child_record1.save + child_record2.save + end + + it 'assigns the child to the parent\'s #ordered_members' do + perform + expect(parent_record.reload.ordered_member_ids).to eq([child_record1.id, child_record2.id]) + end + + it 'deletes the pending relationships' do + expect { perform }.to change(IiifPrint::PendingRelationship, :count).by(-2) + end + + it 'does not reschedule the job' do + perform + expect(create_relationships_job).not_to have_received(:reschedule_job) + end + end + + context 'when a relationship fails' do + before do + child_record1.save + child_record2.save + end + + before do + expect_any_instance_of(CreateRelationshipsJob).to receive(:add_to_work).and_raise('error') + end + + it 'does not save the parent' do + expect { perform }.to raise_error(RuntimeError) + expect(parent_record).not_to have_received(:save!) + end + + it 'does not delete the pending relationships' do + expect { perform }.to raise_error(RuntimeError) + expect(IiifPrint::PendingRelationship.where(parent_id: parent_record.id).count).to eq(2) + end + end + + context 'when any child record is not found' do + let(:child_record2) { nil } + + before do + child_record1.save + end - let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) } + it 'does not save the parent' do + perform + expect(parent_record).not_to have_received(:save!) + end - describe '#perform' do - xit 'loads all child work ids into ordered_members' do + it 'reschedules the job' do + perform + expect(create_relationships_job).to have_received(:reschedule_job) + end + end end end end