Skip to content

Commit

Permalink
download & attach submission_pdf to StateFileArchivedIntake when arch…
Browse files Browse the repository at this point in the history
…iving a batch
  • Loading branch information
mpidcock committed Jan 8, 2025
1 parent 77e7a2a commit 30c22fa
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 42 deletions.
38 changes: 24 additions & 14 deletions app/services/state_file/ty23_archiver_service.rb
Original file line number Diff line number Diff line change
@@ -1,42 +1,52 @@
module StateFile
class Ty23ArchiverService
INTAKE_MAP = {
'az' => StateFileAzIntake,
'ny' => StateFileNyIntake,
:az => StateFileAzIntake,
:ny => StateFileNyIntake,
}.freeze

attr_reader :state_code, :batch_size, :data_source, :tax_year, :current_batch, :cutoff

def initialize(state_code:, batch_size: 100, cutoff: '2024-06-01')
@state_code = state_code
@batch_size = batch_size
@data_source = INTAKE_MAP[state_code]
@data_source = INTAKE_MAP[state_code.to_sym]
@tax_year = 2023
@cutoff = cutoff
@current_batch = nil
raise ArgumentError, "#{state_code} isn't an archivable state. Expected one of #{INTAKE_MAP.keys.join(', ')}" unless data_source
raise ArgumentError, "#{state_code} isn't an archiveable state. Expected one of #{INTAKE_MAP.keys.join(', ')}" unless data_source
end

def find_archiveables
@current_batch = ActiveRecord::Base.connection.exec_query(query_archiveable)
Rails.logger.info("Found #{current_batch.count} #{data_source} intakes to archive.")
Rails.logger.info("Found #{current_batch.count} #{data_source.name.pluralize} to archive.")
end

def archive_batch
archived_ids = []
current_batch.each do |record|
intake = data_source.find(record['source_intake_id'])
archive = StateFileArchivedIntake.new(record.without('source_intake_id'))
# TODO: pull mailing address destails off the intake; populate relevant fields on the archived intake record
archive.submission_pdf.attach(intake.blob)
archive.save!
archived_ids << record['source_intake_id']
@current_batch.each do |record|
intake = data_source.find(record['data_source_id'])
archive_attributes = StateFileArchivedIntake.column_names
archived_intake = StateFileArchivedIntake.new(intake.attributes.slice(*archive_attributes))
# TODO: pull mailing address details off the intake; populate relevant fields on the archived intake record
if intake.submission_pdf.attached?
archived_intake.submission_pdf.attach(
io: StringIO.new(intake.submission_pdf.download),
filename: intake.submission_pdf.filename.to_s,
content_type: intake.submission_pdf.content_type,
)
else
Rails.logger.error("No submission pdf attached for record #{record}. Continuing with batch.")
end
archived_intake.save!
archived_ids << intake.id
rescue StandardError => e
Rails.logger.warn("Caught exception #{e} for record #{record}. Continuing with batch")
Rails.logger.warn("Caught exception #{e} for record #{record}. Continuing with batch.")
next
end
Rails.logger.info("Archived #{archived_ids.count} #{data_source} intakes: [#{archived_ids.join(', ')}]")
Rails.logger.info("Archived #{archived_ids.count} #{data_source.name.pluralize}: [#{archived_ids.join(', ')}]")
@current_batch = nil # reset the batch
archived_ids
end

def query_archiveable
Expand Down
4 changes: 2 additions & 2 deletions lib/tasks/state_file_archive_intakes.rake
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace :state_file do
batch_size = 10 # we batch these since archiving involves copying the submission pdf to a new location in s3
archiver = StateFile::Ty23ArchiverService.new(state_code: 'az', batch_size: batch_size)
archiver.find_archiveables # sets `current_batch` on the archiver instance
while archiver.current_batch.count > 0 # keep archiving, in batches, until the archiver doesn't find anything else
while archiver.current_batch.count.positive? # keep archiving, in batches, until the archiver doesn't find anything else
archiver.archive_batch # process the batch
archiver.find_archiveables # set the next batch
end
Expand All @@ -18,7 +18,7 @@ namespace :state_file do
batch_size = 10 # we batch these since archiving involves copying the submission pdf to a new location in s3
archiver = StateFile::Ty23ArchiverService.new(state_code: 'ny', batch_size: batch_size)
archiver.find_archiveables # sets `current_batch` on the archiver instance
while archiver.current_batch.count > 0 # keep archiving, in batches, until the archiver doesn't find anything else
while archiver.current_batch.count.positive? # keep archiving, in batches, until the archiver doesn't find anything else
archiver.archive_batch # process the batch
archiver.find_archiveables # set the next batch
end
Expand Down
26 changes: 0 additions & 26 deletions spec/services/state_file/state_file_spec.rb

This file was deleted.

78 changes: 78 additions & 0 deletions spec/services/state_file/ty23_archiver_service_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# frozen_string_literal: true

require "rails_helper"

RSpec.describe StateFile::Ty23ArchiverService do

describe '#find_archiveables' do
%w[az ny].each do |state_code|

context 'when there are accepted intakes to archive' do
let(:archiver) { described_class.new(state_code: state_code) }
let!(:intake) { create("state_file_#{state_code}_intake".to_sym, created_at: Date.parse("1/5/23"), hashed_ssn: "fake hashed ssn") }
let!(:submission) { create(:efile_submission, :for_state, :accepted, data_source: intake, created_at: Date.parse("1/5/23")) }

before do
submission.efile_submission_transitions.last.update(created_at: Date.parse("1/5/23"))
end

it 'finds them and sets them as the current batch' do
archiver.find_archiveables
expect(archiver.current_batch.count).to be 1
expect(archiver.current_batch.last["hashed_ssn"]).to eq intake.hashed_ssn
end
end

context 'when there are only non-accepted submissions' do
let(:archiver) { described_class.new(state_code: state_code) }
let!(:intake) { create("state_file_#{state_code}_intake".to_sym, created_at: Date.parse("1/5/23"), hashed_ssn: "fake hashed ssn") }
let!(:rejected_submission) { create(:efile_submission, :for_state, :rejected, data_source: intake, created_at: Date.parse("1/5/23")) }
let!(:resubmitted_submission) { create(:efile_submission, :for_state, :resubmitted, data_source: intake, created_at: Date.parse("1/5/23")) }
let!(:cancelled_submission) { create(:efile_submission, :for_state, :cancelled, data_source: intake, created_at: Date.parse("1/5/23")) }
let!(:waiting_submission) { create(:efile_submission, :for_state, :waiting, data_source: intake, created_at: Date.parse("1/5/23")) }

before do
rejected_submission.efile_submission_transitions.last.update(created_at: Date.parse("1/5/23"))
resubmitted_submission.efile_submission_transitions.last.update(created_at: Date.parse("1/5/23"))
cancelled_submission.efile_submission_transitions.last.update(created_at: Date.parse("1/5/23"))
waiting_submission.efile_submission_transitions.last.update(created_at: Date.parse("1/5/23"))
end

it 'makes an empty current batch' do
archiver.find_archiveables
expect(archiver.current_batch.count).to be 0
end
end
end
end

describe '#archive_batch' do
%w[az ny].each do |state_code|
context 'when there is a current batch to archive' do
let(:archiver) { described_class.new(state_code: state_code) }
let!(:intake) { create("state_file_#{state_code}_intake".to_sym, created_at: Date.parse("1/5/23"), hashed_ssn: "fake hashed ssn") }
let!(:submission) { create(:efile_submission, :for_state, :accepted, data_source: intake, created_at: Date.parse("1/5/23")) }
let!(:mock_batch) { [submission] }
let(:test_pdf) { Rails.root.join("spec", "fixtures", "files", "document_bundle.pdf") }

before do
intake.submission_pdf.attach(
io: File.open(test_pdf),
filename: "test.pdf",
content_type: 'application/pdf'
)
archiver.instance_variable_set(:@current_batch, mock_batch)
end

it 'creates an archived intake for each intake in the batch with an attached pdf' do
archived_ids = archiver.archive_batch
expect(archived_ids.count).to eq 1
archived_ids.each do |id|
expect(StateFileArchivedIntake.find(id).submission_pdf.attached?).to be true
end
end
end
end
end

end

0 comments on commit 30c22fa

Please sign in to comment.