From 3b894898fadd634fd42cb6a59a3787be4ad6c192 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Tue, 28 Jan 2025 09:22:39 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20rerun=20for=20entries=20th?= =?UTF-8?q?at=20came=20from=20zips?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when rerunning an entry that came from a zip file, the rerun would fail because it would look for the CSV in an assumed location which is based on it's last importer ID. Since this was a rerun, it does not do an unzip into the assumed location so the directory does not exist. This commit will first check if the assumed location exists, and if not, it will look for the location of the last unizpped files and use that for the rerun. This does cause an interesting behavior where if the entry is a work with a file attached, it will add the file again resulting in duplicate files. I feel this is such an edge case though because typically if the entry is successful, the user will not rerun it. I added a hint text to the importer to let the user know this is a possibility. Ref: - https://github.com/notch8/palni_palci_knapsack/issues/210 --- app/parsers/bulkrax/csv_parser.rb | 17 ++++++++++++++++- .../importers/_edit_item_buttons.html.erb | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index e1a7c2fb..0555c49c 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -360,6 +360,8 @@ def path_to_files(**args) @path_to_files = File.join( zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename ) + + Dir.exist?(@path_to_files) ? @path_to_files : File.join(real_importer_unzip_path, 'files', filename) end private @@ -379,7 +381,7 @@ def unique_collection_identifier(collection_hash) # We expect a single CSV at the top level of the zip in the CSVParser # but we are willing to go look for it if need be def real_import_file_path - return Dir["#{importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip? + return Dir["#{real_importer_unzip_path}/**/*.csv"].reject { |path| in_files_dir?(path) }.first if file? && zip? parser_fields['import_file_path'] end @@ -389,5 +391,18 @@ def real_import_file_path def in_files_dir?(path) File.dirname(path).ends_with?('files') end + + # If we don't have an existing unzip path, we'll try and find it. + # Just in case there are multiple paths, we sort by the number at the end of the path and get the last one + def real_importer_unzip_path + return importer_unzip_path if Dir.exist?(importer_unzip_path) + + Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last + end + + def base_importer_unzip_path + # turns "tmp/imports/tenant/import_1_20250122035229_1" to "tmp/imports/tenant/import_1_20250122035229" + importer_unzip_path.split('_')[0...-1].join('_') + end end end diff --git a/app/views/bulkrax/importers/_edit_item_buttons.html.erb b/app/views/bulkrax/importers/_edit_item_buttons.html.erb index 4a7987ab..3c16bd39 100644 --- a/app/views/bulkrax/importers/_edit_item_buttons.html.erb +++ b/app/views/bulkrax/importers/_edit_item_buttons.html.erb @@ -5,6 +5,7 @@
Options for Updating an Entry

Rebuild metadata and files.

+

Files may be duplicated if this option is used on a successful entry. Consider using Remove and then Build instead.

<%= link_to 'Build', item_entry_path(item, e), method: :patch, class: 'btn btn-primary' %>

Remove existing work and then recreate the works metadata and files.