From 9659bbe578fbb7e95d15dba6de76c34db2285c8f Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 17 Dec 2024 11:05:40 +0100 Subject: [PATCH] Add index deletions to transformAndIndex.sh script (RPB-230) --- transformAndIndex.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/transformAndIndex.sh b/transformAndIndex.sh index 7dffe2f..60a343b 100644 --- a/transformAndIndex.sh +++ b/transformAndIndex.sh @@ -15,8 +15,8 @@ sbt "runMain rpb.ETL conf/rpb-sw.flux" # creates TSV lookup file for to-lobid tr # Strapi title data export is incomplete, see https://jira.hbz-nrw.de/browse/RPB-202, so we don't use the approach above (rpb-authority, same for RPPD / person): ## zgrep -a -E '"type":"api::article.article"|"type":"api::independent-work.independent-work"' conf/strapi-export.tar.gz > conf/output/output-strapi.ndjson # Instead, we use the backup exports created in Strapi lifecycle afterCreate and afterUpdate hooks (copy from backup/ in Strapi instance): -cat conf/articles.ndjson | jq -c .data > conf/output/output-strapi.ndjson -cat conf/independent_works.ndjson | jq -c .data >> conf/output/output-strapi.ndjson +cat conf/articles.ndjson | grep '"data"' | jq -c .data > conf/output/output-strapi.ndjson +cat conf/independent_works.ndjson | grep '"data"' | jq -c .data >> conf/output/output-strapi.ndjson # Remove old index data: rm conf/output/bulk/bulk-*.ndjson sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux index=$INDEX" @@ -30,6 +30,15 @@ do echo "$filename" curl -XPOST --silent --show-error --fail --header 'Content-Type: application/x-ndjson' --data-binary @"$filename" 'weywot3:9200/_bulk' >> conf/output/es-curl-post.log done + +# Delete in Elasticsearch: +cat conf/articles.ndjson | grep '"delete"' | jq --raw-output .delete.rpbId > conf/delete.ndjson +cat conf/independent_works.ndjson | grep '"delete"' | jq --raw-output .delete.rpbId >> conf/delete.ndjson +while read rpbId; do + curl -X DELETE "weywot3:9200/$INDEX/resource/https%3A%2F%2Flobid.org%2Fresources%2F$rpbId" +done < conf/delete.ndjson + +# Move alias to new index: curl -X POST "weywot3:9200/_aliases?pretty" -H 'Content-Type: application/json' -d' { "actions" : [