From f389ec4799e699229c815991bf371a7bcf4b2d09 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 13 Dec 2024 10:25:47 +0100 Subject: [PATCH 1/2] Add DELETE route for index deletions from Strapi (RPB-230) --- app/controllers/nwbib/Application.java | 45 +++++++++++++++++++------- conf/nwbib.routes | 3 +- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/app/controllers/nwbib/Application.java b/app/controllers/nwbib/Application.java index e8cf5fd..ff78281 100644 --- a/app/controllers/nwbib/Application.java +++ b/app/controllers/nwbib/Application.java @@ -951,20 +951,43 @@ private static List starredIds() { } public static Promise put(String id, String secret) throws FileNotFoundException, RecognitionException, IOException { - File input = new File("conf/output/test-output-strapi.json"); - File output = new File("conf/output/test-output-0.json"); - Files.write(Paths.get(input.getAbsolutePath()), request().body().asJson().toString().getBytes(Charsets.UTF_8)); - ETL.main(new String[] {"conf/rpb-test-titel-to-lobid.flux"}); - String result = Files.readAllLines(Paths.get(output.getAbsolutePath())).stream().collect(Collectors.joining("\n")); boolean authorized = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); if (authorized) { - Cache.remove(String.format("/%s", id)); - String url = "http://weywot3:9200/resources-rpb-test/resource/" - + URLEncoder.encode("https://lobid.org/resources/" + id, "UTF-8"); - WSRequest request = WS.url(url).setHeader("Content-Type", "application/json"); - return request.put(result).map(response -> status(response.getStatus(), response.getBody())); + return transformAndIndex(id, request().body().asJson()); + } else { + return Promise.pure(unauthorized(secret)); + } + } + + public static Promise delete(String id, String secret) throws FileNotFoundException, RecognitionException, IOException { + boolean authorized = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); + if (authorized) { + return deleteFromIndex(id); } else { - return Promise.pure(unauthorized()); + return Promise.pure(unauthorized(secret)); } } + + private static Promise deleteFromIndex(String id) throws UnsupportedEncodingException { + Cache.remove(String.format("/%s", id)); + WSRequest request = WS.url(elasticsearchUrl(id)).setHeader("Content-Type", "application/json"); + return request.delete().map(response -> status(response.getStatus(), response.getBody())); + } + + private static Promise transformAndIndex(String id, JsonNode jsonBody) + throws IOException, FileNotFoundException, RecognitionException, UnsupportedEncodingException { + File input = new File("conf/output/test-output-strapi.json"); + File output = new File("conf/output/test-output-0.json"); + Files.write(Paths.get(input.getAbsolutePath()), jsonBody.toString().getBytes(Charsets.UTF_8)); + ETL.main(new String[] {"conf/rpb-test-titel-to-lobid.flux"}); + String result = Files.readAllLines(Paths.get(output.getAbsolutePath())).stream().collect(Collectors.joining("\n")); + Cache.remove(String.format("/%s", id)); + WSRequest request = WS.url(elasticsearchUrl(id)).setHeader("Content-Type", "application/json"); + return request.put(result).map(response -> status(response.getStatus(), response.getBody())); + } + + private static String elasticsearchUrl(String id) throws UnsupportedEncodingException { + return "http://weywot3:9200/resources-rpb-test/resource/" + + URLEncoder.encode("https://lobid.org/resources/" + id, "UTF-8"); + } } diff --git a/conf/nwbib.routes b/conf/nwbib.routes index b445db6..65a9e1e 100644 --- a/conf/nwbib.routes +++ b/conf/nwbib.routes @@ -34,4 +34,5 @@ GET /cgi-bin/wwwalleg/:name.pl controllers.nwbib.Application.showPl(name, d GET /sw/:rpbId controllers.nwbib.Application.showSw(rpbId) GET /o:id controllers.nwbib.Application.searchSpatial(id, from:Int?=0, size:Int?=25, format?="html") GET /:id controllers.nwbib.Application.show(id, format ?= "") -PUT /:id controllers.nwbib.Application.put(id, secret ?= "") \ No newline at end of file +PUT /:id controllers.nwbib.Application.put(id, secret ?= "") +DELETE /:id controllers.nwbib.Application.delete(id, secret ?= "") \ No newline at end of file From 9659bbe578fbb7e95d15dba6de76c34db2285c8f Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 17 Dec 2024 11:05:40 +0100 Subject: [PATCH 2/2] Add index deletions to transformAndIndex.sh script (RPB-230) --- transformAndIndex.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/transformAndIndex.sh b/transformAndIndex.sh index 7dffe2f..60a343b 100644 --- a/transformAndIndex.sh +++ b/transformAndIndex.sh @@ -15,8 +15,8 @@ sbt "runMain rpb.ETL conf/rpb-sw.flux" # creates TSV lookup file for to-lobid tr # Strapi title data export is incomplete, see https://jira.hbz-nrw.de/browse/RPB-202, so we don't use the approach above (rpb-authority, same for RPPD / person): ## zgrep -a -E '"type":"api::article.article"|"type":"api::independent-work.independent-work"' conf/strapi-export.tar.gz > conf/output/output-strapi.ndjson # Instead, we use the backup exports created in Strapi lifecycle afterCreate and afterUpdate hooks (copy from backup/ in Strapi instance): -cat conf/articles.ndjson | jq -c .data > conf/output/output-strapi.ndjson -cat conf/independent_works.ndjson | jq -c .data >> conf/output/output-strapi.ndjson +cat conf/articles.ndjson | grep '"data"' | jq -c .data > conf/output/output-strapi.ndjson +cat conf/independent_works.ndjson | grep '"data"' | jq -c .data >> conf/output/output-strapi.ndjson # Remove old index data: rm conf/output/bulk/bulk-*.ndjson sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux index=$INDEX" @@ -30,6 +30,15 @@ do echo "$filename" curl -XPOST --silent --show-error --fail --header 'Content-Type: application/x-ndjson' --data-binary @"$filename" 'weywot3:9200/_bulk' >> conf/output/es-curl-post.log done + +# Delete in Elasticsearch: +cat conf/articles.ndjson | grep '"delete"' | jq --raw-output .delete.rpbId > conf/delete.ndjson +cat conf/independent_works.ndjson | grep '"delete"' | jq --raw-output .delete.rpbId >> conf/delete.ndjson +while read rpbId; do + curl -X DELETE "weywot3:9200/$INDEX/resource/https%3A%2F%2Flobid.org%2Fresources%2F$rpbId" +done < conf/delete.ndjson + +# Move alias to new index: curl -X POST "weywot3:9200/_aliases?pretty" -H 'Content-Type: application/json' -d' { "actions" : [