From ba18a6bddd6f205e4af0bcee4b8ea5851c7c92b9 Mon Sep 17 00:00:00 2001 From: PascalEgn Date: Mon, 16 Sep 2024 11:31:23 +0200 Subject: [PATCH] add classifier keywords remove script --- .../kustomization.yml | 11 +++++++ scripts/remove-classifier-keywords/script.py | 31 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 scripts/remove-classifier-keywords/kustomization.yml create mode 100644 scripts/remove-classifier-keywords/script.py diff --git a/scripts/remove-classifier-keywords/kustomization.yml b/scripts/remove-classifier-keywords/kustomization.yml new file mode 100644 index 0000000..d78fac6 --- /dev/null +++ b/scripts/remove-classifier-keywords/kustomization.yml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base/script-job + +configMapGenerator: + - name: hep-script + files: + - script.py +nameSuffix: -remove-classifier-keywords diff --git a/scripts/remove-classifier-keywords/script.py b/scripts/remove-classifier-keywords/script.py new file mode 100644 index 0000000..2a4278f --- /dev/null +++ b/scripts/remove-classifier-keywords/script.py @@ -0,0 +1,31 @@ +from inspire_utils.record import get_value +from inspirehep.curation.search_check_do import SearchCheckDo + + +class RemoveClassifierKeywords(SearchCheckDo): + query = "keywords.source:classifier" + + @staticmethod + def check(record, logger, state): + if any( + keyword.get("source", "") == "classifier" + for keyword in get_value(record, "keywords", []) + ): + return True + else: + return False + + @staticmethod + def do(record, logger, state): + new_keywords = [ + keyword + for keyword in record.get("keywords", []) + if keyword.get("source", "") != "classifier" + ] + if new_keywords: + record["keywords"] = new_keywords + else: + del record["keywords"] + + +RemoveClassifierKeywords()