From 468411bc9b60d50de7518068c6c4230f49f1c163 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 14 Jun 2024 08:22:14 +0000 Subject: [PATCH 01/17] Add functionality to automatically add modelcard on upload --- annif/cli.py | 15 +++++++++-- annif/hfh_util.py | 46 +++++++++++++++++++++++++++++++ tests/test_cli.py | 26 ++++++++++++++++-- tests/test_hfh_util.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 4 deletions(-) diff --git a/annif/cli.py b/annif/cli.py index 673dc42fb..2b07d3147 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -616,8 +616,15 @@ def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_fi "--commit-message", help="""The summary / title / first line of the generated commit.""", ) +@click.option( + "--modelcard/--no-modelcard", + default=True, + help="Update or create a ModelCard with upload.", +) @cli_util.common_options -def run_upload(project_ids_pattern, repo_id, token, revision, commit_message): +def run_upload( + project_ids_pattern, repo_id, token, revision, commit_message, modelcard +): """ Upload selected projects and their vocabularies to a Hugging Face Hub repository. \f @@ -655,6 +662,8 @@ def run_upload(project_ids_pattern, repo_id, token, revision, commit_message): finally: for fobj in fobjs: fobj.close() + if modelcard: + hfh_util.upsert_modelcard(repo_id, projects, token, revision) @cli.command("download") @@ -690,7 +699,9 @@ def run_download(project_ids_pattern, repo_id, token, revision, force): `project_ids_pattern` from the specified Hugging Face Hub repository and unzips the archives to `data/` directory and places the configuration files to `projects.d/` directory. An authentication token and revision can - be given with options. + be given with options. If the README.md does not exist in the repository it is + created with default contents and metadata of the uploaded projects, if it exisits, + its metadata are updated as necessary. """ project_ids = hfh_util.get_matching_project_ids_from_hf_hub( diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 045e4710f..c4a3aa438 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -238,3 +238,49 @@ def get_vocab_id_from_config(config_path: str) -> str: config.read(config_path) section = config.sections()[0] return config[section]["vocab"] + + +def upsert_modelcard(repo_id, projects, token, revision): + """This function creates or updates a Model Card in a Hugging Face Hub repository + with some metadata in it.""" + from huggingface_hub import ModelCard + + card_exists = "README.md" in _list_files_in_hf_hub(repo_id, token, revision) + if card_exists: + card = ModelCard.load(repo_id) + commit_message = "Update README.md with Annif" + else: + card = _create_modelcard(repo_id) + commit_message = "Create README.md with Annif" + + langs_existing = set(card.data.language) if card.data.language else set() + langs_to_add = {proj.vocab_lang for proj in projects} + card.data.language = list(langs_existing.union(langs_to_add)) + + card.push_to_hub( + repo_id=repo_id, token=token, revision=revision, commit_message=commit_message + ) + + +def _create_modelcard(repo_id): + from huggingface_hub import ModelCard + + content = f""" +--- + +--- + +# {repo_id.split("/")[1]} + +## Usage + +Use the `annif download` command to download selected projects with Annif; +for example, to download all projects in this repository run + + annif download "*" {repo_id} + +""" + card = ModelCard(content) + card.data.pipeline_tag = "text-classification" + card.data.tags = ["annif"] + return card diff --git a/tests/test_cli.py b/tests/test_cli.py index d4c7f17d7..6cd0f58a0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1069,10 +1069,13 @@ def test_run_help(): assert "Run Annif in server mode for development." in result.output +@mock.patch("annif.hfh_util.upsert_modelcard") @mock.patch("huggingface_hub.HfApi.preupload_lfs_files") @mock.patch("huggingface_hub.CommitOperationAdd") @mock.patch("huggingface_hub.HfApi.create_commit") -def test_upload(create_commit, CommitOperationAdd, preupload_lfs_files): +def test_upload( + create_commit, CommitOperationAdd, preupload_lfs_files, upsert_modelcard +): result = runner.invoke(annif.cli.cli, ["upload", "dummy-fi", "dummy-repo"]) assert not result.exception assert create_commit.call_count == 1 @@ -1108,16 +1111,35 @@ def test_upload(create_commit, CommitOperationAdd, preupload_lfs_files): ) in create_commit.call_args_list ) + assert upsert_modelcard.call_count == 1 +@mock.patch("annif.hfh_util.upsert_modelcard") @mock.patch("huggingface_hub.HfApi.preupload_lfs_files") @mock.patch("huggingface_hub.CommitOperationAdd") @mock.patch("huggingface_hub.HfApi.create_commit") -def test_upload_many(create_commit, CommitOperationAdd, preupload_lfs_files): +def test_upload_many( + create_commit, CommitOperationAdd, preupload_lfs_files, upsert_modelcard +): result = runner.invoke(annif.cli.cli, ["upload", "dummy-*", "dummy-repo"]) assert not result.exception assert create_commit.call_count == 1 assert CommitOperationAdd.call_count == 11 + assert upsert_modelcard.call_count == 1 + + +@mock.patch("huggingface_hub.HfApi.preupload_lfs_files") +@mock.patch("huggingface_hub.CommitOperationAdd") +@mock.patch("huggingface_hub.HfApi.create_commit") +@mock.patch("annif.hfh_util.upsert_modelcard") +def test_upload_no_modelcard_upsert( + upsert_modelcard, create_commit, CommitOperationAdd, preupload_lfs_files +): + result = runner.invoke( + annif.cli.cli, ["upload", "dummy-fi", "dummy-repo", "--no-modelcard"] + ) + assert not result.exception + assert upsert_modelcard.call_count == 0 def test_upload_nonexistent_repo(): diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index ce3d6aac9..9d206fe06 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -101,3 +101,64 @@ def test_copy_project_config_overwrite(copy, exists): assert copy.call_args == mock.call( "tests/huggingface-cache/dummy-fi.cfg", "projects.d/dummy-fi.cfg" ) + + +@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) +@mock.patch( + "huggingface_hub.ModelCard.load", +) +@mock.patch( + "huggingface_hub.ModelCard", +) +def test_upsert_modelcard_existing_card( + modelcard, load, _list_files_in_hf_hub, project +): + repo_id = "user/repo" + projects = [project] + token = "mytoken" + revision = "main" + + annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) + + assert not modelcard.called # Do not create new card + assert load.called_once_with(repo_id) + assert load.return_value.push_to_hub.called_once_with( + repo_id=repo_id, + token=token, + revision=revision, + commit_message="Update README.md with Annif", + ) + + +@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[]) +@mock.patch( + "huggingface_hub.ModelCard", +) +def test_upsert_modelcard_new_card(modelcard, _list_files_in_hf_hub, project): + repo_id = "annif-user/annif-hfh-repo" + projects = [project] + token = "mytoken" + revision = "main" + + annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) + + assert modelcard.called_once() + assert "# annif-hfh-repo" in modelcard.call_args[0][0] # README heading + assert modelcard.return_value.push_to_hub.called_once_with( + repo_id=repo_id, + token=token, + revision=revision, + commit_message="Create README.md with Annif", + ) + + +@mock.patch( + "huggingface_hub.ModelCard", +) +def test_create_modelcard(modelcard): + repo_id = "user/repo" + + result = annif.hfh_util._create_modelcard(repo_id) + + assert result.data.pipeline_tag == "text-classification" + assert result.data.tags == ["annif"] From 125565efc53bc61516679922411cef2c5cbd68cd Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:10:59 +0300 Subject: [PATCH 02/17] Make tests pass in Py 3.12; test metadata languages; other test tweaks --- tests/test_hfh_util.py | 43 ++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 9d206fe06..a9a668339 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -104,61 +104,64 @@ def test_copy_project_config_overwrite(copy, exists): @mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) -@mock.patch( - "huggingface_hub.ModelCard.load", -) @mock.patch( "huggingface_hub.ModelCard", ) -def test_upsert_modelcard_existing_card( - modelcard, load, _list_files_in_hf_hub, project -): - repo_id = "user/repo" +def test_upsert_modelcard_existing_card(ModelCard, _list_files_in_hf_hub, project): + repo_id = "annif-user/Annif-HFH-repo" + project.vocab_lang = "fi" projects = [project] token = "mytoken" revision = "main" + ModelCard.load.return_value.data.language = ["en"] # Mock language in card annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) - assert not modelcard.called # Do not create new card - assert load.called_once_with(repo_id) - assert load.return_value.push_to_hub.called_once_with( + ModelCard.assert_not_called() # Do not create a new card + + ModelCard.load.assert_called_once_with(repo_id) + card = ModelCard.load.return_value + card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, commit_message="Update README.md with Annif", ) + assert sorted(card.data.language) == ["en", "fi"] @mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[]) @mock.patch( "huggingface_hub.ModelCard", ) -def test_upsert_modelcard_new_card(modelcard, _list_files_in_hf_hub, project): - repo_id = "annif-user/annif-hfh-repo" +def test_upsert_modelcard_new_card(ModelCard, _list_files_in_hf_hub, project): + repo_id = "annif-user/Annif-HFH-repo" + project.vocab_lang = "fi" projects = [project] token = "mytoken" revision = "main" annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) - assert modelcard.called_once() - assert "# annif-hfh-repo" in modelcard.call_args[0][0] # README heading - assert modelcard.return_value.push_to_hub.called_once_with( + ModelCard.assert_called_once() + card = ModelCard.return_value + card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, commit_message="Create README.md with Annif", ) + assert card.data.language == ["fi"] @mock.patch( "huggingface_hub.ModelCard", ) -def test_create_modelcard(modelcard): - repo_id = "user/repo" +def test_create_modelcard(ModelCard): + repo_id = "annif-user/Annif-HFH-repo" - result = annif.hfh_util._create_modelcard(repo_id) + card = annif.hfh_util._create_modelcard(repo_id) - assert result.data.pipeline_tag == "text-classification" - assert result.data.tags == ["annif"] + assert "# Annif-HFH-repo" in ModelCard.call_args[0][0] # README heading + assert card.data.pipeline_tag == "text-classification" + assert card.data.tags == ["annif"] From 10c522783138007279e34add3659e60059537688 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:33:00 +0300 Subject: [PATCH 03/17] Update modelcard only when when no errors in committing --- annif/cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/annif/cli.py b/annif/cli.py index 2b07d3147..7374937db 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -659,11 +659,12 @@ def run_upload( ) except (HfHubHTTPError, HFValidationError) as err: raise OperationFailedException(str(err)) + else: + if modelcard: + hfh_util.upsert_modelcard(repo_id, projects, token, revision) finally: for fobj in fobjs: fobj.close() - if modelcard: - hfh_util.upsert_modelcard(repo_id, projects, token, revision) @cli.command("download") From 6ba969799b71219b1ad0e2affa6b532e3ddb9e26 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:02:14 +0300 Subject: [PATCH 04/17] Add Projects section in modelcard & populate it with project configs in repo --- annif/config.py | 29 +++++++++++++++++++---------- annif/hfh_util.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/annif/config.py b/annif/config.py index 8cdc7d04f..02acecdf0 100644 --- a/annif/config.py +++ b/annif/config.py @@ -21,18 +21,27 @@ class AnnifConfigCFG: """Class for reading configuration in CFG/INI format""" - def __init__(self, filename: str) -> None: + def __init__(self, filename: str = None, projstr: str = None) -> None: self._config = configparser.ConfigParser() self._config.optionxform = annif.util.identity - with open(filename, encoding="utf-8-sig") as projf: - try: - logger.debug(f"Reading configuration file {filename} in CFG format") - self._config.read_file(projf) - except ( - configparser.DuplicateOptionError, - configparser.DuplicateSectionError, - ) as err: - raise ConfigurationException(err.message) + if filename is not None: + logger.debug(f"Reading configuration file {filename} in CFG format") + read_method = self._config.read + source = filename + elif projstr is not None: + logger.debug("Reading configuration from a string in CFG format") + read_method = self._config.read_string + source = projstr + self._read_config(read_method, source) + + def _read_config(self, read_method, source): + try: + read_method(source) + except ( + configparser.DuplicateOptionError, + configparser.DuplicateSectionError, + ) as err: + raise ConfigurationException(err.message) @property def project_ids(self) -> list[str]: diff --git a/annif/hfh_util.py b/annif/hfh_util.py index c4a3aa438..b2476fac0 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -17,6 +17,7 @@ from flask import current_app import annif +from annif.config import AnnifConfigCFG from annif.exception import OperationFailedException from annif.project import Access, AnnifProject @@ -257,6 +258,10 @@ def upsert_modelcard(repo_id, projects, token, revision): langs_to_add = {proj.vocab_lang for proj in projects} card.data.language = list(langs_existing.union(langs_to_add)) + configs = _get_existing_configs(repo_id, token, revision) + card.text = _update_modelcard_projects(card.text, configs) + print(card.text) + card.push_to_hub( repo_id=repo_id, token=token, revision=revision, commit_message=commit_message ) @@ -279,8 +284,40 @@ def _create_modelcard(repo_id): annif download "*" {repo_id} +## Projects + + Project ID Project Name Vocabulary ID Language + -------------------------------------------------------------------- """ card = ModelCard(content) card.data.pipeline_tag = "text-classification" card.data.tags = ["annif"] return card + + +def _update_modelcard_projects(text, configs): + table = [ + ( + proj_id, + configs[proj_id]["name"], + configs[proj_id]["vocab"], + configs[proj_id]["language"], + ) + for proj_id in configs.project_ids + ] + template = " {0: <18} {1: <23} {2: <13} {3: <8}\n" + for row in table: + text += template.format(*row) + return text + + +def _get_existing_configs(repo_id, token, revision): + from huggingface_hub import HfFileSystem + + fs = HfFileSystem() + cfg_locations = fs.glob(f"{repo_id}/*.cfg") + + projstr = "" + for cfg_file in cfg_locations: + projstr += fs.read_text(cfg_file, token=token, revision=revision) + return AnnifConfigCFG(projstr=projstr) From 845f53d74fee07c94b7f97be5dbd73550eb4ef58 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:00:34 +0300 Subject: [PATCH 05/17] Update Projects section properly (not just append projects to it) --- annif/hfh_util.py | 60 ++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index b2476fac0..8f70fedad 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -259,14 +259,25 @@ def upsert_modelcard(repo_id, projects, token, revision): card.data.language = list(langs_existing.union(langs_to_add)) configs = _get_existing_configs(repo_id, token, revision) - card.text = _update_modelcard_projects(card.text, configs) - print(card.text) + card.text = _update_projects_section(card.text, configs) card.push_to_hub( repo_id=repo_id, token=token, revision=revision, commit_message=commit_message ) +def _get_existing_configs(repo_id, token, revision): + from huggingface_hub import HfFileSystem + + fs = HfFileSystem() + cfg_locations = fs.glob(f"{repo_id}/*.cfg") + + projstr = "" + for cfg_file in cfg_locations: + projstr += fs.read_text(cfg_file, token=token, revision=revision) + return AnnifConfigCFG(projstr=projstr) + + def _create_modelcard(repo_id): from huggingface_hub import ModelCard @@ -284,10 +295,6 @@ def _create_modelcard(repo_id): annif download "*" {repo_id} -## Projects - - Project ID Project Name Vocabulary ID Language - -------------------------------------------------------------------- """ card = ModelCard(content) card.data.pipeline_tag = "text-classification" @@ -295,29 +302,28 @@ def _create_modelcard(repo_id): return card -def _update_modelcard_projects(text, configs): - table = [ - ( +def _update_projects_section(text, configs): + section_startind = text.find("## Projects\n") + section_endind = text.rfind("```") + 3 # end of code formatted block + + projects_section = _create_projects_section(configs) + if section_startind == -1: # no existing projects section, append it now + return text + projects_section + else: + return text[:section_startind] + projects_section + text[section_endind:] + + +def _create_projects_section(configs): + content = "## Projects\n" + template = "{0:<19} {1:<23} {2:<15} {3:<11}\n" + header = template.format("Project ID", "Project Name", "Vocabulary ID", "Language") + content += "```\n" + header + "-" * len(header.strip()) + "\n" + + for proj_id in configs.project_ids: + content += template.format( proj_id, configs[proj_id]["name"], configs[proj_id]["vocab"], configs[proj_id]["language"], ) - for proj_id in configs.project_ids - ] - template = " {0: <18} {1: <23} {2: <13} {3: <8}\n" - for row in table: - text += template.format(*row) - return text - - -def _get_existing_configs(repo_id, token, revision): - from huggingface_hub import HfFileSystem - - fs = HfFileSystem() - cfg_locations = fs.glob(f"{repo_id}/*.cfg") - - projstr = "" - for cfg_file in cfg_locations: - projstr += fs.read_text(cfg_file, token=token, revision=revision) - return AnnifConfigCFG(projstr=projstr) + return content + "```" From 604fb05bae9a84d7b00d3c44c4fce1df48e31f76 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:23:13 +0300 Subject: [PATCH 06/17] Restore using utf-8-sig encoding when reading project configs --- annif/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/annif/config.py b/annif/config.py index 02acecdf0..dbe7ae191 100644 --- a/annif/config.py +++ b/annif/config.py @@ -35,8 +35,9 @@ def __init__(self, filename: str = None, projstr: str = None) -> None: self._read_config(read_method, source) def _read_config(self, read_method, source): + encoding = "utf-8-sig" try: - read_method(source) + read_method(source, encoding) except ( configparser.DuplicateOptionError, configparser.DuplicateSectionError, From 04e7473473be5c7d6710287a82525e425ea7822b Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 18 Sep 2024 17:17:48 +0300 Subject: [PATCH 07/17] More mocks to make HFH tests pass --- tests/test_hfh_util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index a9a668339..4f40c96e2 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -107,7 +107,10 @@ def test_copy_project_config_overwrite(copy, exists): @mock.patch( "huggingface_hub.ModelCard", ) -def test_upsert_modelcard_existing_card(ModelCard, _list_files_in_hf_hub, project): +@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) +def test_upsert_modelcard_existing_card( + glob, ModelCard, _list_files_in_hf_hub, project +): repo_id = "annif-user/Annif-HFH-repo" project.vocab_lang = "fi" projects = [project] @@ -134,7 +137,8 @@ def test_upsert_modelcard_existing_card(ModelCard, _list_files_in_hf_hub, projec @mock.patch( "huggingface_hub.ModelCard", ) -def test_upsert_modelcard_new_card(ModelCard, _list_files_in_hf_hub, project): +@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) +def test_upsert_modelcard_new_card(glob, ModelCard, _list_files_in_hf_hub, project): repo_id = "annif-user/Annif-HFH-repo" project.vocab_lang = "fi" projects = [project] From afe4e3dee85acd9ebb87c608b58dcad03b0bd200 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 18 Sep 2024 17:23:25 +0300 Subject: [PATCH 08/17] Ensure authenticated access when using HfFileSystem --- annif/hfh_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 8f70fedad..626fd0f37 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -269,8 +269,8 @@ def upsert_modelcard(repo_id, projects, token, revision): def _get_existing_configs(repo_id, token, revision): from huggingface_hub import HfFileSystem - fs = HfFileSystem() - cfg_locations = fs.glob(f"{repo_id}/*.cfg") + fs = HfFileSystem(token=token) + cfg_locations = fs.glob(f"{repo_id}/*.cfg", revision=revision) projstr = "" for cfg_file in cfg_locations: From 8df3bffd0967428c49432037d764fdc2201b6601 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:55:43 +0300 Subject: [PATCH 09/17] Use markdown comments to detect part to update --- annif/hfh_util.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 626fd0f37..8060b6565 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -302,28 +302,35 @@ def _create_modelcard(repo_id): return card +AUTOGEN_START = "" +AUTOGEN_END = "" + + def _update_projects_section(text, configs): - section_startind = text.find("## Projects\n") - section_endind = text.rfind("```") + 3 # end of code formatted block + section_start_ind = text.find(AUTOGEN_START) + section_end_ind = text.rfind(AUTOGEN_END) + len(AUTOGEN_END) projects_section = _create_projects_section(configs) - if section_startind == -1: # no existing projects section, append it now + + if section_start_ind == -1: # no existing projects section, append it now return text + projects_section else: - return text[:section_startind] + projects_section + text[section_endind:] + return text[:section_start_ind] + projects_section + text[section_end_ind:] def _create_projects_section(configs): - content = "## Projects\n" + content = f"{AUTOGEN_START}\n## Projects\n" + template = "{0:<19} {1:<23} {2:<15} {3:<11}\n" header = template.format("Project ID", "Project Name", "Vocabulary ID", "Language") content += "```\n" + header + "-" * len(header.strip()) + "\n" for proj_id in configs.project_ids: + project = configs[proj_id] content += template.format( proj_id, - configs[proj_id]["name"], - configs[proj_id]["vocab"], - configs[proj_id]["language"], + project["name"], + project["vocab"], + project["language"], ) - return content + "```" + return content + "```\n" + AUTOGEN_END From 55b0ffcac02ef07dcad8473b40ff98df1c022126 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:04:42 +0300 Subject: [PATCH 10/17] Better variable name: AUTOUPDATING_{START,END} --- annif/hfh_util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 8060b6565..9b1e3029b 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -302,13 +302,13 @@ def _create_modelcard(repo_id): return card -AUTOGEN_START = "" -AUTOGEN_END = "" +AUTOUPDATING_START = "" +AUTOUPDATING_END = "" def _update_projects_section(text, configs): - section_start_ind = text.find(AUTOGEN_START) - section_end_ind = text.rfind(AUTOGEN_END) + len(AUTOGEN_END) + section_start_ind = text.find(AUTOUPDATING_START) + section_end_ind = text.rfind(AUTOUPDATING_END) + len(AUTOUPDATING_END) projects_section = _create_projects_section(configs) @@ -319,7 +319,7 @@ def _update_projects_section(text, configs): def _create_projects_section(configs): - content = f"{AUTOGEN_START}\n## Projects\n" + content = f"{AUTOUPDATING_START}\n## Projects\n" template = "{0:<19} {1:<23} {2:<15} {3:<11}\n" header = template.format("Project ID", "Project Name", "Vocabulary ID", "Language") @@ -333,4 +333,4 @@ def _create_projects_section(configs): project["vocab"], project["language"], ) - return content + "```\n" + AUTOGEN_END + return content + "```\n" + AUTOUPDATING_END From 40b9c48d6426763ff30aacc0022f0aa6a32fe319 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 23 Sep 2024 11:29:05 +0300 Subject: [PATCH 11/17] Add test for updating project list --- annif/hfh_util.py | 3 +- tests/test_hfh_util.py | 68 +++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 9b1e3029b..144952069 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -311,7 +311,6 @@ def _update_projects_section(text, configs): section_end_ind = text.rfind(AUTOUPDATING_END) + len(AUTOUPDATING_END) projects_section = _create_projects_section(configs) - if section_start_ind == -1: # no existing projects section, append it now return text + projects_section else: @@ -321,7 +320,7 @@ def _update_projects_section(text, configs): def _create_projects_section(configs): content = f"{AUTOUPDATING_START}\n## Projects\n" - template = "{0:<19} {1:<23} {2:<15} {3:<11}\n" + template = "{0:<19} {1:<23} {2:<15} {3:<8}\n" header = template.format("Project ID", "Project Name", "Vocabulary ID", "Language") content += "```\n" + header + "-" * len(header.strip()) + "\n" diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 4f40c96e2..68e331929 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -103,59 +103,59 @@ def test_copy_project_config_overwrite(copy, exists): ) -@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) +@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[]) @mock.patch( "huggingface_hub.ModelCard", ) @mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) -def test_upsert_modelcard_existing_card( - glob, ModelCard, _list_files_in_hf_hub, project -): +def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, project): repo_id = "annif-user/Annif-HFH-repo" project.vocab_lang = "fi" projects = [project] token = "mytoken" revision = "main" - ModelCard.load.return_value.data.language = ["en"] # Mock language in card annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) - ModelCard.assert_not_called() # Do not create a new card - - ModelCard.load.assert_called_once_with(repo_id) - card = ModelCard.load.return_value + ModelCard.assert_called_once() + card = ModelCard.return_value card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, - commit_message="Update README.md with Annif", + commit_message="Create README.md with Annif", ) - assert sorted(card.data.language) == ["en", "fi"] + assert card.data.language == ["fi"] -@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[]) +@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) @mock.patch( "huggingface_hub.ModelCard", ) @mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) -def test_upsert_modelcard_new_card(glob, ModelCard, _list_files_in_hf_hub, project): +def test_upsert_modelcard_update_existing( + glob, ModelCard, _list_files_in_hf_hub, project +): repo_id = "annif-user/Annif-HFH-repo" project.vocab_lang = "fi" projects = [project] token = "mytoken" revision = "main" + ModelCard.load.return_value.data.language = ["en"] # Mock language in card annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) - ModelCard.assert_called_once() - card = ModelCard.return_value + ModelCard.assert_not_called() # Do not create a new card + + ModelCard.load.assert_called_once_with(repo_id) + card = ModelCard.load.return_value card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, - commit_message="Create README.md with Annif", + commit_message="Update README.md with Annif", ) - assert card.data.language == ["fi"] + assert sorted(card.data.language) == ["en", "fi"] @mock.patch( @@ -169,3 +169,37 @@ def test_create_modelcard(ModelCard): assert "# Annif-HFH-repo" in ModelCard.call_args[0][0] # README heading assert card.data.pipeline_tag == "text-classification" assert card.data.tags == ["annif"] + + +def test_update_modelcard_projects_section(): + text_head = """ +--- +language: +- en +--- +# annif-user/Annif-HFH-repo +This is some text before Projects section. +## Usage + annif download "*" annif-user/Annif-HFH-repo + +## Projects +``` +Project ID Project Name Vocabulary ID Language +-------------------------------------------------------------------- +""" + + text_tail = """ +``` + +This is some text after Projects section, which should remain in place after updates. +""" + + text = text_head + text_tail + cfg = annif.config.parse_config("tests/projects.toml") + + updated_text = annif.hfh_util._update_projects_section(text, cfg) + expected_inserted_projects = ( + "dummy-fi-toml Dummy Finnish dummy fi \n" + "dummy-en-toml Dummy English dummy en " + ) + assert updated_text == text_head + expected_inserted_projects + text_tail From 2b2d3eaf96a543052c9bbbe4f977094ef559651f Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:03:51 +0300 Subject: [PATCH 12/17] Improve tests --- tests/test_hfh_util.py | 114 +++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 43 deletions(-) diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 68e331929..7ac7d3d6c 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -6,7 +6,10 @@ from datetime import datetime, timezone from unittest import mock +import huggingface_hub + import annif.hfh_util +from annif.config import AnnifConfigCFG def test_archive_dir(testdatadir): @@ -109,7 +112,7 @@ def test_copy_project_config_overwrite(copy, exists): ) @mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, project): - repo_id = "annif-user/Annif-HFH-repo" + repo_id = "annif-user/annif-repo" project.vocab_lang = "fi" projects = [project] token = "mytoken" @@ -118,88 +121,113 @@ def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, pro annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) ModelCard.assert_called_once() + card = ModelCard.return_value + assert "# annif-repo" in ModelCard.call_args[0][0] # README heading + assert card.data.language == ["fi"] + assert card.data.pipeline_tag == "text-classification" + assert card.data.tags == ["annif"] card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, commit_message="Create README.md with Annif", ) - assert card.data.language == ["fi"] +@mock.patch("huggingface_hub.ModelCard.push_to_hub") @mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) @mock.patch( - "huggingface_hub.ModelCard", + "huggingface_hub.ModelCard.load", return_value=huggingface_hub.ModelCard("foobar") +) +@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=["dummy.cfg"]) +@mock.patch( + "huggingface_hub.HfFileSystem.read_text", + return_value=""" + [dummy-fi] + name=Dummy Finnish + language=fi + vocab=dummy +""", ) -@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) def test_upsert_modelcard_update_existing( - glob, ModelCard, _list_files_in_hf_hub, project + read_text, glob, load, _list_files_in_hf_hub, push_to_hub, project ): - repo_id = "annif-user/Annif-HFH-repo" + repo_id = "annif-user/annif-repo" project.vocab_lang = "fi" projects = [project] token = "mytoken" revision = "main" - ModelCard.load.return_value.data.language = ["en"] # Mock language in card + load.return_value.data.language = ["en"] # Mock language in existing card annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) - ModelCard.assert_not_called() # Do not create a new card + load.assert_called_once_with(repo_id) - ModelCard.load.assert_called_once_with(repo_id) - card = ModelCard.load.return_value + card = load.return_value + + expected_project_list_content = ( + "dummy-fi Dummy Finnish dummy fi" + ) + assert expected_project_list_content in card.text + assert sorted(card.data.language) == ["en", "fi"] card.push_to_hub.assert_called_once_with( repo_id=repo_id, token=token, revision=revision, commit_message="Update README.md with Annif", ) - assert sorted(card.data.language) == ["en", "fi"] -@mock.patch( - "huggingface_hub.ModelCard", -) -def test_create_modelcard(ModelCard): - repo_id = "annif-user/Annif-HFH-repo" +def test_update_modelcard_projects_section_append_new(): + empty_cfg = AnnifConfigCFG(projstr="") - card = annif.hfh_util._create_modelcard(repo_id) + text = """This is some existing text in the card.""" + updated_text = annif.hfh_util._update_projects_section(text, empty_cfg) - assert "# Annif-HFH-repo" in ModelCard.call_args[0][0] # README heading - assert card.data.pipeline_tag == "text-classification" - assert card.data.tags == ["annif"] + expected_tail = """\ + +## Projects +``` +Project ID Project Name Vocabulary ID Language +-------------------------------------------------------------------- +``` +""" + assert updated_text == text + expected_tail -def test_update_modelcard_projects_section(): - text_head = """ ---- -language: -- en ---- -# annif-user/Annif-HFH-repo -This is some text before Projects section. -## Usage - annif download "*" annif-user/Annif-HFH-repo +def test_update_modelcard_projects_section_update_existing(): + cfg = AnnifConfigCFG( + projstr="""\ + [dummy-fi] + name=Dummy Finnish + language=fi + vocab=dummy""" + ) + + text_head = """This is some existing text in the card.\n""" + text_initial_projects = """\ ## Projects ``` Project ID Project Name Vocabulary ID Language -------------------------------------------------------------------- -""" +``` +\n""" + text_tail = ( + "This is text after the Projects section; it should remain in after updates." + ) - text_tail = """ + text = text_head + text_initial_projects + text_tail + updated_text = annif.hfh_util._update_projects_section(text, cfg) + + expected_updated_projects = """\ + +## Projects ``` +Project ID Project Name Vocabulary ID Language +-------------------------------------------------------------------- +dummy-fi Dummy Finnish dummy fi \n``` -This is some text after Projects section, which should remain in place after updates. """ - - text = text_head + text_tail - cfg = annif.config.parse_config("tests/projects.toml") - - updated_text = annif.hfh_util._update_projects_section(text, cfg) - expected_inserted_projects = ( - "dummy-fi-toml Dummy Finnish dummy fi \n" - "dummy-en-toml Dummy English dummy en " - ) - assert updated_text == text_head + expected_inserted_projects + text_tail + assert updated_text == text_head + expected_updated_projects + text_tail From d237aead2116e703b4f3c724f30b84bdab692d4c Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:56:12 +0300 Subject: [PATCH 13/17] Detect nonexistence of README.md by EntryNotFoundError instead of listing files --- annif/hfh_util.py | 6 +++--- tests/test_hfh_util.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/annif/hfh_util.py b/annif/hfh_util.py index 144952069..a99050be6 100644 --- a/annif/hfh_util.py +++ b/annif/hfh_util.py @@ -245,12 +245,12 @@ def upsert_modelcard(repo_id, projects, token, revision): """This function creates or updates a Model Card in a Hugging Face Hub repository with some metadata in it.""" from huggingface_hub import ModelCard + from huggingface_hub.utils import EntryNotFoundError - card_exists = "README.md" in _list_files_in_hf_hub(repo_id, token, revision) - if card_exists: + try: card = ModelCard.load(repo_id) commit_message = "Update README.md with Annif" - else: + except EntryNotFoundError: card = _create_modelcard(repo_id) commit_message = "Create README.md with Annif" diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 7ac7d3d6c..7cae1e9df 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -7,6 +7,7 @@ from unittest import mock import huggingface_hub +from huggingface_hub.utils import EntryNotFoundError import annif.hfh_util from annif.config import AnnifConfigCFG @@ -106,12 +107,15 @@ def test_copy_project_config_overwrite(copy, exists): ) -@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[]) @mock.patch( - "huggingface_hub.ModelCard", + "huggingface_hub.ModelCard.load", + side_effect=EntryNotFoundError("dummymessage"), ) @mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) -def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, project): +@mock.patch( + "huggingface_hub.ModelCard", +) +def test_upsert_modelcard_insert_new(ModelCard, glob, load, project): repo_id = "annif-user/annif-repo" project.vocab_lang = "fi" projects = [project] @@ -136,7 +140,6 @@ def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, pro @mock.patch("huggingface_hub.ModelCard.push_to_hub") -@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"]) @mock.patch( "huggingface_hub.ModelCard.load", return_value=huggingface_hub.ModelCard("foobar") ) @@ -150,9 +153,7 @@ def test_upsert_modelcard_insert_new(glob, ModelCard, _list_files_in_hf_hub, pro vocab=dummy """, ) -def test_upsert_modelcard_update_existing( - read_text, glob, load, _list_files_in_hf_hub, push_to_hub, project -): +def test_upsert_modelcard_update_existing(read_text, glob, load, push_to_hub, project): repo_id = "annif-user/annif-repo" project.vocab_lang = "fi" projects = [project] From 2b75cf197ee6256608e67cfabe26556b4c1f2da6 Mon Sep 17 00:00:00 2001 From: juhoinkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Tue, 24 Sep 2024 20:01:05 +0300 Subject: [PATCH 14/17] Again improve & simplify tests --- tests/conftest.py | 1 + tests/test_hfh_util.py | 48 ++++++++++++++---------------------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7d7a851ee..9f2015a31 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -139,6 +139,7 @@ def project(subject_index, datadir, registry, vocabulary): proj.analyzer = annif.analyzer.get_analyzer("snowball(finnish)") proj.language = "fi" proj.vocab = vocabulary + proj.vocab_lang = "fi" proj.subjects = subject_index proj.datadir = str(datadir) proj.registry = registry diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 7cae1e9df..5918fd495 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -109,25 +109,21 @@ def test_copy_project_config_overwrite(copy, exists): @mock.patch( "huggingface_hub.ModelCard.load", - side_effect=EntryNotFoundError("dummymessage"), + side_effect=EntryNotFoundError("mymessage"), ) @mock.patch("huggingface_hub.HfFileSystem.glob", return_value=[]) -@mock.patch( - "huggingface_hub.ModelCard", -) +@mock.patch("huggingface_hub.ModelCard") def test_upsert_modelcard_insert_new(ModelCard, glob, load, project): repo_id = "annif-user/annif-repo" - project.vocab_lang = "fi" - projects = [project] token = "mytoken" - revision = "main" + revision = "mybranch" - annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) + annif.hfh_util.upsert_modelcard(repo_id, [project], token, revision) ModelCard.assert_called_once() + assert "# annif-repo" in ModelCard.call_args[0][0] # README heading card = ModelCard.return_value - assert "# annif-repo" in ModelCard.call_args[0][0] # README heading assert card.data.language == ["fi"] assert card.data.pipeline_tag == "text-classification" assert card.data.tags == ["annif"] @@ -141,36 +137,20 @@ def test_upsert_modelcard_insert_new(ModelCard, glob, load, project): @mock.patch("huggingface_hub.ModelCard.push_to_hub") @mock.patch( - "huggingface_hub.ModelCard.load", return_value=huggingface_hub.ModelCard("foobar") + "huggingface_hub.ModelCard.load", # Mock language in existing card + return_value=huggingface_hub.ModelCard("---\nlanguage:\n- en\n---"), ) -@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=["dummy.cfg"]) -@mock.patch( - "huggingface_hub.HfFileSystem.read_text", - return_value=""" - [dummy-fi] - name=Dummy Finnish - language=fi - vocab=dummy -""", -) -def test_upsert_modelcard_update_existing(read_text, glob, load, push_to_hub, project): +@mock.patch("huggingface_hub.HfFileSystem") +def test_upsert_modelcard_update_existing(HfFileSystem, load, push_to_hub, project): repo_id = "annif-user/annif-repo" - project.vocab_lang = "fi" - projects = [project] token = "mytoken" - revision = "main" - load.return_value.data.language = ["en"] # Mock language in existing card + revision = "mybranch" - annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision) + annif.hfh_util.upsert_modelcard(repo_id, [project], token, revision) load.assert_called_once_with(repo_id) card = load.return_value - - expected_project_list_content = ( - "dummy-fi Dummy Finnish dummy fi" - ) - assert expected_project_list_content in card.text assert sorted(card.data.language) == ["en", "fi"] card.push_to_hub.assert_called_once_with( repo_id=repo_id, @@ -194,6 +174,7 @@ def test_update_modelcard_projects_section_append_new(): -------------------------------------------------------------------- ``` """ + assert updated_text == text + expected_tail @@ -207,6 +188,7 @@ def test_update_modelcard_projects_section_update_existing(): ) text_head = """This is some existing text in the card.\n""" + text_initial_projects = """\ ## Projects @@ -215,8 +197,9 @@ def test_update_modelcard_projects_section_update_existing(): -------------------------------------------------------------------- ``` \n""" + text_tail = ( - "This is text after the Projects section; it should remain in after updates." + "This is text after the Projects section; it should remain after updates." ) text = text_head + text_initial_projects + text_tail @@ -231,4 +214,5 @@ def test_update_modelcard_projects_section_update_existing(): dummy-fi Dummy Finnish dummy fi \n``` """ + assert updated_text == text_head + expected_updated_projects + text_tail From ed5f0efb2cf22e1ed8334a7a830840fc77b966cc Mon Sep 17 00:00:00 2001 From: juhoinkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Tue, 24 Sep 2024 20:43:03 +0300 Subject: [PATCH 15/17] Resolve CodeQL complain Potentially uninitialized local variable --- annif/config.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/annif/config.py b/annif/config.py index dbe7ae191..6ccedc695 100644 --- a/annif/config.py +++ b/annif/config.py @@ -26,13 +26,10 @@ def __init__(self, filename: str = None, projstr: str = None) -> None: self._config.optionxform = annif.util.identity if filename is not None: logger.debug(f"Reading configuration file {filename} in CFG format") - read_method = self._config.read - source = filename + self._read_config(self._config.read, filename) elif projstr is not None: logger.debug("Reading configuration from a string in CFG format") - read_method = self._config.read_string - source = projstr - self._read_config(read_method, source) + self._read_config(self._config.read_string, projstr) def _read_config(self, read_method, source): encoding = "utf-8-sig" From 42c2b731a62722a8f14e230465775dddd13f5c37 Mon Sep 17 00:00:00 2001 From: juhoinkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:11:44 +0300 Subject: [PATCH 16/17] Increase test coverage by mocking cfg file in repo --- tests/test_hfh_util.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/test_hfh_util.py b/tests/test_hfh_util.py index 5918fd495..6b5f37745 100644 --- a/tests/test_hfh_util.py +++ b/tests/test_hfh_util.py @@ -140,8 +140,17 @@ def test_upsert_modelcard_insert_new(ModelCard, glob, load, project): "huggingface_hub.ModelCard.load", # Mock language in existing card return_value=huggingface_hub.ModelCard("---\nlanguage:\n- en\n---"), ) -@mock.patch("huggingface_hub.HfFileSystem") -def test_upsert_modelcard_update_existing(HfFileSystem, load, push_to_hub, project): +@mock.patch("huggingface_hub.HfFileSystem.glob", return_value=["dummy-en.cfg"]) +@mock.patch( + "huggingface_hub.HfFileSystem.read_text", + return_value=""" + [dummy-en] + name=Dummy English + language=en + vocab=dummy +""", +) +def test_upsert_modelcard_update_existing(read_text, glob, load, push_to_hub, project): repo_id = "annif-user/annif-repo" token = "mytoken" revision = "mybranch" @@ -151,6 +160,10 @@ def test_upsert_modelcard_update_existing(HfFileSystem, load, push_to_hub, proje load.assert_called_once_with(repo_id) card = load.return_value + retained_project_list_content = ( + "dummy-en Dummy English dummy en" + ) + assert retained_project_list_content in card.text assert sorted(card.data.language) == ["en", "fi"] card.push_to_hub.assert_called_once_with( repo_id=repo_id, From e4febabfa855db6c502951c10b895166828a4fe5 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 27 Sep 2024 09:46:56 +0300 Subject: [PATCH 17/17] Fix typos in docstring and command helpstring --- annif/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/annif/cli.py b/annif/cli.py index 7374937db..9bfbfc554 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -619,7 +619,7 @@ def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_fi @click.option( "--modelcard/--no-modelcard", default=True, - help="Update or create a ModelCard with upload.", + help="Update or create a Model Card with upload.", ) @cli_util.common_options def run_upload( @@ -701,7 +701,7 @@ def run_download(project_ids_pattern, repo_id, token, revision, force): unzips the archives to `data/` directory and places the configuration files to `projects.d/` directory. An authentication token and revision can be given with options. If the README.md does not exist in the repository it is - created with default contents and metadata of the uploaded projects, if it exisits, + created with default contents and metadata of the uploaded projects, if it exists, its metadata are updated as necessary. """