diff --git a/prepare/cards/open_australian_legal_qa.py b/prepare/cards/open_australian_legal_qa.py index 4593763f71..6c4e822731 100644 --- a/prepare/cards/open_australian_legal_qa.py +++ b/prepare/cards/open_australian_legal_qa.py @@ -13,6 +13,7 @@ card = TaskCard( loader=LoadHF( path="umarbutler/open-australian-legal-qa", + name="default", ), preprocess_steps=[ SplitRandomMix( @@ -51,6 +52,7 @@ card = TaskCard( loader=LoadHF( path="umarbutler/open-australian-legal-qa", + name="default", ), preprocess_steps=[ SplitRandomMix( diff --git a/prepare/recipes/tables_benchmark.py b/prepare/recipes/tables_benchmark.py index d87bd8c3b4..0b4d1a58e4 100644 --- a/prepare/recipes/tables_benchmark.py +++ b/prepare/recipes/tables_benchmark.py @@ -22,7 +22,6 @@ serializers = ",".join(list(SERIALIZERS)) max_augmentors = 10 max_pred_tokens = 100 -num_demos = 5 recipes_only = False # Process parameters @@ -35,6 +34,7 @@ for card in cards_parsed: for augment in all_augment: for serializer in serializers_parsed: + num_demos = 1 if card == "wikitq" else 5 kwargs = { "card": "cards." + card, "serializer": f"serializers.table.{serializer}" diff --git a/src/unitxt/assistant/app.py b/src/unitxt/assistant/app.py index f04c3896ae..f2e070d1bd 100644 --- a/src/unitxt/assistant/app.py +++ b/src/unitxt/assistant/app.py @@ -1,5 +1,7 @@ import datetime +import importlib import json +import logging import os import uuid @@ -8,8 +10,11 @@ import pandas as pd import streamlit as st import torch +from litellm import AuthenticationError from transformers import AutoTokenizer +logger = logging.getLogger("unitxt-assistance") + @st.cache_resource def load_data(): @@ -20,9 +25,36 @@ def load_data(): return metadata_df, embeddings +def get_embedding_with_retry(model, input, max_retries=3): + """This function calls the litellm.embedding method and handles token expiration. + + It will retry the call up to `max_retries` times if an AuthenticationError is raised. + """ + retries = 0 + actual_exception = None + while retries < max_retries: + try: + return litellm.embedding(model=model, input=input) + + except AuthenticationError as e: + actual_exception = e + retries += 1 + logger.info( + f"Authentication error: {e}. Retrying... ({retries}/{max_retries})" + ) + importlib.reload( + litellm + ) # Reload the litellm module to clear any cached state + + # If all retries fail, raise an error + raise Exception( + f"Failed to get embedding after {max_retries} attempts. Exception: {actual_exception}" + ) + + def search(query, metadata_df, embeddings, max_tokens=5000, min_text_length=50): # Generate embedding for the query using litellm - response = litellm.embedding( + response = get_embedding_with_retry( model="watsonx/intfloat/multilingual-e5-large", input=[query], ) diff --git a/src/unitxt/catalog/benchmarks/tables_benchmark.json b/src/unitxt/catalog/benchmarks/tables_benchmark.json index e92648e43f..38c6753b7e 100644 --- a/src/unitxt/catalog/benchmarks/tables_benchmark.json +++ b/src/unitxt/catalog/benchmarks/tables_benchmark.json @@ -683,71 +683,71 @@ "concat": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.concat.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.concat.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.concat.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.concat.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.concat.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.concat.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.concat.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.concat.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.concat.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.concat.transpose_augmentation_1_demos" } }, "csv": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.csv.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.csv.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.csv.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.csv.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.csv.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.csv.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.csv.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.csv.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.csv.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.csv.transpose_augmentation_1_demos" } }, "df": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.df.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.df.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.df.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.df.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.df.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.df.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.df.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.df.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.df.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.df.transpose_augmentation_1_demos" } }, "html": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.html.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.html.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.html.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.html.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.html.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.html.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.html.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.html.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.html.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.html.transpose_augmentation_1_demos" } }, "indexed_row_major": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.indexed_row_major.transpose_augmentation_1_demos" } }, "json": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.json.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.json.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.json.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.json.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.json.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.json.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.json.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.json.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.json.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.json.transpose_augmentation_1_demos" } }, "markdown": { "__type__": "benchmark", "subsets": { - "insert_empty_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.markdown.insert_empty_rows_augmentation_5_demos", - "no_augmentation_5_demos": "recipes.tables_benchmark.wikitq.markdown.no_augmentation_5_demos", - "shuffle_cols_augmentation_5_demos": "recipes.tables_benchmark.wikitq.markdown.shuffle_cols_augmentation_5_demos", - "shuffle_rows_augmentation_5_demos": "recipes.tables_benchmark.wikitq.markdown.shuffle_rows_augmentation_5_demos", - "transpose_augmentation_5_demos": "recipes.tables_benchmark.wikitq.markdown.transpose_augmentation_5_demos" + "insert_empty_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.markdown.insert_empty_rows_augmentation_1_demos", + "no_augmentation_1_demos": "recipes.tables_benchmark.wikitq.markdown.no_augmentation_1_demos", + "shuffle_cols_augmentation_1_demos": "recipes.tables_benchmark.wikitq.markdown.shuffle_cols_augmentation_1_demos", + "shuffle_rows_augmentation_1_demos": "recipes.tables_benchmark.wikitq.markdown.shuffle_rows_augmentation_1_demos", + "transpose_augmentation_1_demos": "recipes.tables_benchmark.wikitq.markdown.transpose_augmentation_1_demos" } } } diff --git a/src/unitxt/catalog/cards/open_australian_legal_qa.json b/src/unitxt/catalog/cards/open_australian_legal_qa.json index 59719951ac..5f08fdcf49 100644 --- a/src/unitxt/catalog/cards/open_australian_legal_qa.json +++ b/src/unitxt/catalog/cards/open_australian_legal_qa.json @@ -2,7 +2,8 @@ "__type__": "task_card", "loader": { "__type__": "load_hf", - "path": "umarbutler/open-australian-legal-qa" + "path": "umarbutler/open-australian-legal-qa", + "name": "default" }, "preprocess_steps": [ { diff --git a/src/unitxt/catalog/cards/rag/response_generation/train/open_australian_legal_qa.json b/src/unitxt/catalog/cards/rag/response_generation/train/open_australian_legal_qa.json index b75d55d687..a22adba07b 100644 --- a/src/unitxt/catalog/cards/rag/response_generation/train/open_australian_legal_qa.json +++ b/src/unitxt/catalog/cards/rag/response_generation/train/open_australian_legal_qa.json @@ -2,7 +2,8 @@ "__type__": "task_card", "loader": { "__type__": "load_hf", - "path": "umarbutler/open-australian-legal-qa" + "path": "umarbutler/open-australian-legal-qa", + "name": "default" }, "preprocess_steps": [ { diff --git a/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_0_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_0_8b.json new file mode 100644 index 0000000000..2b814c0108 --- /dev/null +++ b/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_0_8b.json @@ -0,0 +1,11 @@ +{ + "__type__": "llm_judge_direct", + "inference_engine": { + "__type__": "rits_inference_engine", + "model_name": "ibm-granite/granite-3.0-8b-instruct", + "max_tokens": 1024, + "seed": 42 + }, + "evaluator_name": "GRANITE3_8B", + "generate_summaries": false +} diff --git a/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_1_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_1_8b.json new file mode 100644 index 0000000000..3025d6f571 --- /dev/null +++ b/src/unitxt/catalog/metrics/llm_as_judge/direct/rits/granite3_1_8b.json @@ -0,0 +1,11 @@ +{ + "__type__": "llm_judge_direct", + "inference_engine": { + "__type__": "rits_inference_engine", + "model_name": "ibm-granite/granite-3.1-8b-instruct", + "max_tokens": 1024, + "seed": 42 + }, + "evaluator_name": "GRANITE3_1_8B", + "generate_summaries": false +} diff --git a/src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/granite3_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/granite3_0_8b.json similarity index 100% rename from src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/granite3_8b.json rename to src/unitxt/catalog/metrics/llm_as_judge/direct/watsonx/granite3_0_8b.json diff --git a/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_0_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_0_8b.json new file mode 100644 index 0000000000..c9096fde67 --- /dev/null +++ b/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_0_8b.json @@ -0,0 +1,11 @@ +{ + "__type__": "llm_judge_pairwise", + "inference_engine": { + "__type__": "rits_inference_engine", + "model_name": "ibm-granite/granite-3.0-8b-instruct", + "max_tokens": 1024, + "seed": 42 + }, + "evaluator_name": "GRANITE3_8B", + "generate_summaries": false +} diff --git a/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_1_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_1_8b.json new file mode 100644 index 0000000000..13004f414d --- /dev/null +++ b/src/unitxt/catalog/metrics/llm_as_judge/pairwise/rits/granite3_1_8b.json @@ -0,0 +1,11 @@ +{ + "__type__": "llm_judge_pairwise", + "inference_engine": { + "__type__": "rits_inference_engine", + "model_name": "ibm-granite/granite-3.1-8b-instruct", + "max_tokens": 1024, + "seed": 42 + }, + "evaluator_name": "GRANITE3_1_8B", + "generate_summaries": false +} diff --git a/src/unitxt/catalog/metrics/llm_as_judge/pairwise/watsonx/granite3_8b.json b/src/unitxt/catalog/metrics/llm_as_judge/pairwise/watsonx/granite3_0_8b.json similarity index 100% rename from src/unitxt/catalog/metrics/llm_as_judge/pairwise/watsonx/granite3_8b.json rename to src/unitxt/catalog/metrics/llm_as_judge/pairwise/watsonx/granite3_0_8b.json diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_1_demos.json index eb8ca747f8..cef6747a87 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.concat", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_1_demos.json index 4033762dbb..8087f1f339 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.concat", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_1_demos.json index e0d1bb37d2..f435031f41 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.concat", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_1_demos.json index 3845117001..b1b23fe36b 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.concat", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_1_demos.json index 0a1ce8487b..8ff402f5d5 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/concat/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.concat", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_1_demos.json index bbe11925dd..f652b94dc6 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": null, - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_1_demos.json similarity index 89% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_1_demos.json index 7548b3cae6..080bfd2034 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": null, - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_1_demos.json index 8d3740151a..bf1b92ac1c 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": null, - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_1_demos.json index d9f18fa6df..6483f7d088 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": null, - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_1_demos.json index 3a5622046d..5dfa589cf8 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/csv/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": null, - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_1_demos.json index 5aa6864389..35ee9d4dce 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.df", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_1_demos.json index 19df1f390b..8caf7381e8 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.df", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_1_demos.json index 06ec0593cd..5f43f96b37 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.df", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_1_demos.json index 411a96d5d9..ebe3e2a3a9 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.df", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_1_demos.json index a0a8634306..d40d98d296 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/df/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.df", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_1_demos.json index fafd3abe11..8c36297292 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.html", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_1_demos.json index a10569ef3f..915c7d3b6f 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.html", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_1_demos.json index e5634e278d..9c67d37ea5 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.html", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_1_demos.json index 40d7fecca9..196fe58d66 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.html", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_1_demos.json index 8b1fca07ca..b079f9a8bb 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/html/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.html", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_1_demos.json index 7022994f81..926ec3b3cd 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.indexed_row_major", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_1_demos.json index 71405fe6bb..ff9f4b77f0 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.indexed_row_major", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_1_demos.json index 69c70dcc6b..60b7790072 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.indexed_row_major", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_1_demos.json index 65104a6ef2..b9af744cbe 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.indexed_row_major", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_1_demos.json index 3c9dd27edc..0fd06cc91c 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/indexed_row_major/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.indexed_row_major", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_1_demos.json index 6ccfc3f8c9..0773610435 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.json", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_1_demos.json index a944c1bc4e..c2c324c575 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.json", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_1_demos.json index 9bc6a84444..4d5b3ccd7e 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.json", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_1_demos.json index b52dbe27ae..7e41333fd6 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.json", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_1_demos.json similarity index 91% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_1_demos.json index a808fe68aa..9e80bfc2be 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/json/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.json", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_1_demos.json index 58dca07d64..cf5b53fa70 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/insert_empty_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.markdown", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_1_demos.json similarity index 90% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_1_demos.json index 9ac167ec98..a0db017db3 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/no_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.markdown", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": null diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_1_demos.json index 421f6608e0..655c2811f8 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_cols_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.markdown", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_1_demos.json index 09c7824b43..98f181c83f 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/shuffle_rows_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.markdown", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_5_demos.json b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_1_demos.json similarity index 92% rename from src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_5_demos.json rename to src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_1_demos.json index fccf3b4b17..a542b02393 100644 --- a/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_5_demos.json +++ b/src/unitxt/catalog/recipes/tables_benchmark/wikitq/markdown/transpose_augmentation_1_demos.json @@ -2,7 +2,7 @@ "__type__": "dataset_recipe", "card": "cards.wikitq", "serializer": "serializers.table.markdown", - "num_demos": 5, + "num_demos": 1, "demos_pool_size": -1, "loader_limit": 10000, "augmentor": [ diff --git a/src/unitxt/llm_as_judge_constants.py b/src/unitxt/llm_as_judge_constants.py index 5a2e16ccfb..8f6dc6f43d 100644 --- a/src/unitxt/llm_as_judge_constants.py +++ b/src/unitxt/llm_as_judge_constants.py @@ -80,8 +80,10 @@ class EvaluatorNameEnum(str, Enum): O1_PREVIEW = "o1-Preview" O1_MINI = "o1-Mini" GRANITE_13B = "Granite-13b" - GRANITE3_2B = "Granite3-2b" - GRANITE3_8B = "Granite3-8b" + GRANITE3_2B = "Granite3.0-2b" + GRANITE3_8B = "Granite3.0-8b" + GRANITE3_1_2B = "Granite3.1-2b" + GRANITE3_1_8B = "Granite3.1-8b" GRANITE_GUARDIAN_2B = "Granite Guardian 3.0 2B" GRANITE_GUARDIAN_8B = "Granite Guardian 3.0 8B" @@ -108,6 +110,8 @@ class ModelProviderEnum(str, Enum): EvaluatorNameEnum.GRANITE_13B: "ibm/granite-13b-instruct-v2", EvaluatorNameEnum.GRANITE3_2B: "ibm/granite-3-2b-instruct", EvaluatorNameEnum.GRANITE3_8B: "ibm/granite-3-8b-instruct", + EvaluatorNameEnum.GRANITE3_1_2B: "ibm/granite-3.1-2b-instruct", + EvaluatorNameEnum.GRANITE3_1_8B: "ibm/granite-3.1-8b-instruct", EvaluatorNameEnum.GRANITE_GUARDIAN_2B: "ibm/granite-guardian-3-2b", EvaluatorNameEnum.GRANITE_GUARDIAN_8B: "ibm/granite-guardian-3-8b", } @@ -116,7 +120,8 @@ class ModelProviderEnum(str, Enum): ModelProviderEnum.RITS: { "meta-llama/llama-3-1-8b-instruct": "meta-llama/Llama-3.1-8B-Instruct", "mistralai/mixtral-8x7b-instruct-v01": "mistralai/mixtral-8x7B-instruct-v0.1", - "ibm/granite-guardian-3-2b": "ibm-granite/granite-3.0-8b-instruct", + "ibm/granite-3-8b-instruct": "ibm-granite/granite-3.0-8b-instruct", + "ibm/granite-3.1-8b-instruct": "ibm-granite/granite-3.1-8b-instruct", "meta-llama/llama-3-405b-instruct": "meta-llama/llama-3-1-405b-instruct-fp8", "mistralai/mistral-large": "mistralai/mistral-large-instruct-2407", }, @@ -154,7 +159,11 @@ def __init__(self, name, providers): ), EvaluatorMetadata( EvaluatorNameEnum.GRANITE3_8B, - [ModelProviderEnum.WATSONX], + [ModelProviderEnum.WATSONX, ModelProviderEnum.RITS], + ), + EvaluatorMetadata( + EvaluatorNameEnum.GRANITE3_1_8B, + [ModelProviderEnum.RITS], ), EvaluatorMetadata( EvaluatorNameEnum.GPT4, diff --git a/src/unitxt/struct_data_operators.py b/src/unitxt/struct_data_operators.py index 19413bd365..50c7ee809f 100644 --- a/src/unitxt/struct_data_operators.py +++ b/src/unitxt/struct_data_operators.py @@ -145,8 +145,7 @@ def process_row(self, row: List, row_index: int): row_cell_values = [ str(value) if isinstance(value, (int, float)) else value for value in row ] - - serialized_row_str += " | ".join(row_cell_values) + serialized_row_str += " | ".join([str(value) for value in row_cell_values]) return f"row {row_index} : {serialized_row_str}"