Add global mmlu lite sensitivity cards (#1568)

* added cards * feat: add Global-MMLU-Lite CS/CA task cards Add two task cards for Global-MMLU-Lite dataset: - CS card for culturally sensitive questions - CA card for culturally agnostic questions Both cards include: - Support for 14 languages - Multiple choice QA format - Topic mapping and preprocessing steps * feat: add Global-MMLU-Lite CS/CA task cards Add two task cards for Global-MMLU-Lite dataset: - CS card for culturally sensitive questions - CA card for culturally agnostic questions Both cards include: - Support for 14 languages - Multiple choice QA format - Topic mapping and preprocessing steps * reformat files * added cards * reformat files * merged files * merged files * merged files --------- Co-authored-by: Elron Bandel <[email protected]>
IBM · Feb 2, 2025 · f9f9c5d · f9f9c5d
1 parent 7152be4
commit f9f9c5d
Show file tree

Hide file tree

Showing 29 changed files with 3,856 additions and 0 deletions.
diff --git a/prepare/cards/global_mmlu_lite_sensitivity.py b/prepare/cards/global_mmlu_lite_sensitivity.py
@@ -0,0 +1,160 @@
+from unitxt.card import TaskCard
+from unitxt.catalog import add_to_catalog
+from unitxt.loaders import LoadHF
+from unitxt.operators import (
+    Deduplicate,
+    ListFieldValues,
+    MapInstanceValues,
+    Rename,
+)
+from unitxt.settings_utils import get_settings
+from unitxt.splitters import SplitRandomMix
+from unitxt.test_utils.card import test_card
+
+languages = [
+    "ar",
+    "bn",
+    "de",
+    "fr",
+    "hi",
+    "id",
+    "it",
+    "ja",
+    "ko",
+    "pt",
+    "es",
+    "sw",
+    "yo",
+    "zh",
+]
+
+subtasks = [
+    "abstract_algebra",
+    "anatomy",
+    "astronomy",
+    "business_ethics",
+    "clinical_knowledge",
+    "college_biology",
+    "college_chemistry",
+    "college_computer_science",
+    "college_mathematics",
+    "college_medicine",
+    "college_physics",
+    "computer_security",
+    "conceptual_physics",
+    "econometrics",
+    "electrical_engineering",
+    "elementary_mathematics",
+    "formal_logic",
+    "global_facts",
+    "high_school_biology",
+    "high_school_chemistry",
+    "high_school_computer_science",
+    "high_school_european_history",
+    "high_school_geography",
+    "high_school_government_and_politics",
+    "high_school_macroeconomics",
+    "high_school_mathematics",
+    "high_school_microeconomics",
+    "high_school_physics",
+    "high_school_psychology",
+    "high_school_statistics",
+    "high_school_us_history",
+    "high_school_world_history",
+    "human_aging",
+    "human_sexuality",
+    "international_law",
+    "jurisprudence",
+    "logical_fallacies",
+    "machine_learning",
+    "management",
+    "marketing",
+    "medical_genetics",
+    "miscellaneous",
+    "moral_disputes",
+    "moral_scenarios",
+    "nutrition",
+    "philosophy",
+    "prehistory",
+    "professional_accounting",
+    "professional_law",
+    "professional_medicine",
+    "professional_psychology",
+    "public_relations",
+    "security_studies",
+    "sociology",
+    "us_foreign_policy",
+    "virology",
+    "world_religions",
+]
+subject_mapping = {subject: subject.replace("_", " ") for subject in subtasks}
+
+sensitivity_filters = [
+    ("cs", "lambda x: x['cultural_sensitivity_label'] == 'CS'"),
+    ("ca", "lambda x: x['cultural_sensitivity_label'] == 'CA'"),
+]
+
+is_first = True
+settings = get_settings()
+with settings.context(allow_unverified_code=True):
+    for language in languages:
+        for sensitivity_type, filtering_lambda in sensitivity_filters:
+            card = TaskCard(
+                loader=LoadHF(
+                    path="CohereForAI/Global-MMLU-Lite",
+                    name=language,
+                    filtering_lambda=filtering_lambda,
+                ),
+                preprocess_steps=[
+                    SplitRandomMix({"test": "test[100%]", "train": "test[10%]"}),
+                    Deduplicate(by=["question", "subject", "answer"]),
+                    MapInstanceValues(
+                        mappers={
+                            "answer": {
+                                "A": 0,
+                                "B": 1,
+                                "C": 2,
+                                "D": 3,
+                            }
+                        }
+                    ),
+                    ListFieldValues(
+                        fields=["option_a", "option_b", "option_c", "option_d"],
+                        to_field="choices",
+                    ),
+                    Rename(field_to_field={"subject": "topic"}),
+                    MapInstanceValues(mappers={"topic": subject_mapping}),
+                ],
+                task="tasks.qa.multiple_choice.with_topic",
+                templates="templates.qa.multiple_choice.with_topic.all",
+                __tags__={
+                    "annotations_creators": "expert-generated",
+                    "language": language,
+                    "language_creators": "expert-generated",
+                    "license": "apache-2.0",
+                    "multilinguality": "multilingual",
+                    "size_categories": "10K<n<100K",
+                    "source_datasets": "original",
+                    "task_categories": "question-answering",
+                    "task_ids": "multiple-choice-qa",
+                    "region": "global",
+                },
+                __description__=(
+                    "Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset "
+                    "includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. "
+                    "The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or "
+                    "post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration "
+                    "with external contributors from industry and academia. The test spans subjects in humanities, "
+                    "social sciences, hard sciences, and other areas. For more information, see: "
+                    "https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite"
+                ),
+            )
+
+            if is_first:
+                test_card(card, strict=False)
+                is_first = False
+            add_to_catalog(
+                card,
+                f"cards.global_mmlu_lite_{sensitivity_type}.{language}",
+                overwrite=True,
+            )
diff --git a/src/unitxt/catalog/cards/global_mmlu_lite_ca/ar.json b/src/unitxt/catalog/cards/global_mmlu_lite_ca/ar.json
@@ -0,0 +1,132 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "CohereForAI/Global-MMLU-Lite",
+        "name": "ar",
+        "filtering_lambda": "lambda x: x['cultural_sensitivity_label'] == 'CA'"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "split_random_mix",
+            "mix": {
+                "test": "test[100%]",
+                "train": "test[10%]"
+            }
+        },
+        {
+            "__type__": "deduplicate",
+            "by": [
+                "question",
+                "subject",
+                "answer"
+            ]
+        },
+        {
+            "__type__": "map_instance_values",
+            "mappers": {
+                "answer": {
+                    "A": 0,
+                    "B": 1,
+                    "C": 2,
+                    "D": 3
+                }
+            }
+        },
+        {
+            "__type__": "list_field_values",
+            "fields": [
+                "option_a",
+                "option_b",
+                "option_c",
+                "option_d"
+            ],
+            "to_field": "choices"
+        },
+        {
+            "__type__": "rename",
+            "field_to_field": {
+                "subject": "topic"
+            }
+        },
+        {
+            "__type__": "map_instance_values",
+            "mappers": {
+                "topic": {
+                    "abstract_algebra": "abstract algebra",
+                    "anatomy": "anatomy",
+                    "astronomy": "astronomy",
+                    "business_ethics": "business ethics",
+                    "clinical_knowledge": "clinical knowledge",
+                    "college_biology": "college biology",
+                    "college_chemistry": "college chemistry",
+                    "college_computer_science": "college computer science",
+                    "college_mathematics": "college mathematics",
+                    "college_medicine": "college medicine",
+                    "college_physics": "college physics",
+                    "computer_security": "computer security",
+                    "conceptual_physics": "conceptual physics",
+                    "econometrics": "econometrics",
+                    "electrical_engineering": "electrical engineering",
+                    "elementary_mathematics": "elementary mathematics",
+                    "formal_logic": "formal logic",
+                    "global_facts": "global facts",
+                    "high_school_biology": "high school biology",
+                    "high_school_chemistry": "high school chemistry",
+                    "high_school_computer_science": "high school computer science",
+                    "high_school_european_history": "high school european history",
+                    "high_school_geography": "high school geography",
+                    "high_school_government_and_politics": "high school government and politics",
+                    "high_school_macroeconomics": "high school macroeconomics",
+                    "high_school_mathematics": "high school mathematics",
+                    "high_school_microeconomics": "high school microeconomics",
+                    "high_school_physics": "high school physics",
+                    "high_school_psychology": "high school psychology",
+                    "high_school_statistics": "high school statistics",
+                    "high_school_us_history": "high school us history",
+                    "high_school_world_history": "high school world history",
+                    "human_aging": "human aging",
+                    "human_sexuality": "human sexuality",
+                    "international_law": "international law",
+                    "jurisprudence": "jurisprudence",
+                    "logical_fallacies": "logical fallacies",
+                    "machine_learning": "machine learning",
+                    "management": "management",
+                    "marketing": "marketing",
+                    "medical_genetics": "medical genetics",
+                    "miscellaneous": "miscellaneous",
+                    "moral_disputes": "moral disputes",
+                    "moral_scenarios": "moral scenarios",
+                    "nutrition": "nutrition",
+                    "philosophy": "philosophy",
+                    "prehistory": "prehistory",
+                    "professional_accounting": "professional accounting",
+                    "professional_law": "professional law",
+                    "professional_medicine": "professional medicine",
+                    "professional_psychology": "professional psychology",
+                    "public_relations": "public relations",
+                    "security_studies": "security studies",
+                    "sociology": "sociology",
+                    "us_foreign_policy": "us foreign policy",
+                    "virology": "virology",
+                    "world_religions": "world religions"
+                }
+            }
+        }
+    ],
+    "task": "tasks.qa.multiple_choice.with_topic",
+    "templates": "templates.qa.multiple_choice.with_topic.all",
+    "__tags__": {
+        "annotations_creators": "expert-generated",
+        "language": "ar",
+        "language_creators": "expert-generated",
+        "license": "apache-2.0",
+        "multilinguality": "multilingual",
+        "size_categories": "10K<n<100K",
+        "source_datasets": "original",
+        "task_categories": "question-answering",
+        "task_ids": "multiple-choice-qa",
+        "region": "global"
+    },
+    "__description__": "Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration with external contributors from industry and academia. The test spans subjects in humanities, social sciences, hard sciences, and other areas. For more information, see: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite"
+}