Skip to content

Commit

Permalink
Add global mmlu lite sensitivity cards (#1568)
Browse files Browse the repository at this point in the history
* added cards

* feat: add Global-MMLU-Lite CS/CA task cards

Add two task cards for Global-MMLU-Lite dataset:
- CS card for culturally sensitive questions
- CA card for culturally agnostic questions

Both cards include:
- Support for 14 languages
- Multiple choice QA format
- Topic mapping and preprocessing steps

* feat: add Global-MMLU-Lite CS/CA task cards

Add two task cards for Global-MMLU-Lite dataset:
- CS card for culturally sensitive questions
- CA card for culturally agnostic questions

Both cards include:
- Support for 14 languages
- Multiple choice QA format
- Topic mapping and preprocessing steps

* reformat files

* added cards

* reformat files

* merged files

* merged files

* merged files

---------

Co-authored-by: Elron Bandel <[email protected]>
  • Loading branch information
eliyahabba and elronbandel authored Feb 2, 2025
1 parent 7152be4 commit f9f9c5d
Show file tree
Hide file tree
Showing 29 changed files with 3,856 additions and 0 deletions.
160 changes: 160 additions & 0 deletions prepare/cards/global_mmlu_lite_sensitivity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
from unitxt.card import TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.loaders import LoadHF
from unitxt.operators import (
Deduplicate,
ListFieldValues,
MapInstanceValues,
Rename,
)
from unitxt.settings_utils import get_settings
from unitxt.splitters import SplitRandomMix
from unitxt.test_utils.card import test_card

languages = [
"ar",
"bn",
"de",
"fr",
"hi",
"id",
"it",
"ja",
"ko",
"pt",
"es",
"sw",
"yo",
"zh",
]

subtasks = [
"abstract_algebra",
"anatomy",
"astronomy",
"business_ethics",
"clinical_knowledge",
"college_biology",
"college_chemistry",
"college_computer_science",
"college_mathematics",
"college_medicine",
"college_physics",
"computer_security",
"conceptual_physics",
"econometrics",
"electrical_engineering",
"elementary_mathematics",
"formal_logic",
"global_facts",
"high_school_biology",
"high_school_chemistry",
"high_school_computer_science",
"high_school_european_history",
"high_school_geography",
"high_school_government_and_politics",
"high_school_macroeconomics",
"high_school_mathematics",
"high_school_microeconomics",
"high_school_physics",
"high_school_psychology",
"high_school_statistics",
"high_school_us_history",
"high_school_world_history",
"human_aging",
"human_sexuality",
"international_law",
"jurisprudence",
"logical_fallacies",
"machine_learning",
"management",
"marketing",
"medical_genetics",
"miscellaneous",
"moral_disputes",
"moral_scenarios",
"nutrition",
"philosophy",
"prehistory",
"professional_accounting",
"professional_law",
"professional_medicine",
"professional_psychology",
"public_relations",
"security_studies",
"sociology",
"us_foreign_policy",
"virology",
"world_religions",
]
subject_mapping = {subject: subject.replace("_", " ") for subject in subtasks}

sensitivity_filters = [
("cs", "lambda x: x['cultural_sensitivity_label'] == 'CS'"),
("ca", "lambda x: x['cultural_sensitivity_label'] == 'CA'"),
]

is_first = True
settings = get_settings()
with settings.context(allow_unverified_code=True):
for language in languages:
for sensitivity_type, filtering_lambda in sensitivity_filters:
card = TaskCard(
loader=LoadHF(
path="CohereForAI/Global-MMLU-Lite",
name=language,
filtering_lambda=filtering_lambda,
),
preprocess_steps=[
SplitRandomMix({"test": "test[100%]", "train": "test[10%]"}),
Deduplicate(by=["question", "subject", "answer"]),
MapInstanceValues(
mappers={
"answer": {
"A": 0,
"B": 1,
"C": 2,
"D": 3,
}
}
),
ListFieldValues(
fields=["option_a", "option_b", "option_c", "option_d"],
to_field="choices",
),
Rename(field_to_field={"subject": "topic"}),
MapInstanceValues(mappers={"topic": subject_mapping}),
],
task="tasks.qa.multiple_choice.with_topic",
templates="templates.qa.multiple_choice.with_topic.all",
__tags__={
"annotations_creators": "expert-generated",
"language": language,
"language_creators": "expert-generated",
"license": "apache-2.0",
"multilinguality": "multilingual",
"size_categories": "10K<n<100K",
"source_datasets": "original",
"task_categories": "question-answering",
"task_ids": "multiple-choice-qa",
"region": "global",
},
__description__=(
"Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset "
"includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. "
"The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or "
"post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration "
"with external contributors from industry and academia. The test spans subjects in humanities, "
"social sciences, hard sciences, and other areas. For more information, see: "
"https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite"
),
)

if is_first:
test_card(card, strict=False)
is_first = False
add_to_catalog(
card,
f"cards.global_mmlu_lite_{sensitivity_type}.{language}",
overwrite=True,
)
132 changes: 132 additions & 0 deletions src/unitxt/catalog/cards/global_mmlu_lite_ca/ar.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "CohereForAI/Global-MMLU-Lite",
"name": "ar",
"filtering_lambda": "lambda x: x['cultural_sensitivity_label'] == 'CA'"
},
"preprocess_steps": [
{
"__type__": "split_random_mix",
"mix": {
"test": "test[100%]",
"train": "test[10%]"
}
},
{
"__type__": "deduplicate",
"by": [
"question",
"subject",
"answer"
]
},
{
"__type__": "map_instance_values",
"mappers": {
"answer": {
"A": 0,
"B": 1,
"C": 2,
"D": 3
}
}
},
{
"__type__": "list_field_values",
"fields": [
"option_a",
"option_b",
"option_c",
"option_d"
],
"to_field": "choices"
},
{
"__type__": "rename",
"field_to_field": {
"subject": "topic"
}
},
{
"__type__": "map_instance_values",
"mappers": {
"topic": {
"abstract_algebra": "abstract algebra",
"anatomy": "anatomy",
"astronomy": "astronomy",
"business_ethics": "business ethics",
"clinical_knowledge": "clinical knowledge",
"college_biology": "college biology",
"college_chemistry": "college chemistry",
"college_computer_science": "college computer science",
"college_mathematics": "college mathematics",
"college_medicine": "college medicine",
"college_physics": "college physics",
"computer_security": "computer security",
"conceptual_physics": "conceptual physics",
"econometrics": "econometrics",
"electrical_engineering": "electrical engineering",
"elementary_mathematics": "elementary mathematics",
"formal_logic": "formal logic",
"global_facts": "global facts",
"high_school_biology": "high school biology",
"high_school_chemistry": "high school chemistry",
"high_school_computer_science": "high school computer science",
"high_school_european_history": "high school european history",
"high_school_geography": "high school geography",
"high_school_government_and_politics": "high school government and politics",
"high_school_macroeconomics": "high school macroeconomics",
"high_school_mathematics": "high school mathematics",
"high_school_microeconomics": "high school microeconomics",
"high_school_physics": "high school physics",
"high_school_psychology": "high school psychology",
"high_school_statistics": "high school statistics",
"high_school_us_history": "high school us history",
"high_school_world_history": "high school world history",
"human_aging": "human aging",
"human_sexuality": "human sexuality",
"international_law": "international law",
"jurisprudence": "jurisprudence",
"logical_fallacies": "logical fallacies",
"machine_learning": "machine learning",
"management": "management",
"marketing": "marketing",
"medical_genetics": "medical genetics",
"miscellaneous": "miscellaneous",
"moral_disputes": "moral disputes",
"moral_scenarios": "moral scenarios",
"nutrition": "nutrition",
"philosophy": "philosophy",
"prehistory": "prehistory",
"professional_accounting": "professional accounting",
"professional_law": "professional law",
"professional_medicine": "professional medicine",
"professional_psychology": "professional psychology",
"public_relations": "public relations",
"security_studies": "security studies",
"sociology": "sociology",
"us_foreign_policy": "us foreign policy",
"virology": "virology",
"world_religions": "world religions"
}
}
}
],
"task": "tasks.qa.multiple_choice.with_topic",
"templates": "templates.qa.multiple_choice.with_topic.all",
"__tags__": {
"annotations_creators": "expert-generated",
"language": "ar",
"language_creators": "expert-generated",
"license": "apache-2.0",
"multilinguality": "multilingual",
"size_categories": "10K<n<100K",
"source_datasets": "original",
"task_categories": "question-answering",
"task_ids": "multiple-choice-qa",
"region": "global"
},
"__description__": "Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration with external contributors from industry and academia. The test spans subjects in humanities, social sciences, hard sciences, and other areas. For more information, see: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite"
}
Loading

0 comments on commit f9f9c5d

Please sign in to comment.