-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add global mmlu lite sensitivity cards (#1568)
* added cards * feat: add Global-MMLU-Lite CS/CA task cards Add two task cards for Global-MMLU-Lite dataset: - CS card for culturally sensitive questions - CA card for culturally agnostic questions Both cards include: - Support for 14 languages - Multiple choice QA format - Topic mapping and preprocessing steps * feat: add Global-MMLU-Lite CS/CA task cards Add two task cards for Global-MMLU-Lite dataset: - CS card for culturally sensitive questions - CA card for culturally agnostic questions Both cards include: - Support for 14 languages - Multiple choice QA format - Topic mapping and preprocessing steps * reformat files * added cards * reformat files * merged files * merged files * merged files --------- Co-authored-by: Elron Bandel <[email protected]>
- Loading branch information
1 parent
7152be4
commit f9f9c5d
Showing
29 changed files
with
3,856 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
from unitxt.card import TaskCard | ||
from unitxt.catalog import add_to_catalog | ||
from unitxt.loaders import LoadHF | ||
from unitxt.operators import ( | ||
Deduplicate, | ||
ListFieldValues, | ||
MapInstanceValues, | ||
Rename, | ||
) | ||
from unitxt.settings_utils import get_settings | ||
from unitxt.splitters import SplitRandomMix | ||
from unitxt.test_utils.card import test_card | ||
|
||
languages = [ | ||
"ar", | ||
"bn", | ||
"de", | ||
"fr", | ||
"hi", | ||
"id", | ||
"it", | ||
"ja", | ||
"ko", | ||
"pt", | ||
"es", | ||
"sw", | ||
"yo", | ||
"zh", | ||
] | ||
|
||
subtasks = [ | ||
"abstract_algebra", | ||
"anatomy", | ||
"astronomy", | ||
"business_ethics", | ||
"clinical_knowledge", | ||
"college_biology", | ||
"college_chemistry", | ||
"college_computer_science", | ||
"college_mathematics", | ||
"college_medicine", | ||
"college_physics", | ||
"computer_security", | ||
"conceptual_physics", | ||
"econometrics", | ||
"electrical_engineering", | ||
"elementary_mathematics", | ||
"formal_logic", | ||
"global_facts", | ||
"high_school_biology", | ||
"high_school_chemistry", | ||
"high_school_computer_science", | ||
"high_school_european_history", | ||
"high_school_geography", | ||
"high_school_government_and_politics", | ||
"high_school_macroeconomics", | ||
"high_school_mathematics", | ||
"high_school_microeconomics", | ||
"high_school_physics", | ||
"high_school_psychology", | ||
"high_school_statistics", | ||
"high_school_us_history", | ||
"high_school_world_history", | ||
"human_aging", | ||
"human_sexuality", | ||
"international_law", | ||
"jurisprudence", | ||
"logical_fallacies", | ||
"machine_learning", | ||
"management", | ||
"marketing", | ||
"medical_genetics", | ||
"miscellaneous", | ||
"moral_disputes", | ||
"moral_scenarios", | ||
"nutrition", | ||
"philosophy", | ||
"prehistory", | ||
"professional_accounting", | ||
"professional_law", | ||
"professional_medicine", | ||
"professional_psychology", | ||
"public_relations", | ||
"security_studies", | ||
"sociology", | ||
"us_foreign_policy", | ||
"virology", | ||
"world_religions", | ||
] | ||
subject_mapping = {subject: subject.replace("_", " ") for subject in subtasks} | ||
|
||
sensitivity_filters = [ | ||
("cs", "lambda x: x['cultural_sensitivity_label'] == 'CS'"), | ||
("ca", "lambda x: x['cultural_sensitivity_label'] == 'CA'"), | ||
] | ||
|
||
is_first = True | ||
settings = get_settings() | ||
with settings.context(allow_unverified_code=True): | ||
for language in languages: | ||
for sensitivity_type, filtering_lambda in sensitivity_filters: | ||
card = TaskCard( | ||
loader=LoadHF( | ||
path="CohereForAI/Global-MMLU-Lite", | ||
name=language, | ||
filtering_lambda=filtering_lambda, | ||
), | ||
preprocess_steps=[ | ||
SplitRandomMix({"test": "test[100%]", "train": "test[10%]"}), | ||
Deduplicate(by=["question", "subject", "answer"]), | ||
MapInstanceValues( | ||
mappers={ | ||
"answer": { | ||
"A": 0, | ||
"B": 1, | ||
"C": 2, | ||
"D": 3, | ||
} | ||
} | ||
), | ||
ListFieldValues( | ||
fields=["option_a", "option_b", "option_c", "option_d"], | ||
to_field="choices", | ||
), | ||
Rename(field_to_field={"subject": "topic"}), | ||
MapInstanceValues(mappers={"topic": subject_mapping}), | ||
], | ||
task="tasks.qa.multiple_choice.with_topic", | ||
templates="templates.qa.multiple_choice.with_topic.all", | ||
__tags__={ | ||
"annotations_creators": "expert-generated", | ||
"language": language, | ||
"language_creators": "expert-generated", | ||
"license": "apache-2.0", | ||
"multilinguality": "multilingual", | ||
"size_categories": "10K<n<100K", | ||
"source_datasets": "original", | ||
"task_categories": "question-answering", | ||
"task_ids": "multiple-choice-qa", | ||
"region": "global", | ||
}, | ||
__description__=( | ||
"Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset " | ||
"includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. " | ||
"The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or " | ||
"post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration " | ||
"with external contributors from industry and academia. The test spans subjects in humanities, " | ||
"social sciences, hard sciences, and other areas. For more information, see: " | ||
"https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite" | ||
), | ||
) | ||
|
||
if is_first: | ||
test_card(card, strict=False) | ||
is_first = False | ||
add_to_catalog( | ||
card, | ||
f"cards.global_mmlu_lite_{sensitivity_type}.{language}", | ||
overwrite=True, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
{ | ||
"__type__": "task_card", | ||
"loader": { | ||
"__type__": "load_hf", | ||
"path": "CohereForAI/Global-MMLU-Lite", | ||
"name": "ar", | ||
"filtering_lambda": "lambda x: x['cultural_sensitivity_label'] == 'CA'" | ||
}, | ||
"preprocess_steps": [ | ||
{ | ||
"__type__": "split_random_mix", | ||
"mix": { | ||
"test": "test[100%]", | ||
"train": "test[10%]" | ||
} | ||
}, | ||
{ | ||
"__type__": "deduplicate", | ||
"by": [ | ||
"question", | ||
"subject", | ||
"answer" | ||
] | ||
}, | ||
{ | ||
"__type__": "map_instance_values", | ||
"mappers": { | ||
"answer": { | ||
"A": 0, | ||
"B": 1, | ||
"C": 2, | ||
"D": 3 | ||
} | ||
} | ||
}, | ||
{ | ||
"__type__": "list_field_values", | ||
"fields": [ | ||
"option_a", | ||
"option_b", | ||
"option_c", | ||
"option_d" | ||
], | ||
"to_field": "choices" | ||
}, | ||
{ | ||
"__type__": "rename", | ||
"field_to_field": { | ||
"subject": "topic" | ||
} | ||
}, | ||
{ | ||
"__type__": "map_instance_values", | ||
"mappers": { | ||
"topic": { | ||
"abstract_algebra": "abstract algebra", | ||
"anatomy": "anatomy", | ||
"astronomy": "astronomy", | ||
"business_ethics": "business ethics", | ||
"clinical_knowledge": "clinical knowledge", | ||
"college_biology": "college biology", | ||
"college_chemistry": "college chemistry", | ||
"college_computer_science": "college computer science", | ||
"college_mathematics": "college mathematics", | ||
"college_medicine": "college medicine", | ||
"college_physics": "college physics", | ||
"computer_security": "computer security", | ||
"conceptual_physics": "conceptual physics", | ||
"econometrics": "econometrics", | ||
"electrical_engineering": "electrical engineering", | ||
"elementary_mathematics": "elementary mathematics", | ||
"formal_logic": "formal logic", | ||
"global_facts": "global facts", | ||
"high_school_biology": "high school biology", | ||
"high_school_chemistry": "high school chemistry", | ||
"high_school_computer_science": "high school computer science", | ||
"high_school_european_history": "high school european history", | ||
"high_school_geography": "high school geography", | ||
"high_school_government_and_politics": "high school government and politics", | ||
"high_school_macroeconomics": "high school macroeconomics", | ||
"high_school_mathematics": "high school mathematics", | ||
"high_school_microeconomics": "high school microeconomics", | ||
"high_school_physics": "high school physics", | ||
"high_school_psychology": "high school psychology", | ||
"high_school_statistics": "high school statistics", | ||
"high_school_us_history": "high school us history", | ||
"high_school_world_history": "high school world history", | ||
"human_aging": "human aging", | ||
"human_sexuality": "human sexuality", | ||
"international_law": "international law", | ||
"jurisprudence": "jurisprudence", | ||
"logical_fallacies": "logical fallacies", | ||
"machine_learning": "machine learning", | ||
"management": "management", | ||
"marketing": "marketing", | ||
"medical_genetics": "medical genetics", | ||
"miscellaneous": "miscellaneous", | ||
"moral_disputes": "moral disputes", | ||
"moral_scenarios": "moral scenarios", | ||
"nutrition": "nutrition", | ||
"philosophy": "philosophy", | ||
"prehistory": "prehistory", | ||
"professional_accounting": "professional accounting", | ||
"professional_law": "professional law", | ||
"professional_medicine": "professional medicine", | ||
"professional_psychology": "professional psychology", | ||
"public_relations": "public relations", | ||
"security_studies": "security studies", | ||
"sociology": "sociology", | ||
"us_foreign_policy": "us foreign policy", | ||
"virology": "virology", | ||
"world_religions": "world religions" | ||
} | ||
} | ||
} | ||
], | ||
"task": "tasks.qa.multiple_choice.with_topic", | ||
"templates": "templates.qa.multiple_choice.with_topic.all", | ||
"__tags__": { | ||
"annotations_creators": "expert-generated", | ||
"language": "ar", | ||
"language_creators": "expert-generated", | ||
"license": "apache-2.0", | ||
"multilinguality": "multilingual", | ||
"size_categories": "10K<n<100K", | ||
"source_datasets": "original", | ||
"task_categories": "question-answering", | ||
"task_ids": "multiple-choice-qa", | ||
"region": "global" | ||
}, | ||
"__description__": "Global-MMLU-Lite is a streamlined multilingual evaluation set covering 15 languages. The dataset includes 200 Culturally Sensitive (CS) and 200 Culturally Agnostic (CA) questions per language. The samples in Global-MMLU-Lite correspond to languages that were fully human-translated or post-edited in the original dataset. This initiative was led by Cohere For AI in collaboration with external contributors from industry and academia. The test spans subjects in humanities, social sciences, hard sciences, and other areas. For more information, see: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite" | ||
} |
Oops, something went wrong.