Skip to content

Commit

Permalink
Merge pull request #10 from alan-turing-institute/8-replicate-demetr-…
Browse files Browse the repository at this point in the history
…results-for-bleu

8 replicate demetr results for bleu
  • Loading branch information
klh5 authored Jan 10, 2025
2 parents 713f2fb + f9ba2d0 commit c8d60ef
Show file tree
Hide file tree
Showing 5 changed files with 393 additions and 4 deletions.
20 changes: 16 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
description = "Evaluation of Metrics for Speech Translation (M4ST)"
readme = "README.md"
requires-python = ">=3.10"
requires-python = "==3.11.*"
classifiers = [
"Development Status :: 1 - Planning",
"Intended Audience :: Science/Research",
Expand All @@ -27,15 +27,27 @@ classifiers = [
"Typing :: Typed",
]
dependencies = [
"tqdm",
"requests"
"tqdm",
"requests",
"evaluate>=0.4.3",
"fairseq2>=0.2.0",
"filelock>=3.0.12",
"ipykernel>=6.29.5",
"nltk>=3.9.1",
"pandas>=2.2.3",
"sacrebleu>=2.4.3",
"seaborn>=0.13.2",
"sonar-space>=0.2.0",
"torch==2.0.1",
"torchvision>=0.15.2",
"unbabel-comet==2.2.3",
]

[project.optional-dependencies]
dev = [
"pytest >=6",
"pytest-cov >=3",
"pre-commit",
"pre-commit>=3.2.0",
]

[project.urls]
Expand Down
63 changes: 63 additions & 0 deletions scripts/demetr/process_demetr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import argparse
import os

from m4st.process_demetr import ProcessDEMETR


def main(args: dict) -> None:
output_dir = args["output_dir"]
output_file = args["output_file"]

os.makedirs(output_dir, exist_ok=True)

demetr = ProcessDEMETR(
metrics_to_use=args["metrics"],
output_filepath=os.path.join(output_dir, output_file),
demetr_root=args["dataset_dir"],
)

print(args["cats"])
demetr.process_demetr(cats_to_process=args["cats"])


if __name__ == "__main__":
parser = argparse.ArgumentParser()

parser.add_argument(
"--dataset-dir",
type=str,
default="../../datasets/demetr",
help="Root dataset \
for DEMETR containing JSON files.",
)
parser.add_argument(
"--output-dir",
type=str,
default="../../outputs/demetr",
help="Path to output directory. Will be created by script.",
)
parser.add_argument(
"--output-file",
type=str,
default="demetr_results.csv",
help="Name for output CSV file.",
)
parser.add_argument(
"--metrics",
nargs="+",
type=str,
default=["COMET_ref", "COMET_qe", "BLASER_ref", "BLASER_qe", "SacreBLEU"],
help="Metrics to use. Must be one or more \
of COMET_ref, COMET_qe, BLASER_ref, BLASER_qe, SacreBLEU. Defaults to all.",
)
parser.add_argument(
"--cats",
nargs="+",
type=int,
required=False,
help="Specific DEMETR disfluency \
categories to be processed. By default all will be processsed.",
)

args = parser.parse_args()
main(vars(args))
145 changes: 145 additions & 0 deletions src/m4st/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import evaluate
import numpy as np
from pandas import Series
from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
from sonar.models.blaser.loader import load_blaser_model


class SacreBLEUScore:
"""Applies SacreBLEU from the evaluate library."""

def __init__(self) -> None:
self.bleu = evaluate.load("sacrebleu")

def get_scores(self, references: Series, predictions: Series) -> list:
results = []

# SacreBLEU doesn't seem to support batching that isn't document-level, so
# each sentence must be run through separately
for index, ref_txt in references.items():
mt_txt = predictions[index]
score = self.bleu.compute(predictions=[mt_txt], references=[[ref_txt]])
results.append(score["score"])

return results


class BLASERRefScore:
"""Initialises and applies the BLASER 2.0 QE metric from the SONAR library."""

def __init__(self, ref_lang_code: str = "eng_Latn") -> None:
self.blaser_ref = load_blaser_model("blaser_2_0_ref").eval()
self.text_embedder = TextToEmbeddingModelPipeline(
encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder"
)
# Code defining the target language
# Defaults to English
self.ref_lang_code = ref_lang_code

def get_scores(
self,
references: Series,
predictions: Series,
sources: Series,
source_lang_codes: Series,
) -> list:
langs = np.unique(source_lang_codes)

# Store results for all languages so they can be returned together
results = []

# BLASER requires the source language, so at best we can batch by language as
# source_lang must be a string
for language in langs:
mask = source_lang_codes == language
sources_lang = np.array(sources[mask])
refs_lang = np.array(references[mask])
preds_lang = np.array(predictions[mask])

src_embs = self.text_embedder.predict(sources_lang, source_lang=language)
ref_embs = self.text_embedder.predict(
refs_lang, source_lang=self.ref_lang_code
)
mt_embs = self.text_embedder.predict(
preds_lang, source_lang=self.ref_lang_code
)

for i in range(len(src_embs)):
result = self.blaser_ref(
src=src_embs[[i]], ref=ref_embs[[i]], mt=mt_embs[[i]]
).item()
results.append(result)

return results


class BLASERQEScore:
"""Initialises and applies the BLASER 2.0 reference-based metric from the SONAR
library."""

def __init__(self, ref_lang_code: str = "eng_Latn") -> None:
self.blaser_qe = load_blaser_model("blaser_2_0_qe").eval()
self.text_embedder = TextToEmbeddingModelPipeline(
encoder="text_sonar_basic_encoder", tokenizer="text_sonar_basic_encoder"
)
# Code defining the target language
# Defaults to English
self.ref_lang_code = ref_lang_code

def get_scores(
self, predictions: Series, sources: Series, source_lang_codes: Series
) -> list:
langs = np.unique(source_lang_codes)

# Store results for all languages so they can be returned together
results = []

# BLASER requires the source language, so at best we can batch by language as
# source_lang must be a string
for language in langs:
mask = source_lang_codes == language
sources_lang = np.array(sources[mask])
preds_lang = np.array(predictions[mask])

src_embs = self.text_embedder.predict(sources_lang, source_lang=language)
mt_embs = self.text_embedder.predict(
preds_lang, source_lang=self.ref_lang_code
)

for i in range(len(src_embs)):
result = self.blaser_qe(src=src_embs[[i]], mt=mt_embs[[i]]).item()
results.append(result)

return results


class COMETRefScore:
"""Applies COMET reference-based metric from the evaluate library."""

def __init__(self) -> None:
self.comet = evaluate.load("comet", model="wmt21-comet-mqm")

def get_scores(
self, references: Series, predictions: Series, sources: Series
) -> list:
scores = self.comet.compute(
predictions=predictions,
references=references,
sources=sources,
)
return scores["scores"]


class COMETQEScore:
"""Applies COMET QE metric from the evaluate library."""

def __init__(self) -> None:
self.comet = evaluate.load("comet", model="wmt21-comet-qe-mqm")

def get_scores(
self, references: Series, predictions: Series, sources: Series
) -> list:
scores = self.comet.compute(
predictions=predictions, references=references, sources=sources
)
return scores["scores"]
Loading

0 comments on commit c8d60ef

Please sign in to comment.