From 4e5c7de6d8208f055bdb5693c91e1c54f593ad23 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:10:41 +0530
Subject: [PATCH 1/9] remve BS

---
 src/ragas/metrics/_context_entities_recall.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/ragas/metrics/_context_entities_recall.py b/src/ragas/metrics/_context_entities_recall.py
index a17b75494..c76000bf3 100644
--- a/src/ragas/metrics/_context_entities_recall.py
+++ b/src/ragas/metrics/_context_entities_recall.py
@@ -139,7 +139,6 @@ class ContextEntityRecall(MetricWithLLM, SingleTurnMetric):
     context_entity_recall_prompt: Prompt = field(
         default_factory=lambda: TEXT_ENTITY_EXTRACTION
     )
-    batch_size: int = 15
     max_retries: int = 1
 
     def _compute_score(
@@ -195,4 +194,4 @@ def save(self, cache_dir: str | None = None) -> None:
         return self.context_entity_recall_prompt.save(cache_dir)
 
 
-context_entity_recall = ContextEntityRecall(batch_size=15)
+context_entity_recall = ContextEntityRecall()

From 9160daa88405e7692080316ee8a7d1f06e160e7a Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:10:52 +0530
Subject: [PATCH 2/9] rename metric

---
 src/ragas/metrics/_answer_relevance.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/ragas/metrics/_answer_relevance.py b/src/ragas/metrics/_answer_relevance.py
index 3a66372e1..bf13db578 100644
--- a/src/ragas/metrics/_answer_relevance.py
+++ b/src/ragas/metrics/_answer_relevance.py
@@ -25,15 +25,15 @@
     from ragas.llms.prompt import PromptValue
 
 
-class AnswerRelevanceClassification(BaseModel):
+class ResponseRelevanceClassification(BaseModel):
     question: str
     noncommittal: int
 
 
 _output_instructions = get_json_format_instructions(
-    pydantic_object=AnswerRelevanceClassification
+    pydantic_object=ResponseRelevanceClassification
 )
-_output_parser = RagasoutputParser(pydantic_object=AnswerRelevanceClassification)
+_output_parser = RagasoutputParser(pydantic_object=ResponseRelevanceClassification)
 
 
 QUESTION_GEN = Prompt(
@@ -44,7 +44,7 @@ class AnswerRelevanceClassification(BaseModel):
         {
             "answer": """Albert Einstein was born in Germany.""",
             "context": """Albert Einstein was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time""",
-            "output": AnswerRelevanceClassification.parse_obj(
+            "output": ResponseRelevanceClassification.parse_obj(
                 {
                     "question": "Where was Albert Einstein born?",
                     "noncommittal": 0,
@@ -54,7 +54,7 @@ class AnswerRelevanceClassification(BaseModel):
         {
             "answer": """It can change its skin color based on the temperature of its environment.""",
             "context": """A recent scientific study has discovered a new species of frog in the Amazon rainforest that has the unique ability to change its skin color based on the temperature of its environment.""",
-            "output": AnswerRelevanceClassification.parse_obj(
+            "output": ResponseRelevanceClassification.parse_obj(
                 {
                     "question": "What unique ability does the newly discovered species of frog have?",
                     "noncommittal": 0,
@@ -64,7 +64,7 @@ class AnswerRelevanceClassification(BaseModel):
         {
             "answer": """Everest""",
             "context": """The tallest mountain on Earth, measured from sea level, is a renowned peak located in the Himalayas.""",
-            "output": AnswerRelevanceClassification.parse_obj(
+            "output": ResponseRelevanceClassification.parse_obj(
                 {
                     "question": "What is the tallest mountain on Earth?",
                     "noncommittal": 0,
@@ -74,7 +74,7 @@ class AnswerRelevanceClassification(BaseModel):
         {
             "answer": """I don't know about the  groundbreaking feature of the smartphone invented in 2023 as am unaware of information beyond 2022. """,
             "context": """In 2023, a groundbreaking invention was announced: a smartphone with a battery life of one month, revolutionizing the way people use mobile technology.""",
-            "output": AnswerRelevanceClassification.parse_obj(
+            "output": ResponseRelevanceClassification.parse_obj(
                 {
                     "question": "What was the groundbreaking feature of the smartphone invented in 2023?",
                     "noncommittal": 1,
@@ -89,7 +89,7 @@ class AnswerRelevanceClassification(BaseModel):
 
 
 @dataclass
-class AnswerRelevancy(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
+class ResponseRelevancy(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
     """
     Scores the relevancy of the answer according to the given question.
     Answers with incomplete, redundant or unnecessary information is penalized.
@@ -139,7 +139,7 @@ def calculate_similarity(
         )
 
     def _calculate_score(
-        self, answers: t.Sequence[AnswerRelevanceClassification], row: t.Dict
+        self, answers: t.Sequence[ResponseRelevanceClassification], row: t.Dict
     ) -> float:
         question = row["user_input"]
         gen_questions = [answer.question for answer in answers]
@@ -197,4 +197,9 @@ def save(self, cache_dir: str | None = None) -> None:
         self.question_generation.save(cache_dir)
 
 
+class AnswerRelevancy(ResponseRelevancy):
+    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
+        return await super()._ascore(row, callbacks)
+
+
 answer_relevancy = AnswerRelevancy()

From 4f63b289c015f3f4a2e635a2abcfa4d6df383600 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:11:02 +0530
Subject: [PATCH 3/9] rename metric

---
 src/ragas/metrics/_answer_similarity.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/ragas/metrics/_answer_similarity.py b/src/ragas/metrics/_answer_similarity.py
index 027e35a0a..b3e65736b 100644
--- a/src/ragas/metrics/_answer_similarity.py
+++ b/src/ragas/metrics/_answer_similarity.py
@@ -23,7 +23,7 @@
 
 
 @dataclass
-class AnswerSimilarity(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
+class SemanticSimilarity(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
     """
     Scores the semantic similarity of ground truth with generated answer.
     cross encoder score is used to quantify semantic similarity.
@@ -91,4 +91,9 @@ async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float:
         return score.tolist()[0]
 
 
+class AnswerSimilarity(SemanticSimilarity):
+    async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float:
+        return await super()._ascore(row, callbacks)
+
+
 answer_similarity = AnswerSimilarity()

From 41772dbf44b35578a91b3284631b23712905a3d9 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:11:21 +0530
Subject: [PATCH 4/9] remove instances

---
 src/ragas/metrics/__init__.py                 | 14 +-------------
 src/ragas/metrics/_domain_specific_rubrics.py |  4 ----
 src/ragas/metrics/_factual_correctness.py     |  2 +-
 src/ragas/metrics/_noise_sensitivity.py       |  8 +-------
 4 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py
index 6b476675d..244c055b2 100644
--- a/src/ragas/metrics/__init__.py
+++ b/src/ragas/metrics/__init__.py
@@ -13,21 +13,14 @@
     ContextPrecision,
     ContextUtilization,
     context_precision,
-    context_utilization,
 )
 from ragas.metrics._context_recall import ContextRecall, context_recall
 from ragas.metrics._domain_specific_rubrics import (
     RubricsScoreWithoutReference,
     RubricsScoreWithReference,
-    rubrics_score_with_reference,
-    rubrics_score_without_reference,
 )
 from ragas.metrics._faithfulness import Faithfulness, FaithulnesswithHHEM, faithfulness
-from ragas.metrics._noise_sensitivity import (
-    NoiseSensitivity,
-    noise_sensitivity_irrelevant,
-    noise_sensitivity_relevant,
-)
+from ragas.metrics._noise_sensitivity import NoiseSensitivity
 from ragas.metrics._summarization import SummarizationScore, summarization_score
 
 __all__ = [
@@ -41,7 +34,6 @@
     "ContextPrecision",
     "context_precision",
     "ContextUtilization",
-    "context_utilization",
     "ContextRecall",
     "context_recall",
     "AspectCritic",
@@ -52,10 +44,6 @@
     "SummarizationScore",
     "summarization_score",
     "NoiseSensitivity",
-    "noise_sensitivity_irrelevant",
-    "noise_sensitivity_relevant",
-    "rubrics_score_with_reference",
-    "rubrics_score_without_reference",
     "RubricsScoreWithoutReference",
     "RubricsScoreWithReference",
 ]
diff --git a/src/ragas/metrics/_domain_specific_rubrics.py b/src/ragas/metrics/_domain_specific_rubrics.py
index 09509132a..959306b36 100644
--- a/src/ragas/metrics/_domain_specific_rubrics.py
+++ b/src/ragas/metrics/_domain_specific_rubrics.py
@@ -307,7 +307,3 @@ def _create_single_turn_prompt(self, row: t.Dict) -> SingleTurnWithReferenceInpu
             reference=ground_truth,
             rubrics=self.rubrics,
         )
-
-
-rubrics_score_with_reference = RubricsScoreWithReference()
-rubrics_score_without_reference = RubricsScoreWithoutReference()
diff --git a/src/ragas/metrics/_factual_correctness.py b/src/ragas/metrics/_factual_correctness.py
index 15f574a94..42fabaa0e 100644
--- a/src/ragas/metrics/_factual_correctness.py
+++ b/src/ragas/metrics/_factual_correctness.py
@@ -9,11 +9,11 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel, Field
 
-from ragas.experimental.prompt import PydanticPrompt
 from ragas.experimental.metrics._faithfulness import (
     NLIStatementInput,
     NLIStatementPrompt,
 )
+from ragas.experimental.prompt import PydanticPrompt
 from ragas.metrics.base import (
     MetricType,
     MetricWithLLM,
diff --git a/src/ragas/metrics/_noise_sensitivity.py b/src/ragas/metrics/_noise_sensitivity.py
index 122ddf03f..6633d9610 100644
--- a/src/ragas/metrics/_noise_sensitivity.py
+++ b/src/ragas/metrics/_noise_sensitivity.py
@@ -38,7 +38,7 @@
 @dataclass
 class NoiseSensitivity(MetricWithLLM, SingleTurnMetric):
     name: str = "noise_sensitivity"  # type: ignore
-    focus: str = "relevant"
+    focus: t.Literal["relevant", "irrelevant"] = "relevant"
     _required_columns: t.Dict[MetricType, t.Set[str]] = field(
         default_factory=lambda: {
             MetricType.SINGLE_TURN: {
@@ -266,8 +266,6 @@ async def _ascore(self: t.Self, row: t.Dict, callbacks: Callbacks) -> float:
     def adapt(self, language: str, cache_dir: t.Optional[str] = None) -> None:
         assert self.llm is not None, "LLM is not set"
 
-        logger.info(f"Adapting Faithfulness metric to {language}")
-
         self.nli_statements_message = self.nli_statements_message.adapt(
             language, self.llm, cache_dir
         )
@@ -280,7 +278,3 @@ def adapt(self, language: str, cache_dir: t.Optional[str] = None) -> None:
     def save(self, cache_dir: t.Optional[str] = None) -> None:
         self.nli_statements_message.save(cache_dir)
         self.statement_prompt.save(cache_dir)
-
-
-noise_sensitivity_relevant = NoiseSensitivity()
-noise_sensitivity_irrelevant = NoiseSensitivity(focus="irrelevant")

From 1401ae37e2037eac91381f4068b5497302514a3c Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:26:41 +0530
Subject: [PATCH 5/9] remove and rename

---
 src/ragas/metrics/_rogue_score.py              | 3 ---
 src/ragas/metrics/_sql_semantic_equivalence.py | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/ragas/metrics/_rogue_score.py b/src/ragas/metrics/_rogue_score.py
index 78fd60548..9786b76ad 100644
--- a/src/ragas/metrics/_rogue_score.py
+++ b/src/ragas/metrics/_rogue_score.py
@@ -32,6 +32,3 @@ async def _single_turn_ascore(
 
     async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
         return await self._single_turn_ascore(SingleTurnSample(**row), callbacks)
-
-
-rouge_score = RougeScore()
diff --git a/src/ragas/metrics/_sql_semantic_equivalence.py b/src/ragas/metrics/_sql_semantic_equivalence.py
index 821c3f96e..63cc8e834 100644
--- a/src/ragas/metrics/_sql_semantic_equivalence.py
+++ b/src/ragas/metrics/_sql_semantic_equivalence.py
@@ -63,7 +63,7 @@ class EquivalencePrompt(PydanticPrompt[EquivalenceInput, EquivalenceOutput]):
 
 
 @dataclass
-class LLMSqlEquivalenceWithReference(MetricWithLLM, SingleTurnMetric):
+class LLMSQLEquivalence(MetricWithLLM, SingleTurnMetric):
     name: str = "llm_sql_equivalence_with_reference"  # type: ignore
     _required_columns: t.Dict[MetricType, t.Set[str]] = field(
         default_factory=lambda: {

From 85720871caeb296b61b577c5b7895ebf4a192f0f Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:26:49 +0530
Subject: [PATCH 6/9] add metrics to init

---
 src/ragas/metrics/__init__.py | 56 +++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py
index 244c055b2..ebb8f9fb4 100644
--- a/src/ragas/metrics/__init__.py
+++ b/src/ragas/metrics/__init__.py
@@ -2,9 +2,18 @@
 import sys
 
 from ragas.metrics._answer_correctness import AnswerCorrectness, answer_correctness
-from ragas.metrics._answer_relevance import AnswerRelevancy, answer_relevancy
-from ragas.metrics._answer_similarity import AnswerSimilarity, answer_similarity
+from ragas.metrics._answer_relevance import (
+    AnswerRelevancy,
+    ResponseRelevancy,
+    answer_relevancy,
+)
+from ragas.metrics._answer_similarity import (
+    AnswerSimilarity,
+    SemanticSimilarity,
+    answer_similarity,
+)
 from ragas.metrics._aspect_critic import AspectCritic
+from ragas.metrics._bleu_score import BleuScore
 from ragas.metrics._context_entities_recall import (
     ContextEntityRecall,
     context_entity_recall,
@@ -12,16 +21,37 @@
 from ragas.metrics._context_precision import (
     ContextPrecision,
     ContextUtilization,
+    LLMContextPrecisionWithoutReference,
+    NonLLMContextPrecisionWithReference,
     context_precision,
 )
-from ragas.metrics._context_recall import ContextRecall, context_recall
+from ragas.metrics._context_recall import (
+    ContextRecall,
+    LLMContextRecall,
+    NonLLMContextRecall,
+    context_recall,
+)
+from ragas.metrics._datacompy_score import DataCompyScore
 from ragas.metrics._domain_specific_rubrics import (
     RubricsScoreWithoutReference,
     RubricsScoreWithReference,
 )
+from ragas.metrics._factual_correctness import FactualCorrectness
 from ragas.metrics._faithfulness import Faithfulness, FaithulnesswithHHEM, faithfulness
+from ragas.metrics._goal_accuracy import (
+    AgentGoalAccuracyWithoutReference,
+    AgentGoalAccuracyWithReference,
+)
+from ragas.metrics._instance_specific_rubrics import (
+    InstanceRubricsScoreWithoutReference,
+    InstanceRubricsWithReference,
+)
 from ragas.metrics._noise_sensitivity import NoiseSensitivity
+from ragas.metrics._rogue_score import RougeScore
+from ragas.metrics._sql_semantic_equivalence import LLMSQLEquivalence
+from ragas.metrics._string import ExactMatch, NonLLMStringSimilarity, StringPresence
 from ragas.metrics._summarization import SummarizationScore, summarization_score
+from ragas.metrics._tool_call_accuracy import ToolCallAccuracy
 
 __all__ = [
     "AnswerCorrectness",
@@ -46,6 +76,26 @@
     "NoiseSensitivity",
     "RubricsScoreWithoutReference",
     "RubricsScoreWithReference",
+    "LLMContextPrecisionWithoutReference",
+    "NonLLMContextPrecisionWithReference",
+    "LLMContextPrecisionWithoutReference",
+    "LLMContextRecall",
+    "NonLLMContextRecall",
+    "FactualCorrectness",
+    "InstanceRubricsScoreWithoutReference",
+    "InstanceRubricsWithReference",
+    "NonLLMStringSimilarity",
+    "ExactMatch",
+    "StringPresence",
+    "BleuScore",
+    "RougeScore",
+    "DataCompyScore",
+    "LLMSQLEquivalence",
+    "AgentGoalAccuracyWithoutReference",
+    "AgentGoalAccuracyWithReference",
+    "ToolCallAccuracy",
+    "ResponseRelevancy",
+    "SemanticSimilarity",
 ]
 
 current_module = sys.modules[__name__]

From ab428a4ef5360e4c0dc273a0cf12945e606feba8 Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:55:53 +0530
Subject: [PATCH 7/9] optional import

---
 src/ragas/metrics/_rogue_score.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/ragas/metrics/_rogue_score.py b/src/ragas/metrics/_rogue_score.py
index 9786b76ad..1a8a72fa8 100644
--- a/src/ragas/metrics/_rogue_score.py
+++ b/src/ragas/metrics/_rogue_score.py
@@ -2,7 +2,6 @@
 from dataclasses import dataclass, field
 
 from langchain_core.callbacks import Callbacks
-from rouge_score import rouge_scorer
 
 from ragas.dataset_schema import SingleTurnSample
 from ragas.metrics.base import MetricType, SingleTurnMetric
@@ -18,6 +17,15 @@ class RougeScore(SingleTurnMetric):
     rogue_type: t.Literal["rouge1", "rougeL"] = "rougeL"
     measure_type: t.Literal["fmeasure", "precision", "recall"] = "fmeasure"
 
+    def __post_init__(self):
+        try:
+            from rouge_score import rouge_scorer
+        except ImportError as e:
+            raise ImportError(
+                f"{e.name} is required for rouge score. Please install it using `pip install {e.name}"
+            )
+        self.rouge_scorer = rouge_scorer
+
     def init(self, run_config: RunConfig):
         pass
 
@@ -26,7 +34,7 @@ async def _single_turn_ascore(
     ) -> float:
         assert isinstance(sample.reference, str), "Sample reference must be a string"
         assert isinstance(sample.response, str), "Sample response must be a string"
-        scorer = rouge_scorer.RougeScorer([self.rogue_type], use_stemmer=True)
+        scorer = self.rouge_scorer.RougeScorer([self.rogue_type], use_stemmer=True)
         scores = scorer.score(sample.reference, sample.response)
         return getattr(scores[self.rogue_type], self.measure_type)
 

From 7a1cdd73df9ec2fe6c1f4f985a2be84bf9339a8e Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Mon, 23 Sep 2024 19:59:01 +0530
Subject: [PATCH 8/9] add distance measure

---
 src/ragas/metrics/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py
index ebb8f9fb4..35f3e261f 100644
--- a/src/ragas/metrics/__init__.py
+++ b/src/ragas/metrics/__init__.py
@@ -49,7 +49,7 @@
 from ragas.metrics._noise_sensitivity import NoiseSensitivity
 from ragas.metrics._rogue_score import RougeScore
 from ragas.metrics._sql_semantic_equivalence import LLMSQLEquivalence
-from ragas.metrics._string import ExactMatch, NonLLMStringSimilarity, StringPresence
+from ragas.metrics._string import ExactMatch, NonLLMStringSimilarity, StringPresence, DistanceMeasure
 from ragas.metrics._summarization import SummarizationScore, summarization_score
 from ragas.metrics._tool_call_accuracy import ToolCallAccuracy
 
@@ -96,6 +96,7 @@
     "ToolCallAccuracy",
     "ResponseRelevancy",
     "SemanticSimilarity",
+    "DistanceMeasure",
 ]
 
 current_module = sys.modules[__name__]

From 13a33ee20e6b301d8b3f50046481dbc893a1789b Mon Sep 17 00:00:00 2001
From: Shahules786 <Shahules786@gmail.com>
Date: Tue, 24 Sep 2024 09:44:54 +0530
Subject: [PATCH 9/9] add default factory

---
 src/ragas/metrics/_tool_call_accuracy.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ragas/metrics/_tool_call_accuracy.py b/src/ragas/metrics/_tool_call_accuracy.py
index cc7c365c8..b56569219 100644
--- a/src/ragas/metrics/_tool_call_accuracy.py
+++ b/src/ragas/metrics/_tool_call_accuracy.py
@@ -27,7 +27,9 @@ class ToolCallAccuracy(MultiTurnMetric):
         }
     )
 
-    arg_comparison_metric: SingleTurnMetric = ExactMatch()
+    arg_comparison_metric: SingleTurnMetric = field(
+        default_factory=lambda: ExactMatch()
+    )
 
     def init(self, run_config):
         pass