From 2c4f02679295228bde0455398370372608777f4c Mon Sep 17 00:00:00 2001 From: sahusiddharth Date: Wed, 22 Jan 2025 02:03:29 +0530 Subject: [PATCH 1/6] docs: corrected the NonLLMContextPrecisionWithReference metric docs --- docs/concepts/metrics/available_metrics/context_precision.md | 4 ++-- src/ragas/testset/synthesizers/multi_hop/base.py | 2 +- src/ragas/testset/synthesizers/single_hop/base.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/concepts/metrics/available_metrics/context_precision.md b/docs/concepts/metrics/available_metrics/context_precision.md index 5d0dc35ac..449dadcea 100644 --- a/docs/concepts/metrics/available_metrics/context_precision.md +++ b/docs/concepts/metrics/available_metrics/context_precision.md @@ -68,11 +68,11 @@ Output ## Non LLM Based Context Precision -The following metrics uses traditional methods to identify if a retrieved context is relevant or not. You can use any non LLM based metrics as distance measure to identify if a retrieved context is relevant or not. +This metric uses traditional methods to determine whether a retrieved context is relevant. It relies on non-LLM-based metrics as a distance measure to evaluate the relevance of retrieved contexts. ### Context Precision with reference contexts -`NonLLMContextPrecisionWithReference` metric is can be used when you have both retrieved contexts and also reference contexts associated with a `user_input`. To estimate if a retrieved contexts is relevant or not this method uses the LLM to compare each of the retrieved context or chunk present in `retrieved_contexts` with each ones present in `reference_contexts`. +The `NonLLMContextPrecisionWithReference` metric is designed for scenarios where both retrieved contexts and reference contexts are available for a `user_input`. To determine if a retrieved context is relevant, this method compares each retrieved context or chunk in `retrieved_context`s with every context in `reference_contexts` using a non-LLM-based similarity measure. #### Example diff --git a/src/ragas/testset/synthesizers/multi_hop/base.py b/src/ragas/testset/synthesizers/multi_hop/base.py index 48b72ba7c..0a7b5e7ff 100644 --- a/src/ragas/testset/synthesizers/multi_hop/base.py +++ b/src/ragas/testset/synthesizers/multi_hop/base.py @@ -161,7 +161,7 @@ async def _generate_sample( self, scenario: Scenario, callbacks: Callbacks ) -> SingleTurnSample: if not isinstance(scenario, MultiHopScenario): - raise TypeError('scenario type should be MultiHopScenario') + raise TypeError("scenario type should be MultiHopScenario") reference_context = self.make_contexts(scenario) prompt_input = QueryConditions( persona=scenario.persona, diff --git a/src/ragas/testset/synthesizers/single_hop/base.py b/src/ragas/testset/synthesizers/single_hop/base.py index 3a60a6dbc..2bc708a30 100644 --- a/src/ragas/testset/synthesizers/single_hop/base.py +++ b/src/ragas/testset/synthesizers/single_hop/base.py @@ -122,7 +122,7 @@ async def _generate_sample( self, scenario: Scenario, callbacks: Callbacks ) -> SingleTurnSample: if not isinstance(scenario, SingleHopScenario): - raise TypeError('scenario type should be SingleHopScenario') + raise TypeError("scenario type should be SingleHopScenario") reference_context = scenario.nodes[0].properties.get("page_content", "") prompt_input = QueryCondition( persona=scenario.persona, From 535bfbf3c9bd3837bc6a646ff17488c6ba542b38 Mon Sep 17 00:00:00 2001 From: sahusiddharth Date: Thu, 23 Jan 2025 11:28:30 +0530 Subject: [PATCH 2/6] docs: Improved the example in the migration docs --- docs/howtos/migrations/migrate_from_v01_to_v02.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/howtos/migrations/migrate_from_v01_to_v02.md b/docs/howtos/migrations/migrate_from_v01_to_v02.md index 79c209a13..4032029a5 100644 --- a/docs/howtos/migrations/migrate_from_v01_to_v02.md +++ b/docs/howtos/migrations/migrate_from_v01_to_v02.md @@ -48,17 +48,21 @@ Second is that [`metrics.ascore`][ragas.metrics.base.Metric.ascore] is now being ```python # create a Single Turn Sample from ragas import SingleTurnSample + sample = SingleTurnSample( - user_input="user query", - response="response from your pipeline" + user_input="user query", + response="response from your pipeline", + retrieved_contexts=["retrieved", "contexts", "from your pipeline" ] ) # Init the metric from ragas.metrics import Faithfulness faithfulness_metric = Faithfulness(llm=your_evaluator_llm) -score = faithfulness.single_turn_ascore(sample=sample) -print(score) -# 0.9 +await faithfulness_metric.single_turn_ascore(sample) +``` +Output +``` +1 ``` ## Testset Generation From 69da5fdd9dd4b28ab84d650767c7fcd055ef4a8c Mon Sep 17 00:00:00 2001 From: Wenchen Li <9028430+neo@users.noreply.github.com> Date: Thu, 23 Jan 2025 13:04:57 -0500 Subject: [PATCH 3/6] docs: match default `timeout` in `RunConfig` (#1872) fix a minor mismatch of the default value in the docstring --- src/ragas/run_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ragas/run_config.py b/src/ragas/run_config.py index 0276c10d1..124e78e78 100644 --- a/src/ragas/run_config.py +++ b/src/ragas/run_config.py @@ -23,7 +23,7 @@ class RunConfig: Parameters ---------- timeout : int, optional - Maximum time (in seconds) to wait for a single operation, by default 60. + Maximum time (in seconds) to wait for a single operation, by default 180. max_retries : int, optional Maximum number of retry attempts, by default 10. max_wait : int, optional From d277700b0555a80bdc48b97439ce3dd12d96e32d Mon Sep 17 00:00:00 2001 From: tim-hilde <44113468+tim-hilde@users.noreply.github.com> Date: Thu, 23 Jan 2025 19:28:40 +0100 Subject: [PATCH 4/6] fix(prompt/mixin): Add name property and add it to saving/loading path. (#1853) This pull request includes several changes to the `PromptMixin` class in the `src/ragas/prompt/mixin.py` file. The changes focus on adding a `name` attribute to the class and using this attribute when saving and loading prompts. This solves the error when saving and loading several prompts of different Synthesizers (e.g. MultiHopAbstractQuerySynthesizer, MultiHopSpecificQuerySynthesizer, SingleHopSpecificQuerySynthesizer etc.) as they had the same path associated: ``` themes_personas_matching_prompt_english -> single_hop_specifc_query_synthesizer_themes_personas_matching_prompt_english query_answer_generation_prompt_english -> single_hop_specifc_query_synthesizer_query_answer_generation_prompt_english ``` ### Changes to `PromptMixin` class: * Added a `name` attribute to the `PromptMixin` class. * Modified the `save_prompts` method to include the `name` attribute in the prompt file name. * Modified the `load_prompts` method to include the `name` attribute in the prompt file name. --------- Co-authored-by: jjmachan Co-authored-by: Jithin James --- src/ragas/prompt/mixin.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/ragas/prompt/mixin.py b/src/ragas/prompt/mixin.py index c354a8d9e..17db3b682 100644 --- a/src/ragas/prompt/mixin.py +++ b/src/ragas/prompt/mixin.py @@ -20,8 +20,9 @@ class PromptMixin: eg: [BaseSynthesizer][ragas.testset.synthesizers.base.BaseSynthesizer], [MetricWithLLM][ragas.metrics.base.MetricWithLLM] """ - def _get_prompts(self) -> t.Dict[str, PydanticPrompt]: + name: str = "" + def _get_prompts(self) -> t.Dict[str, PydanticPrompt]: prompts = {} for key, value in inspect.getmembers(self): if isinstance(value, PydanticPrompt): @@ -90,10 +91,13 @@ def save_prompts(self, path: str): prompts = self.get_prompts() for prompt_name, prompt in prompts.items(): # hash_hex = f"0x{hash(prompt) & 0xFFFFFFFFFFFFFFFF:016x}" - prompt_file_name = os.path.join( - path, f"{prompt_name}_{prompt.language}.json" - ) - prompt.save(prompt_file_name) + if self.name == "": + file_name = os.path.join(path, f"{prompt_name}_{prompt.language}.json") + else: + file_name = os.path.join( + path, f"{self.name}_{prompt_name}_{prompt.language}.json" + ) + prompt.save(file_name) def load_prompts(self, path: str, language: t.Optional[str] = None): """ @@ -113,7 +117,12 @@ def load_prompts(self, path: str, language: t.Optional[str] = None): loaded_prompts = {} for prompt_name, prompt in self.get_prompts().items(): - prompt_file_name = os.path.join(path, f"{prompt_name}_{language}.json") - loaded_prompt = prompt.__class__.load(prompt_file_name) + if self.name == "": + file_name = os.path.join(path, f"{prompt_name}_{language}.json") + else: + file_name = os.path.join( + path, f"{self.name}_{prompt_name}_{language}.json" + ) + loaded_prompt = prompt.__class__.load(file_name) loaded_prompts[prompt_name] = loaded_prompt return loaded_prompts From bce9563becac68bd3de46bc5d77116e932b0ff5c Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 23 Jan 2025 15:06:24 -0500 Subject: [PATCH 5/6] Adding missing evaluate import to evals guide (#1876) --- docs/getstarted/evals.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/getstarted/evals.md b/docs/getstarted/evals.md index 01eadfa35..ed13493c9 100644 --- a/docs/getstarted/evals.md +++ b/docs/getstarted/evals.md @@ -137,6 +137,8 @@ Total samples in dataset: 50 Evaluate using dataset ```python +from ragas import evaluate + results = evaluate(eval_dataset, metrics=[metric]) results ``` From f5de9e51691a99d9ac55ebd2cffcf5f4397a0fb6 Mon Sep 17 00:00:00 2001 From: sahusiddharth Date: Fri, 24 Jan 2025 10:26:12 +0530 Subject: [PATCH 6/6] Improve error message for missing embeddings in Answer Relevancy Metric --- src/ragas/metrics/_answer_relevance.py | 4 +++- src/ragas/metrics/_answer_similarity.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ragas/metrics/_answer_relevance.py b/src/ragas/metrics/_answer_relevance.py index f4cc2b10e..03b6ea4f9 100644 --- a/src/ragas/metrics/_answer_relevance.py +++ b/src/ragas/metrics/_answer_relevance.py @@ -95,7 +95,9 @@ class ResponseRelevancy(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric): strictness: int = 3 def calculate_similarity(self, question: str, generated_questions: list[str]): - assert self.embeddings is not None + assert ( + self.embeddings is not None + ), f"Error: '{self.name}' requires embeddings to be set." question_vec = np.asarray(self.embeddings.embed_query(question)).reshape(1, -1) gen_question_vec = np.asarray( self.embeddings.embed_documents(generated_questions) diff --git a/src/ragas/metrics/_answer_similarity.py b/src/ragas/metrics/_answer_similarity.py index 67bd2c546..061c221ec 100644 --- a/src/ragas/metrics/_answer_similarity.py +++ b/src/ragas/metrics/_answer_similarity.py @@ -65,7 +65,9 @@ async def _single_turn_ascore( return await self._ascore(row, callbacks) async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: - assert self.embeddings is not None, "embeddings must be set" + assert ( + self.embeddings is not None + ), f"Error: '{self.name}' requires embeddings to be set." ground_truth = t.cast(str, row["reference"]) answer = t.cast(str, row["response"])