feat: adding an implementation of abstractQA (#1359)

Once you have a kg with #1352 you can run this like ```py from ragas.experimental.testset.generators.abstract import AbstractGenerator abstract_qa = AbstractGenerator() dist = await abstract_qa.generate_distributions(n=10, knowledge_graph=kg) q = await abstract_qa.generate_user_input(dist[0]) ``` merge after merging #1352
explodinggradients · Sep 28, 2024 · 4b92fa9 · 4b92fa9
1 parent 6685710
commit 4b92fa9
Show file tree

Hide file tree

Showing 21 changed files with 590 additions and 56 deletions.
diff --git a/docs/howtos/integrations/helicone.ipynb b/docs/howtos/integrations/helicone.ipynb
@@ -60,9 +60,9 @@
     "helicone_config.api_key = (\n",
     "    \"your_helicone_api_key_here\"  # Replace with your actual Helicone API key\n",
     ")\n",
-    "os.environ[\n",
-    "    \"OPENAI_API_KEY\"\n",
-    "] = \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n",
+    "os.environ[\"OPENAI_API_KEY\"] = (\n",
+    "    \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n",
+    ")\n",
     "\n",
     "# Verify Helicone API key is set\n",
     "if HELICONE_API_KEY == \"your_helicone_api_key_here\":\n",

diff --git a/src/experimental/ragas_experimental/testset/generators/base.py b/src/experimental/ragas_experimental/testset/generators/base.py
@@ -48,8 +48,7 @@ def generate(
         docs: t.Sequence[Document],
         test_size: int,
         distribution: QADistribution,
-    ) -> TestDataset:
-        ...
+    ) -> TestDataset: ...
 
     def generate_with_langchain_docs(
         self,

diff --git a/src/experimental/ragas_experimental/testset/questions/base.py b/src/experimental/ragas_experimental/testset/questions/base.py
@@ -46,12 +46,12 @@ class QAC:
 
 @dataclass
 class StyleLengthDistribution:
-    style_length_distribution: t.Dict[
-        t.Tuple[QuestionStyle, QuestionLength], float
-    ] = field(
-        default_factory=lambda: {
-            (QuestionStyle.PERFECT_GRAMMAR, QuestionLength.MEDIUM): 1.0
-        }
+    style_length_distribution: t.Dict[t.Tuple[QuestionStyle, QuestionLength], float] = (
+        field(
+            default_factory=lambda: {
+                (QuestionStyle.PERFECT_GRAMMAR, QuestionLength.MEDIUM): 1.0
+            }
+        )
     )
 
     def __post_init__(self):

diff --git a/src/ragas/executor.py b/src/ragas/executor.py
@@ -118,3 +118,21 @@ async def _aresults() -> t.List[t.Any]:
         results = asyncio.run(_aresults())
         sorted_results = sorted(results, key=lambda x: x[0])
         return [r[1] for r in sorted_results]
+
+
+def run_async_batch(desc: str, func: t.Callable, kwargs_list: t.List[t.Dict]):
+    """
+    run the same async function with different arguments
+    """
+    run_config = RunConfig()
+    executor = Executor(
+        desc=desc,
+        keep_progress_bar=False,
+        raise_exceptions=True,
+        run_config=run_config,
+    )
+
+    for kwargs in kwargs_list:
+        executor.submit(func, **kwargs)
+
+    return executor.results()
diff --git a/src/ragas/experimental/prompt.py b/src/ragas/experimental/prompt.py
@@ -78,6 +78,10 @@ class StringIO(BaseModel):
     text: str
 
 
+class BoolIO(BaseModel):
+    value: bool
+
+
 class PydanticPrompt(BasePrompt, t.Generic[InputModel, OutputModel]):
     input_model: t.Type[InputModel]
     output_model: t.Type[OutputModel]

diff --git a/src/ragas/experimental/testset/generators/__init__.py b/src/ragas/experimental/testset/generators/__init__.py
@@ -0,0 +1,3 @@
+from .abstract import AbstractGenerator
+
+__all__ = ["AbstractGenerator"]
diff --git a/src/ragas/experimental/testset/generators/abstract.py b/src/ragas/experimental/testset/generators/abstract.py
@@ -0,0 +1,168 @@
+import logging
+import math
+import random
+import typing as t
+from dataclasses import dataclass, field
+
+from ragas.executor import run_async_batch
+from ragas.experimental.prompt import PydanticPrompt, StringIO
+from ragas.experimental.testset.generators.base import (
+    BaseSimulator,
+    BasicScenario,
+    UserInputLength,
+    UserInputStyle,
+)
+from ragas.experimental.testset.generators.prompts import (
+    AbstractQuestionFromTheme,
+    CommonThemeFromSummaries,
+    CriticUserInput,
+    GenerateReference,
+    ModifyUserInput,
+    Summaries,
+    ThemeAndContext,
+    Themes,
+    UserInputAndContext,
+    UserInputWithStyleAndLength,
+    extend_modify_input_prompt,
+)
+from ragas.experimental.testset.graph import KnowledgeGraph, Node
+
+logger = logging.getLogger(__name__)
+
+
+class AbstractQAScenario(BasicScenario):
+    theme: str
+
+
+@dataclass
+class AbstractGenerator(BaseSimulator):
+    generate_user_input_prompt: PydanticPrompt = field(
+        default_factory=AbstractQuestionFromTheme
+    )
+    critic_user_input_prompt: PydanticPrompt = field(default_factory=CriticUserInput)
+    user_input_modification_prompt: PydanticPrompt = field(
+        default_factory=ModifyUserInput
+    )
+    generate_reference_prompt: PydanticPrompt = field(default_factory=GenerateReference)
+
+    def __post_init__(self):
+        self.common_theme_prompt = CommonThemeFromSummaries()
+
+    async def generate_scenarios(
+        self, n: int, knowledge_graph: KnowledgeGraph
+    ) -> t.List[AbstractQAScenario]:
+        node_clusters = knowledge_graph.find_clusters(
+            relationship_condition=lambda rel: (
+                True if rel.get_property("cosine_similarity") else False
+            )
+        )
+        logger.info("found %d clusters", len(node_clusters))
+
+        # filter out nodes that are not chunks
+        node_clusters = [
+            cluster
+            for cluster in node_clusters
+            if all(node.type == "chunk" for node in cluster)
+        ]
+
+        # find the number of themes to generation for given n and the num of clusters
+        # will generate more themes just in case
+        num_clusters = len(node_clusters)
+        num_themes = math.ceil(n / num_clusters)
+        logger.info("generating %d themes", num_themes)
+
+        kw_list = []
+        for cluster in node_clusters:
+            summaries = []
+            for node in cluster:
+                summary = node.get_property("summary")
+                if summary is not None:
+                    summaries.append(summary)
+
+            summaries = Summaries(
+                summaries=summaries,
+                num_themes=num_themes,
+            )
+            kw_list.append({"data": summaries, "llm": self.llm})
+
+        themes: t.List[Themes] = run_async_batch(
+            desc="Generating common themes",
+            func=self.common_theme_prompt.generate,
+            kwargs_list=kw_list,
+        )
+
+        # sample clusters and themes to get num_clusters * num_themes
+        clusters_sampled = []
+        themes_sampled = []
+        themes_list = [theme.themes for theme in themes]
+        for cluster, ts in zip(node_clusters, themes_list):
+            for theme in ts:
+                themes_sampled.append(theme)
+                clusters_sampled.append(cluster)
+
+        # sample question styles and question lengths
+        question_styles = random.choices(
+            list(UserInputStyle), k=num_clusters * num_themes
+        )
+        question_lengths = random.choices(
+            list(UserInputLength), k=num_clusters * num_themes
+        )
+
+        # create distributions
+        distributions = []
+        for cluster, theme, style, length in zip(
+            clusters_sampled, themes_sampled, question_styles, question_lengths
+        ):
+            distributions.append(
+                AbstractQAScenario(
+                    theme=theme.theme,
+                    nodes=cluster,
+                    style=style,
+                    length=length,
+                )
+            )
+        return distributions
+
+    async def generate_user_input(self, scenario: AbstractQAScenario) -> str:
+        question = await self.generate_user_input_prompt.generate(
+            data=ThemeAndContext(
+                theme=scenario.theme,
+                context=self.make_source_text(scenario),
+            ),
+            llm=self.llm,
+        )
+        return question.text
+
+    async def critic_user_input(self, user_input: str) -> bool:
+        critic = await self.critic_user_input_prompt.generate(
+            data=StringIO(text=user_input), llm=self.llm
+        )
+        return critic.independence > 1 and critic.clear_intent > 1
+
+    async def modify_user_input(
+        self, user_input: str, scenario: AbstractQAScenario
+    ) -> str:
+        prompt = extend_modify_input_prompt(
+            question_modification_prompt=self.user_input_modification_prompt,
+            style=scenario.style,
+            length=scenario.length,
+        )
+        modified_question = await prompt.generate(
+            data=UserInputWithStyleAndLength(
+                user_input=user_input,
+                style=scenario.style,
+                length=scenario.length,
+            ),
+            llm=self.llm,
+        )
+        return modified_question.text
+
+    async def generate_reference(self, user_input: str, chunks: t.List[Node]) -> str:
+        reference = await self.generate_reference_prompt.generate(
+            data=UserInputAndContext(
+                user_input=user_input,
+                context=self.make_source_text(chunks),
+            ),
+            llm=self.llm,
+        )
+        return reference.text
diff --git a/src/ragas/experimental/testset/generators/base.py b/src/ragas/experimental/testset/generators/base.py
@@ -0,0 +1,68 @@
+import typing as t
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import Enum
+
+from pydantic import BaseModel
+
+from ragas.experimental.testset.graph import KnowledgeGraph, Node
+from ragas.llms import BaseRagasLLM, llm_factory
+
+
+class UserInputLength(str, Enum):
+    LONG = "long"
+    MEDIUM = "medium"
+    SHORT = "short"
+
+
+class UserInputStyle(str, Enum):
+    MISSPELLED = "Misspelled queries"
+    PERFECT_GRAMMAR = "Perfect grammar"
+    POOR_GRAMMAR = "Poor grammar"
+    WEB_SEARCH_LIKE = "Web search like queries"
+
+
+class BasicScenario(BaseModel):
+    nodes: t.List[Node]
+    style: UserInputStyle
+    length: UserInputLength
+
+
+Scenario = t.TypeVar("Scenario", bound=BasicScenario)
+
+
+@dataclass
+class BaseSimulator(ABC, t.Generic[Scenario]):
+    llm: BaseRagasLLM = field(default_factory=llm_factory)
+
+    @abstractmethod
+    async def generate_user_input(
+        self,
+        scenario: Scenario,
+    ) -> str:
+        pass
+
+    @abstractmethod
+    async def generate_reference(self, user_input: str, chunks: t.List[Node]) -> str:
+        pass
+
+    @abstractmethod
+    async def critic_user_input(self, user_input: str) -> bool:
+        pass
+
+    @abstractmethod
+    async def modify_user_input(self, user_input: str, scenario: Scenario) -> str:
+        pass
+
+    @abstractmethod
+    async def generate_scenarios(
+        self, n: int, knowledge_graph: KnowledgeGraph
+    ) -> t.List[Scenario]:
+        pass
+
+    @staticmethod
+    def make_source_text(scenario: Scenario) -> str:
+        page_contents = []
+        for node in scenario.nodes:
+            page_contents.append(node.get_property("page_content"))
+        return "\n\n".join(page_contents)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .abstract import AbstractGenerator

		__all__ = ["AbstractGenerator"]