explodinggradients · jjmachan · Aug 14, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 14, 2024
diff --git a/docs/howtos/customisations/run_config.ipynb b/docs/howtos/customisations/run_config.ipynb
@@ -53,15 +53,16 @@
     "\n",
     "# load the dataset\n",
     "from datasets import load_dataset\n",
+    "\n",
     "amnesty_qa = load_dataset(\"explodinggradients/amnesty_qa\", \"english_v2\")\n",
     "\n",
     "# configure RunConfig\n",
     "from ragas.run_config import RunConfig\n",
     "\n",
     "_ = evaluate(\n",
-    "    dataset=amnesty_qa[\"eval\"], \n",
+    "    dataset=amnesty_qa[\"eval\"],\n",
     "    metrics=[faithfulness],\n",
-    "    run_config=RunConfig(max_workers=64), # increasing max_workers from default 16\n",
+    "    run_config=RunConfig(max_workers=64),  # increasing max_workers from default 16\n",
     ")"
    ]
   },
@@ -94,9 +95,9 @@
    ],
    "source": [
     "_ = evaluate(\n",
-    "    dataset=amnesty_qa[\"eval\"], \n",
+    "    dataset=amnesty_qa[\"eval\"],\n",
     "    metrics=[faithfulness],\n",
-    "    run_config=RunConfig(max_workers=2), # increasing max_workers from default 16\n",
+    "    run_config=RunConfig(max_workers=2),  # increasing max_workers from default 16\n",
     ")"
    ]
   },

diff --git a/docs/howtos/integrations/helicone.ipynb b/docs/howtos/integrations/helicone.ipynb
@@ -53,15 +53,22 @@
     "from datasets import Dataset\n",
     "from ragas import evaluate\n",
     "from ragas.metrics import faithfulness, answer_relevancy, context_precision\n",
+    "from ragas.integrations.helicone import helicone_config  # import helicone_config\n",
+    "\n",
     "\n",
     "# Set up Helicone\n",
-    "HELICONE_API_KEY = \"your_helicone_api_key_here\"  # Replace with your actual Helicone API key\n",
-    "os.environ[\"OPENAI_API_BASE\"] = f\"https://oai.helicone.ai/{HELICONE_API_KEY}/v1\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n",
+    "helicone_config.api_key = (\n",
+    "    \"your_helicone_api_key_here\"  # Replace with your actual Helicone API key\n",
+    ")\n",
+    "os.environ[\n",
+    "    \"OPENAI_API_KEY\"\n",
+    "] = \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n",
     "\n",
     "# Verify Helicone API key is set\n",
     "if HELICONE_API_KEY == \"your_helicone_api_key_here\":\n",
-    "    raise ValueError(\"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\")"
+    "    raise ValueError(\n",
+    "        \"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\"\n",
+    "    )"
    ]
   },
   {
@@ -80,13 +87,23 @@
    "outputs": [],
    "source": [
     "data_samples = {\n",
-    "    'question': ['When was the first Super Bowl?', 'Who has won the most Super Bowls?'],\n",
-    "    'answer': ['The first Super Bowl was held on January 15, 1967.', 'The New England Patriots have won the most Super Bowls, with six championships.'],\n",
-    "    'contexts': [\n",
-    "        ['The First AFL–NFL World Championship Game, later known as Super Bowl I, was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California.'],\n",
-    "        ['As of 2021, the New England Patriots have won the most Super Bowls with six championships, all under the leadership of quarterback Tom Brady and head coach Bill Belichick.']\n",
+    "    \"question\": [\"When was the first Super Bowl?\", \"Who has won the most Super Bowls?\"],\n",
+    "    \"answer\": [\n",
+    "        \"The first Super Bowl was held on January 15, 1967.\",\n",
+    "        \"The New England Patriots have won the most Super Bowls, with six championships.\",\n",
+    "    ],\n",
+    "    \"contexts\": [\n",
+    "        [\n",
+    "            \"The First AFL–NFL World Championship Game, later known as Super Bowl I, was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California.\"\n",
+    "        ],\n",
+    "        [\n",
+    "            \"As of 2021, the New England Patriots have won the most Super Bowls with six championships, all under the leadership of quarterback Tom Brady and head coach Bill Belichick.\"\n",
+    "        ],\n",
+    "    ],\n",
+    "    \"ground_truth\": [\n",
+    "        \"The first Super Bowl was held on January 15, 1967.\",\n",
+    "        \"The New England Patriots have won the most Super Bowls, with six championships as of 2021.\",\n",
     "    ],\n",
-    "    'ground_truth': ['The first Super Bowl was held on January 15, 1967.', 'The New England Patriots have won the most Super Bowls, with six championships as of 2021.']\n",
     "}\n",
     "\n",
     "dataset = Dataset.from_dict(data_samples)\n",

diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
@@ -19,6 +19,7 @@
 )
 from ragas.exceptions import ExceptionInRunner
 from ragas.executor import Executor
+from ragas.integrations.helicone import helicone_config
 from ragas.llms import llm_factory
 from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
 from ragas.metrics._answer_correctness import AnswerCorrectness
@@ -136,6 +137,12 @@ def evaluate(
     column_map = column_map or {}
     callbacks = callbacks or []
 
+    if helicone_config.is_enabled:
+        import uuid
+
+        helicone_config.session_name = "ragas-evaluation"
+        helicone_config.session_id = str(uuid.uuid4())
+
     if dataset is None:
         raise ValueError("Provide dataset!")
 

diff --git a/src/ragas/integrations/helicone.py b/src/ragas/integrations/helicone.py
@@ -0,0 +1,101 @@
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class CacheConfig:
+    ttl: int = 60 * 60 * 24 * 30  # 30 days
+    maxsize: int = 1000
+
+
+@dataclass
+class HeliconeSingleton:
+    api_key: Optional[str] = None
+    base_url: Optional[str] = "https://oai.helicone.ai"
+    cache_config: Optional[CacheConfig] = None
+    _instance: Optional["HeliconeSingleton"] = None
+
+    # New fields for configurable headers
+    target_url: Optional[str] = None
+    openai_api_base: Optional[str] = None
+    request_id: Optional[str] = None
+    model_override: Optional[str] = None
+    prompt_id: Optional[str] = None
+    user_id: Optional[str] = None
+    fallbacks: Optional[str] = None
+    rate_limit_policy: Optional[str] = None
+    session_id: Optional[str] = None
+    session_path: Optional[str] = None
+    session_name: Optional[str] = None
+    posthog_key: Optional[str] = None
+    posthog_host: Optional[str] = None
+    omit_response: Optional[bool] = None
+    omit_request: Optional[bool] = None
+    cache_enabled: Optional[bool] = None
+    retry_enabled: Optional[bool] = None
+    moderations_enabled: Optional[bool] = None
+    llm_security_enabled: Optional[bool] = None
+    stream_force_format: Optional[bool] = None
+    custom_properties: Dict[str, str] = field(default_factory=dict)
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def default_headers(self) -> Dict[str, Any]:
+        headers = {"Helicone-Auth": f"Bearer {self.api_key}"}
+
+        if self.target_url:
+            headers["Helicone-Target-URL"] = self.target_url
+        if self.openai_api_base:
+            headers["Helicone-OpenAI-Api-Base"] = self.openai_api_base
+        if self.request_id:
+            headers["Helicone-Request-Id"] = self.request_id
+        if self.model_override:
+            headers["Helicone-Model-Override"] = self.model_override
+        if self.prompt_id:
+            headers["Helicone-Prompt-Id"] = self.prompt_id
+        if self.user_id:
+            headers["Helicone-User-Id"] = self.user_id
+        if self.fallbacks:
+            headers["Helicone-Fallbacks"] = self.fallbacks
+        if self.rate_limit_policy:
+            headers["Helicone-RateLimit-Policy"] = self.rate_limit_policy
+        if self.session_id:
+            headers["Helicone-Session-Id"] = self.session_id
+        if self.session_path:
+            headers["Helicone-Session-Path"] = self.session_path
+        if self.session_name:
+            headers["Helicone-Session-Name"] = self.session_name
+        if self.posthog_key:
+            headers["Helicone-Posthog-Key"] = self.posthog_key
+        if self.posthog_host:
+            headers["Helicone-Posthog-Host"] = self.posthog_host
+
+        # Boolean headers
+        for header, value in {
+            "Helicone-Omit-Response": self.omit_response,
+            "Helicone-Omit-Request": self.omit_request,
+            "Helicone-Cache-Enabled": (self.cache_enabled and "true")
+            or (self.cache_config.maxsize or self.cache_config.ttl and "true"),  # type: ignore
+            "Helicone-Retry-Enabled": self.retry_enabled,
+            "Helicone-Moderations-Enabled": self.moderations_enabled,
+            "Helicone-LLM-Security-Enabled": self.llm_security_enabled,
+            "Helicone-Stream-Force-Format": self.stream_force_format,
+        }.items():
+            if value is not None:
+                headers[header] = str(value).lower()
+
+        # Custom properties
+        for key, value in self.custom_properties.items():
+            headers[f"Helicone-Property-{key}"] = value
+
+        return headers
+
+    @property
+    def is_enabled(self):
+        return self.api_key is not None
+
+
+helicone_config = HeliconeSingleton()
diff --git a/src/ragas/llms/base.py b/src/ragas/llms/base.py
@@ -15,6 +15,7 @@
 from langchain_openai.llms import AzureOpenAI, OpenAI
 from langchain_openai.llms.base import BaseOpenAI
 
+from ragas.integrations.helicone import helicone_config
 from ragas.run_config import RunConfig, add_async_retry, add_retry
 
 if t.TYPE_CHECKING:
@@ -289,10 +290,21 @@ async def agenerate_text(
 
 
 def llm_factory(
-    model: str = "gpt-4o-mini", run_config: t.Optional[RunConfig] = None
+    model: str = "gpt-4o-mini",
+    run_config: t.Optional[RunConfig] = None,
+    default_headers: t.Optional[t.Dict[str, str]] = None,
+    base_url: t.Optional[str] = None,
 ) -> BaseRagasLLM:
     timeout = None
     if run_config is not None:
         timeout = run_config.timeout
-    openai_model = ChatOpenAI(model=model, timeout=timeout)
+
+    # if helicone is enabled, use the helicone
+    if helicone_config.is_enabled:
+        default_headers = helicone_config.default_headers()
+        base_url = helicone_config.base_url
+
+    openai_model = ChatOpenAI(
+        model=model, timeout=timeout, default_headers=default_headers, base_url=base_url
+    )
     return LangchainLLMWrapper(openai_model, run_config)
diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py
@@ -63,11 +63,13 @@ def get_required_columns(
 class Metric(ABC):
     @property
     @abstractmethod
-    def name(self) -> str: ...
+    def name(self) -> str:
+        ...
 
     @property
     @abstractmethod
-    def evaluation_mode(self) -> EvaluationMode: ...
+    def evaluation_mode(self) -> EvaluationMode:
+        ...
 
     @abstractmethod
     def init(self, run_config: RunConfig):
@@ -130,7 +132,8 @@ async def ascore(
         return score
 
     @abstractmethod
-    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: ...
+    async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
+        ...
 
 
 @dataclass

diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py
@@ -509,5 +509,5 @@ class EvolutionElimination(BaseModel):
     question_rewrite_prompt,
     context_scoring_prompt,
     filter_question_prompt,
-    evolution_elimination_prompt
-]
+    evolution_elimination_prompt,
+]
diff --git a/tests/unit/test_analytics.py b/tests/unit/test_analytics.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
+
 import typing as t
+
 import pytest
 
 
@@ -130,7 +132,7 @@ def test_testset_generation_tracking(monkeypatch):
 
 
 def test_was_completed(monkeypatch):
-    from ragas._analytics import track_was_completed, IsCompleteEvent
+    from ragas._analytics import IsCompleteEvent, track_was_completed
 
     event_properties_list: t.List[IsCompleteEvent] = []
 

diff --git a/tests/unit/test_executor_in_jupyter.ipynb b/tests/unit/test_executor_in_jupyter.ipynb
@@ -33,7 +33,7 @@
     "\n",
     "exec = Executor(raise_exceptions=True)\n",
     "for i in range(10):\n",
-    "    exec.submit(sleep, i/10)\n",
+    "    exec.submit(sleep, i / 10)\n",
     "\n",
     "assert exec.results(), \"didn't get anything from results\""
    ]
@@ -140,16 +140,18 @@
    "source": [
     "from ragas.metrics.base import Metric, EvaluationMode\n",
     "\n",
+    "\n",
     "class FakeMetric(Metric):\n",
     "    name = \"fake_metric\"\n",
     "    evaluation_mode = EvaluationMode.qa\n",
     "\n",
     "    def init(self):\n",
     "        pass\n",
     "\n",
-    "    async def _ascore(self, row, callbacks)->float:\n",
+    "    async def _ascore(self, row, callbacks) -> float:\n",
     "        return 0\n",
     "\n",
+    "\n",
     "fm = FakeMetric()"
    ]
   },