Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: deeper helicon integration #1196

Merged
merged 3 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions docs/howtos/customisations/run_config.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@
"\n",
"# load the dataset\n",
"from datasets import load_dataset\n",
"\n",
"amnesty_qa = load_dataset(\"explodinggradients/amnesty_qa\", \"english_v2\")\n",
"\n",
"# configure RunConfig\n",
"from ragas.run_config import RunConfig\n",
"\n",
"_ = evaluate(\n",
" dataset=amnesty_qa[\"eval\"], \n",
" dataset=amnesty_qa[\"eval\"],\n",
" metrics=[faithfulness],\n",
" run_config=RunConfig(max_workers=64), # increasing max_workers from default 16\n",
" run_config=RunConfig(max_workers=64), # increasing max_workers from default 16\n",
")"
]
},
Expand Down Expand Up @@ -94,9 +95,9 @@
],
"source": [
"_ = evaluate(\n",
" dataset=amnesty_qa[\"eval\"], \n",
" dataset=amnesty_qa[\"eval\"],\n",
" metrics=[faithfulness],\n",
" run_config=RunConfig(max_workers=2), # increasing max_workers from default 16\n",
" run_config=RunConfig(max_workers=2), # increasing max_workers from default 16\n",
")"
]
},
Expand Down
37 changes: 27 additions & 10 deletions docs/howtos/integrations/helicone.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,22 @@
"from datasets import Dataset\n",
"from ragas import evaluate\n",
"from ragas.metrics import faithfulness, answer_relevancy, context_precision\n",
"from ragas.integrations.helicone import helicone_config # import helicone_config\n",
"\n",
"\n",
"# Set up Helicone\n",
"HELICONE_API_KEY = \"your_helicone_api_key_here\" # Replace with your actual Helicone API key\n",
"os.environ[\"OPENAI_API_BASE\"] = f\"https://oai.helicone.ai/{HELICONE_API_KEY}/v1\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"your_openai_api_key_here\" # Replace with your actual OpenAI API key\n",
"helicone_config.api_key = (\n",
" \"your_helicone_api_key_here\" # Replace with your actual Helicone API key\n",
")\n",
"os.environ[\n",
" \"OPENAI_API_KEY\"\n",
"] = \"your_openai_api_key_here\" # Replace with your actual OpenAI API key\n",
"\n",
"# Verify Helicone API key is set\n",
"if HELICONE_API_KEY == \"your_helicone_api_key_here\":\n",
" raise ValueError(\"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\")"
" raise ValueError(\n",
" \"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\"\n",
" )"
]
},
{
Expand All @@ -80,13 +87,23 @@
"outputs": [],
"source": [
"data_samples = {\n",
" 'question': ['When was the first Super Bowl?', 'Who has won the most Super Bowls?'],\n",
" 'answer': ['The first Super Bowl was held on January 15, 1967.', 'The New England Patriots have won the most Super Bowls, with six championships.'],\n",
" 'contexts': [\n",
" ['The First AFL–NFL World Championship Game, later known as Super Bowl I, was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California.'],\n",
" ['As of 2021, the New England Patriots have won the most Super Bowls with six championships, all under the leadership of quarterback Tom Brady and head coach Bill Belichick.']\n",
" \"question\": [\"When was the first Super Bowl?\", \"Who has won the most Super Bowls?\"],\n",
" \"answer\": [\n",
" \"The first Super Bowl was held on January 15, 1967.\",\n",
" \"The New England Patriots have won the most Super Bowls, with six championships.\",\n",
" ],\n",
" \"contexts\": [\n",
" [\n",
" \"The First AFL–NFL World Championship Game, later known as Super Bowl I, was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California.\"\n",
" ],\n",
" [\n",
" \"As of 2021, the New England Patriots have won the most Super Bowls with six championships, all under the leadership of quarterback Tom Brady and head coach Bill Belichick.\"\n",
" ],\n",
" ],\n",
" \"ground_truth\": [\n",
" \"The first Super Bowl was held on January 15, 1967.\",\n",
" \"The New England Patriots have won the most Super Bowls, with six championships as of 2021.\",\n",
" ],\n",
" 'ground_truth': ['The first Super Bowl was held on January 15, 1967.', 'The New England Patriots have won the most Super Bowls, with six championships as of 2021.']\n",
"}\n",
"\n",
"dataset = Dataset.from_dict(data_samples)\n",
Expand Down
7 changes: 7 additions & 0 deletions src/ragas/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from ragas.exceptions import ExceptionInRunner
from ragas.executor import Executor
from ragas.integrations.helicone import helicone_config
from ragas.llms import llm_factory
from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper
from ragas.metrics._answer_correctness import AnswerCorrectness
Expand Down Expand Up @@ -136,6 +137,12 @@ def evaluate(
column_map = column_map or {}
callbacks = callbacks or []

if helicone_config.is_enabled:
import uuid

helicone_config.session_name = "ragas-evaluation"
helicone_config.session_id = str(uuid.uuid4())

if dataset is None:
raise ValueError("Provide dataset!")

Expand Down
101 changes: 101 additions & 0 deletions src/ragas/integrations/helicone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from dataclasses import dataclass, field
from typing import Any, Dict, Optional


@dataclass
class CacheConfig:
ttl: int = 60 * 60 * 24 * 30 # 30 days
maxsize: int = 1000


@dataclass
class HeliconeSingleton:
api_key: Optional[str] = None
base_url: Optional[str] = "https://oai.helicone.ai"
cache_config: Optional[CacheConfig] = None
_instance: Optional["HeliconeSingleton"] = None

# New fields for configurable headers
target_url: Optional[str] = None
openai_api_base: Optional[str] = None
request_id: Optional[str] = None
model_override: Optional[str] = None
prompt_id: Optional[str] = None
user_id: Optional[str] = None
fallbacks: Optional[str] = None
rate_limit_policy: Optional[str] = None
session_id: Optional[str] = None
session_path: Optional[str] = None
session_name: Optional[str] = None
posthog_key: Optional[str] = None
posthog_host: Optional[str] = None
omit_response: Optional[bool] = None
omit_request: Optional[bool] = None
cache_enabled: Optional[bool] = None
retry_enabled: Optional[bool] = None
moderations_enabled: Optional[bool] = None
llm_security_enabled: Optional[bool] = None
stream_force_format: Optional[bool] = None
custom_properties: Dict[str, str] = field(default_factory=dict)

def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance

def default_headers(self) -> Dict[str, Any]:
headers = {"Helicone-Auth": f"Bearer {self.api_key}"}

if self.target_url:
headers["Helicone-Target-URL"] = self.target_url
if self.openai_api_base:
headers["Helicone-OpenAI-Api-Base"] = self.openai_api_base
if self.request_id:
headers["Helicone-Request-Id"] = self.request_id
if self.model_override:
headers["Helicone-Model-Override"] = self.model_override
if self.prompt_id:
headers["Helicone-Prompt-Id"] = self.prompt_id
if self.user_id:
headers["Helicone-User-Id"] = self.user_id
if self.fallbacks:
headers["Helicone-Fallbacks"] = self.fallbacks
if self.rate_limit_policy:
headers["Helicone-RateLimit-Policy"] = self.rate_limit_policy
if self.session_id:
headers["Helicone-Session-Id"] = self.session_id
if self.session_path:
headers["Helicone-Session-Path"] = self.session_path
if self.session_name:
headers["Helicone-Session-Name"] = self.session_name
if self.posthog_key:
headers["Helicone-Posthog-Key"] = self.posthog_key
if self.posthog_host:
headers["Helicone-Posthog-Host"] = self.posthog_host

# Boolean headers
for header, value in {
"Helicone-Omit-Response": self.omit_response,
"Helicone-Omit-Request": self.omit_request,
"Helicone-Cache-Enabled": (self.cache_enabled and "true")
or (self.cache_config.maxsize or self.cache_config.ttl and "true"), # type: ignore
"Helicone-Retry-Enabled": self.retry_enabled,
"Helicone-Moderations-Enabled": self.moderations_enabled,
"Helicone-LLM-Security-Enabled": self.llm_security_enabled,
"Helicone-Stream-Force-Format": self.stream_force_format,
}.items():
if value is not None:
headers[header] = str(value).lower()

# Custom properties
for key, value in self.custom_properties.items():
headers[f"Helicone-Property-{key}"] = value

return headers

@property
def is_enabled(self):
return self.api_key is not None


helicone_config = HeliconeSingleton()
16 changes: 14 additions & 2 deletions src/ragas/llms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from langchain_openai.llms import AzureOpenAI, OpenAI
from langchain_openai.llms.base import BaseOpenAI

from ragas.integrations.helicone import helicone_config
from ragas.run_config import RunConfig, add_async_retry, add_retry

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -289,10 +290,21 @@ async def agenerate_text(


def llm_factory(
model: str = "gpt-4o-mini", run_config: t.Optional[RunConfig] = None
model: str = "gpt-4o-mini",
run_config: t.Optional[RunConfig] = None,
default_headers: t.Optional[t.Dict[str, str]] = None,
base_url: t.Optional[str] = None,
) -> BaseRagasLLM:
timeout = None
if run_config is not None:
timeout = run_config.timeout
openai_model = ChatOpenAI(model=model, timeout=timeout)

# if helicone is enabled, use the helicone
if helicone_config.is_enabled:
default_headers = helicone_config.default_headers()
base_url = helicone_config.base_url

openai_model = ChatOpenAI(
model=model, timeout=timeout, default_headers=default_headers, base_url=base_url
)
return LangchainLLMWrapper(openai_model, run_config)
9 changes: 6 additions & 3 deletions src/ragas/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,13 @@ def get_required_columns(
class Metric(ABC):
@property
@abstractmethod
def name(self) -> str: ...
def name(self) -> str:
...

@property
@abstractmethod
def evaluation_mode(self) -> EvaluationMode: ...
def evaluation_mode(self) -> EvaluationMode:
...

@abstractmethod
def init(self, run_config: RunConfig):
Expand Down Expand Up @@ -130,7 +132,8 @@ async def ascore(
return score

@abstractmethod
async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: ...
async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
...


@dataclass
Expand Down
4 changes: 2 additions & 2 deletions src/ragas/testset/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,5 +509,5 @@ class EvolutionElimination(BaseModel):
question_rewrite_prompt,
context_scoring_prompt,
filter_question_prompt,
evolution_elimination_prompt
]
evolution_elimination_prompt,
]
4 changes: 3 additions & 1 deletion tests/unit/test_analytics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import typing as t

import pytest


Expand Down Expand Up @@ -130,7 +132,7 @@ def test_testset_generation_tracking(monkeypatch):


def test_was_completed(monkeypatch):
from ragas._analytics import track_was_completed, IsCompleteEvent
from ragas._analytics import IsCompleteEvent, track_was_completed

event_properties_list: t.List[IsCompleteEvent] = []

Expand Down
6 changes: 4 additions & 2 deletions tests/unit/test_executor_in_jupyter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"\n",
"exec = Executor(raise_exceptions=True)\n",
"for i in range(10):\n",
" exec.submit(sleep, i/10)\n",
" exec.submit(sleep, i / 10)\n",
"\n",
"assert exec.results(), \"didn't get anything from results\""
]
Expand Down Expand Up @@ -140,16 +140,18 @@
"source": [
"from ragas.metrics.base import Metric, EvaluationMode\n",
"\n",
"\n",
"class FakeMetric(Metric):\n",
" name = \"fake_metric\"\n",
" evaluation_mode = EvaluationMode.qa\n",
"\n",
" def init(self):\n",
" pass\n",
"\n",
" async def _ascore(self, row, callbacks)->float:\n",
" async def _ascore(self, row, callbacks) -> float:\n",
" return 0\n",
"\n",
"\n",
"fm = FakeMetric()"
]
},
Expand Down
Loading
Loading