Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: e2e tests for testset generation #1563

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/howtos/applications/cost.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@
"from ragas.testset import TestsetGenerator\n",
"from ragas.llms import llm_factory\n",
"\n",
"tg = TestsetGenerator(llm=llm_factory(), knowledge_graph=kg)\n",
"# generator_llm = llm_factory()\n",
"tg = TestsetGenerator(llm=generator_llm, knowledge_graph=kg)\n",
"# generating a testset\n",
"testset = tg.generate(testset_size=10, token_usage_parser=get_token_usage_for_openai)"
]
Expand Down
1,052,578 changes: 1,052,578 additions & 0 deletions tests/e2e/scratchpad_kg.json

Large diffs are not rendered by default.

17 changes: 11 additions & 6 deletions tests/e2e/test_fullflow.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
from datasets import load_dataset

from ragas import evaluate
from ragas.metrics import answer_relevancy, context_precision, faithfulness
from ragas.metrics.critique import harmfulness
from ragas import EvaluationDataset, evaluate
from ragas.llms import llm_factory
from ragas.metrics import AnswerRelevancy, ContextPrecision, Faithfulness


def test_evaluate_e2e():
ds = load_dataset("explodinggradients/fiqa", "ragas_eval")["baseline"]
dataset = load_dataset("explodinggradients/amnesty_qa", "english_v3")
eval_dataset = EvaluationDataset.from_hf_dataset(dataset["eval"]) # type: ignore
result = evaluate(
ds.select(range(3)),
metrics=[answer_relevancy, context_precision, faithfulness, harmfulness],
eval_dataset,
metrics=[
AnswerRelevancy(llm=llm_factory()),
ContextPrecision(llm=llm_factory()),
Faithfulness(llm=llm_factory()),
],
)
assert result is not None
34 changes: 34 additions & 0 deletions tests/e2e/test_testset_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import uuid

from ragas.testset import TestsetGenerator


def test_testset_generation():
from ragas.llms import llm_factory
from ragas.testset.graph import KnowledgeGraph

kg = KnowledgeGraph.load(
os.path.join(os.path.dirname(__file__), "scratchpad_kg.json")
)
tg = TestsetGenerator(llm=llm_factory(), knowledge_graph=kg)
testset = tg.generate(testset_size=10)
assert testset is not None


def test_transforms():
from ragas.embeddings import embedding_factory
from ragas.llms import llm_factory
from ragas.testset.graph import KnowledgeGraph, Node
from ragas.testset.transforms import apply_transforms, default_transforms

transforms = default_transforms(
llm=llm_factory(), embedding_model=embedding_factory()
)

kg = KnowledgeGraph()
kg.nodes.append(Node(id=uuid.uuid4(), properties={"page_content": "Hello, world!"}))
assert len(kg.nodes) == 1

apply_transforms(kg, transforms)
assert len(kg.nodes) == 1
Loading