Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: structure faithfulness output #333

Merged
merged 21 commits into from
Nov 24, 2023
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8b8d1fe
add langchain loaders to docs
shahules786 Oct 19, 2023
cd7f411
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Oct 20, 2023
5b18325
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Oct 26, 2023
bb8d984
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Oct 26, 2023
9cbb57d
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Oct 29, 2023
479e636
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Nov 7, 2023
3eeb7ea
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Nov 12, 2023
b09003f
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Nov 17, 2023
0d28d62
Merge branch 'main' of https://github.com/explodinggradients/ragas
shahules786 Nov 20, 2023
110cc02
reformat to json format
shahules786 Nov 20, 2023
c4036f2
add qcg to validate
shahules786 Nov 20, 2023
cf14e39
determinism experiments
shahules786 Nov 20, 2023
7ba5f46
json loader
shahules786 Nov 21, 2023
1df47bf
replace with nanmean
shahules786 Nov 21, 2023
d54ef72
move json loader
shahules786 Nov 21, 2023
3b1878d
move json loader
shahules786 Nov 21, 2023
cc128c9
fix type error
shahules786 Nov 22, 2023
24b9e25
Merge branch 'main' of https://github.com/explodinggradients/ragas in…
shahules786 Nov 22, 2023
16821c4
add error string
shahules786 Nov 22, 2023
14e7440
Merge branch 'main' of https://github.com/explodinggradients/ragas in…
shahules786 Nov 24, 2023
35fb0e6
structured output
shahules786 Nov 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 93 additions & 52 deletions src/ragas/metrics/_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,107 @@
import typing as t
from dataclasses import dataclass

import numpy as np
from langchain.callbacks.manager import CallbackManager, trace_as_chain_group
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

from ragas.metrics.base import EvaluationMode, MetricWithLLM
from ragas.utils import load_as_json

if t.TYPE_CHECKING:
from datasets import Dataset

#################
# NLI Score
#################

LONG_FORM_ANSWER_PROMPT = HumanMessagePromptTemplate.from_template(
"""\
Given a question and answer, create one or more statements from each sentence in the given answer.
Create one or more statements from each sentence in the given answer.

question: Who was Albert Einstein and what is he best known for?
answer: He was a German-born theoretical physicist, widely acknowledged to be one of the greatest and most influential physicists of all time. He was best known for developing the theory of relativity, he also made important contributions to the development of the theory of quantum mechanics.
statements:\nAlbert Einstein was born in Germany.\nAlbert Einstein was best known for his theory of relativity.
statements in json:
{{
"statements": [
"Albert Einstein was born in Germany.",
"Albert Einstein was best known for his theory of relativity."
]
}}

question: Cadmium Chloride is slightly soluble in this chemical, it is also called what?
answer: alcohol
statements:\nCadmium Chloride is slightly soluble in alcohol.
statements in json:
{{
"statements": [
"Cadmium Chloride is slightly soluble in alcohol."
]
}}

question: Were Shahul and Jithin of the same nationality?
answer: They were from different countries.
statements:\nShahul and Jithin were from different countries.
statements in json:
{{
"statements": [
"Shahul and Jithin were from different countries."
]
}}

question:{question}
answer: {answer}
statements:\n""" # noqa: E501
statements in json:""" # noqa: E501
)


NLI_STATEMENTS_MESSAGE = HumanMessagePromptTemplate.from_template(
"""
Prompt: Natural language inference
Consider the given context and following statements, then determine whether they are supported by the information present in the context.Provide a brief explanation for each statement before arriving at the verdict (Yes/No). Provide a final verdict for each statement in order at the end in the given format. Do not deviate from the specified format.

Context:\nJohn is a student at XYZ University. He is pursuing a degree in Computer Science. He is enrolled in several courses this semester, including Data Structures, Algorithms, and Database Management. John is a diligent student and spends a significant amount of time studying and completing assignments. He often stays late in the library to work on his projects.
statements:\n1. John is majoring in Biology.\n2. John is taking a course on Artificial Intelligence.\n3. John is a dedicated student.\n4. John has a part-time job.\n5. John is interested in computer programming.\n
Natural language inference

Context:
John is a student at XYZ University. He is pursuing a degree in Computer Science. He is enrolled in several courses this semester, including Data Structures, Algorithms, and Database Management. John is a diligent student and spends a significant amount of time studying and completing assignments. He often stays late in the library to work on his projects.
statement_1: John is majoring in Biology.
statement_2: John is taking a course on Artificial Intelligence.
statement_3: John is a dedicated student.
statement_4: John has a part-time job.
Answer:
[
{{
"statement_1": "John is majoring in Biology.",
"reason": "John's major is explicitly mentioned as Computer Science. There is no information suggesting he is majoring in Biology.",
"verdict": "No"
}},
{{
"statement_2": "John is taking a course on Artificial Intelligence.",
"reason": "The context mentions the courses John is currently enrolled in, and Artificial Intelligence is not mentioned. Therefore, it cannot be deduced that John is taking a course on AI.",
"verdict": "No"
}},
{{
"statement_3": "John is a dedicated student.",
"reason": "The context states that he spends a significant amount of time studying and completing assignments. Additionally, it mentions that he often stays late in the library to work on his projects, which implies dedication.",
"verdict": "Yes"
}},
{{
"statement_4": "John has a part-time job.",
"reason": "There is no information given in the context about John having a part-time job.",
"verdict": "No"
}}
]

Context:
Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.
statement_1: Answer not found in given context
Answer:
1. John is majoring in Biology.
Explanation: John's major is explicitly mentioned as Computer Science. There is no information suggesting he is majoring in Biology. Verdict: No.
2. John is taking a course on Artificial Intelligence.
Explanation: The context mentions the courses John is currently enrolled in, and Artificial Intelligence is not mentioned. Therefore, it cannot be deduced that John is taking a course on AI. Verdict: No.
3. John is a dedicated student.
Explanation: The prompt states that he spends a significant amount of time studying and completing assignments. Additionally, it mentions that he often stays late in the library to work on his projects, which implies dedication. Verdict: Yes.
4. John has a part-time job.
Explanation: There is no information given in the context about John having a part-time job. Therefore, it cannot be deduced that John has a part-time job. Verdict: No.
5. John is interested in computer programming.
Explanation: The context states that John is pursuing a degree in Computer Science, which implies an interest in computer programming. Verdict: Yes.
Final verdict for each statement in order: No. No. Yes. No. Yes.
context:\n{context}
statements:\n{statements}
[
{{
"statement_4": "Answer not found in given context",
"reason": "The context does not provide enough information to determine the validity of the statement."
"verdict": "NULL"
}}
]



context:
{context}
statements:
{statements}
Answer:
""" # noqa: E501
)
Expand Down Expand Up @@ -84,18 +135,13 @@ def _score_batch(
human_prompt = LONG_FORM_ANSWER_PROMPT.format(question=q, answer=a)
prompts.append(ChatPromptTemplate.from_messages([human_prompt]))


result = self.llm.generate(prompts, callbacks=batch_group)
list_statements: list[list[str]] = []
for output in result.generations:
# use only the first generation for each prompt
statements = output[0].text.split("\n")
list_statements.append(statements)

prompts = []
for context, statements in zip(contexts, list_statements):
for context, output in zip(contexts, result.generations):
statements = load_as_json(output[0].text).get("statements", [])
statements_str: str = "\n".join(
[f"{i+1}.{st}" for i, st in enumerate(statements)]
[f"statement_{i+1}: {st}" for i, st in enumerate(statements)]
)
contexts_str: str = "\n".join(context)
human_prompt = NLI_STATEMENTS_MESSAGE.format(
Expand All @@ -105,26 +151,21 @@ def _score_batch(

result = self.llm.generate(prompts, callbacks=batch_group)
outputs = result.generations

verdict_score_map = {"yes": 1, "no": 0, "null": np.nan}
scores = []
final_answer = "Final verdict for each statement in order:"
final_answer = final_answer.lower()
for i, output in enumerate(outputs):
output = output[0].text.lower().strip()
if final_answer in output:
output = output[output.find(final_answer) + len(final_answer) :]
score = sum(
0 if "yes" in answer else 1
for answer in output.strip().split(".")
if answer != ""
)
score = score / len(list_statements[i])
for output in outputs:
output = load_as_json(output[0].text)
output = output if output else []
faithful_statements = sum(
verdict_score_map.get(dict.get("verdict", "").lower(), np.nan)
for dict in output
)
num_statements = len(output)
if num_statements:
score = faithful_statements / num_statements
else:
score = max(0, output.count("verdict: no")) / len(
list_statements[i]
)

scores.append(1 - score)
score = np.nan
scores.append(score)

return scores

Expand Down
Loading