From 16f221b902c8feb047e5b25294aa372afd0217cb Mon Sep 17 00:00:00 2001 From: Frederico Schuh Date: Sat, 7 Sep 2024 02:24:00 -0400 Subject: [PATCH 1/3] Improved JSON input rewrite prompt so it works with smaller LLMs --- src/ragas/llms/json_load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ragas/llms/json_load.py b/src/ragas/llms/json_load.py index 509b31217..dcd82a6a0 100644 --- a/src/ragas/llms/json_load.py +++ b/src/ragas/llms/json_load.py @@ -31,7 +31,7 @@ def load_as_json(text) -> t.Dict: # not migrating to Prompt format to avoid circular imports JSON_PROMPT = """\ -Rewrite the input into valid json +Rewrite the input into valid json. Only output JSON and nothing else. Input: {{ From 0053ec92200a2d239ee12eead13314c969b581b9 Mon Sep 17 00:00:00 2001 From: Frederico Schuh Date: Sat, 7 Sep 2024 02:28:45 -0400 Subject: [PATCH 2/3] Improved testset generation question rewriting prompts so they work with smaller and chattier LLMs --- src/ragas/testset/prompts.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 0d2806dda..6dd5c2c7f 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -15,7 +15,7 @@ class AnswerFormat(BaseModel): reasoning_question_prompt = Prompt( name="reasoning_question", instruction="""Complicate the given question by rewriting question into a multi-hop reasoning question based on the provided context. - Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. + Answering the question should require the reader to make multiple logical connections or inferences using the information available in given context. Only output the question and nothing else. Rules to follow when rewriting question: 1. Ensure that the rewritten question can be answered entirely from the information present in the contexts. 2. Do not frame questions that contains more than 15 words. Use abbreviation wherever possible. @@ -43,7 +43,7 @@ class AnswerFormat(BaseModel): multi_context_question_prompt = Prompt( name="multi_context_question", instruction=""" - The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. + The task is to rewrite and complicate the given question in a way that answering it requires information derived from both context1 and context2. Only output the question and nothing else. Follow the rules given below while rewriting the question. 1. The rewritten question should not be very long. Use abbreviation wherever possible. 2. The rewritten question must be reasonable and must be understood and responded by humans. @@ -73,7 +73,7 @@ class AnswerFormat(BaseModel): conditional_question_prompt = Prompt( name="conditional_question", instruction="""Rewrite the provided question to increase its complexity by introducing a conditional element. - The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. + The goal is to make the question more intricate by incorporating a scenario or condition that affects the context of the question. Only output the question and nothing else. Follow the rules given below while rewriting the question. 1. The rewritten question should not be longer than 25 words. Use abbreviation wherever possible. 2. The rewritten question must be reasonable and must be understood and responded by humans. @@ -100,7 +100,8 @@ class AnswerFormat(BaseModel): compress_question_prompt = Prompt( name="compress_question", instruction="""Rewrite the following question to make it more indirect and shorter while retaining the essence of the original question. - The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible.""", + The goal is to create a question that conveys the same meaning but in a less direct manner. The rewritten question should shorter so use abbreviation wherever possible. + Only output the question and nothing else.""", examples=[ { "question": "What is the distance between the Earth and the Moon?", @@ -216,7 +217,7 @@ class AnswerFormat(BaseModel): seed_question_prompt = Prompt( name="seed_question", - instruction="Generate a question that can be fully answered from given context. The question should be formed using topic", + instruction="Generate a question that can be fully answered from given context. The question should be formed using topic. Only output the question and nothing else.", examples=[ { "context": "Photosynthesis in plants involves converting light energy into chemical energy, using chlorophyll and other pigments to absorb light. This process is crucial for plant growth and the production of oxygen.", @@ -303,7 +304,7 @@ class AnswerFormat(BaseModel): question_rewrite_prompt = Prompt( name="rewrite_question", - instruction="""Given a context, question and feedback, rewrite the question to improve its clarity and answerability based on the feedback provided.""", + instruction="""Given a context, question and feedback, rewrite the question to improve its clarity and answerability based on the feedback provided. Only output the question and nothing else.""", examples=[ { "context": "The Eiffel Tower was constructed using iron and was originally intended as a temporary exhibit for the 1889 World's Fair held in Paris. Despite its initial temporary purpose, the Eiffel Tower quickly became a symbol of Parisian ingenuity and an iconic landmark of the city, attracting millions of visitors each year. The tower's design, created by Gustave Eiffel, was initially met with criticism from some French artists and intellectuals, but it has since been celebrated as a masterpiece of structural engineering and architectural design.", From d98467a90a91dfbbc2a14b0d629df79b867165c4 Mon Sep 17 00:00:00 2001 From: Frederico Schuh Date: Sat, 7 Sep 2024 02:35:00 -0400 Subject: [PATCH 3/3] Updated testset generation keyphrase_extraction and find_relevant_chunk prompts to use JSON formatting instructions. Also fixed incorrect index in one of the examples of find_relevant_contexts prompt (this will make it more explicit that the index is 1-based). --- src/ragas/testset/prompts.py | 64 +++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/src/ragas/testset/prompts.py b/src/ragas/testset/prompts.py index 6dd5c2c7f..8e93f6b7b 100644 --- a/src/ragas/testset/prompts.py +++ b/src/ragas/testset/prompts.py @@ -3,13 +3,25 @@ from ragas.llms.output_parser import RagasoutputParser, get_json_format_instructions from ragas.llms.prompt import Prompt +from typing import List + class AnswerFormat(BaseModel): answer: str verdict: int +class KeyphraseFormat(BaseModel): + keyphrases: List[str] + + +class RelevantContextFormat(BaseModel): + relevant_contexts: List[int] + + question_answer_parser = RagasoutputParser(pydantic_object=AnswerFormat) +keyphrase_parser = RagasoutputParser(pydantic_object=KeyphraseFormat) +relevant_context_parser = RagasoutputParser(pydantic_object=RelevantContextFormat) reasoning_question_prompt = Prompt( @@ -186,28 +198,33 @@ class AnswerFormat(BaseModel): keyphrase_extraction_prompt = Prompt( name="keyphrase_extraction", instruction="Extract the top 3 to 5 keyphrases from the provided text, focusing on the most significant and distinctive aspects. ", + output_format_instruction=get_json_format_instructions(KeyphraseFormat), examples=[ { "text": "A black hole is a region of spacetime where gravity is so strong that nothing, including light and other electromagnetic waves, has enough energy to escape it. The theory of general relativity predicts that a sufficiently compact mass can deform spacetime to form a black hole.", - "output": { - "keyphrases": [ - "Black hole", - "Region of spacetime", - "Strong gravity", - "Light and electromagnetic waves", - "Theory of general relativity", - ] - }, + "output": KeyphraseFormat.parse_obj( + { + "keyphrases": [ + "Black hole", + "Region of spacetime", + "Strong gravity", + "Light and electromagnetic waves", + "Theory of general relativity", + ] + } + ).dict(), }, { "text": "The Great Wall of China is an ancient series of walls and fortifications located in northern China, built around 500 years ago. This immense wall stretches over 13,000 miles and is a testament to the skill and persistence of ancient Chinese engineers.", - "output": { - "keyphrases": [ - "Great Wall of China", - "Ancient fortifications", - "Northern China", - ] - }, + "output": KeyphraseFormat.parse_obj( + { + "keyphrases": [ + "Great Wall of China", + "Ancient fortifications", + "Northern China", + ] + } + ).dict(), }, ], input_keys=["text"], @@ -273,6 +290,7 @@ class AnswerFormat(BaseModel): find_relevant_context_prompt = Prompt( name="find_relevant_context", instruction="Given a question and set of contexts, find the most relevant contexts to answer the question.", + output_format_instruction=get_json_format_instructions(KeyphraseFormat), examples=[ { "question": "What is the capital of France?", @@ -281,9 +299,11 @@ class AnswerFormat(BaseModel): "2. The capital of France is Paris. It is also the most populous city in France, with a population of over 2 million people. Paris is known for its cultural landmarks like the Eiffel Tower and the Louvre Museum.", "3. Paris is the capital of France. It is also the most populous city in France, with a population of over 2 million people. Paris is known for its cultural landmarks like the Eiffel Tower and the Louvre Museum.", ], - "output": { - "relevant_contexts": [1, 2], - }, + "output": RelevantContextFormat.parse_obj( + { + "relevant_contexts": [2, 3], + } + ).dict(), }, { "question": "How does caffeine affect the body and what are its common sources?", @@ -292,7 +312,11 @@ class AnswerFormat(BaseModel): "2. Regular physical activity is essential for maintaining good health. It can help control weight, combat health conditions, boost energy, and promote better sleep.", "3. Common sources of caffeine include coffee, tea, cola, and energy drinks. These beverages are consumed worldwide and are known for providing a quick boost of energy.", ], - "output": {"relevant_contexts": [1, 2]}, + "output": RelevantContextFormat.parse_obj( + { + "relevant_contexts": [1, 2], + } + ).dict() }, ], input_keys=["question", "contexts"],