forked from geekan/MetaGPT
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e2cdcfb
commit 7ec14de
Showing
10 changed files
with
650 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
from typing import List, Tuple | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from metagpt.configs.models_config import ModelsConfig | ||
from metagpt.ext.aflow.benchmark.humaneval import HumanEvalBenchmark | ||
from metagpt.ext.aflow.scripts.operator import Operator | ||
from metagpt.ext.aflow.scripts.workflow import Workflow | ||
from metagpt.llm import LLM | ||
|
||
HUMANEVAL_PROMPT_IO = """ | ||
{question}\nGenerate an answer to this question, without any additional test cases. | ||
""" | ||
|
||
WITHOUT_PROMPT = """ | ||
{question} | ||
""" | ||
|
||
SC_ENSEMBLE_PROMPT = """ | ||
Given the question described as follows: {question} | ||
Several solutions have been generated to address the given question. They are as follows: | ||
{solutions} | ||
Carefully evaluate these solutions and identify the answer that appears most frequently across them. This consistency in answers is crucial for determining the most reliable solution. | ||
In the "thought" field, provide a detailed explanation of your thought process. In the "solution_letter" field, output only the single letter ID (A, B, C, etc.) corresponding to the most consistent solution. Do not include any additional text or explanation in the "solution_letter" field. | ||
""" | ||
|
||
|
||
class GenerateOp(BaseModel): | ||
solution: str = Field(default="", description="Python Solution For This Question.") | ||
|
||
|
||
class ScEnsembleOp(BaseModel): | ||
thought: str = Field(default="", description="The thought of the most consistent solution.") | ||
solution_letter: str = Field(default="", description="The letter of most consistent solution.") | ||
|
||
|
||
class Generate(Operator): | ||
def __init__(self, llm: LLM, name: str = "Generate"): | ||
super().__init__(llm, name) | ||
|
||
async def __call__(self, question: str, entry_point: str) -> Tuple[str, str]: | ||
prompt = WITHOUT_PROMPT.format(question=question) | ||
response = await self._fill_node(GenerateOp, prompt, mode="code_fill", function_name=entry_point) | ||
return response | ||
|
||
|
||
class JSONGenerate(Operator): | ||
def __init__(self, llm: LLM, name: str = "JSONGenerate"): | ||
super().__init__(llm, name) | ||
|
||
async def __call__(self, question: str, entry_point: str) -> Tuple[str, str]: | ||
prompt = WITHOUT_PROMPT.format(question=question) | ||
response = await self._fill_node(GenerateOp, mode="single_fill", prompt=prompt) | ||
return response | ||
|
||
|
||
class ScEnsemble(Operator): | ||
def __init__(self, llm: LLM, name: str = "ScEnsemble"): | ||
super().__init__(llm, name) | ||
|
||
async def __call__(self, solutions: List[str], problem: str): | ||
answer_mapping = {} | ||
solution_text = "" | ||
for index, solution in enumerate(solutions): | ||
answer_mapping[chr(65 + index)] = index | ||
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n" | ||
|
||
prompt = SC_ENSEMBLE_PROMPT.format(question=problem, solutions=solution_text) | ||
response = await self._fill_node(ScEnsembleOp, prompt, mode="xml_fill") | ||
|
||
answer = response.get("solution_letter", "") | ||
answer = answer.strip().upper() | ||
|
||
return {"response": solutions[answer_mapping[answer]]} | ||
|
||
|
||
class IOSolveGraph(Workflow): | ||
def __init__(self, name: str, llm_config, dataset: str): | ||
super().__init__(name, llm_config, dataset) | ||
self.generate = Generate(self.llm) | ||
|
||
async def __call__(self, question, entry_point): | ||
solution = await self.generate(question, entry_point) | ||
return solution["solution"], self.llm.cost_manager.total_cost | ||
|
||
|
||
class JSONSolveGraph(Workflow): | ||
def __init__(self, name: str, llm_config, dataset: str): | ||
super().__init__(name, llm_config, dataset) | ||
self.generate = Generate(self.llm) | ||
|
||
async def __call__(self, question, entry_point): | ||
solution = await self.generate(question, entry_point) | ||
return solution["solution"], self.llm.cost_manager.total_cost | ||
|
||
|
||
class SelfConsistencyGraph(Workflow): | ||
def __init__(self, name: str, llm_config, dataset: str): | ||
super().__init__(name, llm_config, dataset) | ||
self.generate = Generate(llm=self.llm) | ||
self.sc_ensemble = ScEnsemble(llm=self.llm) | ||
|
||
async def __call__(self, problem, function_name): | ||
solutions = [] | ||
for i in range(5): | ||
solution = await self.generate(problem, function_name) | ||
solutions.append(solution["solution"]) | ||
solution = await self.sc_ensemble(solutions, problem) | ||
return solution["response"], self.llm.cost_manager.total_cost | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
async def main(): | ||
# llm_config = ModelsConfig.default().get("llama-3.2-90b-vision-instruct") | ||
llm_config = ModelsConfig.default().get("meta-llama/Meta-Llama-3.1-70B-Instruct") | ||
# llm_config = ModelsConfig.default().get("gpt-4o-mini") | ||
# llm_config = ModelsConfig.default().get("gpt-4o") | ||
# llm_config = ModelsConfig.default().get("deepseek-chat") | ||
# llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620") | ||
# graph = IOSolveGraph(name="IO", llm_config=llm_config, dataset="HumanEval") | ||
# graph = JSONSolveGraph(name="JSON", llm_config=llm_config, dataset="HumanEval") | ||
graph = SelfConsistencyGraph(name="SelfConsistency", llm_config=llm_config, dataset="HumanEval") | ||
benchmark = HumanEvalBenchmark( | ||
name="HumanEval", | ||
file_path="/Users/trl/Github_project/MetaGPT-MathAI/metagpt/ext/aflow/data/humaneval_test.jsonl", | ||
log_path="", | ||
) | ||
avg_score = await benchmark.baseline_evaluation(graph, max_concurrent_tasks=5) | ||
return avg_score | ||
|
||
import asyncio | ||
|
||
asyncio.run(main()) | ||
|
||
|
||
# llama + markdown + prompt 0.75573 | ||
|
||
# gpt-4o-mini + markdown + prompt 0.871 | ||
# gpt-4o-mini + json + prompt 0.81679 | ||
# gpt-4o-mini |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import asyncio | ||
|
||
from metagpt.ext.aflow.benchmark.humaneval import HumanEvalBenchmark | ||
from metagpt.ext.eflow.src.abstract import Workflow | ||
from metagpt.ext.eflow.src.operators import CodeGenerate, Custom, ScEnsemble | ||
|
||
llm_name_list = ["claude-3-5-sonnet-20240620", "gpt-4o-mini", "gpt-4o", "deepseek-chat"] | ||
|
||
|
||
class MutliLLMWorkflow(Workflow): | ||
def __init__(self, name: str, llm_names: list, dataset: str): | ||
super().__init__(name, llm_names, dataset) | ||
self.custom = Custom(self.llm_dict["gpt-4o-mini"]) | ||
self.code_generate = CodeGenerate(self.llm_dict["gpt-4o-mini"]) | ||
self.sc_ensemble = ScEnsemble(self.llm_dict["claude-3-5-sonnet-20240620"]) | ||
|
||
async def __call__(self, problem, function_name): | ||
solutions = [] | ||
tasks = [ | ||
self.code_generate(problem, function_name, self.llm_dict["claude-3-5-sonnet-20240620"]), | ||
self.code_generate(problem, function_name, self.llm_dict["gpt-4o-mini"]), | ||
self.code_generate(problem, function_name, self.llm_dict["gpt-4o"]), | ||
self.code_generate(problem, function_name, self.llm_dict["deepseek-chat"]), | ||
] | ||
claude_solution, four_o_mini_solution, four_o_solution, deepseek_solution = await asyncio.gather(*tasks) | ||
|
||
solutions.append(claude_solution) | ||
solutions.append(four_o_mini_solution) | ||
solutions.append(four_o_solution) | ||
solutions.append(deepseek_solution) | ||
|
||
solution = await self.sc_ensemble(solutions, problem) | ||
|
||
return solution["response"], self.get_cost() | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
async def main(): | ||
graph = MutliLLMWorkflow(name="SelfConsistency", llm_names=llm_name_list, dataset="HumanEval") | ||
benchmark = HumanEvalBenchmark( | ||
name="HumanEval", file_path="metagpt/ext/aflow/data/humaneval_validate.jsonl", log_path="" | ||
) | ||
avg_score = await benchmark.baseline_evaluation(graph, max_concurrent_tasks=5) | ||
return avg_score | ||
|
||
import asyncio | ||
|
||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from typing import Dict, List | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from metagpt.actions.action_node import ActionNode | ||
from metagpt.configs.models_config import ModelsConfig | ||
from metagpt.ext.aflow.scripts.evaluator import DatasetType | ||
from metagpt.llm import LLM | ||
from metagpt.provider.llm_provider_registry import create_llm_instance | ||
from metagpt.utils.cost_manager import CostManager | ||
|
||
|
||
class Schema(BaseModel): | ||
def __init__(self, attributes: List[Dict]): | ||
self._create_schemas(attributes) | ||
|
||
def _create_schemas(self, attributes: List[Dict]): | ||
for attribute in attributes: | ||
self.model_fields[attribute["name"]] = Field( | ||
default="", description=attribute["description"], type=attribute["type"] | ||
) | ||
|
||
|
||
class Operator: | ||
def __init__(self, model: LLM, name: str): | ||
self.default_model = model | ||
self.name = name | ||
|
||
def __call__(self, *args, **kwargs): | ||
raise NotImplementedError | ||
|
||
async def _fill_node(self, op_schema, prompt, format=None, model: LLM = None, **extra_kwargs): | ||
fill_kwargs = {"context": prompt, "llm": model} | ||
if model is None: | ||
model = self.default_model | ||
if format: | ||
fill_kwargs["mode"] = format | ||
|
||
fill_kwargs.update(extra_kwargs) | ||
node = await ActionNode.from_pydantic(op_schema).fill(**fill_kwargs) | ||
return node.instruct_content.model_dump() | ||
|
||
|
||
class Workflow: | ||
def __init__( | ||
self, | ||
name: str, | ||
llm_names: list, | ||
dataset: DatasetType, | ||
) -> None: | ||
self.name = name | ||
self.dataset = dataset | ||
self.llm_dict = self.create_llms(llm_names) | ||
|
||
def get_cost(self): | ||
# 累计所有llm的cost | ||
total_cost = 0 | ||
for llm in self.llm_dict.values(): | ||
total_cost += llm.cost_manager.total_cost | ||
return total_cost | ||
|
||
def create_llms(self, llm_names: list): | ||
llm_dict = {} | ||
for llm_name in llm_names: | ||
llm_config = ModelsConfig.default().get(llm_name) | ||
llm_dict[llm_name] = create_llm_instance(llm_config) | ||
llm_dict[llm_name].cost_manager = CostManager() | ||
return llm_dict | ||
|
||
async def __call__(self, problem: str): | ||
""" | ||
Implementation of the workflow | ||
""" | ||
raise NotImplementedError("This method should be implemented by the subclass") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from typing import List | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from metagpt.ext.eflow.src.abstract import Operator | ||
from metagpt.ext.eflow.src.prompt import GENERATE_PROMPT, SC_ENSEMBLE_PROMPT | ||
from metagpt.llm import LLM | ||
|
||
# Schema List[{name: str, type: str, description: str}] | ||
# # 看起来Schema 需要一个单独的Class,方便进行生成 | ||
# 这里的抽象太乱了 | ||
# 多LLM应该在调用的时候出现,而不是在Workflow创建的时候出现 | ||
# 应该放开Node调整的条件,还是放开Operator的条件? | ||
|
||
|
||
class GenerateOp(BaseModel): | ||
response: str = Field(default="", description="Your solution for this problem") | ||
|
||
|
||
class Custom(Operator): | ||
def __init__(self, model: LLM): | ||
super().__init__(model, "Custom") | ||
self.schema = [{"name": "response", "type": "str", "description": "Your solution for this problem"}] | ||
|
||
async def __call__(self, input: str, prompt: str, op_schema: BaseModel = None, model: LLM = None, **extra_kwargs): | ||
if op_schema is None: | ||
op_schema = self.schema | ||
context = prompt + input | ||
response = await self._fill_node(op_schema=op_schema, prompt=context, model=model, **extra_kwargs) | ||
return response | ||
|
||
|
||
class CodeGenerate(Operator): | ||
def __init__(self, model: LLM): | ||
super().__init__(model, "CodeGenerate") | ||
self.schema = [{"name": "response", "type": "str", "description": "Your solution for this problem"}] | ||
|
||
async def __call__(self, problem: str, function_name: str, model: LLM = None): | ||
prompt = GENERATE_PROMPT.format(problem=problem) | ||
response = await self._fill_node( | ||
op_schema=GenerateOp, prompt=prompt, format="xml_fill", model=model, function_name=function_name | ||
) | ||
return response | ||
|
||
|
||
class ScEnsemble(Operator): | ||
def __init__(self, model: LLM): | ||
super().__init__(model, "ScEnsemble") | ||
self.schema = [ | ||
{"name": "thought", "type": "str", "description": "The thought of the most consistent solution."}, | ||
{"name": "solution_letter", "type": "str", "description": "The letter of most consistent solution."}, | ||
] | ||
|
||
async def __call__(self, solutions: List[str], problem: str, model: LLM = None): | ||
answer_mapping = {} | ||
solution_text = "" | ||
for index, solution in enumerate(solutions): | ||
answer_mapping[chr(65 + index)] = index | ||
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n" | ||
|
||
prompt = SC_ENSEMBLE_PROMPT.format(question=problem, solutions=solution_text) | ||
response = await self._fill_node(op_schema=self.schema, prompt=prompt, format="xml_fill", model=model) | ||
|
||
answer = response.get("solution_letter", "") | ||
answer = answer.strip().upper() | ||
|
||
return {"response": solutions[answer_mapping[answer]]} |
Oops, something went wrong.