Update for solving latest review.

didiforgithub · Oct 29, 2024 · d01051a · d01051a
1 parent f0a3a3f
commit d01051a
Show file tree

Hide file tree

Showing 19 changed files with 334 additions and 332 deletions.
diff --git a/docs/resources/AFLOW-experiment.jpg → docs/resources/aflow/AFLOW-experiment.jpg b/docs/resources/AFLOW-experiment.jpg → docs/resources/aflow/AFLOW-experiment.jpg
diff --git a/docs/resources/AFLOW-method.jpg → docs/resources/aflow/AFLOW-method.jpg b/docs/resources/AFLOW-method.jpg → docs/resources/aflow/AFLOW-method.jpg
diff --git a/docs/resources/AFLOW-performance.jpg → docs/resources/aflow/AFLOW-performance.jpg b/docs/resources/AFLOW-performance.jpg → docs/resources/aflow/AFLOW-performance.jpg
diff --git a/examples/aflow/README.md b/examples/aflow/README.md
@@ -5,7 +5,7 @@ AFlow is a framework for automatically generating and optimizing Agentic Workflo
 [Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFlow" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
 </p>
 
 ## Framework Components
@@ -17,7 +17,7 @@ AFlow is a framework for automatically generating and optimizing Agentic Workflo
 - **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Performance Of AFLOW" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Framework of AFlow" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
 </p>
 
 ## Datasets
@@ -26,7 +26,7 @@ AFlow is a framework for automatically generating and optimizing Agentic Workflo
 We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFlow" title="Performance Of AFlow <sub>1</sub>" width="80%"></a>
 </p>
 
 ### Custom Datasets
@@ -68,7 +68,7 @@ For custom tasks, you can reference the code in the `metagpt/ext/aflow/benchmark
    ```
 
 ## Reproduce the Results in the Paper
-1. We provide the raw data obtained from our experiments ([download link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv)), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
+1. We provide the raw data obtained from our experiments in this [link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
 2. You can directly reproduce our experimental results by running the scripts in `examples/aflow/experiments`.
 
 

diff --git a/examples/aflow/experiments/optimize_drop.py b/examples/aflow/experiments/optimize_drop.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
-
-# Crucial Parameters
-dataset: DatasetType = "DROP"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
-
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
-
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for DROP")
+    parser.add_argument("--dataset", type=str, default="DROP", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
+
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_gsm8k.py b/examples/aflow/experiments/optimize_gsm8k.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
-
-# Crucial Parameters
-dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
-
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
-
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for GSM8K")
+    parser.add_argument("--dataset", type=str, default="GSM8K", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
+
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "ScEnsemble",
+        "Programmer",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_hotpotqa.py b/examples/aflow/experiments/optimize_hotpotqa.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
-
-# Crucial Parameters
-dataset: DatasetType = "HotpotQA"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
-
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
-
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for HotpotQA")
+    parser.add_argument("--dataset", type=str, default="HotpotQA", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
+
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")