Init AutoDV's Operator

didiforgithub · Nov 8, 2024 · 10295d6 · 10295d6
1 parent a01156d
commit 10295d6
Show file tree

Hide file tree

Showing 6 changed files with 201 additions and 6 deletions.
diff --git a/examples/autodv/operator_an.py b/examples/autodv/operator_an.py
@@ -0,0 +1,22 @@
+from pydantic import BaseModel, Field
+
+
+class ImageDataOp(BaseModel):
+    data: str = Field(default="", description="The data extracted from the image")
+
+
+class KeywordOp(BaseModel):
+    keywords: str = Field(default="", description="The keywords extracted from the data")
+
+
+class TextGenerateOp(BaseModel):
+    plot: str = Field(default="", description="The plot generated from the prompt, discription of the prompt.")
+
+
+class TypeAnalyzeOp(BaseModel):
+    type: str = Field(default="", description="The type of visualization, e.g. bar chart, line chart, pie chart, etc.")
+
+
+class VisualizeOp(BaseModel):
+    data: str = Field(default="", description="The data use for visualization.")
+    type: str = Field(default="", description="The type of visualization, e.g. bar chart, line chart, pie chart, etc.")
diff --git a/examples/autodv/operators.py b/examples/autodv/operators.py
@@ -0,0 +1,71 @@
+from examples.autodv.operator_an import (
+    ImageDataOp,
+    KeywordOp,
+    TextGenerateOp,
+    TypeAnalyzeOp,
+    VisualizeOp,
+)
+from examples.autodv.prompts import (
+    IMAGE_DATA_EXTRACT_PROMPT,
+    KEYWORD_EXTRACT_PROMPT,
+    TEXT_GENERATE_PROMPT,
+    TYPE_ANALYZE_PROMPT,
+    VISUALIZE_MAPPING_PROMPT,
+)
+from metagpt.ext.aflow.scripts.operator import Operator
+from metagpt.llm import LLM
+
+
+# Stage 1: Image Data Extract
+class ImageDataExtract(Operator):
+    def __init__(self, llm: LLM, name: str = "ImageDataExtract"):
+        super().__init__(llm, name)
+
+    async def __call__(self, images: list[str]):
+        prompt = IMAGE_DATA_EXTRACT_PROMPT.format(image_path=images)
+        data = await self._fill_node(ImageDataOp, prompt, mode="single_fill", images=images)
+        return data
+
+
+# Stage 2: Keyword Extract
+class Keyword(Operator):
+    def __init__(self, llm: LLM, name: str = "Keyword"):
+        super().__init__(llm, name)
+
+    async def __call__(self, data: str):
+        prompt = KEYWORD_EXTRACT_PROMPT.format(data=data)
+        keywords = await self._fill_node(KeywordOp, prompt, mode="single_fill")
+        return keywords
+
+
+# Stage 3: Text Generate
+class TextGenerate(Operator):
+    def __init__(self, llm: LLM, name: str = "TextGenerate"):
+        super().__init__(llm, name)
+
+    async def __call__(self, keywords: str):
+        prompt = TEXT_GENERATE_PROMPT.format(keywords=keywords)
+        plot = await self._fill_node(TextGenerateOp, prompt, mode="single_fill")
+        return plot
+
+
+# Stage 4: Visualization, step 1: type analyze
+class TypeAnalyze(Operator):
+    def __init__(self, llm: LLM, name: str = "TypeAnalyze"):
+        super().__init__(llm, name)
+
+    async def __call__(self, keywords: str):
+        prompt = TYPE_ANALYZE_PROMPT.format(keywords=keywords)
+        visualize_type = await self._fill_node(TypeAnalyzeOp, prompt, mode="single_fill")
+        return visualize_type
+
+
+# step 2: visualize mapping
+class VisualizeMapping(Operator):
+    def __init__(self, llm: LLM, name: str = "VisualizeMapping"):
+        super().__init__(llm, name)
+
+    async def __call__(self, data: str, visualize_type: str):
+        prompt = VISUALIZE_MAPPING_PROMPT.format(data=data, type=visualize_type)
+        response = await self._fill_node(VisualizeOp, prompt, mode="xml_fill")
+        return response
diff --git a/examples/autodv/prompts.py b/examples/autodv/prompts.py
@@ -0,0 +1,24 @@
+IMAGE_DATA_EXTRACT_PROMPT = """
+讲解这个图片的内容，并抽取出图片中的数据
+"""
+
+KEYWORD_EXTRACT_PROMPT = """
+Extract the keywords from the data: 
+data: {data}
+"""
+
+TEXT_GENERATE_PROMPT = """
+Generate a text from the keywords, a fluent and concise text, can be used as a plot description: 
+keywords: {keywords}
+"""
+
+TYPE_ANALYZE_PROMPT = """
+Analyze the visualization type of the keywords, visualization type can be like bar chart, line chart, pie chart, etc.
+keywords: {keywords}
+"""
+
+VISUALIZE_MAPPING_PROMPT = """
+Generate the keywords use for visualization, the type of the keywords is visualization type, the plot is visualization data: 
+visualization data: {data}
+visualization type: {type}
+"""
diff --git a/examples/autodv/workflow.py b/examples/autodv/workflow.py
@@ -0,0 +1,58 @@
+from pathlib import Path
+
+from examples.autodv.operators import (
+    ImageDataExtract,
+    Keyword,
+    TextGenerate,
+    TypeAnalyze,
+    VisualizeMapping,
+)
+from metagpt.ext.aflow.scripts.workflow import Workflow
+from metagpt.utils.common import encode_image
+
+
+class AutoDVWorkflow(Workflow):
+    def __init__(
+        self,
+        name: str = "AutoDV",
+        llm_config=None,
+        dataset=None,
+    ) -> None:
+        super().__init__(name, llm_config, dataset)
+
+        # 初始化所有operators
+        self.image_data_extractor = ImageDataExtract(self.llm)
+        self.keyword_extractor = Keyword(self.llm)
+        self.text_generator = TextGenerate(self.llm)
+        self.type_analyzer = TypeAnalyze(self.llm)
+        self.visualizer = VisualizeMapping(self.llm)
+
+    async def __call__(self, images: list[str]):
+        """执行自动数据可视化的完整工作流程
+
+        Args:
+            images: 输入图片路径列表
+
+        Returns:
+            tuple: (plot_text, visualization_result) - 生成的描述文本和可视化结果
+        """
+        # invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
+
+        images_b64 = [encode_image(Path(image)) for image in images]
+        # Stage 1: 从图片提取数据
+        image_data = await self.image_data_extractor(images_b64)
+
+        # Stage 2: 提取关键词
+        keywords = await self.keyword_extractor(image_data)
+
+        # Stage 3: 生成描述文本
+        plot = await self.text_generator(keywords)
+
+        # Stage 4: 可视化分析
+        # Step 1: 分析可视化类型
+        visualize_type = await self.type_analyzer(keywords)
+
+        # Step 2: 生成可视化映射
+        visualization_result = await self.visualizer(keywords, visualize_type)
+
+        return plot, visualization_result
diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py
@@ -541,22 +541,22 @@ async def code_fill(
         result = {field_name: extracted_code}
         return result
 
-    async def single_fill(self, context: str) -> Dict[str, str]:
+    async def single_fill(self, context: str, images: Optional[Union[str, list[str]]] = None) -> Dict[str, str]:
         field_name = self.get_field_name()
         prompt = context
-        content = await self.llm.aask(prompt)
+        content = await self.llm.aask(prompt, images=images)
         result = {field_name: content}
         return result
 
-    async def xml_fill(self, context: str) -> Dict[str, Any]:
+    async def xml_fill(self, context: str, images: Optional[Union[str, list[str]]] = None) -> Dict[str, Any]:
         """
         Fill context with XML tags and convert according to field types, including string, integer, boolean, list and dict types
         """
         field_names = self.get_field_names()
         field_types = self.get_field_types()
 
         extracted_data: Dict[str, Any] = {}
-        content = await self.llm.aask(context)
+        content = await self.llm.aask(context, images=images)
 
         for field_name in field_names:
             pattern = rf"<{field_name}>(.*?)</{field_name}>"
@@ -635,12 +635,12 @@ async def fill(
 
         elif mode == FillMode.XML_FILL.value:
             context = self.xml_compile(context=self.context)
-            result = await self.xml_fill(context)
+            result = await self.xml_fill(context, images=images)
             self.instruct_content = self.create_class()(**result)
             return self
 
         elif mode == FillMode.SINGLE_FILL.value:
-            result = await self.single_fill(context)
+            result = await self.single_fill(context, images=images)
             self.instruct_content = self.create_class()(**result)
             return self
 

diff --git a/run_autodv.py b/run_autodv.py
@@ -0,0 +1,20 @@
+import asyncio
+
+from examples.autodv.workflow import AutoDVWorkflow
+from metagpt.configs.models_config import ModelsConfig
+
+
+async def main():
+    four_o_mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    workflow = AutoDVWorkflow(llm_config=four_o_mini_llm_config)
+
+    # 执行完整工作流程
+    plot, visualization_result = await workflow(["image.png"])
+
+    print(plot)
+    print("another line\n")
+    print(visualization_result)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())