Skip to content

Commit

Permalink
Init AutoDV's Operator
Browse files Browse the repository at this point in the history
  • Loading branch information
didiforgithub committed Nov 8, 2024
1 parent a01156d commit 10295d6
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 6 deletions.
22 changes: 22 additions & 0 deletions examples/autodv/operator_an.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from pydantic import BaseModel, Field


class ImageDataOp(BaseModel):
data: str = Field(default="", description="The data extracted from the image")


class KeywordOp(BaseModel):
keywords: str = Field(default="", description="The keywords extracted from the data")


class TextGenerateOp(BaseModel):
plot: str = Field(default="", description="The plot generated from the prompt, discription of the prompt.")


class TypeAnalyzeOp(BaseModel):
type: str = Field(default="", description="The type of visualization, e.g. bar chart, line chart, pie chart, etc.")


class VisualizeOp(BaseModel):
data: str = Field(default="", description="The data use for visualization.")
type: str = Field(default="", description="The type of visualization, e.g. bar chart, line chart, pie chart, etc.")
71 changes: 71 additions & 0 deletions examples/autodv/operators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from examples.autodv.operator_an import (
ImageDataOp,
KeywordOp,
TextGenerateOp,
TypeAnalyzeOp,
VisualizeOp,
)
from examples.autodv.prompts import (
IMAGE_DATA_EXTRACT_PROMPT,
KEYWORD_EXTRACT_PROMPT,
TEXT_GENERATE_PROMPT,
TYPE_ANALYZE_PROMPT,
VISUALIZE_MAPPING_PROMPT,
)
from metagpt.ext.aflow.scripts.operator import Operator
from metagpt.llm import LLM


# Stage 1: Image Data Extract
class ImageDataExtract(Operator):
def __init__(self, llm: LLM, name: str = "ImageDataExtract"):
super().__init__(llm, name)

async def __call__(self, images: list[str]):
prompt = IMAGE_DATA_EXTRACT_PROMPT.format(image_path=images)
data = await self._fill_node(ImageDataOp, prompt, mode="single_fill", images=images)
return data


# Stage 2: Keyword Extract
class Keyword(Operator):
def __init__(self, llm: LLM, name: str = "Keyword"):
super().__init__(llm, name)

async def __call__(self, data: str):
prompt = KEYWORD_EXTRACT_PROMPT.format(data=data)
keywords = await self._fill_node(KeywordOp, prompt, mode="single_fill")
return keywords


# Stage 3: Text Generate
class TextGenerate(Operator):
def __init__(self, llm: LLM, name: str = "TextGenerate"):
super().__init__(llm, name)

async def __call__(self, keywords: str):
prompt = TEXT_GENERATE_PROMPT.format(keywords=keywords)
plot = await self._fill_node(TextGenerateOp, prompt, mode="single_fill")
return plot


# Stage 4: Visualization, step 1: type analyze
class TypeAnalyze(Operator):
def __init__(self, llm: LLM, name: str = "TypeAnalyze"):
super().__init__(llm, name)

async def __call__(self, keywords: str):
prompt = TYPE_ANALYZE_PROMPT.format(keywords=keywords)
visualize_type = await self._fill_node(TypeAnalyzeOp, prompt, mode="single_fill")
return visualize_type


# step 2: visualize mapping
class VisualizeMapping(Operator):
def __init__(self, llm: LLM, name: str = "VisualizeMapping"):
super().__init__(llm, name)

async def __call__(self, data: str, visualize_type: str):
prompt = VISUALIZE_MAPPING_PROMPT.format(data=data, type=visualize_type)
response = await self._fill_node(VisualizeOp, prompt, mode="xml_fill")
return response
24 changes: 24 additions & 0 deletions examples/autodv/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
IMAGE_DATA_EXTRACT_PROMPT = """
讲解这个图片的内容,并抽取出图片中的数据
"""

KEYWORD_EXTRACT_PROMPT = """
Extract the keywords from the data:
data: {data}
"""

TEXT_GENERATE_PROMPT = """
Generate a text from the keywords, a fluent and concise text, can be used as a plot description:
keywords: {keywords}
"""

TYPE_ANALYZE_PROMPT = """
Analyze the visualization type of the keywords, visualization type can be like bar chart, line chart, pie chart, etc.
keywords: {keywords}
"""

VISUALIZE_MAPPING_PROMPT = """
Generate the keywords use for visualization, the type of the keywords is visualization type, the plot is visualization data:
visualization data: {data}
visualization type: {type}
"""
58 changes: 58 additions & 0 deletions examples/autodv/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pathlib import Path

from examples.autodv.operators import (
ImageDataExtract,
Keyword,
TextGenerate,
TypeAnalyze,
VisualizeMapping,
)
from metagpt.ext.aflow.scripts.workflow import Workflow
from metagpt.utils.common import encode_image


class AutoDVWorkflow(Workflow):
def __init__(
self,
name: str = "AutoDV",
llm_config=None,
dataset=None,
) -> None:
super().__init__(name, llm_config, dataset)

# 初始化所有operators
self.image_data_extractor = ImageDataExtract(self.llm)
self.keyword_extractor = Keyword(self.llm)
self.text_generator = TextGenerate(self.llm)
self.type_analyzer = TypeAnalyze(self.llm)
self.visualizer = VisualizeMapping(self.llm)

async def __call__(self, images: list[str]):
"""执行自动数据可视化的完整工作流程
Args:
images: 输入图片路径列表
Returns:
tuple: (plot_text, visualization_result) - 生成的描述文本和可视化结果
"""
# invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")

images_b64 = [encode_image(Path(image)) for image in images]
# Stage 1: 从图片提取数据
image_data = await self.image_data_extractor(images_b64)

# Stage 2: 提取关键词
keywords = await self.keyword_extractor(image_data)

# Stage 3: 生成描述文本
plot = await self.text_generator(keywords)

# Stage 4: 可视化分析
# Step 1: 分析可视化类型
visualize_type = await self.type_analyzer(keywords)

# Step 2: 生成可视化映射
visualization_result = await self.visualizer(keywords, visualize_type)

return plot, visualization_result
12 changes: 6 additions & 6 deletions metagpt/actions/action_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,22 +541,22 @@ async def code_fill(
result = {field_name: extracted_code}
return result

async def single_fill(self, context: str) -> Dict[str, str]:
async def single_fill(self, context: str, images: Optional[Union[str, list[str]]] = None) -> Dict[str, str]:
field_name = self.get_field_name()
prompt = context
content = await self.llm.aask(prompt)
content = await self.llm.aask(prompt, images=images)
result = {field_name: content}
return result

async def xml_fill(self, context: str) -> Dict[str, Any]:
async def xml_fill(self, context: str, images: Optional[Union[str, list[str]]] = None) -> Dict[str, Any]:
"""
Fill context with XML tags and convert according to field types, including string, integer, boolean, list and dict types
"""
field_names = self.get_field_names()
field_types = self.get_field_types()

extracted_data: Dict[str, Any] = {}
content = await self.llm.aask(context)
content = await self.llm.aask(context, images=images)

for field_name in field_names:
pattern = rf"<{field_name}>(.*?)</{field_name}>"
Expand Down Expand Up @@ -635,12 +635,12 @@ async def fill(

elif mode == FillMode.XML_FILL.value:
context = self.xml_compile(context=self.context)
result = await self.xml_fill(context)
result = await self.xml_fill(context, images=images)
self.instruct_content = self.create_class()(**result)
return self

elif mode == FillMode.SINGLE_FILL.value:
result = await self.single_fill(context)
result = await self.single_fill(context, images=images)
self.instruct_content = self.create_class()(**result)
return self

Expand Down
20 changes: 20 additions & 0 deletions run_autodv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import asyncio

from examples.autodv.workflow import AutoDVWorkflow
from metagpt.configs.models_config import ModelsConfig


async def main():
four_o_mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
workflow = AutoDVWorkflow(llm_config=four_o_mini_llm_config)

# 执行完整工作流程
plot, visualization_result = await workflow(["image.png"])

print(plot)
print("another line\n")
print(visualization_result)


if __name__ == "__main__":
asyncio.run(main())

0 comments on commit 10295d6

Please sign in to comment.