From a5f31e2b80565fccdf5c47e9430425c64d4d871a Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Wed, 11 Sep 2024 11:19:48 +0800
Subject: [PATCH 01/16] add stablediffusion_model.py

---
 src/agentscope/models/__init__.py             |   4 +
 .../models/stablediffusion_model.py           | 215 ++++++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 src/agentscope/models/stablediffusion_model.py

diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py
index 0a6894b35..d32731f91 100644
--- a/src/agentscope/models/__init__.py
+++ b/src/agentscope/models/__init__.py
@@ -41,6 +41,9 @@
 from .yi_model import (
     YiChatWrapper,
 )
+from .stablediffusion_model import(
+    StableDiffusionTxt2imgWrapper
+)
 
 __all__ = [
     "ModelWrapperBase",
@@ -64,6 +67,7 @@
     "ZhipuAIEmbeddingWrapper",
     "LiteLLMChatWrapper",
     "YiChatWrapper",
+    "StableDiffusionTxt2imgWrapper",
 ]
 
 
diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
new file mode 100644
index 000000000..a01b1e8f2
--- /dev/null
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -0,0 +1,215 @@
+# -*- coding: utf-8 -*-
+"""Model wrapper for stable diffusion models."""
+from abc import ABC
+import base64
+from typing import Any, Optional, Union, List, Sequence
+
+from . import ModelWrapperBase, ModelResponse
+from ..message import Msg
+from ..manager import FileManager
+import requests
+from ..utils.common import _convert_to_str
+
+
+class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
+    """The base class for stable-diffusion model wrappers.
+
+    To use SD API, please
+    1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and
+    install it with 'webui-user.bat'
+    2. Move your checkpoint to 'models/Stable-diffusion' folder
+    3. Start launch.py with the '--api' parameter to start the server
+    After that, you can use the SD-webui API and
+    query the available parameters on the http://localhost:7860/docs page
+    """
+
+    model_type: str
+    """The type of the model wrapper, which is to identify the model wrapper
+    class in model configuration."""
+
+    options: dict
+    """A dict contains the options for stable-diffusion option API.
+    Modifications made through this parameter are persistent, meaning they will 
+    remain in effect for subsequent generation requests until explicitly changed or reset.
+    e.g. {"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}"""
+
+    def __init__(
+        self,
+        config_name: str,
+        options: dict = None,
+        generate_args: dict = None,
+        url: Optional[Union[str, None]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the model wrapper for SD-webui API.
+
+        Args:
+            options (`dict`, default `None`):
+                The keyword arguments to change the webui settings
+                such as model or CLIP skip, this changes will persist across sessions.
+                e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`.
+            generate_args (`dict`, default `None`):
+                The extra keyword arguments used in SD-webui api generation,
+                e.g. `steps`, `seed`.
+            url (`str`, default `None`):
+                The url of the SD-webui server.
+                Defaults to `None`, which is http://127.0.0.1:7860.
+        """
+        if url is None:
+            url = "http://127.0.0.1:7860"
+
+        self.url = url
+        self.generate_args = generate_args or {}
+
+        options_url = f"{self.url}/sdapi/v1/options"
+        # Get the current default model
+        default_model_name = (
+            requests.get(options_url)
+            .json()["sd_model_checkpoint"]
+            .split("[")[0]
+            .strip()
+        )
+
+        if options is not None:
+            # Update webui options if needed
+            requests.post(options_url, json=options)
+            model_name = options.get("sd_model_checkpoint", default_model_name)
+        else:
+            model_name = default_model_name
+
+        super().__init__(config_name=config_name, model_name=model_name)
+
+    def format(
+        self,
+        *args: Union[Msg, Sequence[Msg]],
+    ) -> Union[List[dict], str]:
+        raise RuntimeError(
+            f"Model Wrapper [{type(self).__name__}] doesn't "
+            f"need to format the input. Please try to use the "
+            f"model wrapper directly.",
+        )
+
+
+class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase):
+
+    model_type: str = "sd_txt2img"
+
+    def __call__(
+        self,
+        prompt: str,
+        **kwargs: Any,
+    ) -> ModelResponse:
+        """
+        Args:
+            prompt (`str`):
+                The prompt string to generate images from.
+            **kwargs (`Any`):
+                The keyword arguments to SD-webui txt2img API, e.g.
+                `n_iter`, `steps`, `seed`, `width`, etc. Please refer to
+                https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu
+                or http://localhost:7860/docs
+                for more detailed arguments.
+
+        Returns:
+            `ModelResponse`:
+                A list of image local urls in image_urls field and the
+                raw response in raw field.
+        """
+
+        # step1: prepare keyword arguments
+        payload = {
+            "prompt": prompt,
+            **kwargs,
+            **self.generate_args,
+        }
+
+        # step2: forward to generate response
+        txt2img_url = f"{self.url}/sdapi/v1/txt2img"
+        response = requests.post(url=txt2img_url, json=payload)
+
+        if response.status_code != requests.codes.ok:
+            error_msg = f" Status code: {response.status_code},"
+            raise RuntimeError(error_msg)
+
+        # step3: record the model api invocation if needed
+        output = response.json()
+        self._save_model_invocation(
+            arguments={
+                "model": self.model_name,
+                **payload,
+            },
+            response=output,
+        )
+
+        # step4: update monitor accordingly
+        session_parameters = output["parameters"]
+        size = f"{session_parameters['width']}*{session_parameters['height']}"
+        image_count = session_parameters["batch_size"] * session_parameters["n_iter"]
+
+        self.monitor.update_image_tokens(
+            model_name=self.model_name,
+            image_count=image_count,
+            resolution=size,
+        )
+
+        # step5: return response
+        # Get image base64code as a list
+        images = output["images"]
+        b64_images = [base64.b64decode(image) for image in images]
+
+        file_manager = FileManager.get_instance()
+        # Return local url
+        urls = [file_manager.save_image(_) for _ in b64_images]
+        text = "Image saved to " + "\n".join(urls)
+        return ModelResponse(text=text, image_urls=urls, raw=response)
+
+    def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str:
+        # This is a temporary implementation to focus on the prompt 
+        # on single-turn image generation by preserving only the system prompt and 
+        # the last user message. This logic might change in the future to support 
+        # more complex conversational scenarios
+        if len(args) == 0:
+            raise ValueError(
+                "At least one message should be provided. An empty message "
+                "list is not allowed.",
+            )
+
+        # Parse all information into a list of messages
+        input_msgs = []
+        for _ in args:
+            if _ is None:
+                continue
+            if isinstance(_, Msg):
+                input_msgs.append(_)
+            elif isinstance(_, list) and all(isinstance(__, Msg) for __ in _):
+                input_msgs.extend(_)
+            else:
+                raise TypeError(
+                    f"The input should be a Msg object or a list "
+                    f"of Msg objects, got {type(_)}.",
+                )
+
+        # record user message history as a list of strings
+        user_messages = []
+        sys_prompt = None
+        for i, unit in enumerate(input_msgs):
+            if i == 0 and unit.role == "system":
+                # if system prompt is available, place it at the beginning
+                sys_prompt = _convert_to_str(unit.content)
+            elif unit.role == "user":
+                # Merge user messages into a conversation history prompt
+                user_messages.append(_convert_to_str(unit.content))
+            else:
+                continue
+
+        content_components = []
+        # Add system prompt at the beginning if provided
+        if sys_prompt is not None:
+            content_components.append(sys_prompt)
+        # Add the last user message if the user messages is not empty
+        if len(user_messages) > 0:
+            content_components.append(user_messages[-1])
+
+        prompt = ",".join(content_components)
+
+        return prompt

From 992695db3a6f80d4b11e200cd41a5bd61fe31ae3 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Wed, 11 Sep 2024 11:19:48 +0800
Subject: [PATCH 02/16] update stablediffusion_model.py

---
 .../models/stablediffusion_model.py           | 247 +++++++++++-------
 1 file changed, 158 insertions(+), 89 deletions(-)

diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index a01b1e8f2..3b0a166a1 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -2,19 +2,25 @@
 """Model wrapper for stable diffusion models."""
 from abc import ABC
 import base64
+import json
+import time
 from typing import Any, Optional, Union, List, Sequence
 
+import requests
+from loguru import logger
+
 from . import ModelWrapperBase, ModelResponse
+from ..constants import _DEFAULT_MAX_RETRIES
+from ..constants import _DEFAULT_RETRY_INTERVAL
 from ..message import Msg
 from ..manager import FileManager
-import requests
 from ..utils.common import _convert_to_str
 
 
 class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     """The base class for stable-diffusion model wrappers.
 
-    To use SD API, please
+    To use SD-webui API, please
     1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and
     install it with 'webui-user.bat'
     2. Move your checkpoint to 'models/Stable-diffusion' folder
@@ -23,77 +29,176 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     query the available parameters on the http://localhost:7860/docs page
     """
 
-    model_type: str
-    """The type of the model wrapper, which is to identify the model wrapper
-    class in model configuration."""
-
-    options: dict
-    """A dict contains the options for stable-diffusion option API.
-    Modifications made through this parameter are persistent, meaning they will 
-    remain in effect for subsequent generation requests until explicitly changed or reset.
-    e.g. {"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}"""
+    model_type: str = "stable_diffusion"
 
     def __init__(
         self,
         config_name: str,
-        options: dict = None,
+        host: str = "127.0.0.1:7860",
+        base_url: Optional[Union[str, None]] = None,
+        use_https: bool = False,
         generate_args: dict = None,
-        url: Optional[Union[str, None]] = None,
+        headers: dict = None,
+        options: dict = None,
+        timeout: int = 30,
+        max_retries: int = _DEFAULT_MAX_RETRIES,
+        retry_interval: int = _DEFAULT_RETRY_INTERVAL,
         **kwargs: Any,
     ) -> None:
-        """Initialize the model wrapper for SD-webui API.
+        """
+        Initializes the SD-webui API client.
 
         Args:
-            options (`dict`, default `None`):
+            config_name (`str`):
+                The name of the model config.
+            host (`str`, default `"127.0.0.1:7860"`):
+                The host port of the stable-diffusion webui server.
+            base_url (`str`, default `None`):
+                Base URL for the stable-diffusion webui services. If not provided, it will be generated based on `host` and `use_https`.
+            use_https (`bool`, default `False`):
+                Whether to generate the base URL with HTTPS protocol or HTTP.
+            generate_args (`dict`, default `None`): 
+                The extra keyword arguments used in SD api generation,
+                e.g. `{"steps": 50}`.
+            headers (`dict`, default `None`): 
+                HTTP request headers. 
+            options (`dict`, default `None`): 
                 The keyword arguments to change the webui settings
                 such as model or CLIP skip, this changes will persist across sessions.
                 e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`.
-            generate_args (`dict`, default `None`):
-                The extra keyword arguments used in SD-webui api generation,
-                e.g. `steps`, `seed`.
-            url (`str`, default `None`):
-                The url of the SD-webui server.
-                Defaults to `None`, which is http://127.0.0.1:7860.
         """
-        if url is None:
-            url = "http://127.0.0.1:7860"
+        # If base_url is not provided, construct it based on whether HTTPS is used
+        if base_url is None:
+            if use_https:
+                base_url = f"https://{host}"
+            else:
+                base_url = f"http://{host}"
 
-        self.url = url
+        self.base_url = base_url
+        self.options_url = f"{base_url}/sdapi/v1/options"
         self.generate_args = generate_args or {}
 
-        options_url = f"{self.url}/sdapi/v1/options"
-        # Get the current default model
-        default_model_name = (
-            requests.get(options_url)
-            .json()["sd_model_checkpoint"]
-            .split("[")[0]
-            .strip()
+        # Initialize the HTTP session and update the request headers
+        self.session = requests.Session()
+        if headers:
+            self.session.headers.update(headers)
+
+        # Set options if provided
+        if options:
+            self._set_options(options)
+
+        # Get the default model name from the web-options
+        model_name = self._get_options()["sd_model_checkpoint"].split("[")[0].strip()
+        # Update the model name if override_settings is provided in generate_args
+        if self.generate_args.get("override_settings"):
+            model_name = generate_args["override_settings"].get(
+                "sd_model_checkpoint", model_name
+            )
+
+        super().__init__(config_name=config_name, model_name=model_name)
+
+        self.timeout = timeout
+        self.max_retries = max_retries
+        self.retry_interval = retry_interval
+    
+    @property
+    def url(self):
+        """SD-webui API endpoint URL"""
+        raise NotImplementedError()
+
+    def _get_options(self) -> dict:
+        response = self.session.get(url=self.options_url)
+        if response.status_code != 200:
+            logger.error(f"Failed to get options with {response.json()}")
+            raise RuntimeError(f"Failed to get options with {response.json()}")
+        return response.json()
+
+    def _set_options(self, options) -> None:
+        response = self.session.post(url=self.options_url, json=options)
+        if response.status_code != 200:
+            logger.error(json.dumps(options, indent=4))
+            raise RuntimeError(f"Failed to set options with {response.json()}")
+        else:
+            logger.info("Optionsset successfully")
+
+    def _invoke_model(self, payload: dict) -> dict:
+        """Invoke SD webui API and record the invocation if needed"""
+        # step1: prepare post requests
+        for i in range(1, self.max_retries + 1):
+            response = self.session.post(url=self.url, json=payload)
+
+            if response.status_code == requests.codes.ok:
+                break
+
+            if i < self.max_retries:
+                logger.warning(
+                    f"Failed to call the model with "
+                    f"requests.codes == {response.status_code}, retry "
+                    f"{i + 1}/{self.max_retries} times",
+                )
+                time.sleep(i * self.retry_interval)
+
+        # step2: record model invocation
+        # record the model api invocation, which will be skipped if
+        # `FileManager.save_api_invocation` is `False`
+        self._save_model_invocation(
+            arguments=payload,
+            response=response.json(),
         )
 
-        if options is not None:
-            # Update webui options if needed
-            requests.post(options_url, json=options)
-            model_name = options.get("sd_model_checkpoint", default_model_name)
+        # step3: return the response json
+        if response.status_code == requests.codes.ok:
+            return response.json()
         else:
-            model_name = default_model_name
+            logger.error(json.dumps({"url": self.url, "json": payload}, indent=4))
+            raise RuntimeError(
+                f"Failed to call the model with {response.json()}",
+            )
 
-        super().__init__(config_name=config_name, model_name=model_name)
+    def _parse_response(self, response: dict) -> ModelResponse:
+        """Parse the response json data into ModelResponse"""
+        return ModelResponse(raw=response)
+
+    def __call__(self, **kwargs: Any) -> ModelResponse:
+        payload = {
+            **self.generate_args,
+            **kwargs,
+        }
+        response = self._invoke_model(payload)
+        return self._parse_response(response)
 
-    def format(
-        self,
-        *args: Union[Msg, Sequence[Msg]],
-    ) -> Union[List[dict], str]:
-        raise RuntimeError(
-            f"Model Wrapper [{type(self).__name__}] doesn't "
-            f"need to format the input. Please try to use the "
-            f"model wrapper directly.",
-        )
 
 
 class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase):
+    """Stable Diffusion txt2img API wrapper"""
 
     model_type: str = "sd_txt2img"
 
+    @property
+    def url(self):
+        return f"{self.base_url}/sdapi/v1/txt2img"
+    
+    def _parse_response(self, response: dict) -> ModelResponse:
+        session_parameters = response["parameters"]
+        size = f"{session_parameters['width']}*{session_parameters['height']}"
+        image_count = session_parameters["batch_size"] * session_parameters["n_iter"]
+
+        self.monitor.update_image_tokens(
+            model_name=self.model_name,
+            image_count=image_count,
+            resolution=size,
+        )
+
+        # Get image base64code as a list
+        images = response["images"]
+        b64_images = [base64.b64decode(image) for image in images]
+
+        file_manager = FileManager.get_instance()
+        # Return local url
+        image_urls = [file_manager.save_image(_) for _ in b64_images]
+        text = "Image saved to " + "\n".join(image_urls)
+        return ModelResponse(text=text, image_urls=image_urls, raw=response)
+
     def __call__(
         self,
         prompt: str,
@@ -109,13 +214,11 @@ def __call__(
                 https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu
                 or http://localhost:7860/docs
                 for more detailed arguments.
-
         Returns:
             `ModelResponse`:
                 A list of image local urls in image_urls field and the
                 raw response in raw field.
         """
-
         # step1: prepare keyword arguments
         payload = {
             "prompt": prompt,
@@ -124,49 +227,15 @@ def __call__(
         }
 
         # step2: forward to generate response
-        txt2img_url = f"{self.url}/sdapi/v1/txt2img"
-        response = requests.post(url=txt2img_url, json=payload)
-
-        if response.status_code != requests.codes.ok:
-            error_msg = f" Status code: {response.status_code},"
-            raise RuntimeError(error_msg)
-
-        # step3: record the model api invocation if needed
-        output = response.json()
-        self._save_model_invocation(
-            arguments={
-                "model": self.model_name,
-                **payload,
-            },
-            response=output,
-        )
-
-        # step4: update monitor accordingly
-        session_parameters = output["parameters"]
-        size = f"{session_parameters['width']}*{session_parameters['height']}"
-        image_count = session_parameters["batch_size"] * session_parameters["n_iter"]
+        response = self._invoke_model(payload)
 
-        self.monitor.update_image_tokens(
-            model_name=self.model_name,
-            image_count=image_count,
-            resolution=size,
-        )
-
-        # step5: return response
-        # Get image base64code as a list
-        images = output["images"]
-        b64_images = [base64.b64decode(image) for image in images]
-
-        file_manager = FileManager.get_instance()
-        # Return local url
-        urls = [file_manager.save_image(_) for _ in b64_images]
-        text = "Image saved to " + "\n".join(urls)
-        return ModelResponse(text=text, image_urls=urls, raw=response)
+        # step3: parse the response
+        return self._parse_response(response)
 
     def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str:
-        # This is a temporary implementation to focus on the prompt 
-        # on single-turn image generation by preserving only the system prompt and 
-        # the last user message. This logic might change in the future to support 
+        # This is a temporary implementation to focus on the prompt
+        # on single-turn image generation by preserving only the system prompt and
+        # the last user message. This logic might change in the future to support
         # more complex conversational scenarios
         if len(args) == 0:
             raise ValueError(
@@ -204,7 +273,7 @@ def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str:
 
         content_components = []
         # Add system prompt at the beginning if provided
-        if sys_prompt is not None:
+        if sys_prompt:
             content_components.append(sys_prompt)
         # Add the last user message if the user messages is not empty
         if len(user_messages) > 0:

From fb269e11d6347718822cc8505f29e9a7de5a307e Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Wed, 18 Sep 2024 14:02:51 +0800
Subject: [PATCH 03/16] add stable-diffusion conversation example

---
 .../README.md                                 | 27 ++++++++++
 ...conversation_with_stablediffusion_model.py | 54 +++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 examples/conversation_with_stablediffusion_model/README.md
 create mode 100644 examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py

diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
new file mode 100644
index 000000000..79819817b
--- /dev/null
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -0,0 +1,27 @@
+# Conversation with Stable-diffusion model
+
+This example will show
+- How to use Stable Diffusion models in AgentScope.
+
+In this example, you can interact in a conversational format to generate images.
+Once the image is generated, the agent will respond with the local file path where the image is saved.
+
+## Prerequisites
+
+You need to satisfy the following requirements to run this example:
+
+- Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
+- Launching the Stable Diffusion Web UI with arguments: --api
+- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) any other specified host and port you choose.
+- Install the latest version of AgentScope by
+    ```bash
+    git clone https://github.com/modelscope/agentscope.git
+    cd agentscope
+    pip install -e .
+    ```
+
+## Running the Example
+Run the example and input your questions.
+```bash
+python conversation_with_stablediffusion_model.py
+```
\ No newline at end of file
diff --git a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
new file mode 100644
index 000000000..ae96ce1ac
--- /dev/null
+++ b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+"""A simple example for conversation between user and stable-diffusion agent."""
+import agentscope
+from agentscope.agents import DialogAgent
+from agentscope.agents.user_agent import UserAgent
+
+
+def main() -> None:
+    """A basic conversation demo"""
+
+    agentscope.init(
+        model_configs=[
+            {
+                "model_type": "sd_txt2img",
+                "config_name": "sd",
+                "options": {
+                    "sd_model_checkpoint": "xxxxxx",
+                    "CLIP_stop_at_last_layers": 2,
+                },  # global settings, for detailed parameters
+                # please refer to 127.0.0.1:7860/docs#/default/get_config_sdapi_v1_options_get
+                "generate_args": {
+                    "steps": 50,
+                    "n_iter": 1,
+                    "override_settings": {
+                        "CLIP_stop_at_last_layers": 3,
+                        # settings effective only for this conversation
+                        # The parameters are consistent with the global settings.
+                    },
+                },
+            },
+        ],
+        project="txt2img-Agent Conversation",
+        save_api_invoke=True,
+    )
+
+    # Init two agents
+    dialog_agent = DialogAgent(
+        name="Assistant",
+        sys_prompt="high definition,dreamy",  # replace by your desired image style prompts
+        model_config_name="sd",  # replace by your model config name
+    )
+    user_agent = UserAgent()
+
+    # start the conversation between user and assistant
+    msg = None
+    while True:
+        msg = user_agent(msg)
+        if msg.content == "exit":
+            break
+        msg = dialog_agent(msg)
+
+
+if __name__ == "__main__":
+    main()

From b40f41f8ceee292a7332cb7900aea467ea3fd4f3 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Wed, 18 Sep 2024 14:04:53 +0800
Subject: [PATCH 04/16] fix readme

---
 examples/conversation_with_stablediffusion_model/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
index 79819817b..6c3f65df2 100644
--- a/examples/conversation_with_stablediffusion_model/README.md
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -12,7 +12,7 @@ You need to satisfy the following requirements to run this example:
 
 - Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
 - Launching the Stable Diffusion Web UI with arguments: --api
-- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) any other specified host and port you choose.
+- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) or any other specified host and port you choose.
 - Install the latest version of AgentScope by
     ```bash
     git clone https://github.com/modelscope/agentscope.git

From 30fe6f58f1f0f9d4629fc7f77a2104d51bcb01d7 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 19 Sep 2024 17:35:57 +0800
Subject: [PATCH 05/16] fix stablediffusion_model.py

---
 src/agentscope/models/__init__.py             |  6 +-
 .../models/stablediffusion_model.py           | 77 +++++++++----------
 2 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py
index d32731f91..9cde1cd85 100644
--- a/src/agentscope/models/__init__.py
+++ b/src/agentscope/models/__init__.py
@@ -41,8 +41,8 @@
 from .yi_model import (
     YiChatWrapper,
 )
-from .stablediffusion_model import(
-    StableDiffusionTxt2imgWrapper
+from .stablediffusion_model import (
+    StableDiffusionImageSynthesisWrapper,
 )
 
 __all__ = [
@@ -67,7 +67,7 @@
     "ZhipuAIEmbeddingWrapper",
     "LiteLLMChatWrapper",
     "YiChatWrapper",
-    "StableDiffusionTxt2imgWrapper",
+    "StableDiffusionImageSynthesisWrapper",
 ]
 
 
diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index 3b0a166a1..13c3b0870 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -21,12 +21,13 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     """The base class for stable-diffusion model wrappers.
 
     To use SD-webui API, please
-    1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and
+    1. First download stable-diffusion-webui from
+    https://github.com/AUTOMATIC1111/stable-diffusion-webui and
     install it with 'webui-user.bat'
     2. Move your checkpoint to 'models/Stable-diffusion' folder
     3. Start launch.py with the '--api' parameter to start the server
     After that, you can use the SD-webui API and
-    query the available parameters on the http://localhost:7860/docs page
+    query the available parameters on the http://localhost:7862/docs page
     """
 
     model_type: str = "stable_diffusion"
@@ -34,7 +35,7 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     def __init__(
         self,
         config_name: str,
-        host: str = "127.0.0.1:7860",
+        host: str = "127.0.0.1:7862",
         base_url: Optional[Union[str, None]] = None,
         use_https: bool = False,
         generate_args: dict = None,
@@ -51,23 +52,24 @@ def __init__(
         Args:
             config_name (`str`):
                 The name of the model config.
-            host (`str`, default `"127.0.0.1:7860"`):
+            host (`str`, default `"127.0.0.1:7862"`):
                 The host port of the stable-diffusion webui server.
             base_url (`str`, default `None`):
-                Base URL for the stable-diffusion webui services. If not provided, it will be generated based on `host` and `use_https`.
+                Base URL for the stable-diffusion webui services.
+                Generated from host and use_https if not provided.
             use_https (`bool`, default `False`):
                 Whether to generate the base URL with HTTPS protocol or HTTP.
-            generate_args (`dict`, default `None`): 
+            generate_args (`dict`, default `None`):
                 The extra keyword arguments used in SD api generation,
                 e.g. `{"steps": 50}`.
-            headers (`dict`, default `None`): 
-                HTTP request headers. 
-            options (`dict`, default `None`): 
+            headers (`dict`, default `None`):
+                HTTP request headers.
+            options (`dict`, default `None`):
                 The keyword arguments to change the webui settings
-                such as model or CLIP skip, this changes will persist across sessions.
-                e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`.
+                such as model or CLIP skip, this changes will persist.
+                e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`.
         """
-        # If base_url is not provided, construct it based on whether HTTPS is used
+        # Construct base_url based on HTTPS usage if not provided
         if base_url is None:
             if use_https:
                 base_url = f"https://{host}"
@@ -88,11 +90,14 @@ def __init__(
             self._set_options(options)
 
         # Get the default model name from the web-options
-        model_name = self._get_options()["sd_model_checkpoint"].split("[")[0].strip()
-        # Update the model name if override_settings is provided in generate_args
+        model_name = (
+            self._get_options()["sd_model_checkpoint"].split("[")[0].strip()
+        )
+        # Update the model name
         if self.generate_args.get("override_settings"):
             model_name = generate_args["override_settings"].get(
-                "sd_model_checkpoint", model_name
+                "sd_model_checkpoint",
+                model_name,
             )
 
         super().__init__(config_name=config_name, model_name=model_name)
@@ -100,9 +105,9 @@ def __init__(
         self.timeout = timeout
         self.max_retries = max_retries
         self.retry_interval = retry_interval
-    
+
     @property
-    def url(self):
+    def url(self) -> str:
         """SD-webui API endpoint URL"""
         raise NotImplementedError()
 
@@ -113,13 +118,12 @@ def _get_options(self) -> dict:
             raise RuntimeError(f"Failed to get options with {response.json()}")
         return response.json()
 
-    def _set_options(self, options) -> None:
+    def _set_options(self, options: dict) -> None:
         response = self.session.post(url=self.options_url, json=options)
         if response.status_code != 200:
             logger.error(json.dumps(options, indent=4))
             raise RuntimeError(f"Failed to set options with {response.json()}")
-        else:
-            logger.info("Optionsset successfully")
+        logger.info("Optionsset successfully")
 
     def _invoke_model(self, payload: dict) -> dict:
         """Invoke SD webui API and record the invocation if needed"""
@@ -150,7 +154,9 @@ def _invoke_model(self, payload: dict) -> dict:
         if response.status_code == requests.codes.ok:
             return response.json()
         else:
-            logger.error(json.dumps({"url": self.url, "json": payload}, indent=4))
+            logger.error(
+                json.dumps({"url": self.url, "json": payload}, indent=4),
+            )
             raise RuntimeError(
                 f"Failed to call the model with {response.json()}",
             )
@@ -159,29 +165,22 @@ def _parse_response(self, response: dict) -> ModelResponse:
         """Parse the response json data into ModelResponse"""
         return ModelResponse(raw=response)
 
-    def __call__(self, **kwargs: Any) -> ModelResponse:
-        payload = {
-            **self.generate_args,
-            **kwargs,
-        }
-        response = self._invoke_model(payload)
-        return self._parse_response(response)
-
 
-
-class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase):
-    """Stable Diffusion txt2img API wrapper"""
+class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase):
+    """Stable Diffusion Text-to-Image (txt2img) API Wrapper"""
 
     model_type: str = "sd_txt2img"
 
     @property
-    def url(self):
+    def url(self) -> str:
         return f"{self.base_url}/sdapi/v1/txt2img"
-    
+
     def _parse_response(self, response: dict) -> ModelResponse:
         session_parameters = response["parameters"]
         size = f"{session_parameters['width']}*{session_parameters['height']}"
-        image_count = session_parameters["batch_size"] * session_parameters["n_iter"]
+        image_count = (
+            session_parameters["batch_size"] * session_parameters["n_iter"]
+        )
 
         self.monitor.update_image_tokens(
             model_name=self.model_name,
@@ -211,7 +210,7 @@ def __call__(
             **kwargs (`Any`):
                 The keyword arguments to SD-webui txt2img API, e.g.
                 `n_iter`, `steps`, `seed`, `width`, etc. Please refer to
-                https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu
+                https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API
                 or http://localhost:7860/docs
                 for more detailed arguments.
         Returns:
@@ -234,9 +233,9 @@ def __call__(
 
     def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str:
         # This is a temporary implementation to focus on the prompt
-        # on single-turn image generation by preserving only the system prompt and
-        # the last user message. This logic might change in the future to support
-        # more complex conversational scenarios
+        # on single-turn image generation by preserving only the system prompt
+        # and the last user message. This logic might change in the future
+        # to support more complex conversational scenarios
         if len(args) == 0:
             raise ValueError(
                 "At least one message should be provided. An empty message "

From dbb8c31904a4b5efeee6321a853b6024885594f7 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 19 Sep 2024 17:35:26 +0800
Subject: [PATCH 06/16] add sd_setup.sh

---
 .../stable_diffusion_webui/model_config.json  | 14 ++++++++
 scripts/stable_diffusion_webui/sd_setup.sh    | 34 +++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 scripts/stable_diffusion_webui/model_config.json
 create mode 100644 scripts/stable_diffusion_webui/sd_setup.sh

diff --git a/scripts/stable_diffusion_webui/model_config.json b/scripts/stable_diffusion_webui/model_config.json
new file mode 100644
index 000000000..823ea406e
--- /dev/null
+++ b/scripts/stable_diffusion_webui/model_config.json
@@ -0,0 +1,14 @@
+{
+    "model_type": "sd_txt2img",
+    "config_name": "stable_diffusion_txt2img",
+    "host": "127.0.0.1:7862",
+    "options": {
+      "sd_model_checkpoint": "Anything-V3.0-pruned",
+      "sd_lora": "add_detail",
+      "CLIP_stop_at_last_layers": 2
+    },
+    "generate_args": {
+      "steps": 50,
+      "n_iter": 1
+    }
+  }
\ No newline at end of file
diff --git a/scripts/stable_diffusion_webui/sd_setup.sh b/scripts/stable_diffusion_webui/sd_setup.sh
new file mode 100644
index 000000000..ce71baba8
--- /dev/null
+++ b/scripts/stable_diffusion_webui/sd_setup.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# set VENV_DIR=%~dp0%venv
+# call "%VENV_DIR%\Scripts\activate.bat"
+
+# stable_diffusion_webui_path="YOUR_PATH_TO_STABLE_DIFFUSION_WEBUI"
+
+port=7862
+
+while getopts ":p:s:" opt
+do
+    # shellcheck disable=SC2220
+    case $opt in
+        p) port="$OPTARG";;
+        s) stable_diffusion_webui_path="$OPTARG"
+        ;;
+    esac
+done
+
+stable_diffusion_webui_path=${stable_diffusion_webui_path%/}
+launch_py_path="$stable_diffusion_webui_path/launch.py"
+
+# Check if the launch.py script exists
+if [[ ! -f "$launch_py_path" ]]; then
+    echo "The launch.py script was not found at $launch_py_path."
+    echo "Please ensure you have specified the correct path to your Stable Diffusion WebUI using the -s option."
+    echo "Example: ./sd_setup.sh -s /path/to/your/stable-diffusion-webui"
+    echo "Alternatively, you can set the path directly in the script."
+    exit 1
+fi
+
+cd $stable_diffusion_webui_path
+
+python ./launch.py --api --port=$port

From a9c94d675f48370b19409a35a96cda57a5ac8289 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Wed, 18 Sep 2024 14:02:51 +0800
Subject: [PATCH 07/16] fix conversation_with_stablediffusion_model.py

---
 .../conversation_with_stablediffusion_model.py       | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
index ae96ce1ac..9a185f9c8 100644
--- a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
+++ b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-"""A simple example for conversation between user and stable-diffusion agent."""
+"""conversation between user and stable-diffusion agent."""
 import agentscope
 from agentscope.agents import DialogAgent
 from agentscope.agents.user_agent import UserAgent
@@ -16,16 +16,10 @@ def main() -> None:
                 "options": {
                     "sd_model_checkpoint": "xxxxxx",
                     "CLIP_stop_at_last_layers": 2,
-                },  # global settings, for detailed parameters
-                # please refer to 127.0.0.1:7860/docs#/default/get_config_sdapi_v1_options_get
+                },
                 "generate_args": {
                     "steps": 50,
                     "n_iter": 1,
-                    "override_settings": {
-                        "CLIP_stop_at_last_layers": 3,
-                        # settings effective only for this conversation
-                        # The parameters are consistent with the global settings.
-                    },
                 },
             },
         ],
@@ -36,7 +30,7 @@ def main() -> None:
     # Init two agents
     dialog_agent = DialogAgent(
         name="Assistant",
-        sys_prompt="high definition,dreamy",  # replace by your desired image style prompts
+        sys_prompt="dreamy",  # replace by your image style prompts
         model_config_name="sd",  # replace by your model config name
     )
     user_agent = UserAgent()

From 6269fc70f9521bd11bd27b3d4571d60362b7f9c6 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 19 Sep 2024 16:57:11 +0800
Subject: [PATCH 08/16] fix readme

---
 .../README.md                                 | 104 ++++++++++++++++--
 1 file changed, 92 insertions(+), 12 deletions(-)

diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
index 6c3f65df2..a983cb3de 100644
--- a/examples/conversation_with_stablediffusion_model/README.md
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -1,27 +1,107 @@
 # Conversation with Stable-diffusion model
 
 This example will show
+
 - How to use Stable Diffusion models in AgentScope.
 
 In this example, you can interact in a conversational format to generate images.
 Once the image is generated, the agent will respond with the local file path where the image is saved.
 
-## Prerequisites
+## How to Run
 
 You need to satisfy the following requirements to run this example:
 
+### Step 0: Install Stable Diffusion Web UI and AgentScope
+
 - Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
-- Launching the Stable Diffusion Web UI with arguments: --api
-- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) or any other specified host and port you choose.
 - Install the latest version of AgentScope by
-    ```bash
-    git clone https://github.com/modelscope/agentscope.git
-    cd agentscope
-    pip install -e .
-    ```
-
-## Running the Example
-Run the example and input your questions.
+  ```bash
+  git clone https://github.com/modelscope/agentscope.git
+  cd agentscope
+  pip install -e .
+  ```
+
+### Step 1: Download the required checkpoints
+
+Before starting the Stable Diffusion Web UI, you need to download at least one model to ensure normal operation.
+Download the model to `stable-diffusion-webui/models/Stable-diffusion` directory.
+
+### Step 2: Launch the Stable Diffusion Web UI
+
+We've provided a convenient shell script to quickly start the Stable Diffusion Web UI:
+`scripts/stable_diffusion_webui/sd_setup.sh`
+
+Activate the virtual environment first, Then, run the following command in your terminal, replacing YOUR-SD-WEBUI-PATH with the actual path to your Stable Diffusion Web UI directory:
+
+```bash
+bash scripts/stable_diffusion_webui/sd_setup.sh -s YOUR-SD-WEBUI-PATH
+```
+
+If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: --api --port=7862. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
+
+### Step 3: Running the Example
+
+Run the example and input your prompt.
+
 ```bash
 python conversation_with_stablediffusion_model.py
-```
\ No newline at end of file
+```
+
+## Customization Options
+
+### `model_config` Example:
+
+```json
+{
+  "model_type": "sd_txt2img",
+  "config_name": "sd",
+  "options": {
+    "sd_model_checkpoint": "Anything-V3.0-pruned",
+    "sd_lora": "add_detail",
+    "CLIP_stop_at_last_layers": 2
+  },
+  "generate_args": {
+    "steps": 50,
+    "n_iter": 1,
+    "override_settings": {
+      "CLIP_stop_at_last_layers": 3
+    }
+  }
+}
+```
+
+### Parameter Explanation:
+
+- `options`: Global configuration that directly affects the WebUI settings.
+- `generate_args`: Controls parameters for individual image generation requests, such as `steps` (number of sampling steps) and `n_iter` (number of iterations).
+  - `override_settings`: Overrides WebUI settings for a single request, taking precedence over `options`.
+
+Notes:
+
+- `override_settings` only affects the current request, while changes made to `options` persist.
+- Both parameters can set the same options, but `override_settings` has a higher priority.
+
+As shown in the example, the final image will be generated with the following settings:
+
+steps: 50
+n_iter: 1
+sd_model_checkpoint: Anything-V3.0-pruned
+sd_lora: add_detail
+CLIP_stop_at_last_layers: 3
+
+However, the web UI will always display the following settings:
+
+sd_model_checkpoint: Anything-V3.0-pruned
+sd_lora: add_detail
+CLIP_stop_at_last_layers: 2
+
+### Available Parameter Lists:
+
+If you've successfully enabled the Stable Diffusion Web UI API, you should be able to access its documentation at http://127.0.0.1:7862/docs (or whatever URL you're using + /docs).
+
+- `generate_args`: {url}/docs#/default/text2imgapi_sdapi_v1_txt2img_post
+- `options` and `override_settings`: {url}/docs#/default/get_config_sdapi_v1_options_get
+
+For this project, the "options" parameter will be posted to the /sdapi/v1/options API endpoint,
+and the "generate_args" parameter will be posted to the /sdapi/v1/txt2img API endpoint.
+You can refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API for a more parameter reference guide.

From a54a45340f86478fe805f4d629e595e6b8c30419 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 19 Sep 2024 18:00:40 +0800
Subject: [PATCH 09/16] fix stablediffusion_model.py

---
 src/agentscope/models/stablediffusion_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index 13c3b0870..f287a00d7 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -231,7 +231,7 @@ def __call__(
         # step3: parse the response
         return self._parse_response(response)
 
-    def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str:
+    def format(self, *args: Union[Msg, Sequence[Msg]]) -> List[dict] | str:
         # This is a temporary implementation to focus on the prompt
         # on single-turn image generation by preserving only the system prompt
         # and the last user message. This logic might change in the future

From 9f29e8ec12612d91c6191eccd3db65aa5ee3f105 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Fri, 20 Sep 2024 11:12:25 +0800
Subject: [PATCH 10/16] fix stablediffusion_model.py

---
 src/agentscope/models/stablediffusion_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index f287a00d7..c8f2d9548 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -4,7 +4,7 @@
 import base64
 import json
 import time
-from typing import Any, Optional, Union, List, Sequence
+from typing import Any, Optional, Union, Sequence
 
 import requests
 from loguru import logger
@@ -231,7 +231,7 @@ def __call__(
         # step3: parse the response
         return self._parse_response(response)
 
-    def format(self, *args: Union[Msg, Sequence[Msg]]) -> List[dict] | str:
+    def format(self, *args: Union[Msg, Sequence[Msg]]) -> str:
         # This is a temporary implementation to focus on the prompt
         # on single-turn image generation by preserving only the system prompt
         # and the last user message. This logic might change in the future

From f16bb651acee21ec3bc41d54253abcf832e1524a Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Fri, 20 Sep 2024 14:55:48 +0800
Subject: [PATCH 11/16] docs: add running example

---
 .../README.md                                     | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
index a983cb3de..016e11cd8 100644
--- a/examples/conversation_with_stablediffusion_model/README.md
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -105,3 +105,18 @@ If you've successfully enabled the Stable Diffusion Web UI API, you should be ab
 For this project, the "options" parameter will be posted to the /sdapi/v1/options API endpoint,
 and the "generate_args" parameter will be posted to the /sdapi/v1/txt2img API endpoint.
 You can refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API for a more parameter reference guide.
+
+## A Running Example
+
+- Conversation history with Stable Diffusion Web UI.
+  ```bash
+  User input:Horses on Mars
+  User: Horses on Mars
+  Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142522_HTF38X.png
+  User input: boy eating ice-cream
+  User: boy eating ice-cream
+  Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142559_2xGtUs.png
+  ```
+- Image
+<img src="https://img.alicdn.com/imgextra/i3/O1CN01YoMRQP26ClOHM7Kh0_!!6000000007626-0-tps-512-512.jpg" alt="Horses on Mars" width="300" />
+<img src="https://img.alicdn.com/imgextra/i1/O1CN01QTO8AU1HVxaQ2rFPx_!!6000000000764-0-tps-512-512.jpg" alt="boy eating ice-cream" width="300" />
\ No newline at end of file

From 609611bb805472945e7255dbe0d424d49f276827 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Fri, 27 Sep 2024 17:31:29 +0800
Subject: [PATCH 12/16] redactor: replace custom request with third-party API
 package (webuiapi) for SD-model invocation feat: add stablediffusion model
 services

---
 setup.py                                      |   3 +
 .../models/stablediffusion_model.py           | 214 +++++++-----------
 src/agentscope/service/__init__.py            |   2 +
 .../stablediffusion_services.py               | 122 ++++++++++
 4 files changed, 204 insertions(+), 137 deletions(-)
 create mode 100644 src/agentscope/service/multi_modality/stablediffusion_services.py

diff --git a/setup.py b/setup.py
index cd577d5b8..e29fdd8f9 100644
--- a/setup.py
+++ b/setup.py
@@ -90,6 +90,7 @@
 extra_litellm_requires = ["litellm"]
 extra_zhipuai_requires = ["zhipuai"]
 extra_ollama_requires = ["ollama>=0.1.7"]
+extra_sd_webuiapi_requires = ["webuiapi"]
 
 # Full requires
 extra_full_requires = (
@@ -102,6 +103,7 @@
     + extra_litellm_requires
     + extra_zhipuai_requires
     + extra_ollama_requires
+    + extra_sd_webuiapi_requires
 )
 
 # For online workstation
@@ -140,6 +142,7 @@
         "litellm": extra_litellm_requires,
         "zhipuai": extra_zhipuai_requires,
         "gemini": extra_gemini_requires,
+        "stablediffusion": extra_sd_webuiapi_requires,
         # For service functions
         "service": extra_service_requires,
         # For distribution mode
diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index c8f2d9548..1d5d20f54 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -1,17 +1,14 @@
 # -*- coding: utf-8 -*-
 """Model wrapper for stable diffusion models."""
 from abc import ABC
-import base64
-import json
-import time
-from typing import Any, Optional, Union, Sequence
+from typing import Any, Union, Sequence
 
-import requests
-from loguru import logger
+try:
+    import webuiapi
+except ImportError:
+    webuiapi = None
 
 from . import ModelWrapperBase, ModelResponse
-from ..constants import _DEFAULT_MAX_RETRIES
-from ..constants import _DEFAULT_RETRY_INTERVAL
 from ..message import Msg
 from ..manager import FileManager
 from ..utils.common import _convert_to_str
@@ -23,9 +20,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     To use SD-webui API, please
     1. First download stable-diffusion-webui from
     https://github.com/AUTOMATIC1111/stable-diffusion-webui and
-    install it with 'webui-user.bat'
+    install it
     2. Move your checkpoint to 'models/Stable-diffusion' folder
-    3. Start launch.py with the '--api' parameter to start the server
+    3. Start launch.py with the '--api --port=7862' parameter
+    4. Install the 'webuiapi' package by 'pip install webuiapi'
     After that, you can use the SD-webui API and
     query the available parameters on the http://localhost:7862/docs page
     """
@@ -35,15 +33,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     def __init__(
         self,
         config_name: str,
-        host: str = "127.0.0.1:7862",
-        base_url: Optional[Union[str, None]] = None,
-        use_https: bool = False,
         generate_args: dict = None,
-        headers: dict = None,
         options: dict = None,
-        timeout: int = 30,
-        max_retries: int = _DEFAULT_MAX_RETRIES,
-        retry_interval: int = _DEFAULT_RETRY_INTERVAL,
+        host: str = "127.0.0.1",
+        port: int = 7862,
         **kwargs: Any,
     ) -> None:
         """
@@ -52,46 +45,29 @@ def __init__(
         Args:
             config_name (`str`):
                 The name of the model config.
-            host (`str`, default `"127.0.0.1:7862"`):
-                The host port of the stable-diffusion webui server.
-            base_url (`str`, default `None`):
-                Base URL for the stable-diffusion webui services.
-                Generated from host and use_https if not provided.
-            use_https (`bool`, default `False`):
-                Whether to generate the base URL with HTTPS protocol or HTTP.
             generate_args (`dict`, default `None`):
                 The extra keyword arguments used in SD api generation,
                 e.g. `{"steps": 50}`.
-            headers (`dict`, default `None`):
-                HTTP request headers.
             options (`dict`, default `None`):
-                The keyword arguments to change the webui settings
+                The keyword arguments to change the sd-webui settings
                 such as model or CLIP skip, this changes will persist.
                 e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`.
+            host (`str`, default `"127.0.0.1"`):
+                The host of the stable-diffusion webui server.
+            port (`int`, default `7862`):
+                The port of the stable-diffusion webui server.
         """
-        # Construct base_url based on HTTPS usage if not provided
-        if base_url is None:
-            if use_https:
-                base_url = f"https://{host}"
-            else:
-                base_url = f"http://{host}"
-
-        self.base_url = base_url
-        self.options_url = f"{base_url}/sdapi/v1/options"
+        # Initialize the SD-webui API
+        self.api = webuiapi.WebUIApi(host=host, port=port, **kwargs)
         self.generate_args = generate_args or {}
 
-        # Initialize the HTTP session and update the request headers
-        self.session = requests.Session()
-        if headers:
-            self.session.headers.update(headers)
-
         # Set options if provided
         if options:
-            self._set_options(options)
+            self.api.set_options(options)
 
         # Get the default model name from the web-options
         model_name = (
-            self._get_options()["sd_model_checkpoint"].split("[")[0].strip()
+            self.api.get_options()["sd_model_checkpoint"].split("[")[0].strip()
         )
         # Update the model name
         if self.generate_args.get("override_settings"):
@@ -102,116 +78,29 @@ def __init__(
 
         super().__init__(config_name=config_name, model_name=model_name)
 
-        self.timeout = timeout
-        self.max_retries = max_retries
-        self.retry_interval = retry_interval
-
-    @property
-    def url(self) -> str:
-        """SD-webui API endpoint URL"""
-        raise NotImplementedError()
-
-    def _get_options(self) -> dict:
-        response = self.session.get(url=self.options_url)
-        if response.status_code != 200:
-            logger.error(f"Failed to get options with {response.json()}")
-            raise RuntimeError(f"Failed to get options with {response.json()}")
-        return response.json()
-
-    def _set_options(self, options: dict) -> None:
-        response = self.session.post(url=self.options_url, json=options)
-        if response.status_code != 200:
-            logger.error(json.dumps(options, indent=4))
-            raise RuntimeError(f"Failed to set options with {response.json()}")
-        logger.info("Optionsset successfully")
-
-    def _invoke_model(self, payload: dict) -> dict:
-        """Invoke SD webui API and record the invocation if needed"""
-        # step1: prepare post requests
-        for i in range(1, self.max_retries + 1):
-            response = self.session.post(url=self.url, json=payload)
-
-            if response.status_code == requests.codes.ok:
-                break
-
-            if i < self.max_retries:
-                logger.warning(
-                    f"Failed to call the model with "
-                    f"requests.codes == {response.status_code}, retry "
-                    f"{i + 1}/{self.max_retries} times",
-                )
-                time.sleep(i * self.retry_interval)
-
-        # step2: record model invocation
-        # record the model api invocation, which will be skipped if
-        # `FileManager.save_api_invocation` is `False`
-        self._save_model_invocation(
-            arguments=payload,
-            response=response.json(),
-        )
-
-        # step3: return the response json
-        if response.status_code == requests.codes.ok:
-            return response.json()
-        else:
-            logger.error(
-                json.dumps({"url": self.url, "json": payload}, indent=4),
-            )
-            raise RuntimeError(
-                f"Failed to call the model with {response.json()}",
-            )
-
-    def _parse_response(self, response: dict) -> ModelResponse:
-        """Parse the response json data into ModelResponse"""
-        return ModelResponse(raw=response)
-
 
 class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase):
     """Stable Diffusion Text-to-Image (txt2img) API Wrapper"""
 
     model_type: str = "sd_txt2img"
 
-    @property
-    def url(self) -> str:
-        return f"{self.base_url}/sdapi/v1/txt2img"
-
-    def _parse_response(self, response: dict) -> ModelResponse:
-        session_parameters = response["parameters"]
-        size = f"{session_parameters['width']}*{session_parameters['height']}"
-        image_count = (
-            session_parameters["batch_size"] * session_parameters["n_iter"]
-        )
-
-        self.monitor.update_image_tokens(
-            model_name=self.model_name,
-            image_count=image_count,
-            resolution=size,
-        )
-
-        # Get image base64code as a list
-        images = response["images"]
-        b64_images = [base64.b64decode(image) for image in images]
-
-        file_manager = FileManager.get_instance()
-        # Return local url
-        image_urls = [file_manager.save_image(_) for _ in b64_images]
-        text = "Image saved to " + "\n".join(image_urls)
-        return ModelResponse(text=text, image_urls=image_urls, raw=response)
-
     def __call__(
         self,
         prompt: str,
+        save_local: bool = True,
         **kwargs: Any,
     ) -> ModelResponse:
         """
         Args:
             prompt (`str`):
                 The prompt string to generate images from.
+            save_local (`bool`, default `True`):
+                Whether to save the generated images locally.
             **kwargs (`Any`):
                 The keyword arguments to SD-webui txt2img API, e.g.
                 `n_iter`, `steps`, `seed`, `width`, etc. Please refer to
                 https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API
-                or http://localhost:7860/docs
+                or http://localhost:7862/docs
                 for more detailed arguments.
         Returns:
             `ModelResponse`:
@@ -226,10 +115,61 @@ def __call__(
         }
 
         # step2: forward to generate response
-        response = self._invoke_model(payload)
+        response = self.api.txt2img(**payload)
+
+        # step3: save model invocation and update monitor
+        self._save_model_invocation_and_update_monitor(
+            payload=payload,
+            response=response.json,
+        )
+
+        # step4: parse the response
+        PIL_images = response.images
+
+        file_manager = FileManager.get_instance()
+        if save_local:
+            # Save images
+            image_urls = [file_manager.save_image(_) for _ in PIL_images]
+            text = "Image saved to " + "\n".join(image_urls)
+        else:
+            image_urls = PIL_images
+            text = None
+
+        return ModelResponse(
+            text=text,
+            image_urls=image_urls,
+            raw=response.json,
+        )
+
+    def _save_model_invocation_and_update_monitor(
+        self,
+        payload: dict,
+        response: dict,
+    ) -> None:
+        """Save the model invocation and update the monitor accordingly.
+
+        Args:
+            kwargs (`dict`):
+                The keyword arguments to the DashScope chat API.
+            response (`dict`):
+                The response object returned by the DashScope chat API.
+        """
+        self._save_model_invocation(
+            arguments=payload,
+            response=response,
+        )
+
+        session_parameters = response["parameters"]
+        size = f"{session_parameters['width']}*{session_parameters['height']}"
+        image_count = (
+            session_parameters["batch_size"] * session_parameters["n_iter"]
+        )
 
-        # step3: parse the response
-        return self._parse_response(response)
+        self.monitor.update_image_tokens(
+            model_name=self.model_name,
+            image_count=image_count,
+            resolution=size,
+        )
 
     def format(self, *args: Union[Msg, Sequence[Msg]]) -> str:
         # This is a temporary implementation to focus on the prompt
diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index 7d33e6501..20c1af051 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -45,6 +45,7 @@
     openai_edit_image,
     openai_create_image_variation,
 )
+from .multi_modality.stablediffusion_services import sd_text_to_image
 
 from .service_response import ServiceResponse
 from .service_toolkit import ServiceToolkit
@@ -117,6 +118,7 @@ def get_help() -> None:
     "openai_image_to_text",
     "openai_edit_image",
     "openai_create_image_variation",
+    "sd_text_to_image",
     "tripadvisor_search",
     "tripadvisor_search_location_photos",
     "tripadvisor_search_location_details",
diff --git a/src/agentscope/service/multi_modality/stablediffusion_services.py b/src/agentscope/service/multi_modality/stablediffusion_services.py
new file mode 100644
index 000000000..4547aa115
--- /dev/null
+++ b/src/agentscope/service/multi_modality/stablediffusion_services.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""Use StableDiffusion-webui API to generate images
+"""
+import os
+from typing import Optional
+
+from ...models import StableDiffusionImageSynthesisWrapper
+
+from ...manager import FileManager
+from ..service_response import (
+    ServiceResponse,
+    ServiceExecStatus,
+)
+from ...utils.common import (
+    _get_timestamp,
+    _generate_random_code,
+)
+from ...constants import _DEFAULT_IMAGE_NAME
+
+
+def sd_text_to_image(
+    prompt: str,
+    n_iter: int = 1,
+    width: int = 1024,
+    height: int = 1024,
+    options: dict = None,
+    baseurl: str = None,
+    save_dir: Optional[str] = None,
+) -> ServiceResponse:
+    """Generate image(s) based on the given prompt, and return image url(s).
+
+    Args:
+        prompt (`str`):
+            The text prompt to generate image.
+        n (`int`, defaults to `1`):
+            The number of images to generate.
+        width (`int`, defaults to `1024`):
+            Width of the image.
+        height (`int`, defaults to `1024`):
+            Height of the image.
+        options (`dict`, defaults to `None`):
+            The options to override the sd-webui default settings.
+            If not specified, will use the default settings.
+        baseurl (`str`, defaults to `None`):
+            The base url of the sd-webui.
+        save_dir (`Optional[str]`, defaults to 'None'):
+            The directory to save the generated images. If not specified,
+            will return the web urls.
+
+    Returns:
+        ServiceResponse:
+        A dictionary with two variables: `status` and`content`.
+        If `status` is ServiceExecStatus.SUCCESS,
+        the `content` is a dict with key 'fig_paths" and
+        value is a list of the paths to the generated images.
+
+    Example:
+
+        .. code-block:: python
+
+            prompt = "A beautiful sunset in the mountains"
+            print(sd_text_to_image(prompt, 2))
+
+    > {
+    >     'status': 'SUCCESS',
+    >     'content': {'image_urls': ['IMAGE_URL1', 'IMAGE_URL2']}
+    > }
+
+    """
+    text2img = StableDiffusionImageSynthesisWrapper(
+        config_name="sd-text-to-image-service",  # Just a placeholder
+        baseurl=baseurl,
+    )
+    try:
+        kwargs = {"n_iter": n_iter, "width": width, "height": height}
+        if options:
+            kwargs["override_settings"] = options
+
+        res = text2img(prompt=prompt, save_local=False, **kwargs)
+        images = res.image_urls
+
+        # save images to save_dir
+        if images is not None:
+            if save_dir:
+                os.makedirs(save_dir, exist_ok=True)
+                urls_local = []
+                # Obtain the image file names in the url
+                for image in images:
+                    image_name = _DEFAULT_IMAGE_NAME.format(
+                        _get_timestamp(
+                            "%Y%m%d-%H%M%S",
+                        ),
+                        _generate_random_code(),
+                    )
+                    image_path = os.path.abspath(
+                        os.path.join(save_dir, image_name),
+                    )
+                    # Download the image
+                    image.save(image_path)
+                    urls_local.append(image_path)
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls_local},
+                )
+            else:
+                # Return the default urls
+                file_manager = FileManager.get_instance()
+                urls = [file_manager.save_image(_) for _ in images]
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls},
+                )
+        else:
+            return ServiceResponse(
+                ServiceExecStatus.ERROR,
+                "Error: Failed to generate images",
+            )
+    except Exception as e:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            str(e),
+        )

From 62d80cfa46a0b8db90941c839577e658939a8d51 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Fri, 27 Sep 2024 17:42:17 +0800
Subject: [PATCH 13/16] fix: correct text assignment in ModelResponse

---
 src/agentscope/models/stablediffusion_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index 1d5d20f54..e1948e38a 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -133,7 +133,7 @@ def __call__(
             text = "Image saved to " + "\n".join(image_urls)
         else:
             image_urls = PIL_images
-            text = None
+            text = ""  # Just a placeholder
 
         return ModelResponse(
             text=text,

From 07f70f68e4d91b99f2dcebbd3a8194e9c1209f3f Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Sun, 29 Sep 2024 15:06:53 +0800
Subject: [PATCH 14/16] docs: add minimum hardware requirements

---
 .../conversation_with_stablediffusion_model/README.md    | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
index 016e11cd8..59e3a7270 100644
--- a/examples/conversation_with_stablediffusion_model/README.md
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -7,6 +7,13 @@ This example will show
 In this example, you can interact in a conversational format to generate images.
 Once the image is generated, the agent will respond with the local file path where the image is saved.
 
+## Minimum Hardware Requirements
+
+- **GPU**: NVIDIA GPU with at least 6.9GB of VRAM
+- **CPU**: Modern multi-core CPU (e.g., Intel i5 or AMD Ryzen 5)
+- **RAM**: Minimum 8GB
+- **Storage**: At least 10GB of available hard drive space
+
 ## How to Run
 
 You need to satisfy the following requirements to run this example:
@@ -37,7 +44,7 @@ Activate the virtual environment first, Then, run the following command in your
 bash scripts/stable_diffusion_webui/sd_setup.sh -s YOUR-SD-WEBUI-PATH
 ```
 
-If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: --api --port=7862. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
+If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: `--api --port=7862`. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
 
 ### Step 3: Running the Example
 

From 612b1b6458bab3aaac0018f556f1846e845d76ee Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 9 Jan 2025 16:00:25 +0800
Subject: [PATCH 15/16] include StableDiffusionImageSynthesisWrapper in models
 init

---
 src/agentscope/models/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py
index 7b26c6b51..fafa0a746 100644
--- a/src/agentscope/models/__init__.py
+++ b/src/agentscope/models/__init__.py
@@ -62,6 +62,7 @@
     "LiteLLMChatWrapper",
     "YiChatWrapper",
     "AnthropicChatWrapper",
+    "StableDiffusionImageSynthesisWrapper",
 ]
 
 __all__ = [

From 6b9d416d8c061dfa659f95c9ba57fb591dbfc226 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Thu, 9 Jan 2025 17:25:40 +0800
Subject: [PATCH 16/16] add StableDiffusionImageSynthesisWrapper test case

---
 tests/model_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/model_test.py b/tests/model_test.py
index dafc02c79..37682cc88 100644
--- a/tests/model_test.py
+++ b/tests/model_test.py
@@ -28,6 +28,7 @@
     OpenAIChatWrapper,
     PostAPIChatWrapper,
     AnthropicChatWrapper,
+    StableDiffusionImageSynthesisWrapper,
 )
 
 
@@ -79,6 +80,7 @@ def test_build_in_model_wrapper_classes(self) -> None:
                 "litellm_chat": LiteLLMChatWrapper,
                 "yi_chat": YiChatWrapper,
                 "anthropic_chat": AnthropicChatWrapper,
+                "sd_txt2img": StableDiffusionImageSynthesisWrapper,
             },
         )