From a5f31e2b80565fccdf5c47e9430425c64d4d871a Mon Sep 17 00:00:00 2001 From: cmgzn Date: Wed, 11 Sep 2024 11:19:48 +0800 Subject: [PATCH 01/16] add stablediffusion_model.py --- src/agentscope/models/__init__.py | 4 + .../models/stablediffusion_model.py | 215 ++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 src/agentscope/models/stablediffusion_model.py diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py index 0a6894b35..d32731f91 100644 --- a/src/agentscope/models/__init__.py +++ b/src/agentscope/models/__init__.py @@ -41,6 +41,9 @@ from .yi_model import ( YiChatWrapper, ) +from .stablediffusion_model import( + StableDiffusionTxt2imgWrapper +) __all__ = [ "ModelWrapperBase", @@ -64,6 +67,7 @@ "ZhipuAIEmbeddingWrapper", "LiteLLMChatWrapper", "YiChatWrapper", + "StableDiffusionTxt2imgWrapper", ] diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py new file mode 100644 index 000000000..a01b1e8f2 --- /dev/null +++ b/src/agentscope/models/stablediffusion_model.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +"""Model wrapper for stable diffusion models.""" +from abc import ABC +import base64 +from typing import Any, Optional, Union, List, Sequence + +from . import ModelWrapperBase, ModelResponse +from ..message import Msg +from ..manager import FileManager +import requests +from ..utils.common import _convert_to_str + + +class StableDiffusionWrapperBase(ModelWrapperBase, ABC): + """The base class for stable-diffusion model wrappers. + + To use SD API, please + 1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and + install it with 'webui-user.bat' + 2. Move your checkpoint to 'models/Stable-diffusion' folder + 3. Start launch.py with the '--api' parameter to start the server + After that, you can use the SD-webui API and + query the available parameters on the http://localhost:7860/docs page + """ + + model_type: str + """The type of the model wrapper, which is to identify the model wrapper + class in model configuration.""" + + options: dict + """A dict contains the options for stable-diffusion option API. + Modifications made through this parameter are persistent, meaning they will + remain in effect for subsequent generation requests until explicitly changed or reset. + e.g. {"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}""" + + def __init__( + self, + config_name: str, + options: dict = None, + generate_args: dict = None, + url: Optional[Union[str, None]] = None, + **kwargs: Any, + ) -> None: + """Initialize the model wrapper for SD-webui API. + + Args: + options (`dict`, default `None`): + The keyword arguments to change the webui settings + such as model or CLIP skip, this changes will persist across sessions. + e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`. + generate_args (`dict`, default `None`): + The extra keyword arguments used in SD-webui api generation, + e.g. `steps`, `seed`. + url (`str`, default `None`): + The url of the SD-webui server. + Defaults to `None`, which is http://127.0.0.1:7860. + """ + if url is None: + url = "http://127.0.0.1:7860" + + self.url = url + self.generate_args = generate_args or {} + + options_url = f"{self.url}/sdapi/v1/options" + # Get the current default model + default_model_name = ( + requests.get(options_url) + .json()["sd_model_checkpoint"] + .split("[")[0] + .strip() + ) + + if options is not None: + # Update webui options if needed + requests.post(options_url, json=options) + model_name = options.get("sd_model_checkpoint", default_model_name) + else: + model_name = default_model_name + + super().__init__(config_name=config_name, model_name=model_name) + + def format( + self, + *args: Union[Msg, Sequence[Msg]], + ) -> Union[List[dict], str]: + raise RuntimeError( + f"Model Wrapper [{type(self).__name__}] doesn't " + f"need to format the input. Please try to use the " + f"model wrapper directly.", + ) + + +class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase): + + model_type: str = "sd_txt2img" + + def __call__( + self, + prompt: str, + **kwargs: Any, + ) -> ModelResponse: + """ + Args: + prompt (`str`): + The prompt string to generate images from. + **kwargs (`Any`): + The keyword arguments to SD-webui txt2img API, e.g. + `n_iter`, `steps`, `seed`, `width`, etc. Please refer to + https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu + or http://localhost:7860/docs + for more detailed arguments. + + Returns: + `ModelResponse`: + A list of image local urls in image_urls field and the + raw response in raw field. + """ + + # step1: prepare keyword arguments + payload = { + "prompt": prompt, + **kwargs, + **self.generate_args, + } + + # step2: forward to generate response + txt2img_url = f"{self.url}/sdapi/v1/txt2img" + response = requests.post(url=txt2img_url, json=payload) + + if response.status_code != requests.codes.ok: + error_msg = f" Status code: {response.status_code}," + raise RuntimeError(error_msg) + + # step3: record the model api invocation if needed + output = response.json() + self._save_model_invocation( + arguments={ + "model": self.model_name, + **payload, + }, + response=output, + ) + + # step4: update monitor accordingly + session_parameters = output["parameters"] + size = f"{session_parameters['width']}*{session_parameters['height']}" + image_count = session_parameters["batch_size"] * session_parameters["n_iter"] + + self.monitor.update_image_tokens( + model_name=self.model_name, + image_count=image_count, + resolution=size, + ) + + # step5: return response + # Get image base64code as a list + images = output["images"] + b64_images = [base64.b64decode(image) for image in images] + + file_manager = FileManager.get_instance() + # Return local url + urls = [file_manager.save_image(_) for _ in b64_images] + text = "Image saved to " + "\n".join(urls) + return ModelResponse(text=text, image_urls=urls, raw=response) + + def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str: + # This is a temporary implementation to focus on the prompt + # on single-turn image generation by preserving only the system prompt and + # the last user message. This logic might change in the future to support + # more complex conversational scenarios + if len(args) == 0: + raise ValueError( + "At least one message should be provided. An empty message " + "list is not allowed.", + ) + + # Parse all information into a list of messages + input_msgs = [] + for _ in args: + if _ is None: + continue + if isinstance(_, Msg): + input_msgs.append(_) + elif isinstance(_, list) and all(isinstance(__, Msg) for __ in _): + input_msgs.extend(_) + else: + raise TypeError( + f"The input should be a Msg object or a list " + f"of Msg objects, got {type(_)}.", + ) + + # record user message history as a list of strings + user_messages = [] + sys_prompt = None + for i, unit in enumerate(input_msgs): + if i == 0 and unit.role == "system": + # if system prompt is available, place it at the beginning + sys_prompt = _convert_to_str(unit.content) + elif unit.role == "user": + # Merge user messages into a conversation history prompt + user_messages.append(_convert_to_str(unit.content)) + else: + continue + + content_components = [] + # Add system prompt at the beginning if provided + if sys_prompt is not None: + content_components.append(sys_prompt) + # Add the last user message if the user messages is not empty + if len(user_messages) > 0: + content_components.append(user_messages[-1]) + + prompt = ",".join(content_components) + + return prompt From 992695db3a6f80d4b11e200cd41a5bd61fe31ae3 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Wed, 11 Sep 2024 11:19:48 +0800 Subject: [PATCH 02/16] update stablediffusion_model.py --- .../models/stablediffusion_model.py | 247 +++++++++++------- 1 file changed, 158 insertions(+), 89 deletions(-) diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index a01b1e8f2..3b0a166a1 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -2,19 +2,25 @@ """Model wrapper for stable diffusion models.""" from abc import ABC import base64 +import json +import time from typing import Any, Optional, Union, List, Sequence +import requests +from loguru import logger + from . import ModelWrapperBase, ModelResponse +from ..constants import _DEFAULT_MAX_RETRIES +from ..constants import _DEFAULT_RETRY_INTERVAL from ..message import Msg from ..manager import FileManager -import requests from ..utils.common import _convert_to_str class StableDiffusionWrapperBase(ModelWrapperBase, ABC): """The base class for stable-diffusion model wrappers. - To use SD API, please + To use SD-webui API, please 1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and install it with 'webui-user.bat' 2. Move your checkpoint to 'models/Stable-diffusion' folder @@ -23,77 +29,176 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC): query the available parameters on the http://localhost:7860/docs page """ - model_type: str - """The type of the model wrapper, which is to identify the model wrapper - class in model configuration.""" - - options: dict - """A dict contains the options for stable-diffusion option API. - Modifications made through this parameter are persistent, meaning they will - remain in effect for subsequent generation requests until explicitly changed or reset. - e.g. {"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}""" + model_type: str = "stable_diffusion" def __init__( self, config_name: str, - options: dict = None, + host: str = "127.0.0.1:7860", + base_url: Optional[Union[str, None]] = None, + use_https: bool = False, generate_args: dict = None, - url: Optional[Union[str, None]] = None, + headers: dict = None, + options: dict = None, + timeout: int = 30, + max_retries: int = _DEFAULT_MAX_RETRIES, + retry_interval: int = _DEFAULT_RETRY_INTERVAL, **kwargs: Any, ) -> None: - """Initialize the model wrapper for SD-webui API. + """ + Initializes the SD-webui API client. Args: - options (`dict`, default `None`): + config_name (`str`): + The name of the model config. + host (`str`, default `"127.0.0.1:7860"`): + The host port of the stable-diffusion webui server. + base_url (`str`, default `None`): + Base URL for the stable-diffusion webui services. If not provided, it will be generated based on `host` and `use_https`. + use_https (`bool`, default `False`): + Whether to generate the base URL with HTTPS protocol or HTTP. + generate_args (`dict`, default `None`): + The extra keyword arguments used in SD api generation, + e.g. `{"steps": 50}`. + headers (`dict`, default `None`): + HTTP request headers. + options (`dict`, default `None`): The keyword arguments to change the webui settings such as model or CLIP skip, this changes will persist across sessions. e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`. - generate_args (`dict`, default `None`): - The extra keyword arguments used in SD-webui api generation, - e.g. `steps`, `seed`. - url (`str`, default `None`): - The url of the SD-webui server. - Defaults to `None`, which is http://127.0.0.1:7860. """ - if url is None: - url = "http://127.0.0.1:7860" + # If base_url is not provided, construct it based on whether HTTPS is used + if base_url is None: + if use_https: + base_url = f"https://{host}" + else: + base_url = f"http://{host}" - self.url = url + self.base_url = base_url + self.options_url = f"{base_url}/sdapi/v1/options" self.generate_args = generate_args or {} - options_url = f"{self.url}/sdapi/v1/options" - # Get the current default model - default_model_name = ( - requests.get(options_url) - .json()["sd_model_checkpoint"] - .split("[")[0] - .strip() + # Initialize the HTTP session and update the request headers + self.session = requests.Session() + if headers: + self.session.headers.update(headers) + + # Set options if provided + if options: + self._set_options(options) + + # Get the default model name from the web-options + model_name = self._get_options()["sd_model_checkpoint"].split("[")[0].strip() + # Update the model name if override_settings is provided in generate_args + if self.generate_args.get("override_settings"): + model_name = generate_args["override_settings"].get( + "sd_model_checkpoint", model_name + ) + + super().__init__(config_name=config_name, model_name=model_name) + + self.timeout = timeout + self.max_retries = max_retries + self.retry_interval = retry_interval + + @property + def url(self): + """SD-webui API endpoint URL""" + raise NotImplementedError() + + def _get_options(self) -> dict: + response = self.session.get(url=self.options_url) + if response.status_code != 200: + logger.error(f"Failed to get options with {response.json()}") + raise RuntimeError(f"Failed to get options with {response.json()}") + return response.json() + + def _set_options(self, options) -> None: + response = self.session.post(url=self.options_url, json=options) + if response.status_code != 200: + logger.error(json.dumps(options, indent=4)) + raise RuntimeError(f"Failed to set options with {response.json()}") + else: + logger.info("Optionsset successfully") + + def _invoke_model(self, payload: dict) -> dict: + """Invoke SD webui API and record the invocation if needed""" + # step1: prepare post requests + for i in range(1, self.max_retries + 1): + response = self.session.post(url=self.url, json=payload) + + if response.status_code == requests.codes.ok: + break + + if i < self.max_retries: + logger.warning( + f"Failed to call the model with " + f"requests.codes == {response.status_code}, retry " + f"{i + 1}/{self.max_retries} times", + ) + time.sleep(i * self.retry_interval) + + # step2: record model invocation + # record the model api invocation, which will be skipped if + # `FileManager.save_api_invocation` is `False` + self._save_model_invocation( + arguments=payload, + response=response.json(), ) - if options is not None: - # Update webui options if needed - requests.post(options_url, json=options) - model_name = options.get("sd_model_checkpoint", default_model_name) + # step3: return the response json + if response.status_code == requests.codes.ok: + return response.json() else: - model_name = default_model_name + logger.error(json.dumps({"url": self.url, "json": payload}, indent=4)) + raise RuntimeError( + f"Failed to call the model with {response.json()}", + ) - super().__init__(config_name=config_name, model_name=model_name) + def _parse_response(self, response: dict) -> ModelResponse: + """Parse the response json data into ModelResponse""" + return ModelResponse(raw=response) + + def __call__(self, **kwargs: Any) -> ModelResponse: + payload = { + **self.generate_args, + **kwargs, + } + response = self._invoke_model(payload) + return self._parse_response(response) - def format( - self, - *args: Union[Msg, Sequence[Msg]], - ) -> Union[List[dict], str]: - raise RuntimeError( - f"Model Wrapper [{type(self).__name__}] doesn't " - f"need to format the input. Please try to use the " - f"model wrapper directly.", - ) class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase): + """Stable Diffusion txt2img API wrapper""" model_type: str = "sd_txt2img" + @property + def url(self): + return f"{self.base_url}/sdapi/v1/txt2img" + + def _parse_response(self, response: dict) -> ModelResponse: + session_parameters = response["parameters"] + size = f"{session_parameters['width']}*{session_parameters['height']}" + image_count = session_parameters["batch_size"] * session_parameters["n_iter"] + + self.monitor.update_image_tokens( + model_name=self.model_name, + image_count=image_count, + resolution=size, + ) + + # Get image base64code as a list + images = response["images"] + b64_images = [base64.b64decode(image) for image in images] + + file_manager = FileManager.get_instance() + # Return local url + image_urls = [file_manager.save_image(_) for _ in b64_images] + text = "Image saved to " + "\n".join(image_urls) + return ModelResponse(text=text, image_urls=image_urls, raw=response) + def __call__( self, prompt: str, @@ -109,13 +214,11 @@ def __call__( https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu or http://localhost:7860/docs for more detailed arguments. - Returns: `ModelResponse`: A list of image local urls in image_urls field and the raw response in raw field. """ - # step1: prepare keyword arguments payload = { "prompt": prompt, @@ -124,49 +227,15 @@ def __call__( } # step2: forward to generate response - txt2img_url = f"{self.url}/sdapi/v1/txt2img" - response = requests.post(url=txt2img_url, json=payload) - - if response.status_code != requests.codes.ok: - error_msg = f" Status code: {response.status_code}," - raise RuntimeError(error_msg) - - # step3: record the model api invocation if needed - output = response.json() - self._save_model_invocation( - arguments={ - "model": self.model_name, - **payload, - }, - response=output, - ) - - # step4: update monitor accordingly - session_parameters = output["parameters"] - size = f"{session_parameters['width']}*{session_parameters['height']}" - image_count = session_parameters["batch_size"] * session_parameters["n_iter"] + response = self._invoke_model(payload) - self.monitor.update_image_tokens( - model_name=self.model_name, - image_count=image_count, - resolution=size, - ) - - # step5: return response - # Get image base64code as a list - images = output["images"] - b64_images = [base64.b64decode(image) for image in images] - - file_manager = FileManager.get_instance() - # Return local url - urls = [file_manager.save_image(_) for _ in b64_images] - text = "Image saved to " + "\n".join(urls) - return ModelResponse(text=text, image_urls=urls, raw=response) + # step3: parse the response + return self._parse_response(response) def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str: - # This is a temporary implementation to focus on the prompt - # on single-turn image generation by preserving only the system prompt and - # the last user message. This logic might change in the future to support + # This is a temporary implementation to focus on the prompt + # on single-turn image generation by preserving only the system prompt and + # the last user message. This logic might change in the future to support # more complex conversational scenarios if len(args) == 0: raise ValueError( @@ -204,7 +273,7 @@ def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str: content_components = [] # Add system prompt at the beginning if provided - if sys_prompt is not None: + if sys_prompt: content_components.append(sys_prompt) # Add the last user message if the user messages is not empty if len(user_messages) > 0: From fb269e11d6347718822cc8505f29e9a7de5a307e Mon Sep 17 00:00:00 2001 From: cmgzn Date: Wed, 18 Sep 2024 14:02:51 +0800 Subject: [PATCH 03/16] add stable-diffusion conversation example --- .../README.md | 27 ++++++++++ ...conversation_with_stablediffusion_model.py | 54 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 examples/conversation_with_stablediffusion_model/README.md create mode 100644 examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md new file mode 100644 index 000000000..79819817b --- /dev/null +++ b/examples/conversation_with_stablediffusion_model/README.md @@ -0,0 +1,27 @@ +# Conversation with Stable-diffusion model + +This example will show +- How to use Stable Diffusion models in AgentScope. + +In this example, you can interact in a conversational format to generate images. +Once the image is generated, the agent will respond with the local file path where the image is saved. + +## Prerequisites + +You need to satisfy the following requirements to run this example: + +- Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). +- Launching the Stable Diffusion Web UI with arguments: --api +- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) any other specified host and port you choose. +- Install the latest version of AgentScope by + ```bash + git clone https://github.com/modelscope/agentscope.git + cd agentscope + pip install -e . + ``` + +## Running the Example +Run the example and input your questions. +```bash +python conversation_with_stablediffusion_model.py +``` \ No newline at end of file diff --git a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py new file mode 100644 index 000000000..ae96ce1ac --- /dev/null +++ b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +"""A simple example for conversation between user and stable-diffusion agent.""" +import agentscope +from agentscope.agents import DialogAgent +from agentscope.agents.user_agent import UserAgent + + +def main() -> None: + """A basic conversation demo""" + + agentscope.init( + model_configs=[ + { + "model_type": "sd_txt2img", + "config_name": "sd", + "options": { + "sd_model_checkpoint": "xxxxxx", + "CLIP_stop_at_last_layers": 2, + }, # global settings, for detailed parameters + # please refer to 127.0.0.1:7860/docs#/default/get_config_sdapi_v1_options_get + "generate_args": { + "steps": 50, + "n_iter": 1, + "override_settings": { + "CLIP_stop_at_last_layers": 3, + # settings effective only for this conversation + # The parameters are consistent with the global settings. + }, + }, + }, + ], + project="txt2img-Agent Conversation", + save_api_invoke=True, + ) + + # Init two agents + dialog_agent = DialogAgent( + name="Assistant", + sys_prompt="high definition,dreamy", # replace by your desired image style prompts + model_config_name="sd", # replace by your model config name + ) + user_agent = UserAgent() + + # start the conversation between user and assistant + msg = None + while True: + msg = user_agent(msg) + if msg.content == "exit": + break + msg = dialog_agent(msg) + + +if __name__ == "__main__": + main() From b40f41f8ceee292a7332cb7900aea467ea3fd4f3 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Wed, 18 Sep 2024 14:04:53 +0800 Subject: [PATCH 04/16] fix readme --- examples/conversation_with_stablediffusion_model/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md index 79819817b..6c3f65df2 100644 --- a/examples/conversation_with_stablediffusion_model/README.md +++ b/examples/conversation_with_stablediffusion_model/README.md @@ -12,7 +12,7 @@ You need to satisfy the following requirements to run this example: - Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). - Launching the Stable Diffusion Web UI with arguments: --api -- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) any other specified host and port you choose. +- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) or any other specified host and port you choose. - Install the latest version of AgentScope by ```bash git clone https://github.com/modelscope/agentscope.git From 30fe6f58f1f0f9d4629fc7f77a2104d51bcb01d7 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 19 Sep 2024 17:35:57 +0800 Subject: [PATCH 05/16] fix stablediffusion_model.py --- src/agentscope/models/__init__.py | 6 +- .../models/stablediffusion_model.py | 77 +++++++++---------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py index d32731f91..9cde1cd85 100644 --- a/src/agentscope/models/__init__.py +++ b/src/agentscope/models/__init__.py @@ -41,8 +41,8 @@ from .yi_model import ( YiChatWrapper, ) -from .stablediffusion_model import( - StableDiffusionTxt2imgWrapper +from .stablediffusion_model import ( + StableDiffusionImageSynthesisWrapper, ) __all__ = [ @@ -67,7 +67,7 @@ "ZhipuAIEmbeddingWrapper", "LiteLLMChatWrapper", "YiChatWrapper", - "StableDiffusionTxt2imgWrapper", + "StableDiffusionImageSynthesisWrapper", ] diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index 3b0a166a1..13c3b0870 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -21,12 +21,13 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC): """The base class for stable-diffusion model wrappers. To use SD-webui API, please - 1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and + 1. First download stable-diffusion-webui from + https://github.com/AUTOMATIC1111/stable-diffusion-webui and install it with 'webui-user.bat' 2. Move your checkpoint to 'models/Stable-diffusion' folder 3. Start launch.py with the '--api' parameter to start the server After that, you can use the SD-webui API and - query the available parameters on the http://localhost:7860/docs page + query the available parameters on the http://localhost:7862/docs page """ model_type: str = "stable_diffusion" @@ -34,7 +35,7 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC): def __init__( self, config_name: str, - host: str = "127.0.0.1:7860", + host: str = "127.0.0.1:7862", base_url: Optional[Union[str, None]] = None, use_https: bool = False, generate_args: dict = None, @@ -51,23 +52,24 @@ def __init__( Args: config_name (`str`): The name of the model config. - host (`str`, default `"127.0.0.1:7860"`): + host (`str`, default `"127.0.0.1:7862"`): The host port of the stable-diffusion webui server. base_url (`str`, default `None`): - Base URL for the stable-diffusion webui services. If not provided, it will be generated based on `host` and `use_https`. + Base URL for the stable-diffusion webui services. + Generated from host and use_https if not provided. use_https (`bool`, default `False`): Whether to generate the base URL with HTTPS protocol or HTTP. - generate_args (`dict`, default `None`): + generate_args (`dict`, default `None`): The extra keyword arguments used in SD api generation, e.g. `{"steps": 50}`. - headers (`dict`, default `None`): - HTTP request headers. - options (`dict`, default `None`): + headers (`dict`, default `None`): + HTTP request headers. + options (`dict`, default `None`): The keyword arguments to change the webui settings - such as model or CLIP skip, this changes will persist across sessions. - e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned", "CLIP_stop_at_last_layers": 2}`. + such as model or CLIP skip, this changes will persist. + e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`. """ - # If base_url is not provided, construct it based on whether HTTPS is used + # Construct base_url based on HTTPS usage if not provided if base_url is None: if use_https: base_url = f"https://{host}" @@ -88,11 +90,14 @@ def __init__( self._set_options(options) # Get the default model name from the web-options - model_name = self._get_options()["sd_model_checkpoint"].split("[")[0].strip() - # Update the model name if override_settings is provided in generate_args + model_name = ( + self._get_options()["sd_model_checkpoint"].split("[")[0].strip() + ) + # Update the model name if self.generate_args.get("override_settings"): model_name = generate_args["override_settings"].get( - "sd_model_checkpoint", model_name + "sd_model_checkpoint", + model_name, ) super().__init__(config_name=config_name, model_name=model_name) @@ -100,9 +105,9 @@ def __init__( self.timeout = timeout self.max_retries = max_retries self.retry_interval = retry_interval - + @property - def url(self): + def url(self) -> str: """SD-webui API endpoint URL""" raise NotImplementedError() @@ -113,13 +118,12 @@ def _get_options(self) -> dict: raise RuntimeError(f"Failed to get options with {response.json()}") return response.json() - def _set_options(self, options) -> None: + def _set_options(self, options: dict) -> None: response = self.session.post(url=self.options_url, json=options) if response.status_code != 200: logger.error(json.dumps(options, indent=4)) raise RuntimeError(f"Failed to set options with {response.json()}") - else: - logger.info("Optionsset successfully") + logger.info("Optionsset successfully") def _invoke_model(self, payload: dict) -> dict: """Invoke SD webui API and record the invocation if needed""" @@ -150,7 +154,9 @@ def _invoke_model(self, payload: dict) -> dict: if response.status_code == requests.codes.ok: return response.json() else: - logger.error(json.dumps({"url": self.url, "json": payload}, indent=4)) + logger.error( + json.dumps({"url": self.url, "json": payload}, indent=4), + ) raise RuntimeError( f"Failed to call the model with {response.json()}", ) @@ -159,29 +165,22 @@ def _parse_response(self, response: dict) -> ModelResponse: """Parse the response json data into ModelResponse""" return ModelResponse(raw=response) - def __call__(self, **kwargs: Any) -> ModelResponse: - payload = { - **self.generate_args, - **kwargs, - } - response = self._invoke_model(payload) - return self._parse_response(response) - - -class StableDiffusionTxt2imgWrapper(StableDiffusionWrapperBase): - """Stable Diffusion txt2img API wrapper""" +class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase): + """Stable Diffusion Text-to-Image (txt2img) API Wrapper""" model_type: str = "sd_txt2img" @property - def url(self): + def url(self) -> str: return f"{self.base_url}/sdapi/v1/txt2img" - + def _parse_response(self, response: dict) -> ModelResponse: session_parameters = response["parameters"] size = f"{session_parameters['width']}*{session_parameters['height']}" - image_count = session_parameters["batch_size"] * session_parameters["n_iter"] + image_count = ( + session_parameters["batch_size"] * session_parameters["n_iter"] + ) self.monitor.update_image_tokens( model_name=self.model_name, @@ -211,7 +210,7 @@ def __call__( **kwargs (`Any`): The keyword arguments to SD-webui txt2img API, e.g. `n_iter`, `steps`, `seed`, `width`, etc. Please refer to - https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API#api-guide-by-kilvoctu + https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API or http://localhost:7860/docs for more detailed arguments. Returns: @@ -234,9 +233,9 @@ def __call__( def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str: # This is a temporary implementation to focus on the prompt - # on single-turn image generation by preserving only the system prompt and - # the last user message. This logic might change in the future to support - # more complex conversational scenarios + # on single-turn image generation by preserving only the system prompt + # and the last user message. This logic might change in the future + # to support more complex conversational scenarios if len(args) == 0: raise ValueError( "At least one message should be provided. An empty message " From dbb8c31904a4b5efeee6321a853b6024885594f7 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 19 Sep 2024 17:35:26 +0800 Subject: [PATCH 06/16] add sd_setup.sh --- .../stable_diffusion_webui/model_config.json | 14 ++++++++ scripts/stable_diffusion_webui/sd_setup.sh | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 scripts/stable_diffusion_webui/model_config.json create mode 100644 scripts/stable_diffusion_webui/sd_setup.sh diff --git a/scripts/stable_diffusion_webui/model_config.json b/scripts/stable_diffusion_webui/model_config.json new file mode 100644 index 000000000..823ea406e --- /dev/null +++ b/scripts/stable_diffusion_webui/model_config.json @@ -0,0 +1,14 @@ +{ + "model_type": "sd_txt2img", + "config_name": "stable_diffusion_txt2img", + "host": "127.0.0.1:7862", + "options": { + "sd_model_checkpoint": "Anything-V3.0-pruned", + "sd_lora": "add_detail", + "CLIP_stop_at_last_layers": 2 + }, + "generate_args": { + "steps": 50, + "n_iter": 1 + } + } \ No newline at end of file diff --git a/scripts/stable_diffusion_webui/sd_setup.sh b/scripts/stable_diffusion_webui/sd_setup.sh new file mode 100644 index 000000000..ce71baba8 --- /dev/null +++ b/scripts/stable_diffusion_webui/sd_setup.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# set VENV_DIR=%~dp0%venv +# call "%VENV_DIR%\Scripts\activate.bat" + +# stable_diffusion_webui_path="YOUR_PATH_TO_STABLE_DIFFUSION_WEBUI" + +port=7862 + +while getopts ":p:s:" opt +do + # shellcheck disable=SC2220 + case $opt in + p) port="$OPTARG";; + s) stable_diffusion_webui_path="$OPTARG" + ;; + esac +done + +stable_diffusion_webui_path=${stable_diffusion_webui_path%/} +launch_py_path="$stable_diffusion_webui_path/launch.py" + +# Check if the launch.py script exists +if [[ ! -f "$launch_py_path" ]]; then + echo "The launch.py script was not found at $launch_py_path." + echo "Please ensure you have specified the correct path to your Stable Diffusion WebUI using the -s option." + echo "Example: ./sd_setup.sh -s /path/to/your/stable-diffusion-webui" + echo "Alternatively, you can set the path directly in the script." + exit 1 +fi + +cd $stable_diffusion_webui_path + +python ./launch.py --api --port=$port From a9c94d675f48370b19409a35a96cda57a5ac8289 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Wed, 18 Sep 2024 14:02:51 +0800 Subject: [PATCH 07/16] fix conversation_with_stablediffusion_model.py --- .../conversation_with_stablediffusion_model.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py index ae96ce1ac..9a185f9c8 100644 --- a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py +++ b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -"""A simple example for conversation between user and stable-diffusion agent.""" +"""conversation between user and stable-diffusion agent.""" import agentscope from agentscope.agents import DialogAgent from agentscope.agents.user_agent import UserAgent @@ -16,16 +16,10 @@ def main() -> None: "options": { "sd_model_checkpoint": "xxxxxx", "CLIP_stop_at_last_layers": 2, - }, # global settings, for detailed parameters - # please refer to 127.0.0.1:7860/docs#/default/get_config_sdapi_v1_options_get + }, "generate_args": { "steps": 50, "n_iter": 1, - "override_settings": { - "CLIP_stop_at_last_layers": 3, - # settings effective only for this conversation - # The parameters are consistent with the global settings. - }, }, }, ], @@ -36,7 +30,7 @@ def main() -> None: # Init two agents dialog_agent = DialogAgent( name="Assistant", - sys_prompt="high definition,dreamy", # replace by your desired image style prompts + sys_prompt="dreamy", # replace by your image style prompts model_config_name="sd", # replace by your model config name ) user_agent = UserAgent() From 6269fc70f9521bd11bd27b3d4571d60362b7f9c6 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 19 Sep 2024 16:57:11 +0800 Subject: [PATCH 08/16] fix readme --- .../README.md | 104 ++++++++++++++++-- 1 file changed, 92 insertions(+), 12 deletions(-) diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md index 6c3f65df2..a983cb3de 100644 --- a/examples/conversation_with_stablediffusion_model/README.md +++ b/examples/conversation_with_stablediffusion_model/README.md @@ -1,27 +1,107 @@ # Conversation with Stable-diffusion model This example will show + - How to use Stable Diffusion models in AgentScope. In this example, you can interact in a conversational format to generate images. Once the image is generated, the agent will respond with the local file path where the image is saved. -## Prerequisites +## How to Run You need to satisfy the following requirements to run this example: +### Step 0: Install Stable Diffusion Web UI and AgentScope + - Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). -- Launching the Stable Diffusion Web UI with arguments: --api -- Ensure that your host can successfully access `http://127.0.0.1:7860/`(default) or any other specified host and port you choose. - Install the latest version of AgentScope by - ```bash - git clone https://github.com/modelscope/agentscope.git - cd agentscope - pip install -e . - ``` - -## Running the Example -Run the example and input your questions. + ```bash + git clone https://github.com/modelscope/agentscope.git + cd agentscope + pip install -e . + ``` + +### Step 1: Download the required checkpoints + +Before starting the Stable Diffusion Web UI, you need to download at least one model to ensure normal operation. +Download the model to `stable-diffusion-webui/models/Stable-diffusion` directory. + +### Step 2: Launch the Stable Diffusion Web UI + +We've provided a convenient shell script to quickly start the Stable Diffusion Web UI: +`scripts/stable_diffusion_webui/sd_setup.sh` + +Activate the virtual environment first, Then, run the following command in your terminal, replacing YOUR-SD-WEBUI-PATH with the actual path to your Stable Diffusion Web UI directory: + +```bash +bash scripts/stable_diffusion_webui/sd_setup.sh -s YOUR-SD-WEBUI-PATH +``` + +If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: --api --port=7862. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). + +### Step 3: Running the Example + +Run the example and input your prompt. + ```bash python conversation_with_stablediffusion_model.py -``` \ No newline at end of file +``` + +## Customization Options + +### `model_config` Example: + +```json +{ + "model_type": "sd_txt2img", + "config_name": "sd", + "options": { + "sd_model_checkpoint": "Anything-V3.0-pruned", + "sd_lora": "add_detail", + "CLIP_stop_at_last_layers": 2 + }, + "generate_args": { + "steps": 50, + "n_iter": 1, + "override_settings": { + "CLIP_stop_at_last_layers": 3 + } + } +} +``` + +### Parameter Explanation: + +- `options`: Global configuration that directly affects the WebUI settings. +- `generate_args`: Controls parameters for individual image generation requests, such as `steps` (number of sampling steps) and `n_iter` (number of iterations). + - `override_settings`: Overrides WebUI settings for a single request, taking precedence over `options`. + +Notes: + +- `override_settings` only affects the current request, while changes made to `options` persist. +- Both parameters can set the same options, but `override_settings` has a higher priority. + +As shown in the example, the final image will be generated with the following settings: + +steps: 50 +n_iter: 1 +sd_model_checkpoint: Anything-V3.0-pruned +sd_lora: add_detail +CLIP_stop_at_last_layers: 3 + +However, the web UI will always display the following settings: + +sd_model_checkpoint: Anything-V3.0-pruned +sd_lora: add_detail +CLIP_stop_at_last_layers: 2 + +### Available Parameter Lists: + +If you've successfully enabled the Stable Diffusion Web UI API, you should be able to access its documentation at http://127.0.0.1:7862/docs (or whatever URL you're using + /docs). + +- `generate_args`: {url}/docs#/default/text2imgapi_sdapi_v1_txt2img_post +- `options` and `override_settings`: {url}/docs#/default/get_config_sdapi_v1_options_get + +For this project, the "options" parameter will be posted to the /sdapi/v1/options API endpoint, +and the "generate_args" parameter will be posted to the /sdapi/v1/txt2img API endpoint. +You can refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API for a more parameter reference guide. From a54a45340f86478fe805f4d629e595e6b8c30419 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 19 Sep 2024 18:00:40 +0800 Subject: [PATCH 09/16] fix stablediffusion_model.py --- src/agentscope/models/stablediffusion_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index 13c3b0870..f287a00d7 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -231,7 +231,7 @@ def __call__( # step3: parse the response return self._parse_response(response) - def format(self, *args: Msg | Sequence[Msg]) -> List[dict] | str: + def format(self, *args: Union[Msg, Sequence[Msg]]) -> List[dict] | str: # This is a temporary implementation to focus on the prompt # on single-turn image generation by preserving only the system prompt # and the last user message. This logic might change in the future From 9f29e8ec12612d91c6191eccd3db65aa5ee3f105 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Fri, 20 Sep 2024 11:12:25 +0800 Subject: [PATCH 10/16] fix stablediffusion_model.py --- src/agentscope/models/stablediffusion_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index f287a00d7..c8f2d9548 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -4,7 +4,7 @@ import base64 import json import time -from typing import Any, Optional, Union, List, Sequence +from typing import Any, Optional, Union, Sequence import requests from loguru import logger @@ -231,7 +231,7 @@ def __call__( # step3: parse the response return self._parse_response(response) - def format(self, *args: Union[Msg, Sequence[Msg]]) -> List[dict] | str: + def format(self, *args: Union[Msg, Sequence[Msg]]) -> str: # This is a temporary implementation to focus on the prompt # on single-turn image generation by preserving only the system prompt # and the last user message. This logic might change in the future From f16bb651acee21ec3bc41d54253abcf832e1524a Mon Sep 17 00:00:00 2001 From: cmgzn Date: Fri, 20 Sep 2024 14:55:48 +0800 Subject: [PATCH 11/16] docs: add running example --- .../README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md index a983cb3de..016e11cd8 100644 --- a/examples/conversation_with_stablediffusion_model/README.md +++ b/examples/conversation_with_stablediffusion_model/README.md @@ -105,3 +105,18 @@ If you've successfully enabled the Stable Diffusion Web UI API, you should be ab For this project, the "options" parameter will be posted to the /sdapi/v1/options API endpoint, and the "generate_args" parameter will be posted to the /sdapi/v1/txt2img API endpoint. You can refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API for a more parameter reference guide. + +## A Running Example + +- Conversation history with Stable Diffusion Web UI. + ```bash + User input:Horses on Mars + User: Horses on Mars + Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142522_HTF38X.png + User input: boy eating ice-cream + User: boy eating ice-cream + Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142559_2xGtUs.png + ``` +- Image +Horses on Mars +boy eating ice-cream \ No newline at end of file From 609611bb805472945e7255dbe0d424d49f276827 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Fri, 27 Sep 2024 17:31:29 +0800 Subject: [PATCH 12/16] redactor: replace custom request with third-party API package (webuiapi) for SD-model invocation feat: add stablediffusion model services --- setup.py | 3 + .../models/stablediffusion_model.py | 214 +++++++----------- src/agentscope/service/__init__.py | 2 + .../stablediffusion_services.py | 122 ++++++++++ 4 files changed, 204 insertions(+), 137 deletions(-) create mode 100644 src/agentscope/service/multi_modality/stablediffusion_services.py diff --git a/setup.py b/setup.py index cd577d5b8..e29fdd8f9 100644 --- a/setup.py +++ b/setup.py @@ -90,6 +90,7 @@ extra_litellm_requires = ["litellm"] extra_zhipuai_requires = ["zhipuai"] extra_ollama_requires = ["ollama>=0.1.7"] +extra_sd_webuiapi_requires = ["webuiapi"] # Full requires extra_full_requires = ( @@ -102,6 +103,7 @@ + extra_litellm_requires + extra_zhipuai_requires + extra_ollama_requires + + extra_sd_webuiapi_requires ) # For online workstation @@ -140,6 +142,7 @@ "litellm": extra_litellm_requires, "zhipuai": extra_zhipuai_requires, "gemini": extra_gemini_requires, + "stablediffusion": extra_sd_webuiapi_requires, # For service functions "service": extra_service_requires, # For distribution mode diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index c8f2d9548..1d5d20f54 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -1,17 +1,14 @@ # -*- coding: utf-8 -*- """Model wrapper for stable diffusion models.""" from abc import ABC -import base64 -import json -import time -from typing import Any, Optional, Union, Sequence +from typing import Any, Union, Sequence -import requests -from loguru import logger +try: + import webuiapi +except ImportError: + webuiapi = None from . import ModelWrapperBase, ModelResponse -from ..constants import _DEFAULT_MAX_RETRIES -from ..constants import _DEFAULT_RETRY_INTERVAL from ..message import Msg from ..manager import FileManager from ..utils.common import _convert_to_str @@ -23,9 +20,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC): To use SD-webui API, please 1. First download stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui and - install it with 'webui-user.bat' + install it 2. Move your checkpoint to 'models/Stable-diffusion' folder - 3. Start launch.py with the '--api' parameter to start the server + 3. Start launch.py with the '--api --port=7862' parameter + 4. Install the 'webuiapi' package by 'pip install webuiapi' After that, you can use the SD-webui API and query the available parameters on the http://localhost:7862/docs page """ @@ -35,15 +33,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC): def __init__( self, config_name: str, - host: str = "127.0.0.1:7862", - base_url: Optional[Union[str, None]] = None, - use_https: bool = False, generate_args: dict = None, - headers: dict = None, options: dict = None, - timeout: int = 30, - max_retries: int = _DEFAULT_MAX_RETRIES, - retry_interval: int = _DEFAULT_RETRY_INTERVAL, + host: str = "127.0.0.1", + port: int = 7862, **kwargs: Any, ) -> None: """ @@ -52,46 +45,29 @@ def __init__( Args: config_name (`str`): The name of the model config. - host (`str`, default `"127.0.0.1:7862"`): - The host port of the stable-diffusion webui server. - base_url (`str`, default `None`): - Base URL for the stable-diffusion webui services. - Generated from host and use_https if not provided. - use_https (`bool`, default `False`): - Whether to generate the base URL with HTTPS protocol or HTTP. generate_args (`dict`, default `None`): The extra keyword arguments used in SD api generation, e.g. `{"steps": 50}`. - headers (`dict`, default `None`): - HTTP request headers. options (`dict`, default `None`): - The keyword arguments to change the webui settings + The keyword arguments to change the sd-webui settings such as model or CLIP skip, this changes will persist. e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`. + host (`str`, default `"127.0.0.1"`): + The host of the stable-diffusion webui server. + port (`int`, default `7862`): + The port of the stable-diffusion webui server. """ - # Construct base_url based on HTTPS usage if not provided - if base_url is None: - if use_https: - base_url = f"https://{host}" - else: - base_url = f"http://{host}" - - self.base_url = base_url - self.options_url = f"{base_url}/sdapi/v1/options" + # Initialize the SD-webui API + self.api = webuiapi.WebUIApi(host=host, port=port, **kwargs) self.generate_args = generate_args or {} - # Initialize the HTTP session and update the request headers - self.session = requests.Session() - if headers: - self.session.headers.update(headers) - # Set options if provided if options: - self._set_options(options) + self.api.set_options(options) # Get the default model name from the web-options model_name = ( - self._get_options()["sd_model_checkpoint"].split("[")[0].strip() + self.api.get_options()["sd_model_checkpoint"].split("[")[0].strip() ) # Update the model name if self.generate_args.get("override_settings"): @@ -102,116 +78,29 @@ def __init__( super().__init__(config_name=config_name, model_name=model_name) - self.timeout = timeout - self.max_retries = max_retries - self.retry_interval = retry_interval - - @property - def url(self) -> str: - """SD-webui API endpoint URL""" - raise NotImplementedError() - - def _get_options(self) -> dict: - response = self.session.get(url=self.options_url) - if response.status_code != 200: - logger.error(f"Failed to get options with {response.json()}") - raise RuntimeError(f"Failed to get options with {response.json()}") - return response.json() - - def _set_options(self, options: dict) -> None: - response = self.session.post(url=self.options_url, json=options) - if response.status_code != 200: - logger.error(json.dumps(options, indent=4)) - raise RuntimeError(f"Failed to set options with {response.json()}") - logger.info("Optionsset successfully") - - def _invoke_model(self, payload: dict) -> dict: - """Invoke SD webui API and record the invocation if needed""" - # step1: prepare post requests - for i in range(1, self.max_retries + 1): - response = self.session.post(url=self.url, json=payload) - - if response.status_code == requests.codes.ok: - break - - if i < self.max_retries: - logger.warning( - f"Failed to call the model with " - f"requests.codes == {response.status_code}, retry " - f"{i + 1}/{self.max_retries} times", - ) - time.sleep(i * self.retry_interval) - - # step2: record model invocation - # record the model api invocation, which will be skipped if - # `FileManager.save_api_invocation` is `False` - self._save_model_invocation( - arguments=payload, - response=response.json(), - ) - - # step3: return the response json - if response.status_code == requests.codes.ok: - return response.json() - else: - logger.error( - json.dumps({"url": self.url, "json": payload}, indent=4), - ) - raise RuntimeError( - f"Failed to call the model with {response.json()}", - ) - - def _parse_response(self, response: dict) -> ModelResponse: - """Parse the response json data into ModelResponse""" - return ModelResponse(raw=response) - class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase): """Stable Diffusion Text-to-Image (txt2img) API Wrapper""" model_type: str = "sd_txt2img" - @property - def url(self) -> str: - return f"{self.base_url}/sdapi/v1/txt2img" - - def _parse_response(self, response: dict) -> ModelResponse: - session_parameters = response["parameters"] - size = f"{session_parameters['width']}*{session_parameters['height']}" - image_count = ( - session_parameters["batch_size"] * session_parameters["n_iter"] - ) - - self.monitor.update_image_tokens( - model_name=self.model_name, - image_count=image_count, - resolution=size, - ) - - # Get image base64code as a list - images = response["images"] - b64_images = [base64.b64decode(image) for image in images] - - file_manager = FileManager.get_instance() - # Return local url - image_urls = [file_manager.save_image(_) for _ in b64_images] - text = "Image saved to " + "\n".join(image_urls) - return ModelResponse(text=text, image_urls=image_urls, raw=response) - def __call__( self, prompt: str, + save_local: bool = True, **kwargs: Any, ) -> ModelResponse: """ Args: prompt (`str`): The prompt string to generate images from. + save_local (`bool`, default `True`): + Whether to save the generated images locally. **kwargs (`Any`): The keyword arguments to SD-webui txt2img API, e.g. `n_iter`, `steps`, `seed`, `width`, etc. Please refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API - or http://localhost:7860/docs + or http://localhost:7862/docs for more detailed arguments. Returns: `ModelResponse`: @@ -226,10 +115,61 @@ def __call__( } # step2: forward to generate response - response = self._invoke_model(payload) + response = self.api.txt2img(**payload) + + # step3: save model invocation and update monitor + self._save_model_invocation_and_update_monitor( + payload=payload, + response=response.json, + ) + + # step4: parse the response + PIL_images = response.images + + file_manager = FileManager.get_instance() + if save_local: + # Save images + image_urls = [file_manager.save_image(_) for _ in PIL_images] + text = "Image saved to " + "\n".join(image_urls) + else: + image_urls = PIL_images + text = None + + return ModelResponse( + text=text, + image_urls=image_urls, + raw=response.json, + ) + + def _save_model_invocation_and_update_monitor( + self, + payload: dict, + response: dict, + ) -> None: + """Save the model invocation and update the monitor accordingly. + + Args: + kwargs (`dict`): + The keyword arguments to the DashScope chat API. + response (`dict`): + The response object returned by the DashScope chat API. + """ + self._save_model_invocation( + arguments=payload, + response=response, + ) + + session_parameters = response["parameters"] + size = f"{session_parameters['width']}*{session_parameters['height']}" + image_count = ( + session_parameters["batch_size"] * session_parameters["n_iter"] + ) - # step3: parse the response - return self._parse_response(response) + self.monitor.update_image_tokens( + model_name=self.model_name, + image_count=image_count, + resolution=size, + ) def format(self, *args: Union[Msg, Sequence[Msg]]) -> str: # This is a temporary implementation to focus on the prompt diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py index 7d33e6501..20c1af051 100644 --- a/src/agentscope/service/__init__.py +++ b/src/agentscope/service/__init__.py @@ -45,6 +45,7 @@ openai_edit_image, openai_create_image_variation, ) +from .multi_modality.stablediffusion_services import sd_text_to_image from .service_response import ServiceResponse from .service_toolkit import ServiceToolkit @@ -117,6 +118,7 @@ def get_help() -> None: "openai_image_to_text", "openai_edit_image", "openai_create_image_variation", + "sd_text_to_image", "tripadvisor_search", "tripadvisor_search_location_photos", "tripadvisor_search_location_details", diff --git a/src/agentscope/service/multi_modality/stablediffusion_services.py b/src/agentscope/service/multi_modality/stablediffusion_services.py new file mode 100644 index 000000000..4547aa115 --- /dev/null +++ b/src/agentscope/service/multi_modality/stablediffusion_services.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +"""Use StableDiffusion-webui API to generate images +""" +import os +from typing import Optional + +from ...models import StableDiffusionImageSynthesisWrapper + +from ...manager import FileManager +from ..service_response import ( + ServiceResponse, + ServiceExecStatus, +) +from ...utils.common import ( + _get_timestamp, + _generate_random_code, +) +from ...constants import _DEFAULT_IMAGE_NAME + + +def sd_text_to_image( + prompt: str, + n_iter: int = 1, + width: int = 1024, + height: int = 1024, + options: dict = None, + baseurl: str = None, + save_dir: Optional[str] = None, +) -> ServiceResponse: + """Generate image(s) based on the given prompt, and return image url(s). + + Args: + prompt (`str`): + The text prompt to generate image. + n (`int`, defaults to `1`): + The number of images to generate. + width (`int`, defaults to `1024`): + Width of the image. + height (`int`, defaults to `1024`): + Height of the image. + options (`dict`, defaults to `None`): + The options to override the sd-webui default settings. + If not specified, will use the default settings. + baseurl (`str`, defaults to `None`): + The base url of the sd-webui. + save_dir (`Optional[str]`, defaults to 'None'): + The directory to save the generated images. If not specified, + will return the web urls. + + Returns: + ServiceResponse: + A dictionary with two variables: `status` and`content`. + If `status` is ServiceExecStatus.SUCCESS, + the `content` is a dict with key 'fig_paths" and + value is a list of the paths to the generated images. + + Example: + + .. code-block:: python + + prompt = "A beautiful sunset in the mountains" + print(sd_text_to_image(prompt, 2)) + + > { + > 'status': 'SUCCESS', + > 'content': {'image_urls': ['IMAGE_URL1', 'IMAGE_URL2']} + > } + + """ + text2img = StableDiffusionImageSynthesisWrapper( + config_name="sd-text-to-image-service", # Just a placeholder + baseurl=baseurl, + ) + try: + kwargs = {"n_iter": n_iter, "width": width, "height": height} + if options: + kwargs["override_settings"] = options + + res = text2img(prompt=prompt, save_local=False, **kwargs) + images = res.image_urls + + # save images to save_dir + if images is not None: + if save_dir: + os.makedirs(save_dir, exist_ok=True) + urls_local = [] + # Obtain the image file names in the url + for image in images: + image_name = _DEFAULT_IMAGE_NAME.format( + _get_timestamp( + "%Y%m%d-%H%M%S", + ), + _generate_random_code(), + ) + image_path = os.path.abspath( + os.path.join(save_dir, image_name), + ) + # Download the image + image.save(image_path) + urls_local.append(image_path) + return ServiceResponse( + ServiceExecStatus.SUCCESS, + {"image_urls": urls_local}, + ) + else: + # Return the default urls + file_manager = FileManager.get_instance() + urls = [file_manager.save_image(_) for _ in images] + return ServiceResponse( + ServiceExecStatus.SUCCESS, + {"image_urls": urls}, + ) + else: + return ServiceResponse( + ServiceExecStatus.ERROR, + "Error: Failed to generate images", + ) + except Exception as e: + return ServiceResponse( + ServiceExecStatus.ERROR, + str(e), + ) From 62d80cfa46a0b8db90941c839577e658939a8d51 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Fri, 27 Sep 2024 17:42:17 +0800 Subject: [PATCH 13/16] fix: correct text assignment in ModelResponse --- src/agentscope/models/stablediffusion_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py index 1d5d20f54..e1948e38a 100644 --- a/src/agentscope/models/stablediffusion_model.py +++ b/src/agentscope/models/stablediffusion_model.py @@ -133,7 +133,7 @@ def __call__( text = "Image saved to " + "\n".join(image_urls) else: image_urls = PIL_images - text = None + text = "" # Just a placeholder return ModelResponse( text=text, From 07f70f68e4d91b99f2dcebbd3a8194e9c1209f3f Mon Sep 17 00:00:00 2001 From: cmgzn Date: Sun, 29 Sep 2024 15:06:53 +0800 Subject: [PATCH 14/16] docs: add minimum hardware requirements --- .../conversation_with_stablediffusion_model/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md index 016e11cd8..59e3a7270 100644 --- a/examples/conversation_with_stablediffusion_model/README.md +++ b/examples/conversation_with_stablediffusion_model/README.md @@ -7,6 +7,13 @@ This example will show In this example, you can interact in a conversational format to generate images. Once the image is generated, the agent will respond with the local file path where the image is saved. +## Minimum Hardware Requirements + +- **GPU**: NVIDIA GPU with at least 6.9GB of VRAM +- **CPU**: Modern multi-core CPU (e.g., Intel i5 or AMD Ryzen 5) +- **RAM**: Minimum 8GB +- **Storage**: At least 10GB of available hard drive space + ## How to Run You need to satisfy the following requirements to run this example: @@ -37,7 +44,7 @@ Activate the virtual environment first, Then, run the following command in your bash scripts/stable_diffusion_webui/sd_setup.sh -s YOUR-SD-WEBUI-PATH ``` -If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: --api --port=7862. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). +If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: `--api --port=7862`. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). ### Step 3: Running the Example From 612b1b6458bab3aaac0018f556f1846e845d76ee Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 9 Jan 2025 16:00:25 +0800 Subject: [PATCH 15/16] include StableDiffusionImageSynthesisWrapper in models init --- src/agentscope/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py index 7b26c6b51..fafa0a746 100644 --- a/src/agentscope/models/__init__.py +++ b/src/agentscope/models/__init__.py @@ -62,6 +62,7 @@ "LiteLLMChatWrapper", "YiChatWrapper", "AnthropicChatWrapper", + "StableDiffusionImageSynthesisWrapper", ] __all__ = [ From 6b9d416d8c061dfa659f95c9ba57fb591dbfc226 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Thu, 9 Jan 2025 17:25:40 +0800 Subject: [PATCH 16/16] add StableDiffusionImageSynthesisWrapper test case --- tests/model_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/model_test.py b/tests/model_test.py index dafc02c79..37682cc88 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -28,6 +28,7 @@ OpenAIChatWrapper, PostAPIChatWrapper, AnthropicChatWrapper, + StableDiffusionImageSynthesisWrapper, ) @@ -79,6 +80,7 @@ def test_build_in_model_wrapper_classes(self) -> None: "litellm_chat": LiteLLMChatWrapper, "yi_chat": YiChatWrapper, "anthropic_chat": AnthropicChatWrapper, + "sd_txt2img": StableDiffusionImageSynthesisWrapper, }, )