From 882df659a07849d5c9008cf6b05cc1cc0d239103 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Mon, 12 Feb 2024 09:46:24 -0500 Subject: [PATCH 1/3] Add in upscale repeater logic --- examples/cli/main.cpp | 50 ++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 7acc4449..77cab5e8 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -88,6 +88,7 @@ struct SDParams { bool vae_tiling = false; bool control_net_cpu = false; bool canny_preprocess = false; + int upscale_repeats = 1; }; void print_params(SDParams params) { @@ -120,6 +121,7 @@ void print_params(SDParams params) { printf(" seed: %ld\n", params.seed); printf(" batch_count: %d\n", params.batch_count); printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); + printf(" upscale_repeats: %d\n", params.upscale_repeats); } void print_usage(int argc, const char* argv[]) { @@ -136,6 +138,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --control-net [CONTROL_PATH] path to control net model\n"); printf(" --embd-dir [EMBEDDING_PATH] path to embeddings.\n"); printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n"); + printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n"); printf(" If not specified, the default is the type of the weight file.\n"); printf(" --lora-model-dir [DIR] lora model directory\n"); @@ -162,7 +165,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); printf(" --canny apply canny preprocessor (edge detection)\n"); - printf(" -v, --verbose print extra info\n"); + printf(" -v, --verbose print extra info\n"); } void parse_args(int argc, const char** argv, SDParams& params) { @@ -286,6 +289,16 @@ void parse_args(int argc, const char** argv, SDParams& params) { break; } params.prompt = argv[i]; + } else if (arg == "--upscale-repeats") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.upscale_repeats = std::stoi(argv[i]); + if (params.upscale_repeats < 1) { + fprintf(stderr, "error: upscale multiplier must be at least 1\n"); + exit(1); + } } else if (arg == "-n" || arg == "--negative-prompt") { if (++i >= argc) { invalid_arg = true; @@ -647,28 +660,35 @@ int main(int argc, const char* argv[]) { } int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth - if (params.esrgan_path.size() > 0) { - upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), - params.n_threads, - params.wtype); + if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { + upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), + params.n_threads, + params.wtype); - if (upscaler_ctx == NULL) { - printf("new_upscaler_ctx failed\n"); - } else { - for (int i = 0; i < params.batch_count; i++) { - if (results[i].data == NULL) { - continue; + if (upscaler_ctx == NULL) { + printf("new_upscaler_ctx failed\n"); + } else { + for (int i = 0; i < params.batch_count; i++) { + if (results[i].data == NULL) { + continue; + } + sd_image_t current_image = results[i]; + for (int u = 0; u < params.upscale_repeats; ++u) { + sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); + if (u > 0) { // Free the previous iteration's image data if not the first upscale + free(current_image.data); } - sd_image_t upscaled_image = upscale(upscaler_ctx, results[i], upscale_factor); if (upscaled_image.data == NULL) { printf("upscale failed\n"); - continue; + break; } - free(results[i].data); - results[i] = upscaled_image; + current_image = upscaled_image; } + results[i] = current_image; // Set the final upscaled image as the result } } +} + size_t last = params.output_path.find_last_of("."); std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path; From 5a194f8824ed555b73e2b761698ebdbc363e72f5 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Mon, 12 Feb 2024 09:48:26 -0500 Subject: [PATCH 2/3] Update readme with the changes --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c1675aa1..244a5d03 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,8 @@ git submodule update #### Build from scratch ```shell +git submodule init +git submodule update mkdir build cd build cmake .. @@ -148,7 +150,7 @@ cmake --build . --config Release ### Run ``` -usage: ./bin/sd [arguments] +usage: ./build/bin/sd [arguments] arguments: -h, --help show this help message and exit @@ -161,6 +163,7 @@ arguments: --control-net [CONTROL_PATH] path to control net model --embd-dir [EMBEDDING_PATH] path to embeddings. --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now. + --upscale-repeats Run the ESRGAN upscaler this many times (default 1) --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) If not specified, the default is the type of the weight file. --lora-model-dir [DIR] lora model directory @@ -186,6 +189,7 @@ arguments: <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x --vae-tiling process vae in tiles to reduce memory usage --control-net-cpu keep controlnet in cpu (for low vram) + --canny apply canny preprocessor (edge detection) -v, --verbose print extra info ``` From 38ccc05c57b29445edb4e779fe7b5e8f3cacff36 Mon Sep 17 00:00:00 2001 From: leejet Date: Sat, 24 Feb 2024 21:26:19 +0800 Subject: [PATCH 3/3] some fixes --- README.md | 2 -- examples/cli/main.cpp | 45 ++++++++++++++++++++----------------------- stable-diffusion.cpp | 9 +++++---- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index a0822b0a..2e07fded 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,6 @@ git submodule update #### Build from scratch ```shell -git submodule init -git submodule update mkdir build cd build cmake .. diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 805cd0be..5d324845 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -96,7 +96,7 @@ struct SDParams { bool vae_tiling = false; bool control_net_cpu = false; bool canny_preprocess = false; - int upscale_repeats = 1; + int upscale_repeats = 1; }; void print_params(SDParams params) { @@ -174,7 +174,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); printf(" --canny apply canny preprocessor (edge detection)\n"); - printf(" -v, --verbose print extra info\n"); + printf(" -v, --verbose print extra info\n"); } void parse_args(int argc, const char** argv, SDParams& params) { @@ -714,34 +714,31 @@ int main(int argc, const char* argv[]) { int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { - upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), - params.n_threads, - params.wtype); + upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), + params.n_threads, + params.wtype); - if (upscaler_ctx == NULL) { - printf("new_upscaler_ctx failed\n"); - } else { - for (int i = 0; i < params.batch_count; i++) { - if (results[i].data == NULL) { - continue; - } - sd_image_t current_image = results[i]; - for (int u = 0; u < params.upscale_repeats; ++u) { - sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); - if (u > 0) { // Free the previous iteration's image data if not the first upscale - free(current_image.data); + if (upscaler_ctx == NULL) { + printf("new_upscaler_ctx failed\n"); + } else { + for (int i = 0; i < params.batch_count; i++) { + if (results[i].data == NULL) { + continue; } - if (upscaled_image.data == NULL) { - printf("upscale failed\n"); - break; + sd_image_t current_image = results[i]; + for (int u = 0; u < params.upscale_repeats; ++u) { + sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); + if (upscaled_image.data == NULL) { + printf("upscale failed\n"); + break; + } + free(current_image.data); + current_image = upscaled_image; } - current_image = upscaled_image; + results[i] = current_image; // Set the final upscaled image as the result } - results[i] = current_image; // Set the final upscaled image as the result } } -} - size_t last = params.output_path.find_last_of("."); std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index be32f7f6..8f123fc1 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -173,10 +173,11 @@ class StableDiffusionGGML { if (version == VERSION_XL) { scale_factor = 0.13025f; if (vae_path.size() == 0 && taesd_path.size() == 0) { - LOG_WARN("!!!It looks like you are using SDXL model. " - "If you find that the generated images are completely black, " - "try specifying SDXL VAE FP16 Fix with the --vae parameter. " - "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors"); + LOG_WARN( + "!!!It looks like you are using SDXL model. " + "If you find that the generated images are completely black, " + "try specifying SDXL VAE FP16 Fix with the --vae parameter. " + "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors"); } }