leejet · rmatif · May 27, 2025 · May 27, 2025 · May 27, 2025 · idostyle
diff --git a/README.md b/README.md
@@ -253,6 +253,7 @@ arguments:
   --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm}
                                      sampling method (default: "euler_a")
   --steps  STEPS                     number of sample steps (default: 20)
+  --sigmas SIGMAS                    Custom sigma values for the sampler, comma-separated list (e.g., "14.61,7.8,3.5,0.0")
   --rng {std_default, cuda}          RNG (default: cuda)
   -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)
   -b, --batch-count COUNT            number of images to generate

diff --git a/denoiser.hpp b/denoiser.hpp
@@ -485,6 +485,10 @@ static void sample_k_diffusion(sample_method_t method,
 
             for (int i = 0; i < steps; i++) {
                 float sigma = sigmas[i];
+                float sigma_next = sigmas[i+1]; // For logging
+
+                // Log the sigma values for the current step
+                LOG_INFO("Step %d/%zu: sigma_current = %.4f, sigma_next = %.4f", i + 1, steps, sigma, sigma_next);
 
                 // denoise
                 ggml_tensor* denoised = model(x, sigma, i + 1);

diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -5,7 +5,8 @@
 #include <random>
 #include <string>
 #include <vector>
-
+#include <sstream>
+#include <iomanip>
 // #include "preprocessing.hpp"
 #include "flux.hpp"
 #include "stable-diffusion.h"
@@ -129,6 +130,7 @@ struct SDParams {
     float slg_scale              = 0.f;
     float skip_layer_start       = 0.01f;
     float skip_layer_end         = 0.2f;
+    std::vector<float> custom_sigmas;
 };
 
 void print_params(SDParams params) {
@@ -175,6 +177,13 @@ void print_params(SDParams params) {
     printf("    strength(img2img): %.2f\n", params.strength);
     printf("    rng:               %s\n", rng_type_to_str[params.rng_type]);
     printf("    seed:              %ld\n", params.seed);
+    if (!params.custom_sigmas.empty()) {
+        printf("    custom_sigmas:     [");
+        for (size_t i = 0; i < params.custom_sigmas.size(); ++i) {
+            printf("%.4f%s", params.custom_sigmas[i], i == params.custom_sigmas.size() - 1 ? "" : ", ");
+        }
+        printf("]\n");
+    }
     printf("    batch_count:       %d\n", params.batch_count);
     printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
     printf("    upscale_repeats:   %d\n", params.upscale_repeats);
@@ -231,8 +240,12 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --steps  STEPS                     number of sample steps (default: 20)\n");
     printf("  --rng {std_default, cuda}          RNG (default: cuda)\n");
     printf("  -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)\n");
+    printf("  --sigmas SIGMAS                    Custom sigma values for the sampler, comma-separated (e.g., \"14.61,7.8,3.5,0.0\").\n");
+    printf("                                     Overrides --schedule. Number of provided sigmas can be less than steps;\n");
+    printf("                                     it will be padded with zeros. The last sigma is always forced to 0.\n");
     printf("  -b, --batch-count COUNT            number of images to generate\n");
-    printf("  --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n");
+    printf("  --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete).\n");
+    printf("                                     Ignored if --sigmas is used.\n");
     printf("  --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
     printf("                                     <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
     printf("  --vae-tiling                       process vae in tiles to reduce memory usage\n");
@@ -629,6 +642,44 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                 break;
             }
             params.skip_layer_end = std::stof(argv[i]);
+        } else if (arg == "--sigmas") {
+            if (++i >= argc) {
+                invalid_arg = true;
+                break;
+            }
+            std::string sigmas_str = argv[i];
+            if (!sigmas_str.empty() && sigmas_str.front() == '[') {
+                sigmas_str.erase(0, 1);
+            }
+            if (!sigmas_str.empty() && sigmas_str.back() == ']') {
+                sigmas_str.pop_back();
+            }
+
+            std::stringstream ss(sigmas_str);
+            std::string item;
+            while(std::getline(ss, item, ',')) {
+                item.erase(0, item.find_first_not_of(" \t\n\r\f\v"));
+                item.erase(item.find_last_not_of(" \t\n\r\f\v") + 1);
+                if (!item.empty()) {
+                    try {
+                        params.custom_sigmas.push_back(std::stof(item));
+                    } catch (const std::invalid_argument& e) {
+                        fprintf(stderr, "error: invalid float value '%s' in --sigmas\n", item.c_str());
+                        invalid_arg = true;
+                        break;
+                    } catch (const std::out_of_range& e) {
+                        fprintf(stderr, "error: float value '%s' out of range in --sigmas\n", item.c_str());
+                        invalid_arg = true;
+                        break;
+                    }
+                }
+            }
+            if (invalid_arg) break;
+            if (params.custom_sigmas.empty() && !sigmas_str.empty()) {
+                 fprintf(stderr, "error: could not parse any sigma values from '%s'\n", argv[i]);
+                 invalid_arg = true;
+                 break;
+            }
         } else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             print_usage(argc, argv);
@@ -736,8 +787,16 @@ std::string get_image_params(SDParams params, int64_t seed) {
     parameter_string += "Model: " + sd_basename(params.model_path) + ", ";
     parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", ";
     parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]);
-    if (params.schedule == KARRAS) {
-        parameter_string += " karras";
+    if (!params.custom_sigmas.empty()) {
+        parameter_string += ", Custom Sigmas: [";
+        for (size_t i = 0; i < params.custom_sigmas.size(); ++i) {
+            std::ostringstream oss;
+            oss << std::fixed << std::setprecision(4) << params.custom_sigmas[i];
+            parameter_string += oss.str() + (i == params.custom_sigmas.size() - 1 ? "" : ", ");
+        }
+        parameter_string += "]";
+    } else if (params.schedule != DEFAULT) { // Only show schedule if not using custom sigmas
+        parameter_string += " " + std::string(schedule_str[params.schedule]);
     }
     parameter_string += ", ";
     parameter_string += "Version: stable-diffusion.cpp";
@@ -963,6 +1022,8 @@ int main(int argc, const char* argv[]) {
                           params.style_ratio,
                           params.normalize_input,
                           params.input_id_images_path.c_str(),
+                          params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(),
+                          (int)params.custom_sigmas.size(),
                           params.skip_layers.data(),
                           params.skip_layers.size(),
                           params.slg_scale,
@@ -988,7 +1049,9 @@ int main(int argc, const char* argv[]) {
                               params.sample_method,
                               params.sample_steps,
                               params.strength,
-                              params.seed);
+                              params.seed,
+                              params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(),
+                              (int)params.custom_sigmas.size());
             if (results == NULL) {
                 printf("generate failed\n");
                 free_sd_ctx(sd_ctx);
@@ -1032,6 +1095,8 @@ int main(int argc, const char* argv[]) {
                               params.style_ratio,
                               params.normalize_input,
                               params.input_id_images_path.c_str(),
+                              params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(),
+                              (int)params.custom_sigmas.size(),
                               params.skip_layers.data(),
                               params.skip_layers.size(),
                               params.slg_scale,

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1213,6 +1213,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
                            float slg_scale              = 0,
                            float skip_layer_start       = 0.01,
                            float skip_layer_end         = 0.2,
+                           const std::vector<float>& sigmas_override = {}, 
                            ggml_tensor* masked_image    = NULL) {
     if (seed < 0) {
         // Generally, when using the provided command line, the seed is always >0.
@@ -1227,7 +1228,12 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
     // }
     // std::cout << std::endl;
 
-    int sample_steps = sigmas.size() - 1;
+    const std::vector<float>& sigmas_to_use = sigmas_override;
+    int sample_steps = sigmas_to_use.size() > 1 ? sigmas_to_use.size() - 1 : 0;
+    if (sample_steps == 0 && !sigmas_to_use.empty()) { // e.g. if sigmas_override has only one element
+        LOG_WARN("Received sigmas_override with %zu elements, implying 0 steps. This might not be intended.", sigmas_to_use.size());
+    }
+
 
     // Apply lora
     auto result_pair                                = extract_and_remove_lora(prompt);
@@ -1463,7 +1469,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
                                                      guidance,
                                                      eta,
                                                      sample_method,
-                                                     sigmas,
+                                                     sigmas_to_use,
                                                      start_merge_step,
                                                      id_cond,
                                                      skip_layers,
@@ -1539,6 +1545,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                     float style_ratio,
                     bool normalize_input,
                     const char* input_id_images_path_c_str,
+                    const float* custom_sigmas,
+                    int custom_sigmas_count,
                     int* skip_layers         = NULL,
                     size_t skip_layers_count = 0,
                     float slg_scale          = 0,
@@ -1575,7 +1583,26 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
 
     size_t t0 = ggml_time_ms();
 
-    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    std::vector<float> sigmas_for_generation;
+    if (custom_sigmas_count > 0 && custom_sigmas != nullptr) {
+        LOG_INFO("Using custom sigmas provided by user.");
+        sigmas_for_generation.assign(custom_sigmas, custom_sigmas + custom_sigmas_count);
+        size_t target_len = static_cast<size_t>(sample_steps) + 1;
+        if (sigmas_for_generation.size() < target_len) {
+            sigmas_for_generation.resize(target_len, 0.0f);
+        } else if (sigmas_for_generation.size() > target_len) {
+            sigmas_for_generation.resize(target_len);
+        }
+        if (!sigmas_for_generation.empty()) {
+            sigmas_for_generation.back() = 0.0f; // Ensure the last sigma is 0
+        }
+        if (sd_ctx->sd->denoiser->schedule->version == DEFAULT && custom_sigmas_count > 0) {
+            LOG_INFO("Custom sigmas are used, --schedule option is ignored.");
+        }
+    } else {
+        sigmas_for_generation = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    }
+
 
     int C = 4;
     if (sd_version_is_sd3(sd_ctx->sd->version)) {
@@ -1610,7 +1637,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                                                width,
                                                height,
                                                sample_method,
-                                               sigmas,
+                                               sigmas_for_generation,
                                                seed,
                                                batch_count,
                                                control_cond,
@@ -1621,7 +1648,9 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                                                skip_layers_vec,
                                                slg_scale,
                                                skip_layer_start,
-                                               skip_layer_end);
+                                               skip_layer_end,
+                                               sigmas_for_generation,
+                                               nullptr /* masked_image for txt2img is null */);
 
     size_t t1 = ggml_time_ms();
 
@@ -1651,6 +1680,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
                     float style_ratio,
                     bool normalize_input,
                     const char* input_id_images_path_c_str,
+                    const float* custom_sigmas,
+                    int custom_sigmas_count,
                     int* skip_layers         = NULL,
                     size_t skip_layers_count = 0,
                     float slg_scale          = 0,
@@ -1770,13 +1801,35 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
     size_t t1 = ggml_time_ms();
     LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
 
-    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    std::vector<float> base_sigmas;
+    if (custom_sigmas_count > 0 && custom_sigmas != nullptr) {
+        LOG_INFO("Using custom sigmas provided by user for img2img base schedule.");
+        base_sigmas.assign(custom_sigmas, custom_sigmas + custom_sigmas_count);
+        size_t target_len = static_cast<size_t>(sample_steps) + 1;
+        if (base_sigmas.size() < target_len) {
+            base_sigmas.resize(target_len, 0.0f);
+        } else if (base_sigmas.size() > target_len) {
+            base_sigmas.resize(target_len);
+        }
+        if (!base_sigmas.empty()) {
+            base_sigmas.back() = 0.0f; // Ensure the last sigma is 0
+        }
+    } else {
+        base_sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    }
+
     size_t t_enc              = static_cast<size_t>(sample_steps * strength);
     if (t_enc == sample_steps)
         t_enc--;
     LOG_INFO("target t_enc is %zu steps", t_enc);
-    std::vector<float> sigma_sched;
-    sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end());
+    std::vector<float> sigmas_for_generation;
+    if (sample_steps - t_enc -1 < base_sigmas.size()) { // Check bounds
+        sigmas_for_generation.assign(base_sigmas.begin() + sample_steps - t_enc - 1, base_sigmas.end());
+    } else {
+        LOG_WARN("Cannot create sub-schedule for img2img due to strength/steps/custom_sigmas combination. Using full base_sigmas.");
+        sigmas_for_generation = base_sigmas;
+    }
+
 
     sd_image_t* result_images = generate_image(sd_ctx,
                                                work_ctx,
@@ -1790,7 +1843,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
                                                width,
                                                height,
                                                sample_method,
-                                               sigma_sched,
+                                               sigmas_for_generation,
                                                seed,
                                                batch_count,
                                                control_cond,
@@ -1802,6 +1855,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
                                                slg_scale,
                                                skip_layer_start,
                                                skip_layer_end,
+                                               sigmas_for_generation,
                                                masked_image);
 
     size_t t2 = ggml_time_ms();
@@ -1824,14 +1878,31 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
                            enum sample_method_t sample_method,
                            int sample_steps,
                            float strength,
-                           int64_t seed) {
+                           int64_t seed,
+                           const float* custom_sigmas,
+                           int custom_sigmas_count) {
     if (sd_ctx == NULL) {
         return NULL;
     }
 
     LOG_INFO("img2vid %dx%d", width, height);
 
-    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    std::vector<float> sigmas_for_generation;
+    if (custom_sigmas_count > 0 && custom_sigmas != nullptr) {
+        LOG_INFO("Using custom sigmas provided by user for img2vid.");
+        sigmas_for_generation.assign(custom_sigmas, custom_sigmas + custom_sigmas_count);
+        size_t target_len = static_cast<size_t>(sample_steps) + 1;
+        if (sigmas_for_generation.size() < target_len) {
+            sigmas_for_generation.resize(target_len, 0.0f);
+        } else if (sigmas_for_generation.size() > target_len) {
+            sigmas_for_generation.resize(target_len);
+        }
+        if (!sigmas_for_generation.empty()) {
+            sigmas_for_generation.back() = 0.0f; // Ensure the last sigma is 0
+        }
+    } else {
+        sigmas_for_generation = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
+    }
 
     struct ggml_init_params params;
     params.mem_size = static_cast<size_t>(10 * 1024) * 1024;  // 10 MB
@@ -1902,7 +1973,7 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
                                                  0.f,
                                                  0.f,
                                                  sample_method,
-                                                 sigmas,
+                                                 sigmas_for_generation,
                                                  -1,
                                                  SDCondition(NULL, NULL, NULL));
 

diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -172,6 +172,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                            float style_strength,
                            bool normalize_input,
                            const char* input_id_images_path,
+                           const float* custom_sigmas,
+                           int custom_sigmas_count,
                            int* skip_layers,
                            size_t skip_layers_count,
                            float slg_scale,
@@ -199,6 +201,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
                            float style_strength,
                            bool normalize_input,
                            const char* input_id_images_path,
+                           const float* custom_sigmas,
+                           int custom_sigmas_count,
                            int* skip_layers,
                            size_t skip_layers_count,
                            float slg_scale,
@@ -218,7 +222,9 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
                            enum sample_method_t sample_method,
                            int sample_steps,
                            float strength,
-                           int64_t seed);
+                           int64_t seed,
+                           const float* custom_sigmas,
+                           int custom_sigmas_count);
 
 typedef struct upscaler_ctx_t upscaler_ctx_t;