diff --git a/denoiser.hpp b/denoiser.hpp index d4bcec59..c231ea6a 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -497,6 +497,7 @@ static void sample_k_diffusion(sample_method_t method, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, + int initial_step, std::shared_ptr rng, float eta) { size_t steps = sigmas.size() - 1; @@ -1083,12 +1084,13 @@ static void sample_k_diffusion(sample_method_t method, // - pred_sample_direction -> "direction pointing to // x_t" // - pred_prev_sample -> "x_t-1" - int timestep = - roundf(TIMESTEPS - - i * ((float)TIMESTEPS / steps)) - - 1; + int timestep = TIMESTEPS - 1 - + (int)roundf((initial_step + i) * + (TIMESTEPS / float(initial_step + steps))); // 1. get previous step value (=t-1) - int prev_timestep = timestep - TIMESTEPS / steps; + int prev_timestep = TIMESTEPS - 1 - + (int)roundf((initial_step + i + 1) * + (TIMESTEPS / float(initial_step + steps))); // The sigma here is chosen to cause the // CompVisDenoiser to produce t = timestep float sigma = compvis_sigmas[timestep]; @@ -1260,9 +1262,14 @@ static void sample_k_diffusion(sample_method_t method, // Analytic form for TCD timesteps int timestep = TIMESTEPS - 1 - (TIMESTEPS / original_steps) * - (int)floor(i * ((float)original_steps / steps)); + (int)floor((initial_step + i) * + ((float)original_steps / (initial_step + steps))); // 1. get previous step value - int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps)); + int prev_timestep = i >= steps - 1 ? 0 : + TIMESTEPS - 1 - + (TIMESTEPS / original_steps) * + (int)floor((initial_step + i + 1) * + ((float)original_steps / (initial_step + steps))); // Here timestep_s is tau_n' in Algorithm 4. The _s // notation appears to be that from C. Lu, // "DPM-Solver: A Fast ODE Solver for Diffusion diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 402585f1..b4defffd 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -845,6 +845,7 @@ class StableDiffusionGGML { float eta, sample_method_t method, const std::vector& sigmas, + int initial_step, int start_merge_step, SDCondition id_cond, std::vector ref_latents = {}, @@ -1083,7 +1084,7 @@ class StableDiffusionGGML { return denoised; }; - sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta); + sample_k_diffusion(method, denoise, work_ctx, x, sigmas, initial_step, rng, eta); x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x); @@ -1520,6 +1521,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, int height, enum sample_method_t sample_method, const std::vector& sigmas, + int initial_step, int64_t seed, int batch_count, const sd_image_t* control_cond, @@ -1530,6 +1532,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, std::vector ref_latents, ggml_tensor* concat_latent = NULL, ggml_tensor* denoise_mask = NULL) { + if (seed < 0) { // Generally, when using the provided command line, the seed is always >0. // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library @@ -1795,6 +1798,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, eta, sample_method, sigmas, + initial_step, start_merge_step, id_cond, ref_latents, @@ -1917,6 +1921,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g ggml_tensor* concat_latent = NULL; ggml_tensor* denoise_mask = NULL; std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sd_img_gen_params->sample_steps); + int initial_step = 0; if (sd_img_gen_params->init_image.data) { LOG_INFO("IMG2IMG"); @@ -1926,7 +1931,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g t_enc--; LOG_INFO("target t_enc is %zu steps", t_enc); std::vector sigma_sched; - sigma_sched.assign(sigmas.begin() + sd_img_gen_params->sample_steps - t_enc - 1, sigmas.end()); + initial_step = sd_img_gen_params->sample_steps - t_enc - 1; + sigma_sched.assign(sigmas.begin() + initial_step, sigmas.end()); sigmas = sigma_sched; ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); @@ -2063,6 +2069,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g height, sd_img_gen_params->sample_method, sigmas, + initial_step, seed, sd_img_gen_params->batch_count, sd_img_gen_params->control_cond, @@ -2162,6 +2169,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s 0.f, sd_vid_gen_params->sample_method, sigmas, + 0, -1, SDCondition(NULL, NULL, NULL));