leejet
diff --git a/‎conditioner.hpp
Lines changed: 4 additions & 3 deletions b/‎conditioner.hpp
Lines changed: 4 additions & 3 deletions
diff --git a/‎denoiser.hpp
Lines changed: 34 additions & 38 deletions b/‎denoiser.hpp
Lines changed: 34 additions & 38 deletions
diff --git a/‎diffusion_model.hpp
Lines changed: 4 additions & 4 deletions b/‎diffusion_model.hpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/cli/main.cpp
Lines changed: 10 additions & 10 deletions b/‎examples/cli/main.cpp
Lines changed: 10 additions & 10 deletions
@@ -1224,14 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
     T5UniGramTokenizer t5_tokenizer;
     std::shared_ptr<T5Runner> t5;
     size_t chunk_len = 512;
-    bool use_mask = false;
-    int  mask_pad = 1;
+    bool use_mask    = false;
+    int mask_pad     = 1;
 
     PixArtCLIPEmbedder(ggml_backend_t backend,
                        std::map<std::string, enum ggml_type>& tensor_types,
                        int clip_skip = -1,
                        bool use_mask = false,
-                       int mask_pad = 1) : use_mask(use_mask), mask_pad(mask_pad) {
+                       int mask_pad  = 1)
+        : use_mask(use_mask), mask_pad(mask_pad) {
         t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
     }
 
 
@@ -1019,7 +1019,7 @@ static void sample_k_diffusion(sample_method_t method,
             // also needed to invert the behavior of CompVisDenoiser
             // (k-diffusion's LMSDiscreteScheduler)
             float beta_start = 0.00085f;
-            float beta_end = 0.0120f;
+            float beta_end   = 0.0120f;
             std::vector<double> alphas_cumprod;
             std::vector<double> compvis_sigmas;
 
@@ -1030,8 +1030,9 @@ static void sample_k_diffusion(sample_method_t method,
                     (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
                     (1.0f -
                      std::pow(sqrtf(beta_start) +
-                              (sqrtf(beta_end) - sqrtf(beta_start)) *
-                              ((float)i / (TIMESTEPS - 1)), 2));
+                                  (sqrtf(beta_end) - sqrtf(beta_start)) *
+                                      ((float)i / (TIMESTEPS - 1)),
+                              2));
                 compvis_sigmas[i] =
                     std::sqrt((1 - alphas_cumprod[i]) /
                               alphas_cumprod[i]);
@@ -1061,7 +1062,8 @@ static void sample_k_diffusion(sample_method_t method,
                 // - pred_prev_sample -> "x_t-1"
                 int timestep =
                     roundf(TIMESTEPS -
-                           i * ((float)TIMESTEPS / steps)) - 1;
+                           i * ((float)TIMESTEPS / steps)) -
+                    1;
                 // 1. get previous step value (=t-1)
                 int prev_timestep = timestep - TIMESTEPS / steps;
                 // The sigma here is chosen to cause the
@@ -1086,10 +1088,9 @@ static void sample_k_diffusion(sample_method_t method,
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1) /
-                            sigma;
+                                    sigma;
                     }
-                }
-                else {
+                } else {
                     // For the subsequent steps after the first one,
                     // at this point x = latents or x = sample, and
                     // needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1128,8 @@ static void sample_k_diffusion(sample_method_t method,
                 float alpha_prod_t = alphas_cumprod[timestep];
                 // Note final_alpha_cumprod = alphas_cumprod[0] due to
                 // trailing timestep spacing
-                float alpha_prod_t_prev = prev_timestep >= 0 ?
-                    alphas_cumprod[prev_timestep] : alphas_cumprod[0];
-                float beta_prod_t = 1 - alpha_prod_t;
+                float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
+                float beta_prod_t       = 1 - alpha_prod_t;
                 // 3. compute predicted original sample from predicted
                 // noise also called "predicted x_0" of formula (12)
                 // from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1145,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_pred_original_sample[j] =
                             (vec_x[j] / std::sqrt(sigma * sigma + 1) -
                              std::sqrt(beta_prod_t) *
-                             vec_model_output[j]) *
+                                 vec_model_output[j]) *
                             (1 / std::sqrt(alpha_prod_t));
                     }
                 }
@@ -1159,8 +1159,8 @@ static void sample_k_diffusion(sample_method_t method,
                 // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
                 // sqrt(1 - alpha_t/alpha_t-1)
                 float beta_prod_t_prev = 1 - alpha_prod_t_prev;
-                float variance = (beta_prod_t_prev / beta_prod_t) *
-                    (1 - alpha_prod_t / alpha_prod_t_prev);
+                float variance         = (beta_prod_t_prev / beta_prod_t) *
+                                 (1 - alpha_prod_t / alpha_prod_t_prev);
                 float std_dev_t = eta * std::sqrt(variance);
                 // 6. compute "direction pointing to x_t" of formula
                 // (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1179,8 @@ static void sample_k_diffusion(sample_method_t method,
                                       std::pow(std_dev_t, 2)) *
                             vec_model_output[j];
                         vec_x[j] = std::sqrt(alpha_prod_t_prev) *
-                            vec_pred_original_sample[j] +
-                            pred_sample_direction;
+                                       vec_pred_original_sample[j] +
+                                   pred_sample_direction;
                     }
                 }
                 if (eta > 0) {
@@ -1208,7 +1208,7 @@ static void sample_k_diffusion(sample_method_t method,
             // by Semi-Linear Consistency Function with Trajectory
             // Mapping", arXiv:2402.19159 [cs.CV]
             float beta_start = 0.00085f;
-            float beta_end = 0.0120f;
+            float beta_end   = 0.0120f;
             std::vector<double> alphas_cumprod;
             std::vector<double> compvis_sigmas;
 
@@ -1219,8 +1219,9 @@ static void sample_k_diffusion(sample_method_t method,
                     (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
                     (1.0f -
                      std::pow(sqrtf(beta_start) +
-                              (sqrtf(beta_end) - sqrtf(beta_start)) *
-                              ((float)i / (TIMESTEPS - 1)), 2));
+                                  (sqrtf(beta_end) - sqrtf(beta_start)) *
+                                      ((float)i / (TIMESTEPS - 1)),
+                              2));
                 compvis_sigmas[i] =
                     std::sqrt((1 - alphas_cumprod[i]) /
                               alphas_cumprod[i]);
@@ -1235,13 +1236,10 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // Analytic form for TCD timesteps
                 int timestep = TIMESTEPS - 1 -
-                    (TIMESTEPS / original_steps) *
-                    (int)floor(i * ((float)original_steps / steps));
+                               (TIMESTEPS / original_steps) *
+                                   (int)floor(i * ((float)original_steps / steps));
                 // 1. get previous step value
-                int prev_timestep = i >= steps - 1 ? 0 :
-                    TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
-                    (int)floor((i + 1) *
-                               ((float)original_steps / steps));
+                int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
                 // Here timestep_s is tau_n' in Algorithm 4. The _s
                 // notation appears to be that from C. Lu,
                 // "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1256,9 @@ static void sample_k_diffusion(sample_method_t method,
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1) /
-                            sigma;
+                                    sigma;
                     }
-                }
-                else {
+                } else {
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1);
@@ -1294,15 +1291,14 @@ static void sample_k_diffusion(sample_method_t method,
                 // DPM-Solver. In fact, we have alpha_{t_n} =
                 // \sqrt{\hat{alpha_n}}, [...]"
                 float alpha_prod_t = alphas_cumprod[timestep];
-                float beta_prod_t = 1 - alpha_prod_t;
+                float beta_prod_t  = 1 - alpha_prod_t;
                 // Note final_alpha_cumprod = alphas_cumprod[0] since
                 // TCD is always "trailing"
-                float alpha_prod_t_prev = prev_timestep >= 0 ?
-                    alphas_cumprod[prev_timestep] : alphas_cumprod[0];
+                float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
                 // The subscript _s are the only portion in this
                 // section (2) unique to TCD
                 float alpha_prod_s = alphas_cumprod[timestep_s];
-                float beta_prod_s = 1 - alpha_prod_s;
+                float beta_prod_s  = 1 - alpha_prod_s;
                 // 3. Compute the predicted noised sample x_s based on
                 // the model parameterization
                 //
@@ -1317,7 +1313,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_pred_original_sample[j] =
                             (vec_x[j] / std::sqrt(sigma * sigma + 1) -
                              std::sqrt(beta_prod_t) *
-                             vec_model_output[j]) *
+                                 vec_model_output[j]) *
                             (1 / std::sqrt(alpha_prod_t));
                     }
                 }
@@ -1339,9 +1335,9 @@ static void sample_k_diffusion(sample_method_t method,
                         // pred_epsilon = model_output
                         vec_x[j] =
                             std::sqrt(alpha_prod_s) *
-                            vec_pred_original_sample[j] +
+                                vec_pred_original_sample[j] +
                             std::sqrt(beta_prod_s) *
-                            vec_model_output[j];
+                                vec_model_output[j];
                     }
                 }
                 // 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1353,7 @@ static void sample_k_diffusion(sample_method_t method,
                     // In this case, x is still pred_noised_sample,
                     // continue in-place
                     ggml_tensor_set_f32_randn(noise, rng);
-                    float* vec_x = (float*)x->data;
+                    float* vec_x     = (float*)x->data;
                     float* vec_noise = (float*)noise->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         // Corresponding to (35) in Zheng et
@@ -1366,10 +1362,10 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_x[j] =
                             std::sqrt(alpha_prod_t_prev /
                                       alpha_prod_s) *
-                            vec_x[j] +
+                                vec_x[j] +
                             std::sqrt(1 - alpha_prod_t_prev /
-                                      alpha_prod_s) *
-                            vec_noise[j];
+                                              alpha_prod_s) *
+                                vec_noise[j];
                     }
                 }
             }
 
@@ -13,7 +13,7 @@ struct DiffusionModel {
                          struct ggml_tensor* c_concat,
                          struct ggml_tensor* y,
                          struct ggml_tensor* guidance,
-                         std::vector<ggml_tensor*> ref_latents = {},
+                         std::vector<ggml_tensor*> ref_latents     = {},
                          int num_video_frames                      = -1,
                          std::vector<struct ggml_tensor*> controls = {},
                          float control_strength                    = 0.f,
@@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
@@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
@@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
 
@@ -133,9 +133,9 @@ struct SDParams {
     float skip_layer_start       = 0.01f;
     float skip_layer_end         = 0.2f;
 
-    bool chroma_use_dit_mask     = true;
-    bool chroma_use_t5_mask      = false;
-    int  chroma_t5_mask_pad      = 1;
+    bool chroma_use_dit_mask = true;
+    bool chroma_use_t5_mask  = false;
+    int chroma_t5_mask_pad   = 1;
 };
 
 void print_params(SDParams params) {
@@ -919,7 +919,7 @@ int main(int argc, const char* argv[]) {
             input_image_buffer = resized_image_buffer;
         }
     } else if (params.mode == EDIT) {
-        vae_decode_only       = false;
+        vae_decode_only = false;
         for (auto& path : params.ref_image_paths) {
             int c                 = 0;
             int width             = 0;
@@ -1113,7 +1113,7 @@ int main(int argc, const char* argv[]) {
                               params.skip_layer_start,
                               params.skip_layer_end);
         }
-    } else { // EDIT
+    } else {  // EDIT
         results = edit(sd_ctx,
                        ref_images.data(),
                        ref_images.size(),
@@ -1176,19 +1176,19 @@ int main(int argc, const char* argv[]) {
 
     std::string dummy_name, ext, lc_ext;
     bool is_jpg;
-    size_t last = params.output_path.find_last_of(".");
+    size_t last      = params.output_path.find_last_of(".");
     size_t last_path = std::min(params.output_path.find_last_of("/"),
                                 params.output_path.find_last_of("\\"));
-    if (last != std::string::npos // filename has extension
-    && (last_path == std::string::npos || last > last_path)) {
+    if (last != std::string::npos  // filename has extension
+        && (last_path == std::string::npos || last > last_path)) {
         dummy_name = params.output_path.substr(0, last);
         ext = lc_ext = params.output_path.substr(last);
         std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
         is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
     } else {
         dummy_name = params.output_path;
         ext = lc_ext = "";
-        is_jpg = false;
+        is_jpg       = false;
     }
     // appending ".png" to absent or unknown extension
     if (!is_jpg && lc_ext != ".png") {
@@ -1200,7 +1200,7 @@ int main(int argc, const char* argv[]) {
             continue;
         }
         std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
-        if(is_jpg) {
+        if (is_jpg) {
             stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
                            results[i].data, 90, get_image_params(params, params.seed + i).c_str());
             printf("save result JPEG image to '%s'\n", final_image_path.c_str());