Skip to content

Commit 5635b0e

Browse files
phil2satGreen-Sky
authored andcommitted
Beta Scheduler
1 parent abb115c commit 5635b0e

File tree

6 files changed

+271
-2
lines changed

6 files changed

+271
-2
lines changed

denoiser.hpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,119 @@ struct KarrasSchedule : SigmaSchedule {
251251
}
252252
};
253253

254+
struct BetaSchedule : SigmaSchedule {
255+
static constexpr double alpha = 0.6;
256+
static constexpr double beta = 0.6;
257+
258+
// Log Beta function
259+
static double log_beta(double a, double b) {
260+
return std::lgamma(a) + std::lgamma(b) - std::lgamma(a + b);
261+
}
262+
263+
// Regularized incomplete beta function using continued fraction
264+
static double incbeta(double x, double a, double b) {
265+
if (x <= 0.0) return 0.0;
266+
if (x >= 1.0) return 1.0;
267+
268+
// Use the continued fraction approximation (Lentz’s method)
269+
const int MAX_ITER = 200;
270+
const double EPSILON = 3.0e-7;
271+
272+
double aa, c, d, del, h;
273+
double qab = a + b;
274+
double qap = a + 1.0;
275+
double qam = a - 1.0;
276+
277+
c = 1.0;
278+
d = 1.0 - qab * x / qap;
279+
if (std::abs(d) < 1e-30) d = 1e-30;
280+
d = 1.0 / d;
281+
h = d;
282+
283+
for (int m = 1; m <= MAX_ITER; m++) {
284+
int m2 = 2 * m;
285+
286+
// Even term
287+
aa = m * (b - m) * x / ((qam + m2) * (a + m2));
288+
d = 1.0 + aa * d;
289+
if (std::abs(d) < 1e-30) d = 1e-30;
290+
c = 1.0 + aa / c;
291+
if (std::abs(c) < 1e-30) c = 1e-30;
292+
d = 1.0 / d;
293+
h *= d * c;
294+
295+
// Odd term
296+
aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
297+
d = 1.0 + aa * d;
298+
if (std::abs(d) < 1e-30) d = 1e-30;
299+
c = 1.0 + aa / c;
300+
if (std::abs(c) < 1e-30) c = 1e-30;
301+
d = 1.0 / d;
302+
del = d * c;
303+
h *= del;
304+
305+
if (std::abs(del - 1.0) < EPSILON) break;
306+
}
307+
308+
return std::exp(a * std::log(x) + b * std::log(1.0 - x) - log_beta(a, b)) / a * h;
309+
}
310+
311+
// Beta CDF using symmetry for better convergence
312+
static double beta_cdf(double x, double a, double b) {
313+
if (x == 0.0) return 0.0;
314+
if (x == 1.0) return 1.0;
315+
if (x < (a + 1.0) / (a + b + 2.0)) {
316+
return incbeta(x, a, b);
317+
} else {
318+
return 1.0 - incbeta(1.0 - x, b, a);
319+
}
320+
}
321+
322+
// Inverse Beta CDF (PPF) using Newton-Raphson
323+
static double beta_ppf(double u, double a, double b, int max_iter = 30) {
324+
double x = 0.5; // initial guess
325+
for (int i = 0; i < max_iter; i++) {
326+
double f = beta_cdf(x, a, b) - u;
327+
if (std::abs(f) < 1e-10) break;
328+
// derivative = x^(a-1) * (1-x)^(b-1) / B(a,b)
329+
double df = std::exp((a-1.0)*std::log(x) + (b-1.0)*std::log(1.0-x) - log_beta(a,b));
330+
x -= f / df;
331+
if (x <= 0.0) x = 1e-10;
332+
if (x >= 1.0) x = 1.0 - 1e-10;
333+
}
334+
return x;
335+
}
336+
337+
std::vector<float> get_sigmas(uint32_t n, float /*sigma_min*/, float /*sigma_max*/, t_to_sigma_t t_to_sigma) override {
338+
std::vector<float> result;
339+
result.reserve(n + 1);
340+
341+
int t_max = TIMESTEPS - 1;
342+
if (n == 0) {
343+
return result;
344+
} else if (n == 1) {
345+
result.push_back(t_to_sigma((float)t_max));
346+
result.push_back(0.f);
347+
return result;
348+
}
349+
350+
int last_t = -1;
351+
for (uint32_t i = 0; i < n; i++) {
352+
double u = 1.0 - double(i)/double(n); // reversed linspace
353+
double t_cont = beta_ppf(u, alpha, beta) * t_max;
354+
int t = (int)std::lround(t_cont);
355+
356+
if (t != last_t) {
357+
result.push_back(t_to_sigma((float)t));
358+
last_t = t;
359+
}
360+
}
361+
362+
result.push_back(0.f);
363+
return result;
364+
}
365+
};
366+
254367
struct Denoiser {
255368
std::shared_ptr<SigmaSchedule> scheduler = std::make_shared<DiscreteSchedule>();
256369
virtual float sigma_min() = 0;

examples/cli/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ void print_usage(int argc, const char* argv[]) {
238238
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
239239
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
240240
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
241-
printf(" --scheduler {discrete, karras, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
241+
printf(" --scheduler {discrete, karras, beta, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
242242
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
243243
printf(" sampling method (default: \"euler_a\")\n");
244244
printf(" --steps STEPS number of sample steps (default: 20)\n");
@@ -251,7 +251,7 @@ void print_usage(int argc, const char* argv[]) {
251251
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
252252
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
253253
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
254-
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
254+
printf(" --high-noise-scheduler {discrete, karras, beta, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
255255
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
256256
printf(" (high noise) sampling method (default: \"euler_a\")\n");
257257
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");

flake.lock

Lines changed: 61 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
{
2+
inputs = {
3+
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
4+
flake-utils.url = "github:numtide/flake-utils";
5+
};
6+
outputs = { self, nixpkgs, flake-utils }:
7+
flake-utils.lib.eachDefaultSystem (system:
8+
let
9+
name = "stable-diffusion.cpp";
10+
src = ./.;
11+
meta.mainProgram = "sd";
12+
stdenv = (pkgs.stdenvAdapters.keepDebugInfo pkgs.stdenv);
13+
inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
14+
buildInputs = with pkgs; [ ];
15+
osSpecific = with pkgs; buildInputs ++ (
16+
if isAarch64 && isDarwin then
17+
with pkgs.darwin.apple_sdk_11_0.frameworks; [
18+
Accelerate
19+
MetalKit
20+
]
21+
else if isAarch32 && isDarwin then
22+
with pkgs.darwin.apple_sdk.frameworks; [
23+
Accelerate
24+
CoreGraphics
25+
CoreVideo
26+
]
27+
else if isDarwin then
28+
with pkgs.darwin.apple_sdk.frameworks; [
29+
Accelerate
30+
CoreGraphics
31+
CoreVideo
32+
]
33+
else
34+
with pkgs; [ openblas ]
35+
);
36+
pkgs = import nixpkgs { inherit system; };
37+
nativeBuildInputs = with pkgs; [ cmake ninja pkg-config git ];
38+
cmakeFlags = [
39+
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
40+
#"-DCMAKE_C_FLAGS:STRING=-Og"
41+
#"-DCMAKE_CXX_FLAGS:STRING=-Og"
42+
#"-DCMAKE_C_FLAGS:STRING=-fsanitize=address,undefined"
43+
#"-DCMAKE_CXX_FLAGS:STRING=-fsanitize=address,undefined"
44+
#"-DCMAKE_EXE_LINKER_FLAGS:STRING=-fsanitize=address,undefined"
45+
46+
47+
# does not work
48+
"-DGGML_NATIVE=OFF"
49+
"-DGGML_AVX=ON"
50+
"-DGGML_AVX2=ON"
51+
"-DGGML_FMA=ON"
52+
"-DGGML_F16C=ON"
53+
54+
#"-DBUILD_SHARED_LIBS=ON"
55+
"-DCMAKE_SKIP_BUILD_RPATH=ON"
56+
];
57+
in
58+
{
59+
packages.default = stdenv.mkDerivation {
60+
inherit name src meta nativeBuildInputs;
61+
buildInputs = osSpecific;
62+
};
63+
packages.cuda = stdenv.mkDerivation {
64+
inherit name src meta;
65+
buildInputs = with pkgs; buildInputs ++ [
66+
#cudaPackages.cudatoolkit
67+
cudaPackages.cuda_cccl # <nv/target>
68+
69+
# A temporary hack for reducing the closure size, remove once cudaPackages
70+
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
71+
cudaPackages.cuda_cudart
72+
cudaPackages.libcublas
73+
];
74+
nativeBuildInputs = with pkgs; nativeBuildInputs ++ [
75+
cudaPackages.cuda_nvcc
76+
autoAddDriverRunpath
77+
];
78+
cmakeFlags = cmakeFlags ++ [
79+
"-DSD_CUDA=ON"
80+
"-DCMAKE_CUDA_ARCHITECTURES=75"
81+
];
82+
};
83+
apps.sd = {
84+
type = "app";
85+
program = "${self.packages.${system}.default}/bin/sd";
86+
};
87+
apps.default = self.apps.${system}.sd;
88+
});
89+
}

stable-diffusion.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,10 @@ class StableDiffusionGGML {
730730
LOG_INFO("running with Karras scheduler");
731731
denoiser->scheduler = std::make_shared<KarrasSchedule>();
732732
break;
733+
case BETA:
734+
LOG_INFO("running with Beta scheduler");
735+
denoiser->scheduler = std::make_shared<BetaSchedule>();
736+
break;
733737
case EXPONENTIAL:
734738
LOG_INFO("running exponential scheduler");
735739
denoiser->scheduler = std::make_shared<ExponentialSchedule>();
@@ -1524,6 +1528,7 @@ const char* schedule_to_str[] = {
15241528
"default",
15251529
"discrete",
15261530
"karras",
1531+
"beta",
15271532
"exponential",
15281533
"ays",
15291534
"gits",

stable-diffusion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ enum scheduler_t {
5454
DEFAULT,
5555
DISCRETE,
5656
KARRAS,
57+
BETA,
5758
EXPONENTIAL,
5859
AYS,
5960
GITS,

0 commit comments

Comments
 (0)