Skip to content

Commit 05f99c7

Browse files
committed
Remove model-specific sampling
1 parent 6bb0093 commit 05f99c7

File tree

9 files changed

+360
-586
lines changed

9 files changed

+360
-586
lines changed

common/arg.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3451,37 +3451,38 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34513451
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34523452

34533453
add_opt(common_arg(
3454-
{ "--diffusion--dream-eps" }, "F",
3454+
{ "--diffusion-eps" }, "F",
34553455
string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps),
34563456
[](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); }
34573457
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34583458
add_opt(common_arg(
3459-
{ "--diffusion-dream-algorithm" }, "N",
3460-
string_format("diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY (default: %d)",
3459+
{ "--diffusion-algorithm" }, "N",
3460+
string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)",
34613461
params.diffusion.algorithm),
34623462
[](common_params & params, int value) { params.diffusion.algorithm = value; }
34633463
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34643464
add_opt(common_arg(
3465-
{ "--diffusion-dream-alg-temp" }, "F",
3465+
{ "--diffusion-alg-temp" }, "F",
34663466
string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp),
34673467
[](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); }
34683468
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34693469

34703470
add_opt(common_arg(
3471-
{ "--diffusion-llada-block-length" }, "N",
3471+
{ "--diffusion-block-length" }, "N",
34723472
string_format("llada block length for generation (default: %d)", params.diffusion.block_length),
34733473
[](common_params & params, int value) { params.diffusion.block_length = value; }
34743474
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34753475
add_opt(common_arg(
3476-
{ "--diffusion-llada-cfg-scale" }, "F",
3476+
{ "--diffusion-cfg-scale" }, "F",
34773477
string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale),
34783478
[](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); }
34793479
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34803480
add_opt(common_arg(
3481-
{ "--diffusion-llada-algorithm" }, "N",
3482-
string_format("llada remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM (default: %d)", params.diffusion.remasking),
3483-
[](common_params & params, int value) { params.diffusion.remasking = value; }
3481+
{ "--diffusion-add-gumbel-noise" }, "F",
3482+
string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"),
3483+
[](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); }
34843484
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
34853485

3486+
34863487
return ctx_arg;
34873488
}

common/common.h

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -220,19 +220,17 @@ struct common_params_vocoder {
220220
};
221221

222222
struct common_params_diffusion {
223-
// Common parameters
224-
int32_t steps = 128; // number of diffusion steps
225-
bool visual_mode = false; // show progressive diffusion on screen
226-
227-
// Dream-specific parameters
228-
float eps = 1e-3f; // epsilon for timesteps
229-
int32_t algorithm = 3; // diffusion algorithm (0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY)
230-
float alg_temp = 0.0f; // algorithm temperature
231-
232-
// LLaDA-specific parameters
233-
int32_t block_length = 32; // block length for generation
234-
float cfg_scale = 0.2f; // classifier-free guidance scale
235-
int32_t remasking = 1; // remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM
223+
int32_t steps = 128;
224+
bool visual_mode = false;
225+
226+
float eps = 0; // epsilon for timesteps
227+
int32_t block_length = 32; // block length for generation
228+
229+
int32_t algorithm = 4; // default algorithm: low-confidence
230+
float alg_temp = 0.0f; // algorithm temperature
231+
232+
float cfg_scale = 0; // classifier-free guidance scale
233+
bool add_gumbel_noise = false; // add gumbel noise to the logits if temp > 0.0
236234
};
237235

238236
enum common_reasoning_format {

convert_hf_to_gguf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2988,9 +2988,15 @@ def set_gguf_parameters(self):
29882988

29892989
# Add LLaDA-specific parameters
29902990
mask_token_id = self.hparams.get("mask_token_id")
2991+
29912992
if mask_token_id is not None:
29922993
self.gguf_writer.add_mask_token_id(mask_token_id)
29932994

2995+
self.gguf_writer.add_add_bos_token(True)
2996+
2997+
logging.info("Adding diffusion shift logits to False")
2998+
self.gguf_writer.add_diffusion_shift_logits(False)
2999+
29943000
@staticmethod
29953001
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
29963002
if n_head_kv is not None and n_head != n_head_kv:

examples/diffusion/README.md

Lines changed: 5 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,7 @@
1-
# Diffusion Text Generation Examples
1+
# Diffusion Text Generation
22

3-
This directory contains implementations for diffusion-based text generation using two different model architectures: **Dream** and **LLaDA-8B**. Both models use iterative denoising processes to generate text, but employ different sampling strategies and algorithms.
3+
This directory contains implementations for Diffusion LLMs (DLLMs)
44

5-
## Supported Architechtures
6-
7-
### 1. Dream
8-
9-
Example models:
10-
- https://huggingface.co/Dream-org/Dream-v0-Base-7B
11-
- PR - https://github.com/ggml-org/llama.cpp/pull/14644
12-
13-
The Dream model supports four different sampling algorithms controlled by the `--diffusion-dream-algorithm` parameter:
14-
15-
1. **ORIGIN (0)** - Original diffusion algorithm
16-
- Uses probability transfer based on timestep ratios
17-
18-
2. **MASKGIT_PLUS (1)** - Enhanced MaskGIT sampling
19-
- Improved version of the MaskGIT algorithm
20-
21-
3. **TOPK_MARGIN (2)** - Top-K margin-based sampling
22-
- Confidence calculated as the margin between top-1 and top-2 probabilities
23-
24-
4. **ENTROPY (3)** - Entropy-based sampling (default, recommended)
25-
- Uses entropy calculation for confidence estimation
26-
27-
### 2. LLaDA
28-
29-
Example models:
30-
- https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct
31-
- PR: https://github.com/ggml-org/llama.cpp/pull/14771
32-
33-
### LLaDA Model Remasking Strategies
34-
35-
The LLaDA model uses two remasking approaches controlled by the `--diffusion-llada-algorithm` parameter:
36-
37-
1. **REMASKING_LOW_CONFIDENCE (0)** - Default strategy
38-
- Remasks tokens with lowest confidence scores
39-
- Uses softmax probabilities to determine confidence
40-
41-
2. **REMASKING_RANDOM (1)** - Random remasking
5+
More Info:
6+
- https://github.com/ggml-org/llama.cpp/pull/14644
7+
- https://github.com/ggml-org/llama.cpp/pull/14771

0 commit comments

Comments
 (0)