Skip to content

Commit 6f791ca

Browse files
committed
Add diffusion specific gguf params in set_vocab, remove setting rope_theta and rms_norm_eps
1 parent e864a49 commit 6f791ca

File tree

1 file changed

+3
-12
lines changed

1 file changed

+3
-12
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,6 +2949,9 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
29492949
def set_vocab(self):
29502950
self._set_vocab_gpt2()
29512951

2952+
self.gguf_writer.add_add_bos_token(True)
2953+
self.gguf_writer.add_diffusion_shift_logits(False)
2954+
29522955
def set_gguf_parameters(self):
29532956
super().set_gguf_parameters()
29542957
self._try_set_pooling_type()
@@ -2974,14 +2977,6 @@ def set_gguf_parameters(self):
29742977
feed_forward_length = self.hparams.get("mlp_hidden_size", 12288)
29752978
self.gguf_writer.add_feed_forward_length(feed_forward_length)
29762979

2977-
# Set RoPE parameters
2978-
if "rope_theta" in self.hparams:
2979-
self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
2980-
2981-
# Set RMS norm epsilon
2982-
if "rms_norm_eps" in self.hparams:
2983-
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
2984-
29852980
# LLaDA models use non-causal attention for diffusion, similar to Dream
29862981
self.gguf_writer.add_causal_attention(False)
29872982
# Handle RoPE scaling similar to LlamaModel and Dream
@@ -2992,10 +2987,6 @@ def set_gguf_parameters(self):
29922987
if mask_token_id is not None:
29932988
self.gguf_writer.add_mask_token_id(mask_token_id)
29942989

2995-
self.gguf_writer.add_add_bos_token(True)
2996-
2997-
logging.info("Adding diffusion shift logits to False")
2998-
self.gguf_writer.add_diffusion_shift_logits(False)
29992990

30002991
@staticmethod
30012992
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):

0 commit comments

Comments
 (0)