Skip to content

Commit d4aa468

Browse files
committed
support seed
1 parent 6ead7a3 commit d4aa468

File tree

7 files changed

+30
-15
lines changed

7 files changed

+30
-15
lines changed

fastdeploy/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def __init__(
117117
self.enable_mm = False
118118
self.enable_redundant_experts = False
119119
self.redundant_experts_num = 0
120+
self.seed = 0
120121
self.quantization = None
121122
for key, value in args.items():
122123
if hasattr(self, key):

fastdeploy/engine/args_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,11 @@ class EngineArgs:
316316
Must be explicitly enabled via the `--enable-logprob` startup parameter to output logprob values.
317317
"""
318318

319+
seed: Optional[int] = None
320+
"""
321+
Random seed to use for initialization. If not set, a random seed is used.
322+
"""
323+
319324
enable_early_stop: bool = False
320325
"""
321326
Flag to enable early stop. Default is False (disabled).
@@ -484,6 +489,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
484489
default=EngineArgs.enable_logprob,
485490
help="Enable output of token-level log probabilities.",
486491
)
492+
model_group.add_argument(
493+
"--seed",
494+
type=int,
495+
default=None,
496+
help="Random seed for initialization. If not specified, a random seed will be used.",
497+
)
487498
model_group.add_argument(
488499
"--enable-early-stop",
489500
action="store_true",

fastdeploy/engine/sampling_params.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from __future__ import annotations
1818

19-
import random
2019
from dataclasses import dataclass, fields
2120
from typing import Any, List, Optional, Union
2221

@@ -155,7 +154,7 @@ def from_optional(
155154

156155
def __post_init__(self):
157156
if self.seed is None:
158-
self.seed = random.randint(0, 922337203685477580)
157+
self.seed = 0
159158
if self.max_tokens is not None and self.reasoning_max_tokens is None:
160159
self.reasoning_max_tokens = max(int(self.max_tokens * 0.8), 1)
161160
self._verify_args()
@@ -200,9 +199,6 @@ def _verify_args(self) -> None:
200199
if self.logprobs is not None and self.logprobs > 20:
201200
raise ValueError("Invalid value for 'top_logprobs': must be less than or equal to 20.")
202201

203-
if not 0 <= self.seed <= 922337203685477580:
204-
raise ValueError("seed must be in [0, 922337203685477580], got " f"{self.seed}.")
205-
206202
def update_from_tokenizer(self, tokenizer):
207203
"""Support bad words"""
208204
if self.bad_words is None:

fastdeploy/model_executor/layers/sample/meta_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class SamplingMetadata:
4343
top_p: paddle.Tensor
4444
top_k: Optional[paddle.Tensor] = None
4545
min_p: Optional[paddle.Tensor] = None
46+
seed: Optional[paddle.Tensor] = None
4647
max_num_logprobs: Optional[int] = None
4748
enable_early_stop: Optional[int] = False
4849
stop_flags: Optional[paddle.Tensor] = None

fastdeploy/model_executor/layers/sample/sampler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,11 @@ def forward_cuda(
282282

283283
probs = min_p_sampling(probs, sampling_metadata.min_p)
284284

285-
_, next_tokens = top_k_top_p_sampling(probs, sampling_metadata.top_p, sampling_metadata.top_k)
285+
if paddle.count_nonzero(sampling_metadata.seed) == 0:
286+
seed_value = -1
287+
else:
288+
seed_value = int(sampling_metadata.seed[0, 0])
289+
_, next_tokens = top_k_top_p_sampling(probs, sampling_metadata.top_p, sampling_metadata.top_k, seed=seed_value)
286290

287291
logprobs_tensors = (
288292
None if num_logprobs is None else self.gather_logprobs(raw_logprobs, num_logprobs, token_ids=next_tokens)

fastdeploy/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from pathlib import Path
3030
from typing import Literal, TypeVar, Union
3131

32+
import numpy as np
33+
import paddle
3234
import requests
3335
import yaml
3436
from aistudio_sdk.snapshot_download import snapshot_download as aistudio_download
@@ -291,6 +293,13 @@ def extract_tar(tar_path, output_dir):
291293
raise RuntimeError(f"Extraction failed: {e!s}")
292294

293295

296+
def set_random_seed(seed: int) -> None:
297+
if seed is not None:
298+
random.seed(seed)
299+
np.random.seed(seed)
300+
paddle.seed(seed)
301+
302+
294303
def download_model(url, output_dir, temp_tar):
295304
"""
296305
下载模型,并将其解压到指定目录。

fastdeploy/worker/gpu_model_runner.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,7 @@ def __init__(
126126

127127
# Initialize share inputs
128128
self._init_share_inputs(self.parallel_config.max_num_seqs)
129-
self.infer_seed_increment = paddle.full(
130-
shape=[self.parallel_config.max_num_seqs, 1],
131-
fill_value=4,
132-
dtype="int64",
133-
)
129+
134130
self.restore_chunked_prefill_request = dict()
135131

136132
# Initialize attention Backend
@@ -795,6 +791,7 @@ def _prepare_inputs(self) -> None:
795791
top_p=self.share_inputs["top_p"],
796792
top_k=self.share_inputs["top_k"],
797793
min_p=self.share_inputs["min_p"],
794+
seed=self.share_inputs["infer_seed"],
798795
step_idx=self.share_inputs["step_idx"],
799796
pre_token_ids=self.share_inputs["pre_ids"],
800797
prompt_ids=self.share_inputs["prompt_ids"],
@@ -1096,8 +1093,6 @@ def _dummy_run(
10961093
self.proposer.run(share_inputs=self.share_inputs)
10971094

10981095
# 7. Updata 'infer_seed' and step_cuda()
1099-
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
1100-
self.share_inputs["infer_seed"][:] %= self.MAX_INFER_SEED
11011096
step_cuda(
11021097
self.share_inputs,
11031098
self.cache_config.block_size,
@@ -1368,8 +1363,6 @@ class at the server level, which is too granular for ModelRunner.
13681363
self.proposer.run(share_inputs=self.share_inputs)
13691364

13701365
# 7. Updata 'infer_seed' and step_cuda()
1371-
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
1372-
self.share_inputs["infer_seed"][:] %= self.MAX_INFER_SEED
13731366
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
13741367
step_cuda(
13751368
self.share_inputs,

0 commit comments

Comments
 (0)