Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __init__(
self.enable_mm = False
self.enable_redundant_experts = False
self.redundant_experts_num = 0
self.seed = 0
self.quantization = None
for key, value in args.items():
if hasattr(self, key):
Expand Down
11 changes: 11 additions & 0 deletions fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,11 @@ class EngineArgs:
Must be explicitly enabled via the `--enable-logprob` startup parameter to output logprob values.
"""

seed: Optional[int] = None
"""
Random seed to use for initialization. If not set, a random seed is used.
"""

enable_early_stop: bool = False
"""
Flag to enable early stop. Default is False (disabled).
Expand Down Expand Up @@ -484,6 +489,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=EngineArgs.enable_logprob,
help="Enable output of token-level log probabilities.",
)
model_group.add_argument(
"--seed",
type=int,
default=None,
help="Random seed for initialization. If not specified, a random seed will be used.",
)
model_group.add_argument(
"--enable-early-stop",
action="store_true",
Expand Down
6 changes: 1 addition & 5 deletions fastdeploy/engine/sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from __future__ import annotations

import random
from dataclasses import dataclass, fields
from typing import Any, List, Optional, Union

Expand Down Expand Up @@ -155,7 +154,7 @@ def from_optional(

def __post_init__(self):
if self.seed is None:
self.seed = random.randint(0, 922337203685477580)
self.seed = 0
if self.max_tokens is not None and self.reasoning_max_tokens is None:
self.reasoning_max_tokens = max(int(self.max_tokens * 0.8), 1)
self._verify_args()
Expand Down Expand Up @@ -200,9 +199,6 @@ def _verify_args(self) -> None:
if self.logprobs is not None and self.logprobs > 20:
raise ValueError("Invalid value for 'top_logprobs': must be less than or equal to 20.")

if not 0 <= self.seed <= 922337203685477580:
raise ValueError("seed must be in [0, 922337203685477580], got " f"{self.seed}.")

def update_from_tokenizer(self, tokenizer):
"""Support bad words"""
if self.bad_words is None:
Expand Down
1 change: 1 addition & 0 deletions fastdeploy/model_executor/layers/sample/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class SamplingMetadata:
top_p: paddle.Tensor
top_k: Optional[paddle.Tensor] = None
min_p: Optional[paddle.Tensor] = None
seed: Optional[paddle.Tensor] = None
max_num_logprobs: Optional[int] = None
enable_early_stop: Optional[int] = False
stop_flags: Optional[paddle.Tensor] = None
Expand Down
6 changes: 5 additions & 1 deletion fastdeploy/model_executor/layers/sample/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,11 @@ def forward_cuda(

probs = min_p_sampling(probs, sampling_metadata.min_p)

_, next_tokens = top_k_top_p_sampling(probs, sampling_metadata.top_p, sampling_metadata.top_k)
if paddle.count_nonzero(sampling_metadata.seed) == 0:
seed_value = -1
else:
seed_value = int(sampling_metadata.seed[0, 0])
_, next_tokens = top_k_top_p_sampling(probs, sampling_metadata.top_p, sampling_metadata.top_k, seed=seed_value)

logprobs_tensors = (
None if num_logprobs is None else self.gather_logprobs(raw_logprobs, num_logprobs, token_ids=next_tokens)
Expand Down
9 changes: 9 additions & 0 deletions fastdeploy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from pathlib import Path
from typing import Literal, TypeVar, Union

import numpy as np
import paddle
import requests
import yaml
from aistudio_sdk.snapshot_download import snapshot_download as aistudio_download
Expand Down Expand Up @@ -291,6 +293,13 @@ def extract_tar(tar_path, output_dir):
raise RuntimeError(f"Extraction failed: {e!s}")


def set_random_seed(seed: int) -> None:
if seed is not None:
random.seed(seed)
np.random.seed(seed)
paddle.seed(seed)


def download_model(url, output_dir, temp_tar):
"""
下载模型,并将其解压到指定目录。
Expand Down
11 changes: 2 additions & 9 deletions fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,7 @@ def __init__(

# Initialize share inputs
self._init_share_inputs(self.parallel_config.max_num_seqs)
self.infer_seed_increment = paddle.full(
shape=[self.parallel_config.max_num_seqs, 1],
fill_value=4,
dtype="int64",
)

self.restore_chunked_prefill_request = dict()

# Initialize attention Backend
Expand Down Expand Up @@ -795,6 +791,7 @@ def _prepare_inputs(self) -> None:
top_p=self.share_inputs["top_p"],
top_k=self.share_inputs["top_k"],
min_p=self.share_inputs["min_p"],
seed=self.share_inputs["infer_seed"],
step_idx=self.share_inputs["step_idx"],
pre_token_ids=self.share_inputs["pre_ids"],
prompt_ids=self.share_inputs["prompt_ids"],
Expand Down Expand Up @@ -1096,8 +1093,6 @@ def _dummy_run(
self.proposer.run(share_inputs=self.share_inputs)

# 7. Updata 'infer_seed' and step_cuda()
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
self.share_inputs["infer_seed"][:] %= self.MAX_INFER_SEED
step_cuda(
self.share_inputs,
self.cache_config.block_size,
Expand Down Expand Up @@ -1368,8 +1363,6 @@ class at the server level, which is too granular for ModelRunner.
self.proposer.run(share_inputs=self.share_inputs)

# 7. Updata 'infer_seed' and step_cuda()
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
self.share_inputs["infer_seed"][:] %= self.MAX_INFER_SEED
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
step_cuda(
self.share_inputs,
Expand Down
Loading