Skip to content

Commit 17903a8

Browse files
committed
fix
1 parent d4aa468 commit 17903a8

File tree

2 files changed

+2
-9
lines changed

2 files changed

+2
-9
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,6 @@ def _init_share_inputs(self, max_num_seqs: int):
554554
"""
555555
Initialize all share buffers for model inputs.
556556
"""
557-
self.MAX_INFER_SEED = 9223372036854775806
558557
self.share_inputs = {}
559558

560559
self.share_inputs["pre_ids"] = paddle.full(

fastdeploy/worker/xpu_model_runner.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,6 @@ def __init__(self, fd_config: FDConfig, device: str, rank: int, local_rank: int)
357357

358358
# Initialize share inputs
359359
self._init_share_inputs(self.fd_config.parallel_config.max_num_seqs)
360-
self.infer_seed_increment = paddle.full(
361-
shape=[self.parallel_config.max_num_seqs, 1],
362-
fill_value=4,
363-
dtype="int64",
364-
)
365360

366361
# Initialize attention Backend
367362
# Note(gonshaotian): Currently, all attention layers share one attention backend instance.
@@ -529,7 +524,6 @@ def _init_share_inputs(self, max_num_seqs: int):
529524
"""Initialize all share buffers for model inputs.
530525
Note: In the future, we may abandon share buffers.
531526
"""
532-
self.MAX_INFER_SEED = 9223372036854775806
533527
self.share_inputs = {}
534528

535529
self.share_inputs["pre_ids"] = paddle.full(
@@ -673,6 +667,7 @@ def _prepare_inputs(self, is_dummy_run=False) -> None:
673667
top_p=self.share_inputs["top_p"],
674668
top_k=self.share_inputs["top_k"],
675669
min_p=self.share_inputs["min_p"],
670+
seed=self.share_inputs["infer_seed"],
676671
step_idx=self.share_inputs["step_idx"],
677672
pre_token_ids=self.share_inputs["pre_ids"],
678673
frequency_penalties=self.share_inputs["frequency_score"],
@@ -911,8 +906,7 @@ class at the server level, which is too granular for ModelRunner.
911906
)
912907

913908
# 7. Updata 'infer_seed' and step_paddle()
914-
self.share_inputs["infer_seed"].add_(self.infer_seed_increment)
915-
self.share_inputs["infer_seed"][:] %= self.MAX_INFER_SEED
909+
916910
step_paddle(
917911
self.share_inputs,
918912
self.cache_config.block_size,

0 commit comments

Comments
 (0)