diff --git a/fastdeploy/model_executor/layers/sample/sampler.py b/fastdeploy/model_executor/layers/sample/sampler.py index bf6b191c12..6b08e02e9e 100644 --- a/fastdeploy/model_executor/layers/sample/sampler.py +++ b/fastdeploy/model_executor/layers/sample/sampler.py @@ -447,8 +447,8 @@ def forward_cuda( sampling_metadata.min_dec_lens, sampling_metadata.eos_token_ids, share_inputs["seq_lens_this_time"], - share_inputs["seq_lens_encoder"], - share_inputs["seq_lens_decoder"], + share_inputs["output_padding_offset"], + share_inputs["output_cum_offsets"], max_model_len, ) probs = F.softmax(logits)