From 4880c16be3982ba442de8ca848911c1102fc7f08 Mon Sep 17 00:00:00 2001 From: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:30:24 +0800 Subject: [PATCH 1/4] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 87099104b7..e13e70d07e 100644 --- a/setup.py +++ b/setup.py @@ -181,7 +181,7 @@ def get_name(): cmdclass_dict = {"bdist_wheel": CustomBdistWheel} cmdclass_dict["build_ext"] = CMakeBuild -FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.1.0-dev") +FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.1.0") cmdclass_dict["build_optl"] = PostInstallCommand setup( From c8dd5976ae5db01d0cc67a2629268bd2bce9a80a Mon Sep 17 00:00:00 2001 From: chen <103103266+ckl117@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:34:33 +0800 Subject: [PATCH 2/4] fix request_output sampling_params (#3154) --- fastdeploy/engine/engine.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index e7443bc1db..c76a6432dd 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -749,10 +749,6 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False): """ Insert tasks to engine. """ - for task in tasks: - start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER) - if task.sampling_params.bad_words is not None: - task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer) # TODO 返回至 scheduler if allocated: current_tasks = [] @@ -779,6 +775,11 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False): self.engine_worker_queue.put_tasks((current_tasks, self.resource_manager.real_bsz)) return True + for task in tasks: + start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER) + if task.sampling_params.bad_words is not None: + task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer) + self.resource_manager.check_and_free_block_tables() if not isinstance(tasks, list): From d4059cabf0c217b32555989244447449f3d8cc73 Mon Sep 17 00:00:00 2001 From: RAM Date: Fri, 1 Aug 2025 22:34:59 +0800 Subject: [PATCH 3/4] fix typo (#3153) --- .../model_executor/layers/attention/flash_attn_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py index 306164635b..199a26db81 100644 --- a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py +++ b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py @@ -208,7 +208,7 @@ def init_attention_metadata(self, forward_meta: ForwardMeta): ) = pre_cache_len_concat( forward_meta.seq_lens_decoder, forward_meta.seq_lens_this_time, - metadata.set_max_lengths[2], + forward_meta.max_len_tensor_cpu[2], self.block_size, ) From 87987b4e63ccb4c6e1924e7428e25dc2760aa552 Mon Sep 17 00:00:00 2001 From: bukejiyu <52310069+bukejiyu@users.noreply.github.com> Date: Mon, 4 Aug 2025 10:44:20 +0800 Subject: [PATCH 4/4] fix load_pre_sharded_checkpoint (#3152) Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --- fastdeploy/model_executor/load_weight_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py index 2856737ff5..01f81ac13d 100644 --- a/fastdeploy/model_executor/load_weight_utils.py +++ b/fastdeploy/model_executor/load_weight_utils.py @@ -215,11 +215,13 @@ def load_pre_sharded_checkpoint(model_path: str, local_rank: int, use_fastsafete """ load_pre_sharded_checkpoint """ + from fastdeploy.model_executor.layers.utils import get_tensor + state_dict = {} _, safetensor_files = get_all_safetensors(os.path.join(model_path, f"rank{local_rank}")) weights_iterator = safetensors_weights_iterator(safetensor_files) for name, weight in weights_iterator: - state_dict[name] = weight + state_dict[name] = get_tensor(weight) return state_dict