diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py
index e7443bc1db..c76a6432dd 100644
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -749,10 +749,6 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False):
         """
         Insert tasks to engine.
         """
-        for task in tasks:
-            start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
-            if task.sampling_params.bad_words is not None:
-                task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
         # TODO 返回至 scheduler
         if allocated:
             current_tasks = []
@@ -779,6 +775,11 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False):
             self.engine_worker_queue.put_tasks((current_tasks, self.resource_manager.real_bsz))
             return True
 
+        for task in tasks:
+            start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
+            if task.sampling_params.bad_words is not None:
+                task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
+
         self.resource_manager.check_and_free_block_tables()
 
         if not isinstance(tasks, list):
diff --git a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py
index 306164635b..199a26db81 100644
--- a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py
+++ b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py
@@ -208,7 +208,7 @@ def init_attention_metadata(self, forward_meta: ForwardMeta):
         ) = pre_cache_len_concat(
             forward_meta.seq_lens_decoder,
             forward_meta.seq_lens_this_time,
-            metadata.set_max_lengths[2],
+            forward_meta.max_len_tensor_cpu[2],
             self.block_size,
         )
 
diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py
index 2856737ff5..01f81ac13d 100644
--- a/fastdeploy/model_executor/load_weight_utils.py
+++ b/fastdeploy/model_executor/load_weight_utils.py
@@ -215,11 +215,13 @@ def load_pre_sharded_checkpoint(model_path: str, local_rank: int, use_fastsafete
     """
     load_pre_sharded_checkpoint
     """
+    from fastdeploy.model_executor.layers.utils import get_tensor
+
     state_dict = {}
     _, safetensor_files = get_all_safetensors(os.path.join(model_path, f"rank{local_rank}"))
     weights_iterator = safetensors_weights_iterator(safetensor_files)
     for name, weight in weights_iterator:
-        state_dict[name] = weight
+        state_dict[name] = get_tensor(weight)
     return state_dict
 
 
diff --git a/setup.py b/setup.py
index 87099104b7..e13e70d07e 100644
--- a/setup.py
+++ b/setup.py
@@ -181,7 +181,7 @@ def get_name():
 
 cmdclass_dict = {"bdist_wheel": CustomBdistWheel}
 cmdclass_dict["build_ext"] = CMakeBuild
-FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.1.0-dev")
+FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.1.0")
 cmdclass_dict["build_optl"] = PostInstallCommand
 
 setup(