Skip to content

Commit 4367c09

Browse files
authored
Fix out-of-memory issue during single-XPU deployment (#3131)
1 parent 8e789dc commit 4367c09

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

fastdeploy/worker/xpu_worker.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,14 @@ def determine_available_memory(self) -> int:
9494
xpu_get_used_global_memory,
9595
)
9696

97-
total_memory = xpu_get_total_global_memory(self.local_rank)
98-
used_memory = xpu_get_used_global_memory(self.local_rank)
99-
free_memory = xpu_get_free_global_memory(self.local_rank)
97+
assert self.device_ids[self.local_rank] is not None, f"device_id is none for rank {self.local_rank}"
98+
assert (
99+
len(self.device_ids) > self.local_rank
100+
), f"device number must be greater than local rank, but get device number is {len(self.device_ids)}, rank is {self.local_rank}"
101+
102+
total_memory = xpu_get_total_global_memory(int(self.device_ids[self.local_rank]))
103+
used_memory = xpu_get_used_global_memory(int(self.device_ids[self.local_rank]))
104+
free_memory = xpu_get_free_global_memory(int(self.device_ids[self.local_rank]))
100105

101106
logger.info(
102107
f"Before warm up, total_memory: {total_memory}, \
@@ -107,7 +112,7 @@ def determine_available_memory(self) -> int:
107112
self.model_runner.profile_run()
108113

109114
total_available_memory = int(total_memory * self.cache_config.gpu_memory_utilization)
110-
used_memory = xpu_get_used_global_memory(self.local_rank)
115+
used_memory = xpu_get_used_global_memory(int(self.device_ids[self.local_rank]))
111116
available_kv_cache_memory = total_available_memory - used_memory
112117
model_block_memory_used = self.cal_theortical_kvcache()
113118
available_kv_cache_memory += model_block_memory_used * self.parallel_config.total_block_num

0 commit comments

Comments
 (0)