8
8
USE_RERANKER = False # 如果为 True 则启用 reranker,否则使用原有逻辑
9
9
10
10
async def adapt_query (cache_data_convert , * args , ** kwargs ):
11
+ # Extract query parameters
11
12
chat_cache = kwargs .pop ("cache_obj" )
12
13
scope = kwargs .pop ("scope" )
13
14
model = scope ['model' ]
14
15
context = kwargs .pop ("cache_context" , {})
15
16
cache_factor = kwargs .pop ("cache_factor" , 1.0 )
17
+
18
+ # Preprocess query for embedding generation
16
19
pre_embedding_data = chat_cache .query_pre_embedding_func (
17
20
kwargs ,
18
21
extra_param = context .get ("pre_embedding_func" , None ),
19
22
prompts = chat_cache .prompts ,
20
23
)
24
+
25
+ # Generate embedding with performance monitoring
21
26
embedding_data = await time_cal (
22
27
chat_cache .embedding_func ,
23
28
func_name = "embedding" ,
@@ -39,24 +44,29 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
39
44
model = model
40
45
)
41
46
47
+ # Initialize result containers
42
48
cache_answers = []
43
49
cache_questions = []
44
50
cache_ids = []
45
51
cosine_similarity = None
46
52
53
+ # Similarity evaluation based on metric type
47
54
if chat_cache .similarity_metric_type == MetricType .COSINE :
48
55
cosine_similarity = cache_data_list [0 ][0 ]
49
56
# This code uses the built-in cosine similarity evaluation in milvus
50
57
if cosine_similarity < chat_cache .similarity_threshold :
51
- return None
58
+ return None # No suitable match found
59
+
52
60
elif chat_cache .similarity_metric_type == MetricType .L2 :
53
- ## this is the code that uses L2 for similarity evaluation
61
+ # this is the code that uses L2 for similarity evaluation
54
62
similarity_threshold = chat_cache .similarity_threshold
55
63
similarity_threshold_long = chat_cache .similarity_threshold_long
56
64
57
65
min_rank , max_rank = chat_cache .similarity_evaluation .range ()
58
66
rank_threshold = (max_rank - min_rank ) * similarity_threshold * cache_factor
59
67
rank_threshold_long = (max_rank - min_rank ) * similarity_threshold_long * cache_factor
68
+
69
+ # Clamp thresholds to valid range
60
70
rank_threshold = (
61
71
max_rank
62
72
if rank_threshold > max_rank
@@ -71,6 +81,8 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
71
81
if rank_threshold_long < min_rank
72
82
else rank_threshold_long
73
83
)
84
+
85
+ # Evaluate similarity score
74
86
if cache_data_list is None or len (cache_data_list ) == 0 :
75
87
rank_pre = - 1.0
76
88
else :
@@ -81,12 +93,13 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
81
93
extra_param = context .get ("evaluation_func" , None ),
82
94
)
83
95
if rank_pre < rank_threshold :
84
- return None
96
+ return None # Similarity too low
85
97
else :
86
98
raise ValueError (
87
99
f"Unsupported similarity metric type: { chat_cache .similarity_metric_type } "
88
100
)
89
101
102
+ # Process search results with optional reranking
90
103
if USE_RERANKER :
91
104
reranker = FlagReranker ('BAAI/bge-reranker-v2-m3' , use_fp16 = False )
92
105
for cache_data in cache_data_list :
@@ -116,7 +129,6 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
116
129
"question" : pre_embedding_data ,
117
130
"embedding" : embedding_data ,
118
131
}
119
-
120
132
eval_cache_data = {
121
133
"question" : ret [0 ],
122
134
"answer" : ret [1 ],
@@ -135,9 +147,10 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
135
147
cache_questions .append ((rank , ret [1 ]))
136
148
cache_ids .append ((rank , primary_id ))
137
149
else :
138
- # 不使用 reranker 时,走原来的逻辑
150
+ # Original logic without reranking
139
151
for cache_data in cache_data_list :
140
152
primary_id = cache_data [1 ]
153
+ # Retrieve full cache entry data
141
154
ret = await asyncio .to_thread (
142
155
chat_cache .data_manager .get_scalar_data ,
143
156
cache_data , extra_param = context .get ("get_scalar_data" , None ), model = model
@@ -150,6 +163,7 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
150
163
cache_answers .append ((cosine_similarity , ret [0 ]))
151
164
cache_questions .append ((cosine_similarity , ret [1 ]))
152
165
cache_ids .append ((cosine_similarity , primary_id ))
166
+
153
167
elif chat_cache .similarity_metric_type == MetricType .L2 :
154
168
if "deps" in context and hasattr (ret .question , "deps" ):
155
169
eval_query_data = {
@@ -167,13 +181,14 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
167
181
"question" : pre_embedding_data ,
168
182
"embedding" : embedding_data ,
169
183
}
170
-
171
184
eval_cache_data = {
172
185
"question" : ret [0 ],
173
186
"answer" : ret [1 ],
174
187
"search_result" : cache_data ,
175
188
"embedding" : None
176
189
}
190
+
191
+ # Evaluate similarity for this specific result
177
192
rank = chat_cache .similarity_evaluation .evaluation (
178
193
eval_query_data ,
179
194
eval_cache_data ,
@@ -195,6 +210,7 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
195
210
f"Unsupported similarity metric type: { chat_cache .similarity_metric_type } "
196
211
)
197
212
213
+ # Sort results by similarity score (highest first)
198
214
cache_answers = sorted (cache_answers , key = lambda x : x [0 ], reverse = True )
199
215
cache_questions = sorted (cache_questions , key = lambda x : x [0 ], reverse = True )
200
216
cache_ids = sorted (cache_ids , key = lambda x : x [0 ], reverse = True )
@@ -208,12 +224,14 @@ async def adapt_query(cache_data_convert, *args, **kwargs):
208
224
return_id = chat_cache .post_process_messages_func (
209
225
[t [1 ] for t in cache_ids ]
210
226
)
211
- # 更新命中次数
227
+
228
+ # Update hit count for analytics (async to avoid blocking)
212
229
try :
213
230
asyncio .create_task (asyncio .to_thread (chat_cache .data_manager .update_hit_count ,return_id ))
214
231
except Exception :
215
232
logging .info ('update_hit_count except, please check!' )
216
233
234
+ # Record cache hit for reporting
217
235
chat_cache .report .hint_cache ()
218
236
return cache_data_convert (return_message , return_query )
219
237
return None
0 commit comments