Disallow the ragged offsets for decode for now

Anerudhan · Anerudhan · commit d1c120aeebe5 · 2025-07-18T13:55:29.000-07:00
diff --git a/flashinfer/cudnn/decode.py b/flashinfer/cudnn/decode.py
@@ -22,7 +22,7 @@ def _create_cudnn_handle(stream: torch.cuda.Stream):
     global _cudnn_handle
     if _cudnn_handle is None:
         _cudnn_handle = cudnn.create_handle()
-    cudnn.set_stream(_cudnn_handle, stream.cuda_stream)
+    # cudnn.set_stream(_cudnn_handle, stream.cuda_stream) # TODO: Will fix this in future
     return _cudnn_handle
 
 
@@ -89,6 +89,10 @@ def _build_decode_graph(
 ):
     handle = _create_cudnn_handle(torch.cuda.current_stream())
 
+    # WAR: override batch offsets for now, as it leads to a poor performance
+    batch_offsets_q = None
+    batch_offsets_o = None
+
     with cudnn.graph(handle) as (g, _):
 
         if q.dim() == 3: