Merge pull request #47 from speechmatics/fix/wait-for-eot-message

TudorCRL · web-flow · commit cb653970f3a7 · 2025-10-15T15:17:07.000+01:00
Wait for eot on close
diff --git a/examples/rt/async/file/main.py b/examples/rt/async/file/main.py
@@ -0,0 +1,19 @@
+
+import asyncio
+from speechmatics.rt import AsyncClient, ServerMessageType
+
+
+async def main():
+    # Create a client using environment variable SPEECHMATICS_API_KEY
+    async with AsyncClient() as client:
+        # Register event handlers
+        @client.on(ServerMessageType.ADD_TRANSCRIPT)
+        def handle_final_transcript(msg):
+            print(f"Final: {msg['metadata']['transcript']}")
+
+        # Transcribe audio file
+        with open("./examples/example.wav", "rb") as audio_file:
+            await client.transcribe(audio_file)
+
+# Run the async function
+asyncio.run(main())
diff --git a/sdk/rt/speechmatics/rt/_async_client.py b/sdk/rt/speechmatics/rt/_async_client.py
@@ -81,7 +81,6 @@ def __init__(
             self._recognition_started_evt,
             self._session_done_evt,
         ) = self._init_session_info()
-        self._eos_sent = False
 
         transport = self._create_transport_from_config(
             auth=auth,
@@ -96,6 +95,7 @@ def __init__(
         self.on(ServerMessageType.END_OF_TRANSCRIPT, self._on_eot)
         self.on(ServerMessageType.ERROR, self._on_error)
         self.on(ServerMessageType.WARNING, self._on_warning)
+        self.on(ServerMessageType.AUDIO_ADDED, self._on_audio_added)
 
         self._logger.debug("AsyncClient initialized (request_id=%s)", self._session.request_id)
 
@@ -141,6 +141,26 @@ async def start_session(
             ws_headers=ws_headers,
         )
 
+    async def stop_session(self) -> None:
+        """
+        This method closes the WebSocket connection and ends the transcription session.
+
+        Raises:
+            ConnectionError: If the WebSocket connection fails.
+            TranscriptionError: If the server reports an error during teardown.
+            TimeoutError: If the connection or teardown times out.
+
+        Examples:
+            Basic streaming:
+                >>> async with AsyncClient() as client:
+                ...     await client.start_session()
+                ...     await client.send_audio(frame)
+                ...     await client.stop_session()
+        """
+        await self._send_eos(self._seq_no)
+        await self._session_done_evt.wait()  # Wait for end of transcript event to indicate we can stop listening
+        await self.close()
+
     async def transcribe(
         self,
         source: BinaryIO,
@@ -233,7 +253,6 @@ async def _audio_producer(self, source: BinaryIO, chunk_size: int) -> None:
             chunk_size: Chunk size for audio data
         """
         src = FileSource(source, chunk_size=chunk_size)
-        seq_no = 0
 
         try:
             async for frame in src:
@@ -242,13 +261,12 @@ async def _audio_producer(self, source: BinaryIO, chunk_size: int) -> None:
 
                 try:
                     await self.send_audio(frame)
-                    seq_no += 1
                 except Exception as e:
                     self._logger.error("Failed to send audio frame: %s", e)
                     self._session_done_evt.set()
                     break
 
-            await self._send_eos(seq_no)
+            await self.stop_session()
         except asyncio.CancelledError:
             raise
         except Exception as e:
@@ -286,13 +304,19 @@ def _on_error(self, msg: dict[str, Any]) -> None:
         self._session_done_evt.set()
         raise TranscriptionError(error)
 
+    def _on_audio_added(self, msg: dict[str, Any]) -> None:
+        """Handle AudioAdded message from server."""
+        self._seq_no = msg.get("seq_no", 0)
+
     def _on_warning(self, msg: dict[str, Any]) -> None:
         """Handle Warning message from server."""
         self._logger.warning("Server warning: %s", msg.get("reason", "unknown"))
 
     async def close(self) -> None:
         """
         Close the client and clean up resources.
+        WARNING: this closes the client without waiting for remaining messages to be processed.
+        It is recommended to use stop_session() instead.
 
         Ensures the session is marked as complete and delegates to the base
         class for full cleanup including WebSocket connection termination.
diff --git a/sdk/rt/speechmatics/rt/_base_client.py b/sdk/rt/speechmatics/rt/_base_client.py
@@ -41,6 +41,8 @@ def __init__(self, transport: Transport) -> None:
         self._transport = transport
         self._recv_task: Optional[asyncio.Task[None]] = None
         self._closed_evt = asyncio.Event()
+        self._eos_sent = False
+        self._seq_no = 0
 
         self._logger = get_logger("speechmatics.rt.base_client")
 
@@ -112,14 +114,15 @@ async def send_audio(self, payload: bytes) -> None:
             >>> audio_chunk = b""
             >>> await client.send_audio(audio_chunk)
         """
-        if self._closed_evt.is_set():
+        if self._closed_evt.is_set() or self._eos_sent:
             raise TransportError("Client is closed")
 
         if not isinstance(payload, bytes):
             raise ValueError("Payload must be bytes")
 
         try:
             await self._transport.send_message(payload)
+            self._seq_no += 1
         except Exception:
             self._closed_evt.set()
             raise
@@ -133,7 +136,7 @@ async def send_message(self, message: dict[str, Any]) -> None:
             >>> msg = json.dumps({"message": "StartRecognition", ...})
             >>> await client.send_message(msg)
         """
-        if self._closed_evt.is_set():
+        if self._closed_evt.is_set() or self._eos_sent:
             raise TransportError("Client is closed")
 
         if not isinstance(message, dict):