Skip to content

Commit 42943b1

Browse files
authored
Merge pull request #46 from speechmatics/feature/add-speaker-id-to-rt
Add speaker id to rt
2 parents dba39fa + 4221d2c commit 42943b1

File tree

4 files changed

+50
-6
lines changed

4 files changed

+50
-6
lines changed

sdk/batch/speechmatics/batch/_transport.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,9 @@ async def _prepare_headers(self) -> dict[str, str]:
297297
Headers dictionary with authentication and tracking info
298298
"""
299299
auth_headers = await self._auth.get_auth_headers()
300-
auth_headers[
301-
"User-Agent"
302-
] = f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}"
300+
auth_headers["User-Agent"] = (
301+
f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}"
302+
)
303303

304304
if self._request_id:
305305
auth_headers["X-Request-Id"] = self._request_id

sdk/rt/speechmatics/rt/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ._models import ServerMessageType
2626
from ._models import SessionInfo
2727
from ._models import SpeakerDiarizationConfig
28+
from ._models import SpeakerIdentifier
2829
from ._models import TranscriptionConfig
2930
from ._models import TranscriptResult
3031
from ._models import TranslationConfig
@@ -53,6 +54,7 @@
5354
"SessionError",
5455
"SessionInfo",
5556
"SpeakerDiarizationConfig",
57+
"SpeakerIdentifier",
5658
"StaticKeyAuth",
5759
"TimeoutError",
5860
"TranscriptResult",

sdk/rt/speechmatics/rt/_base_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from typing import Any
99
from typing import Optional
1010

11+
from typing_extensions import Self
12+
1113
from ._auth import AuthBase
1214
from ._auth import StaticKeyAuth
1315
from ._events import EventEmitter
@@ -96,7 +98,7 @@ async def _ws_connect(self, ws_headers: Optional[dict] = None) -> None:
9698
await self._transport.connect(ws_headers)
9799
self._recv_task = asyncio.create_task(self._recv_loop())
98100

99-
async def __aenter__(self) -> _BaseClient:
101+
async def __aenter__(self) -> Self:
100102
return self
101103

102104
async def __aexit__(self, *args: Any) -> None:

sdk/rt/speechmatics/rt/_models.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from dataclasses import asdict
44
from dataclasses import dataclass
5+
from dataclasses import field
56
from enum import Enum
67
from typing import Any
78
from typing import Optional
@@ -61,7 +62,7 @@ class ClientMessageType(str, Enum):
6162
EndOfStream: Signals that no more audio data will be sent.
6263
SetRecognitionConfig: Updates transcription configuration during
6364
an active session (advanced use).
64-
GetSpeakers: Internal, Speechmatics only message. Allows the client to request speaker data.
65+
GetSpeakers: Allows the client to request speaker data.
6566
6667
Examples:
6768
>>> # Starting a recognition session
@@ -110,7 +111,7 @@ class ServerMessageType(str, Enum):
110111
change for the given audio segment.
111112
AddPartialTranslation: Provides interim translation results that
112113
may change as more context becomes available.
113-
SpeakerResult: Internal, Speechmatics only message containing the speakers data.
114+
SpeakerResult: Provides the speaker identification data.
114115
Info: Informational messages from the server.
115116
Warning: Warning messages that don't stop transcription.
116117
Error: Error messages indicating transcription failure.
@@ -245,19 +246,58 @@ class SpeakerDiarizationConfig:
245246
is a close enough match, even if other speakers may be closer. This is useful
246247
for cases where we can flip incorrectly between similar speakers during a single
247248
speaker section.
249+
speakers: (Optional) Add speaker identifiers to your session to identify specific speakers.
250+
This is a list of SpeakerIdentifier objects generated in previous transcription sessions.
251+
You can provide multiple identifiers for a single speaker to help the engine identify
252+
the speaker more accurately.
248253
249254
Examples:
250255
>>> config = SpeakerDiarizationConfig(
251256
max_speakers=2,
252257
speaker_sensitivity=0.8,
253258
prefer_current_speaker=True,
259+
speakers=[
260+
SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]),
261+
SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]),
262+
],
254263
)
255264
256265
"""
257266

258267
max_speakers: Optional[int] = None
259268
speaker_sensitivity: Optional[float] = None
260269
prefer_current_speaker: Optional[bool] = None
270+
speakers: Optional[list[SpeakerIdentifier]] = None
271+
272+
273+
@dataclass
274+
class SpeakerIdentifier:
275+
"""Labeled speaker identifier for guided speaker diarization.
276+
277+
Use this to map one or more known speaker identifiers to a human-readable
278+
label. When provided in `SpeakerDiarizationConfig.speakers`, the engine can
279+
use these identifiers as hints to consistently assign the specified label.
280+
281+
Attributes:
282+
label: Human-readable label to assign to this speaker or group
283+
(e.g., "Agent", "Customer", "Alice").
284+
speaker_identifiers: A list of string identifiers associated with this
285+
speaker. These can be any stable identifiers relevant to your
286+
application (for example device IDs, prior session speaker IDs,
287+
channel tags, etc.).
288+
289+
Examples:
290+
>>> config = SpeakerDiarizationConfig(
291+
... max_speakers=2,
292+
... speakers=[
293+
... SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]),
294+
... SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]),
295+
... ],
296+
... )
297+
"""
298+
299+
label: str = ""
300+
speaker_identifiers: list[str] = field(default_factory=list)
261301

262302

263303
@dataclass

0 commit comments

Comments
 (0)