|
2 | 2 |
|
3 | 3 | from dataclasses import asdict |
4 | 4 | from dataclasses import dataclass |
| 5 | +from dataclasses import field |
5 | 6 | from enum import Enum |
6 | 7 | from typing import Any |
7 | 8 | from typing import Optional |
@@ -61,7 +62,7 @@ class ClientMessageType(str, Enum): |
61 | 62 | EndOfStream: Signals that no more audio data will be sent. |
62 | 63 | SetRecognitionConfig: Updates transcription configuration during |
63 | 64 | an active session (advanced use). |
64 | | - GetSpeakers: Internal, Speechmatics only message. Allows the client to request speaker data. |
| 65 | + GetSpeakers: Allows the client to request speaker data. |
65 | 66 |
|
66 | 67 | Examples: |
67 | 68 | >>> # Starting a recognition session |
@@ -110,7 +111,7 @@ class ServerMessageType(str, Enum): |
110 | 111 | change for the given audio segment. |
111 | 112 | AddPartialTranslation: Provides interim translation results that |
112 | 113 | may change as more context becomes available. |
113 | | - SpeakerResult: Internal, Speechmatics only message containing the speakers data. |
| 114 | + SpeakerResult: Provides the speaker identification data. |
114 | 115 | Info: Informational messages from the server. |
115 | 116 | Warning: Warning messages that don't stop transcription. |
116 | 117 | Error: Error messages indicating transcription failure. |
@@ -245,19 +246,58 @@ class SpeakerDiarizationConfig: |
245 | 246 | is a close enough match, even if other speakers may be closer. This is useful |
246 | 247 | for cases where we can flip incorrectly between similar speakers during a single |
247 | 248 | speaker section. |
| 249 | + speakers: (Optional) Add speaker identifiers to your session to identify specific speakers. |
| 250 | + This is a list of SpeakerIdentifier objects generated in previous transcription sessions. |
| 251 | + You can provide multiple identifiers for a single speaker to help the engine identify |
| 252 | + the speaker more accurately. |
248 | 253 |
|
249 | 254 | Examples: |
250 | 255 | >>> config = SpeakerDiarizationConfig( |
251 | 256 | max_speakers=2, |
252 | 257 | speaker_sensitivity=0.8, |
253 | 258 | prefer_current_speaker=True, |
| 259 | + speakers=[ |
| 260 | + SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]), |
| 261 | + SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]), |
| 262 | + ], |
254 | 263 | ) |
255 | 264 |
|
256 | 265 | """ |
257 | 266 |
|
258 | 267 | max_speakers: Optional[int] = None |
259 | 268 | speaker_sensitivity: Optional[float] = None |
260 | 269 | prefer_current_speaker: Optional[bool] = None |
| 270 | + speakers: Optional[list[SpeakerIdentifier]] = None |
| 271 | + |
| 272 | + |
| 273 | +@dataclass |
| 274 | +class SpeakerIdentifier: |
| 275 | + """Labeled speaker identifier for guided speaker diarization. |
| 276 | +
|
| 277 | + Use this to map one or more known speaker identifiers to a human-readable |
| 278 | + label. When provided in `SpeakerDiarizationConfig.speakers`, the engine can |
| 279 | + use these identifiers as hints to consistently assign the specified label. |
| 280 | +
|
| 281 | + Attributes: |
| 282 | + label: Human-readable label to assign to this speaker or group |
| 283 | + (e.g., "Agent", "Customer", "Alice"). |
| 284 | + speaker_identifiers: A list of string identifiers associated with this |
| 285 | + speaker. These can be any stable identifiers relevant to your |
| 286 | + application (for example device IDs, prior session speaker IDs, |
| 287 | + channel tags, etc.). |
| 288 | +
|
| 289 | + Examples: |
| 290 | + >>> config = SpeakerDiarizationConfig( |
| 291 | + ... max_speakers=2, |
| 292 | + ... speakers=[ |
| 293 | + ... SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]), |
| 294 | + ... SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]), |
| 295 | + ... ], |
| 296 | + ... ) |
| 297 | + """ |
| 298 | + |
| 299 | + label: str = "" |
| 300 | + speaker_identifiers: list[str] = field(default_factory=list) |
261 | 301 |
|
262 | 302 |
|
263 | 303 | @dataclass |
|
0 commit comments