Skip to content

Commit b5ae6c5

Browse files
committed
Merge branch 'release/0.2.0'
2 parents b74987a + da50214 commit b5ae6c5

File tree

3 files changed

+70
-29
lines changed

3 files changed

+70
-29
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# CHANGELOG
22

3+
## Version 0.2.0 (2025-09-09)
4+
5+
- Add support for `Precision-2` model
6+
37
## Version 0.1.1 (2025-06-04)
48

59
- Allow arbitrary kwargs in `diarize`, `identify` and `voiceprint`

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,15 @@
66
<h1>Official pyannoteAI Python SDK </h1>
77
</div>
88

9-
109
## Installation
1110

1211
```bash
13-
$ pip install pyannoteai.sdk
12+
$ pip install pyannoteai-sdk
1413
```
1514

1615
Then head over to [`dashboard.pyannote.ai`](https://dashboard.pyannote.ai) and create an API key.
1716

18-
## Speaker diarization
17+
## Speaker diarization
1918

2019
```python
2120
# instantiate client

src/pyannoteai/sdk/client.py

Lines changed: 64 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class Client:
9494
9595
Usage
9696
-----
97-
97+
9898
# instantiate client for pyannoteAI web API
9999
>>> from pyannoteai.sdk import Client
100100
>>> client = Client(token="{PYANNOTEAI_API_KEY}")
@@ -251,16 +251,17 @@ def _hash_md5(self, file: Union[str, Path]) -> str:
251251

252252
def upload(
253253
self,
254-
audio: str | Path,
254+
audio: str | Path | dict[str, str|Path],
255255
media_url: Optional[str] = None,
256256
callback: Optional[Callable] = None,
257257
) -> str:
258258
"""Upload audio file to pyannoteAI platform
259259
260260
Parameters
261261
----------
262-
audio : str or Path
263-
Audio file to be uploaded. Can be a "str" or "Path" instance: "audio.wav" or Path("audio.wav")
262+
audio : str or Path or dict
263+
Path to audio file to be uploaded. Can be a "str" or "Path" instance, or a dict with an
264+
"audio" key (e.g. {"audio": "/path/to/audio.wav"}).
264265
media_url : str, optional
265266
Unique identifier used to retrieve the uploaded audio file on the pyannoteAI platform.
266267
Any combination of letters {a-z, A-Z}, digits {0-9}, and {-./} characters prefixed
@@ -278,6 +279,13 @@ def upload(
278279
or "media://{md5-hash-of-audio-file}" otherwise.
279280
"""
280281

282+
if isinstance(audio, dict):
283+
if "audio" not in audio:
284+
raise ValueError(
285+
"When `audio` is a dict, it must provide the path to the audio file in 'audio' key."
286+
)
287+
audio = audio["audio"]
288+
281289
# get the total size of the file to upload
282290
# to provide progress information to the hook
283291
total_size = os.path.getsize(audio)
@@ -324,10 +332,13 @@ def upload(
324332
def diarize(
325333
self,
326334
media_url: str,
327-
num_speakers: Optional[int] = None,
328-
min_speakers: Optional[int] = None,
329-
max_speakers: Optional[int] = None,
335+
num_speakers: int | None = None,
336+
min_speakers: int | None = None,
337+
max_speakers: int | None = None,
330338
confidence: bool = False,
339+
turn_level_confidence: bool = False,
340+
exclusive: bool = False,
341+
model: str = "precision-2",
331342
**kwargs,
332343
) -> str:
333344
"""Initiate a diarization job on the pyannoteAI web API
@@ -341,11 +352,17 @@ def diarize(
341352
Force number of speakers to diarize. If not provided, the
342353
number of speakers will be determined automatically.
343354
min_speakers : int, optional
344-
Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided.
355+
Minimum number of speakers.
345356
max_speakers : int, optional
346-
Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided.
357+
Maximum number of speakers.
347358
confidence : bool, optional
348-
Defaults to False
359+
Enable confidence scores.
360+
turn_level_confidence: bool, optional
361+
Enable turn-based confidence scores.
362+
exclusive: bool, optional
363+
Enable exclusive speaker diarization.
364+
model : str, optional
365+
Defaults to "precision-2"
349366
**kwargs : optional
350367
Extra arguments to send in the body of the request.
351368
@@ -359,10 +376,16 @@ def diarize(
359376
If something else went wrong
360377
"""
361378

362-
assert min_speakers is None, "`min_speakers` is not supported yet"
363-
assert max_speakers is None, "`max_speakers` is not supported yet"
364-
365-
json = {"url": media_url, "numSpeakers": num_speakers, "confidence": confidence}
379+
json = {
380+
"url": media_url,
381+
"model": model,
382+
"numSpeakers": num_speakers,
383+
"minSpeakers": min_speakers,
384+
"maxSpeakers": max_speakers,
385+
"confidence": confidence,
386+
"turnLevelConfidence": turn_level_confidence,
387+
"exclusive": exclusive,
388+
}
366389
# add extra arguments to the request body
367390
json.update(kwargs)
368391

@@ -373,6 +396,7 @@ def diarize(
373396
def voiceprint(
374397
self,
375398
media_url: str,
399+
model: str = "precision-2",
376400
**kwargs,
377401
) -> str:
378402
"""Initiate a voiceprint job on the pyannoteAI web API
@@ -382,6 +406,8 @@ def voiceprint(
382406
media_url : str
383407
media://{...} URL created with the `upload` method or
384408
any other public URL pointing to an audio file.
409+
model : str, optional
410+
Defaults to "precision-2".
385411
**kwargs : optional
386412
Extra arguments to send in the body of the request.
387413
@@ -395,7 +421,7 @@ def voiceprint(
395421
If something else went wrong
396422
"""
397423

398-
json = {"url": media_url}
424+
json = {"url": media_url, "model": model}
399425
# add extra arguments to the request body
400426
json.update(kwargs)
401427

@@ -409,10 +435,13 @@ def identify(
409435
voiceprints: dict[str, str],
410436
exclusive_matching: bool = True,
411437
matching_threshold: float = 0.0,
412-
num_speakers: Optional[int] = None,
413-
min_speakers: Optional[int] = None,
414-
max_speakers: Optional[int] = None,
438+
num_speakers: int | None = None,
439+
min_speakers: int | None = None,
440+
max_speakers: int | None = None,
415441
confidence: bool = False,
442+
turn_level_confidence: bool = False,
443+
exclusive: bool = False,
444+
model: str = "precision-2",
416445
**kwargs,
417446
) -> str:
418447
"""Initiate an identification job on the pyannoteAI web API
@@ -423,6 +452,7 @@ def identify(
423452
media://{...} URL created with the `upload` method or
424453
any other public URL pointing to an audio file.
425454
voiceprints : dict
455+
Voiceprints.
426456
exclusive_matching : bool, optional
427457
Prevent multiple speakers from being matched to the same voiceprint.
428458
Defaults to True.
@@ -433,11 +463,17 @@ def identify(
433463
Force number of speakers to diarize. If not provided, the
434464
number of speakers will be determined automatically.
435465
min_speakers : int, optional
436-
Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided.
466+
Minimum number of speakers.
437467
max_speakers : int, optional
438-
Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided.
468+
Maximum number of speakers.
439469
confidence : bool, optional
440-
Defaults to False
470+
Enable confidence scores.
471+
turn_level_confidence: bool, optional
472+
Enable turn-based confidence scores.
473+
exclusive: bool, optional
474+
Enable exclusive speaker diarization.
475+
model : str, optional
476+
Defaults to "precision-2"
441477
**kwargs : optional
442478
Extra arguments to send in the body of the request.
443479
@@ -451,17 +487,19 @@ def identify(
451487
If something else went wrong
452488
"""
453489

454-
assert min_speakers is None, "`min_speakers` is not supported yet"
455-
assert max_speakers is None, "`max_speakers` is not supported yet"
456-
457490
json = {
458491
"url": media_url,
492+
"model": model,
493+
"numSpeakers": num_speakers,
494+
"minSpeakers": min_speakers,
495+
"maxSpeakers": max_speakers,
496+
"confidence": confidence,
497+
"turnLevelConfidence": turn_level_confidence,
498+
"exclusive": exclusive,
459499
"voiceprints": [
460500
{"label": speaker, "voiceprint": voiceprint}
461501
for speaker, voiceprint in voiceprints.items()
462502
],
463-
"numSpeakers": num_speakers,
464-
# "confidence": confidence,
465503
"matching": {
466504
"exclusive": exclusive_matching,
467505
"threshold": matching_threshold,

0 commit comments

Comments
 (0)