@@ -94,7 +94,7 @@ class Client:
94
94
95
95
Usage
96
96
-----
97
-
97
+
98
98
# instantiate client for pyannoteAI web API
99
99
>>> from pyannoteai.sdk import Client
100
100
>>> client = Client(token="{PYANNOTEAI_API_KEY}")
@@ -251,16 +251,17 @@ def _hash_md5(self, file: Union[str, Path]) -> str:
251
251
252
252
def upload (
253
253
self ,
254
- audio : str | Path ,
254
+ audio : str | Path | dict [ str , str | Path ] ,
255
255
media_url : Optional [str ] = None ,
256
256
callback : Optional [Callable ] = None ,
257
257
) -> str :
258
258
"""Upload audio file to pyannoteAI platform
259
259
260
260
Parameters
261
261
----------
262
- audio : str or Path
263
- Audio file to be uploaded. Can be a "str" or "Path" instance: "audio.wav" or Path("audio.wav")
262
+ audio : str or Path or dict
263
+ Path to audio file to be uploaded. Can be a "str" or "Path" instance, or a dict with an
264
+ "audio" key (e.g. {"audio": "/path/to/audio.wav"}).
264
265
media_url : str, optional
265
266
Unique identifier used to retrieve the uploaded audio file on the pyannoteAI platform.
266
267
Any combination of letters {a-z, A-Z}, digits {0-9}, and {-./} characters prefixed
@@ -278,6 +279,13 @@ def upload(
278
279
or "media://{md5-hash-of-audio-file}" otherwise.
279
280
"""
280
281
282
+ if isinstance (audio , dict ):
283
+ if "audio" not in audio :
284
+ raise ValueError (
285
+ "When `audio` is a dict, it must provide the path to the audio file in 'audio' key."
286
+ )
287
+ audio = audio ["audio" ]
288
+
281
289
# get the total size of the file to upload
282
290
# to provide progress information to the hook
283
291
total_size = os .path .getsize (audio )
@@ -324,10 +332,13 @@ def upload(
324
332
def diarize (
325
333
self ,
326
334
media_url : str ,
327
- num_speakers : Optional [ int ] = None ,
328
- min_speakers : Optional [ int ] = None ,
329
- max_speakers : Optional [ int ] = None ,
335
+ num_speakers : int | None = None ,
336
+ min_speakers : int | None = None ,
337
+ max_speakers : int | None = None ,
330
338
confidence : bool = False ,
339
+ turn_level_confidence : bool = False ,
340
+ exclusive : bool = False ,
341
+ model : str = "precision-2" ,
331
342
** kwargs ,
332
343
) -> str :
333
344
"""Initiate a diarization job on the pyannoteAI web API
@@ -341,11 +352,17 @@ def diarize(
341
352
Force number of speakers to diarize. If not provided, the
342
353
number of speakers will be determined automatically.
343
354
min_speakers : int, optional
344
- Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided .
355
+ Minimum number of speakers.
345
356
max_speakers : int, optional
346
- Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided .
357
+ Maximum number of speakers.
347
358
confidence : bool, optional
348
- Defaults to False
359
+ Enable confidence scores.
360
+ turn_level_confidence: bool, optional
361
+ Enable turn-based confidence scores.
362
+ exclusive: bool, optional
363
+ Enable exclusive speaker diarization.
364
+ model : str, optional
365
+ Defaults to "precision-2"
349
366
**kwargs : optional
350
367
Extra arguments to send in the body of the request.
351
368
@@ -359,10 +376,16 @@ def diarize(
359
376
If something else went wrong
360
377
"""
361
378
362
- assert min_speakers is None , "`min_speakers` is not supported yet"
363
- assert max_speakers is None , "`max_speakers` is not supported yet"
364
-
365
- json = {"url" : media_url , "numSpeakers" : num_speakers , "confidence" : confidence }
379
+ json = {
380
+ "url" : media_url ,
381
+ "model" : model ,
382
+ "numSpeakers" : num_speakers ,
383
+ "minSpeakers" : min_speakers ,
384
+ "maxSpeakers" : max_speakers ,
385
+ "confidence" : confidence ,
386
+ "turnLevelConfidence" : turn_level_confidence ,
387
+ "exclusive" : exclusive ,
388
+ }
366
389
# add extra arguments to the request body
367
390
json .update (kwargs )
368
391
@@ -373,6 +396,7 @@ def diarize(
373
396
def voiceprint (
374
397
self ,
375
398
media_url : str ,
399
+ model : str = "precision-2" ,
376
400
** kwargs ,
377
401
) -> str :
378
402
"""Initiate a voiceprint job on the pyannoteAI web API
@@ -382,6 +406,8 @@ def voiceprint(
382
406
media_url : str
383
407
media://{...} URL created with the `upload` method or
384
408
any other public URL pointing to an audio file.
409
+ model : str, optional
410
+ Defaults to "precision-2".
385
411
**kwargs : optional
386
412
Extra arguments to send in the body of the request.
387
413
@@ -395,7 +421,7 @@ def voiceprint(
395
421
If something else went wrong
396
422
"""
397
423
398
- json = {"url" : media_url }
424
+ json = {"url" : media_url , "model" : model }
399
425
# add extra arguments to the request body
400
426
json .update (kwargs )
401
427
@@ -409,10 +435,13 @@ def identify(
409
435
voiceprints : dict [str , str ],
410
436
exclusive_matching : bool = True ,
411
437
matching_threshold : float = 0.0 ,
412
- num_speakers : Optional [ int ] = None ,
413
- min_speakers : Optional [ int ] = None ,
414
- max_speakers : Optional [ int ] = None ,
438
+ num_speakers : int | None = None ,
439
+ min_speakers : int | None = None ,
440
+ max_speakers : int | None = None ,
415
441
confidence : bool = False ,
442
+ turn_level_confidence : bool = False ,
443
+ exclusive : bool = False ,
444
+ model : str = "precision-2" ,
416
445
** kwargs ,
417
446
) -> str :
418
447
"""Initiate an identification job on the pyannoteAI web API
@@ -423,6 +452,7 @@ def identify(
423
452
media://{...} URL created with the `upload` method or
424
453
any other public URL pointing to an audio file.
425
454
voiceprints : dict
455
+ Voiceprints.
426
456
exclusive_matching : bool, optional
427
457
Prevent multiple speakers from being matched to the same voiceprint.
428
458
Defaults to True.
@@ -433,11 +463,17 @@ def identify(
433
463
Force number of speakers to diarize. If not provided, the
434
464
number of speakers will be determined automatically.
435
465
min_speakers : int, optional
436
- Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided .
466
+ Minimum number of speakers.
437
467
max_speakers : int, optional
438
- Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided .
468
+ Maximum number of speakers.
439
469
confidence : bool, optional
440
- Defaults to False
470
+ Enable confidence scores.
471
+ turn_level_confidence: bool, optional
472
+ Enable turn-based confidence scores.
473
+ exclusive: bool, optional
474
+ Enable exclusive speaker diarization.
475
+ model : str, optional
476
+ Defaults to "precision-2"
441
477
**kwargs : optional
442
478
Extra arguments to send in the body of the request.
443
479
@@ -451,17 +487,19 @@ def identify(
451
487
If something else went wrong
452
488
"""
453
489
454
- assert min_speakers is None , "`min_speakers` is not supported yet"
455
- assert max_speakers is None , "`max_speakers` is not supported yet"
456
-
457
490
json = {
458
491
"url" : media_url ,
492
+ "model" : model ,
493
+ "numSpeakers" : num_speakers ,
494
+ "minSpeakers" : min_speakers ,
495
+ "maxSpeakers" : max_speakers ,
496
+ "confidence" : confidence ,
497
+ "turnLevelConfidence" : turn_level_confidence ,
498
+ "exclusive" : exclusive ,
459
499
"voiceprints" : [
460
500
{"label" : speaker , "voiceprint" : voiceprint }
461
501
for speaker , voiceprint in voiceprints .items ()
462
502
],
463
- "numSpeakers" : num_speakers ,
464
- # "confidence": confidence,
465
503
"matching" : {
466
504
"exclusive" : exclusive_matching ,
467
505
"threshold" : matching_threshold ,
0 commit comments