Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CHANGELOG

## develop

- Add support for `Precision-2` model

## Version 0.1.1 (2025-06-04)

- Allow arbitrary kwargs in `diarize`, `identify` and `voiceprint`
Expand Down
90 changes: 64 additions & 26 deletions src/pyannoteai/sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class Client:

Usage
-----

# instantiate client for pyannoteAI web API
>>> from pyannoteai.sdk import Client
>>> client = Client(token="{PYANNOTEAI_API_KEY}")
Expand Down Expand Up @@ -251,16 +251,17 @@ def _hash_md5(self, file: Union[str, Path]) -> str:

def upload(
self,
audio: str | Path,
audio: str | Path | dict[str, str|Path],
media_url: Optional[str] = None,
callback: Optional[Callable] = None,
) -> str:
"""Upload audio file to pyannoteAI platform

Parameters
----------
audio : str or Path
Audio file to be uploaded. Can be a "str" or "Path" instance: "audio.wav" or Path("audio.wav")
audio : str or Path or dict
Path to audio file to be uploaded. Can be a "str" or "Path" instance, or a dict with an
"audio" key (e.g. {"audio": "/path/to/audio.wav"}).
media_url : str, optional
Unique identifier used to retrieve the uploaded audio file on the pyannoteAI platform.
Any combination of letters {a-z, A-Z}, digits {0-9}, and {-./} characters prefixed
Expand All @@ -278,6 +279,13 @@ def upload(
or "media://{md5-hash-of-audio-file}" otherwise.
"""

if isinstance(audio, dict):
if "audio" not in audio:
raise ValueError(
"When `audio` is a dict, it must provide the path to the audio file in 'audio' key."
)
audio = audio["audio"]

# get the total size of the file to upload
# to provide progress information to the hook
total_size = os.path.getsize(audio)
Expand Down Expand Up @@ -324,10 +332,13 @@ def upload(
def diarize(
self,
media_url: str,
num_speakers: Optional[int] = None,
min_speakers: Optional[int] = None,
max_speakers: Optional[int] = None,
num_speakers: int | None = None,
min_speakers: int | None = None,
max_speakers: int | None = None,
confidence: bool = False,
turn_level_confidence: bool = False,
exclusive: bool = False,
model: str = "precision-2",
**kwargs,
) -> str:
"""Initiate a diarization job on the pyannoteAI web API
Expand All @@ -341,11 +352,17 @@ def diarize(
Force number of speakers to diarize. If not provided, the
number of speakers will be determined automatically.
min_speakers : int, optional
Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided.
Minimum number of speakers.
max_speakers : int, optional
Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided.
Maximum number of speakers.
confidence : bool, optional
Defaults to False
Enable confidence scores.
turn_level_confidence: bool, optional
Enable turn-based confidence scores.
exclusive: bool, optional
Enable exclusive speaker diarization.
model : str, optional
Defaults to "precision-2"
**kwargs : optional
Extra arguments to send in the body of the request.

Expand All @@ -359,10 +376,16 @@ def diarize(
If something else went wrong
"""

assert min_speakers is None, "`min_speakers` is not supported yet"
assert max_speakers is None, "`max_speakers` is not supported yet"

json = {"url": media_url, "numSpeakers": num_speakers, "confidence": confidence}
json = {
"url": media_url,
"model": model,
"numSpeakers": num_speakers,
"minSpeakers": min_speakers,
"maxSpeakers": max_speakers,
"confidence": confidence,
"turnLevelConfidence": turn_level_confidence,
"exclusive": exclusive,
}
# add extra arguments to the request body
json.update(kwargs)

Expand All @@ -373,6 +396,7 @@ def diarize(
def voiceprint(
self,
media_url: str,
model: str = "precision-2",
**kwargs,
) -> str:
"""Initiate a voiceprint job on the pyannoteAI web API
Expand All @@ -382,6 +406,8 @@ def voiceprint(
media_url : str
media://{...} URL created with the `upload` method or
any other public URL pointing to an audio file.
model : str, optional
Defaults to "precision-2".
**kwargs : optional
Extra arguments to send in the body of the request.

Expand All @@ -395,7 +421,7 @@ def voiceprint(
If something else went wrong
"""

json = {"url": media_url}
json = {"url": media_url, "model": model}
# add extra arguments to the request body
json.update(kwargs)

Expand All @@ -409,10 +435,13 @@ def identify(
voiceprints: dict[str, str],
exclusive_matching: bool = True,
matching_threshold: float = 0.0,
num_speakers: Optional[int] = None,
min_speakers: Optional[int] = None,
max_speakers: Optional[int] = None,
num_speakers: int | None = None,
min_speakers: int | None = None,
max_speakers: int | None = None,
confidence: bool = False,
turn_level_confidence: bool = False,
exclusive: bool = False,
model: str = "precision-2",
**kwargs,
) -> str:
"""Initiate an identification job on the pyannoteAI web API
Expand All @@ -423,6 +452,7 @@ def identify(
media://{...} URL created with the `upload` method or
any other public URL pointing to an audio file.
voiceprints : dict
Voiceprints.
exclusive_matching : bool, optional
Prevent multiple speakers from being matched to the same voiceprint.
Defaults to True.
Expand All @@ -433,11 +463,17 @@ def identify(
Force number of speakers to diarize. If not provided, the
number of speakers will be determined automatically.
min_speakers : int, optional
Not supported yet. Minimum number of speakers. Has no effect when `num_speakers` is provided.
Minimum number of speakers.
max_speakers : int, optional
Not supported yet. Maximum number of speakers. Has no effect when `num_speakers` is provided.
Maximum number of speakers.
confidence : bool, optional
Defaults to False
Enable confidence scores.
turn_level_confidence: bool, optional
Enable turn-based confidence scores.
exclusive: bool, optional
Enable exclusive speaker diarization.
model : str, optional
Defaults to "precision-2"
**kwargs : optional
Extra arguments to send in the body of the request.

Expand All @@ -451,17 +487,19 @@ def identify(
If something else went wrong
"""

assert min_speakers is None, "`min_speakers` is not supported yet"
assert max_speakers is None, "`max_speakers` is not supported yet"

json = {
"url": media_url,
"model": model,
"numSpeakers": num_speakers,
"minSpeakers": min_speakers,
"maxSpeakers": max_speakers,
"confidence": confidence,
"turnLevelConfidence": turn_level_confidence,
"exclusive": exclusive,
"voiceprints": [
{"label": speaker, "voiceprint": voiceprint}
for speaker, voiceprint in voiceprints.items()
],
"numSpeakers": num_speakers,
# "confidence": confidence,
"matching": {
"exclusive": exclusive_matching,
"threshold": matching_threshold,
Expand Down