Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions plugins/decart/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Decart Plugin

Decart plugin for Vision Agents.

Empty file.
58 changes: 58 additions & 0 deletions plugins/decart/example/decart_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import logging

from dotenv import load_dotenv

from vision_agents.core import User, Agent, cli
from vision_agents.core.agents import AgentLauncher
from vision_agents.plugins import decart, getstream, openai, elevenlabs, deepgram

logger = logging.getLogger(__name__)

load_dotenv()


async def create_agent(**kwargs) -> Agent:
processor = decart.RestylingProcessor(
initial_prompt="A cute animated movie with vibrant colours", model="mirage_v2"
)
llm = openai.LLM(model="gpt-4o-mini")

agent = Agent(
edge=getstream.Edge(),
agent_user=User(name="Story teller", id="agent"),
instructions="You are a story teller. You will tell a short story to the user. You will use the Decart processor to change the style of the video and user's background. You can embed audio tags in your responses for added effect Emotional tone: [EXCITED], [NERVOUS], [FRUSTRATED], [TIRED] Reactions: [GASP], [SIGH], [LAUGHS], [GULPS] Volume & energy: [WHISPERING], [SHOUTING], [QUIETLY], [LOUDLY] Pacing & rhythm: [PAUSES], [STAMMERS], [RUSHED]",
llm=llm,
tts=elevenlabs.TTS(voice_id="N2lVS1w4EtoT3dr4eOWO"),
stt=deepgram.STT(),
processors=[processor],
)

@llm.register_function(
description="This function changes the prompt of the Decart processor which in turn changes the style of the video and user's background"
)
async def change_prompt(prompt: str) -> str:
await processor.set_prompt(prompt)
return f"Prompt changed to {prompt}"

return agent


async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
"""Join the call and start the agent."""
# Ensure the agent user is created
await agent.create_user()
# Create a call
call = await agent.create_call(call_type, call_id)

logger.info("🤖 Starting Agent...")

# Have the agent join the call/room
with await agent.join(call):
logger.info("Joining call")
logger.info("LLM ready")

await agent.finish() # Run till the call ends


if __name__ == "__main__":
cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
23 changes: 23 additions & 0 deletions plugins/decart/example/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[project]
name = "decart-example"
version = "0.0.0"
requires-python = ">=3.10"

dependencies = [
"vision-agents",
"python-dotenv",
"vision-agents-plugins-openai",
"vision-agents-plugins-decart",
"vision-agents-plugins-elvenlabs",
"vision-agents-plugins-getstream",
"vision-agents-plugins-deepgram",

]

[tool.uv.sources]
vision-agents = { workspace = true }
vision-agents-plugins-getstream = { editable=true }
vision-agents-plugins-openai = { editable=true }
vision-agents-plugins-elevenlabs = { editable=true }
vision-agents-plugins-deepgram = { editable=true }
vision-agents-plugins-decart = { editable=true }
Empty file added plugins/decart/py.typed
Empty file.
41 changes: 41 additions & 0 deletions plugins/decart/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "vision-agents-plugins-decart"
dynamic = ["version"]
description = "Decart plugin for Vision Agents"
readme = "README.md"
keywords = ["decart", "AI", "voice agents", "agents"]
requires-python = ">=3.10"
license = "MIT"
dependencies = [
"vision-agents",
"decart",
]

[project.urls]
Documentation = "https://visionagents.ai/"
Website = "https://visionagents.ai/"
Source = "https://github.com/GetStream/Vision-Agents"

[tool.hatch.version]
source = "vcs"
raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }

[tool.hatch.build.targets.wheel]
packages = ["."]

[tool.hatch.build.targets.sdist]
include = ["/vision_agents"]

[tool.uv.sources]
vision-agents = { workspace = true }

[dependency-groups]
dev = [
"pytest>=8.4.1",
"pytest-asyncio>=1.0.0",
]

16 changes: 16 additions & 0 deletions plugins/decart/tests/test_decart_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pytest
from dotenv import load_dotenv


load_dotenv()


class TestDecartPlugin:
def test_regular(self):
assert True

# example integration test (run daily on CI)
@pytest.mark.integration
async def test_simple(self):
assert True

35 changes: 35 additions & 0 deletions plugins/decart/tests/test_decart_restyling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
from dotenv import load_dotenv

from vision_agents.plugins.decart import RestylingProcessor


load_dotenv()


class TestDecartRestyling:
def test_regular(self):
"""Test basic initialization."""
# This test will fail if DECART_API_KEY is not set, which is expected
# In a real test environment, you'd mock the Decart client
assert True

@pytest.mark.integration
async def test_processor_initialization(self):
"""Test that RestylingProcessor can be initialized with valid config."""
# This is an integration test that requires DECART_API_KEY
# In a real scenario, you'd mock the Decart client
try:
processor = RestylingProcessor(
initial_prompt="test style",
model="mirage_v2",
)
assert processor.name == "decart_restyling"
assert processor.initial_prompt == "test style"
assert processor.model_name == "mirage_v2"
except ValueError as e:
# Expected if DECART_API_KEY is not set
if "API key" in str(e):
pytest.skip("DECART_API_KEY not set, skipping integration test")
raise

4 changes: 4 additions & 0 deletions plugins/decart/vision_agents/plugins/decart/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .restyling import RestylingProcessor

__all__ = ["RestylingProcessor"]

104 changes: 104 additions & 0 deletions plugins/decart/vision_agents/plugins/decart/decart_video_track.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import asyncio
import logging
from typing import Optional

import av
from PIL import Image
from aiortc import MediaStreamTrack, VideoStreamTrack

from vision_agents.core.utils.video_queue import VideoLatestNQueue

logger = logging.getLogger(__name__)


class DecartVideoTrack(VideoStreamTrack):
"""Video track that forwards Decart restyled video frames.

Receives video frames from Decart's Realtime API and provides
them through the standard VideoStreamTrack interface for publishing
to the call.
"""

def __init__(self, width: int = 1280, height: int = 720):
"""Initialize the Decart video track.

Args:
width: Video frame width.
height: Video frame height.
"""
super().__init__()

self.width = width
self.height = height

self.frame_queue: VideoLatestNQueue[av.VideoFrame] = VideoLatestNQueue(maxlen=2)
placeholder = Image.new("RGB", (self.width, self.height), color=(30, 30, 40))
self.placeholder_frame = av.VideoFrame.from_image(placeholder)
self.last_frame: av.VideoFrame = self.placeholder_frame

self._stopped = False
self._source_track: Optional[MediaStreamTrack] = None

logger.debug(f"DecartVideoTrack initialized ({width}x{height})")

async def add_frame(self, frame: av.VideoFrame) -> None:
if self._stopped:
return
# if frame.width != self.width or frame.height != self.height:
# frame = await asyncio.to_thread(self._resize_frame, frame)
self.frame_queue.put_latest_nowait(frame)

# TODO: move this to a utils file
def _resize_frame(self, frame: av.VideoFrame) -> av.VideoFrame:
logger.debug(
f"Resizing frame from {frame.width}x{frame.height} to {self.width}x{self.height}"
)
img = frame.to_image()

# Calculate scaling to maintain aspect ratio
src_width, src_height = img.size
target_width, target_height = self.width, self.height

# Calculate scale factor (fit within target dimensions)
scale = min(target_width / src_width, target_height / src_height)
new_width = int(src_width * scale)
new_height = int(src_height * scale)

# Resize with aspect ratio maintained
resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

# Create black background at target resolution
result = Image.new("RGB", (target_width, target_height), (0, 0, 0))

# Paste resized image centered
x_offset = (target_width - new_width) // 2
y_offset = (target_height - new_height) // 2
result.paste(resized, (x_offset, y_offset))

return av.VideoFrame.from_image(result)

async def recv(self) -> av.VideoFrame:
if self._stopped:
raise Exception("Track stopped")

try:
frame = await asyncio.wait_for(
self.frame_queue.get(),
timeout=0.033, # ~30 FPS
)
if frame:
self.last_frame = frame
except asyncio.TimeoutError:
pass

pts, time_base = await self.next_timestamp()

output_frame = self.last_frame
output_frame.pts = pts
output_frame.time_base = time_base

return output_frame

def stop(self) -> None:
self._stopped = True
super().stop()
Loading
Loading