roboflow · timmermansjoy · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 13, 2025
@@ -83,6 +83,9 @@ build = [
     "wheel>=0.40,<0.46",
     "build>=0.10,<1.3"
 ]
+video = [
+    "av>=15.0.0"
+]
 
 [tool.bandit]
 target = ["test", "supervision"]

@@ -131,11 +131,11 @@
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
     FPSMonitor,
-    VideoInfo,
     VideoSink,
     get_video_frames_generator,
     process_video,
 )
+from supervision.video.core import Video, VideoInfo
 
 __all__ = [
     "LMM",
@@ -193,6 +193,7 @@
     "TriangleAnnotator",
     "VertexAnnotator",
     "VertexLabelAnnotator",
+    "Video",
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",

@@ -9,6 +9,8 @@
 import numpy as np
 from tqdm.auto import tqdm
 
+from supervision.utils.internal import deprecated
+
 
 @dataclass
 class VideoInfo:
@@ -60,6 +62,7 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
+@deprecated
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.
@@ -117,6 +120,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
         self.__writer.release()
 
 
+@deprecated
 def _validate_and_setup_video(
     source_path: str, start: int, end: int | None, iterative_seek: bool = False
 ):
@@ -141,6 +145,7 @@ def _validate_and_setup_video(
     return video, start, end
 
 
+@deprecated
 def get_video_frames_generator(
     source_path: str,
     stride: int = 1,
@@ -192,6 +197,7 @@ def get_video_frames_generator(
     video.release()
 
 
+@deprecated
 def process_video(
     source_path: str,
     target_path: str,

@@ -0,0 +1,4 @@
+from .core import Video
+from .utils import VideoInfo
+
+__all__ = ["Video", "VideoInfo"]
@@ -0,0 +1,7 @@
+BACKENDS = {
+    "opencv": "supervision.video.backends.opencv",
+    "pyav": "supervision.video.backends.pyav",
+}
+
+
+__all__ = ["BACKENDS"]
@@ -0,0 +1,118 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Protocol, runtime_checkable
+
+import numpy as np
+
+from ..utils import VideoInfo
+
+
+@runtime_checkable
+class Backend(Protocol):
+    """
+    The high-level :pyclass:`~supervision.video.Video` adapter instantiates a
+    backend - selected by name - and then only calls the methods defined
+    below.  Anything else is considered a private implementation detail.
+    """
+
+    def __init__(self, source: str | int):
+        """Create a new backend for source.
+
+        ``source`` can be
+        * ``str`` - file path, RTSP/HTTP URL …
+        * ``int`` - webcam index (OpenCV-style)
+        """
+
+    def info(self) -> VideoInfo:
+        """Return static information (width / height / fps / total_frames)."""
+
+    def read(self) -> tuple[bool, np.ndarray]:
+        """Decode the next frame.
+
+        Returns ``(success, frame)`` where frame is a ``np.ndarray`` (HxWx3).
+        """
+
+    def grab(self) -> bool:
+        """Grab the next frame without decoding pixels.
+
+        Equivalent to OpenCV's ``VideoCapture.grab``.  Useful if the user only
+        wants to skip frames quickly (stride > 1 for example).
+        """
+
+    def seek(self, frame_idx: int) -> None:
+        """Seek to frame_idx so that the next :py:meth:`read` returns it."""
+
+    # Encoding ---------------------------------------------------------------
+
+    def writer(
+        self,
+        path: str,
+        info: VideoInfo,
+        codec: str | None = None,
+    ) -> Writer:
+        """Return a writer that encodes frames to path.
+
+        Parameters
+        ----------
+        path:
+            Target file path.
+        info:
+            Expected output resolution / fps (copied from source by default).
+        codec:
+            FourCC / codec name to override the backend default.
+        """
+
+    # Iterator convenience ---------------------------------------------------
+
+    def __iter__(self) -> Iterator[np.ndarray]:
+        """Yield successive frames until exhaustion.
+
+        This is considered convenience behaviour; the default implementation
+        below is fine for most back-ends.
+        """
+
+
+@runtime_checkable
+class Writer(Protocol):
+    """Protocol for an encoded video writer returned by :py:meth:`Backend.writer`."""
+
+    def write(self, frame: np.ndarray, frame_number: int, callback) -> None:
+        """Write a single BGR / RGB frame to the output stream."""
+
+    def close(self) -> None:
+        """Flush and close the underlying container / file descriptor."""
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        self.close()
+        return False  # propagate exception (if any)
+
+
+# ---------------------------------------------------------------------------
+# Utility - a dummy writer that does nothing.  Useful for testing.
+# ---------------------------------------------------------------------------
+
+
+class _NullWriter:
+    """Fallback Writer that silently drops every frame."""
+
+    def write(self, frame: np.ndarray, frame_number: int, callback) -> None:
+        pass
+
+    def close(self) -> None:
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+
+__all__ = [
+    "Backend",
+    "Writer",
+]
@@ -0,0 +1,195 @@
+from collections.abc import Callable, Iterator
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ..utils import VideoInfo
+from .base import Writer
+
+
+class OpenCVWriter:
+    def __init__(self, vw: cv2.VideoWriter, info: VideoInfo):
+        self._vw = vw
+        self.info = info
+
+    def write(
+        self,
+        frame: np.ndarray,
+        frame_number: int,
+        callback: Callable[[np.ndarray], None] | None = None,
+    ) -> None:
+        if callback:
+            frame = callback(frame, frame_number)
+        if frame.shape[0] != self.info.height or frame.shape[1] != self.info.width:
+            frame = cv2.resize(frame, (self.info.width, self.info.height))
+        self._vw.write(frame)
+
+    def close(self) -> None:
+        self._vw.release()
+
+    def __enter__(self) -> Writer:
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.close()
+
+
+class Backend:
+    def __init__(self, source_path: str | int):
+        """Create a new backend for source.
+
+        `source`` can b
+        * ``str`` - file path, RTSP/HTTP URL …
+        * ``int`` - webcam index (OpenCV-style)
+        """
+        self.source_path = source_path
+        self.cap = cv2.VideoCapture(self.source_path)
+        if not self.cap.isOpened():
+            raise ValueError(f"Could not open video source {self.source_path}")
+
+    def info(self) -> VideoInfo:
+        """Return static information (width / height / fps / total_frames)."""
+        from ..core import VideoInfo
+
+        w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        precise_fps = self.cap.get(cv2.CAP_PROP_FPS)
+        fps = int(round(precise_fps, 0))
+        n = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        return VideoInfo(w, h, fps, precise_fps, n)
+
+    def read(self) -> tuple[bool, np.ndarray]:
+        """Decode the next frame."""
+        return self.cap.read()
+
+    def grab(self) -> bool:
+        """Grab the next frame without decoding pixels."""
+        return self.cap.grab()
+
+    def seek(self, frame_idx: int) -> None:
+        """Seek to frame_idx so that the next :py:meth:`read` returns it."""
+        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+
+    # ? Do we want to mix and match different writers to different backends?
+    def writer(self, path: str, info: VideoInfo, codec: str | None = None) -> Writer:
+        """Return a writer that encodes frames to path.
+
+        Parameters
+        ----------
+        path:
+            Target file path.
+        info:"
+            Expected output resolution / fps (copied from source by default).
+        codec:
+            FourCC / codec name to override the backend default.
+        """
+        fourcc = (
+            cv2.VideoWriter_fourcc(*codec) if codec else cv2.VideoWriter_fourcc(*"mp4v")
+        )
+        vw = cv2.VideoWriter(path, fourcc, info.fps, (info.width, info.height))
+        return OpenCVWriter(vw, info)
+
+    def frames(
+        self,
+        stride: int = 1,
+        start: int = 0,
+        end: int | None = None,
+        resolution_wh: tuple[int, int] | None = None,
+        interpolation=cv2.INTER_LINEAR,
+    ) -> Iterator[np.ndarray]:
+        """Yield frames lazily, with optional skipping and resizing.
+
+        Parameters
+        ----------
+        stride:
+            Number of frames to skip between yielded frames (``1`` yields every frame).
+        start:
+            First frame index (0-based) to yield.
+        end:
+            Index after the last frame to yield. ``None`` means until exhaustion.
+        resolution_wh:
+            Optional ``(width, height)`` to resize each yielded frame to.
+
+        Yields
+        ------
+        np.ndarray
+            The next decoded (and optionally resized) video frame.
+        """
+        if stride < 1:
+            raise ValueError("stride must be >= 1")
+
+        info = self.info()
+        total = (
+            info.total_frames if info.total_frames and info.total_frames > 0 else None
+        )
+        if end is None and total is not None:
+            end = total
+        if start < 0 or start >= end:
+            return
+
+        # Position capture at the start frame
+        self.seek(start)
+        current_idx = start
+        infinate_stream = end is None
+
+        while infinate_stream or current_idx < end:
+            success, frame = self.read()
+            if not success:
+                break
+
+            if resolution_wh is not None and (
+                frame.shape[1] != resolution_wh[0] or frame.shape[0] != resolution_wh[1]
+            ):
+                frame = cv2.resize(frame, resolution_wh, interpolation=interpolation)
+
+            yield frame
+            current_idx += 1
+
+            # Efficiently skip stride-1 frames with grab()
+            skip = stride - 1
+            while skip and current_idx < end:
+                grabbed = self.grab()
+                if not grabbed:
+                    return
+                current_idx += 1
+                skip -= 1
+
+    def __iter__(self) -> Iterator[np.ndarray]:
+        """Yield successive frames until exhaustion.
+
+        This is considered convenience behaviour; the default implementation
+        below is fine for most back-ends.
+        """
+        while True:
+            success, frame = self.read()
+            if not success:
+                break
+            yield frame
+
+    def release(self):
+        """Release the video file."""
+        self.cap.release()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        self.release()
+
+    def __len__(self) -> int:
+        n = self.info().total_frames
+        if n is None or n < 0:
+            raise TypeError("length is unknown for this stream")
+        return n
+
+    def __getitem__(self, index: int) -> np.ndarray:
+        current = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))
+        self.cap.set(cv2.CAP_PROP_POS_FRAMES, index)
+        success, frame = self.read()
+        self.cap.set(
+            cv2.CAP_PROP_POS_FRAMES, current
+        )  # ? Do we want to restore the video to the original position?
+        if not success:
+            raise IndexError(f"Failed to read frame {index}")
+        return frame