Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ build = [
"wheel>=0.40,<0.46",
"build>=0.10,<1.3"
]
video = [
"av>=15.0.0"
]

[tool.bandit]
target = ["test", "supervision"]
Expand Down
3 changes: 2 additions & 1 deletion supervision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@
from supervision.utils.notebook import plot_image, plot_images_grid
from supervision.utils.video import (
FPSMonitor,
VideoInfo,
VideoSink,
get_video_frames_generator,
process_video,
)
from supervision.video.core import Video, VideoInfo

__all__ = [
"LMM",
Expand Down Expand Up @@ -193,6 +193,7 @@
"TriangleAnnotator",
"VertexAnnotator",
"VertexLabelAnnotator",
"Video",
"VideoInfo",
"VideoSink",
"approximate_polygon",
Expand Down
6 changes: 6 additions & 0 deletions supervision/utils/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import numpy as np
from tqdm.auto import tqdm

from supervision.utils.internal import deprecated


@dataclass
class VideoInfo:
Expand Down Expand Up @@ -60,6 +62,7 @@ def resolution_wh(self) -> tuple[int, int]:
return self.width, self.height


@deprecated
class VideoSink:
"""
Context manager that saves video frames to a file using OpenCV.
Expand Down Expand Up @@ -117,6 +120,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
self.__writer.release()


@deprecated
def _validate_and_setup_video(
source_path: str, start: int, end: int | None, iterative_seek: bool = False
):
Expand All @@ -141,6 +145,7 @@ def _validate_and_setup_video(
return video, start, end


@deprecated
def get_video_frames_generator(
source_path: str,
stride: int = 1,
Expand Down Expand Up @@ -192,6 +197,7 @@ def get_video_frames_generator(
video.release()


@deprecated
def process_video(
source_path: str,
target_path: str,
Expand Down
4 changes: 4 additions & 0 deletions supervision/video/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .core import Video
from .utils import VideoInfo

__all__ = ["Video", "VideoInfo"]
7 changes: 7 additions & 0 deletions supervision/video/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
BACKENDS = {
"opencv": "supervision.video.backends.opencv",
"pyav": "supervision.video.backends.pyav",
}


__all__ = ["BACKENDS"]
118 changes: 118 additions & 0 deletions supervision/video/backends/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from __future__ import annotations

from collections.abc import Iterator
from typing import Protocol, runtime_checkable

import numpy as np

from ..utils import VideoInfo


@runtime_checkable
class Backend(Protocol):
"""
The high-level :pyclass:`~supervision.video.Video` adapter instantiates a
backend - selected by name - and then only calls the methods defined
below. Anything else is considered a private implementation detail.
"""

def __init__(self, source: str | int):
"""Create a new backend for source.
``source`` can be
* ``str`` - file path, RTSP/HTTP URL …
* ``int`` - webcam index (OpenCV-style)
"""

def info(self) -> VideoInfo:
"""Return static information (width / height / fps / total_frames)."""

def read(self) -> tuple[bool, np.ndarray]:
"""Decode the next frame.
Returns ``(success, frame)`` where frame is a ``np.ndarray`` (HxWx3).
"""

def grab(self) -> bool:
"""Grab the next frame without decoding pixels.
Equivalent to OpenCV's ``VideoCapture.grab``. Useful if the user only
wants to skip frames quickly (stride > 1 for example).
"""

def seek(self, frame_idx: int) -> None:
"""Seek to frame_idx so that the next :py:meth:`read` returns it."""

# Encoding ---------------------------------------------------------------

def writer(
self,
path: str,
info: VideoInfo,
codec: str | None = None,
) -> Writer:
"""Return a writer that encodes frames to path.
Parameters
----------
path:
Target file path.
info:
Expected output resolution / fps (copied from source by default).
codec:
FourCC / codec name to override the backend default.
"""

# Iterator convenience ---------------------------------------------------

def __iter__(self) -> Iterator[np.ndarray]:
"""Yield successive frames until exhaustion.
This is considered convenience behaviour; the default implementation
below is fine for most back-ends.
"""


@runtime_checkable
class Writer(Protocol):
"""Protocol for an encoded video writer returned by :py:meth:`Backend.writer`."""

def write(self, frame: np.ndarray, frame_number: int, callback) -> None:
"""Write a single BGR / RGB frame to the output stream."""

def close(self) -> None:
"""Flush and close the underlying container / file descriptor."""

def __enter__(self):
return self

def __exit__(self, exc_type, exc, tb):
self.close()
return False # propagate exception (if any)


# ---------------------------------------------------------------------------
# Utility - a dummy writer that does nothing. Useful for testing.
# ---------------------------------------------------------------------------


class _NullWriter:
"""Fallback Writer that silently drops every frame."""

def write(self, frame: np.ndarray, frame_number: int, callback) -> None:
pass

def close(self) -> None:
pass

def __enter__(self):
return self

def __exit__(self, exc_type, exc, tb):
return False


__all__ = [
"Backend",
"Writer",
]
195 changes: 195 additions & 0 deletions supervision/video/backends/opencv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
from collections.abc import Callable, Iterator
from typing import Any

import cv2
import numpy as np

from ..utils import VideoInfo
from .base import Writer


class OpenCVWriter:
def __init__(self, vw: cv2.VideoWriter, info: VideoInfo):
self._vw = vw
self.info = info

def write(
self,
frame: np.ndarray,
frame_number: int,
callback: Callable[[np.ndarray], None] | None = None,
) -> None:
if callback:
frame = callback(frame, frame_number)
if frame.shape[0] != self.info.height or frame.shape[1] != self.info.width:
frame = cv2.resize(frame, (self.info.width, self.info.height))
self._vw.write(frame)

def close(self) -> None:
self._vw.release()

def __enter__(self) -> Writer:
return self

def __exit__(self, exc_type, exc_val, exc_tb) -> None:
self.close()


class Backend:
def __init__(self, source_path: str | int):
"""Create a new backend for source.
`source`` can b
* ``str`` - file path, RTSP/HTTP URL …
* ``int`` - webcam index (OpenCV-style)
"""
self.source_path = source_path
self.cap = cv2.VideoCapture(self.source_path)
if not self.cap.isOpened():
raise ValueError(f"Could not open video source {self.source_path}")

def info(self) -> VideoInfo:
"""Return static information (width / height / fps / total_frames)."""
from ..core import VideoInfo

w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
precise_fps = self.cap.get(cv2.CAP_PROP_FPS)
fps = int(round(precise_fps, 0))
n = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
return VideoInfo(w, h, fps, precise_fps, n)

def read(self) -> tuple[bool, np.ndarray]:
"""Decode the next frame."""
return self.cap.read()

def grab(self) -> bool:
"""Grab the next frame without decoding pixels."""
return self.cap.grab()

def seek(self, frame_idx: int) -> None:
"""Seek to frame_idx so that the next :py:meth:`read` returns it."""
self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)

# ? Do we want to mix and match different writers to different backends?
def writer(self, path: str, info: VideoInfo, codec: str | None = None) -> Writer:
"""Return a writer that encodes frames to path.
Parameters
----------
path:
Target file path.
info:"
Expected output resolution / fps (copied from source by default).
codec:
FourCC / codec name to override the backend default.
"""
fourcc = (
cv2.VideoWriter_fourcc(*codec) if codec else cv2.VideoWriter_fourcc(*"mp4v")
)
vw = cv2.VideoWriter(path, fourcc, info.fps, (info.width, info.height))
return OpenCVWriter(vw, info)

def frames(
self,
stride: int = 1,
start: int = 0,
end: int | None = None,
resolution_wh: tuple[int, int] | None = None,
interpolation=cv2.INTER_LINEAR,
) -> Iterator[np.ndarray]:
"""Yield frames lazily, with optional skipping and resizing.
Parameters
----------
stride:
Number of frames to skip between yielded frames (``1`` yields every frame).
start:
First frame index (0-based) to yield.
end:
Index after the last frame to yield. ``None`` means until exhaustion.
resolution_wh:
Optional ``(width, height)`` to resize each yielded frame to.
Yields
------
np.ndarray
The next decoded (and optionally resized) video frame.
"""
if stride < 1:
raise ValueError("stride must be >= 1")

info = self.info()
total = (
info.total_frames if info.total_frames and info.total_frames > 0 else None
)
if end is None and total is not None:
end = total
if start < 0 or start >= end:
return

# Position capture at the start frame
self.seek(start)
current_idx = start
infinate_stream = end is None

while infinate_stream or current_idx < end:
success, frame = self.read()
if not success:
break

if resolution_wh is not None and (
frame.shape[1] != resolution_wh[0] or frame.shape[0] != resolution_wh[1]
):
frame = cv2.resize(frame, resolution_wh, interpolation=interpolation)

yield frame
current_idx += 1

# Efficiently skip stride-1 frames with grab()
skip = stride - 1
while skip and current_idx < end:
grabbed = self.grab()
if not grabbed:
return
current_idx += 1
skip -= 1

def __iter__(self) -> Iterator[np.ndarray]:
"""Yield successive frames until exhaustion.
This is considered convenience behaviour; the default implementation
below is fine for most back-ends.
"""
while True:
success, frame = self.read()
if not success:
break
yield frame

def release(self):
"""Release the video file."""
self.cap.release()

def __enter__(self):
return self

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self.release()

def __len__(self) -> int:
n = self.info().total_frames
if n is None or n < 0:
raise TypeError("length is unknown for this stream")
return n

def __getitem__(self, index: int) -> np.ndarray:
current = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))
self.cap.set(cv2.CAP_PROP_POS_FRAMES, index)
success, frame = self.read()
self.cap.set(
cv2.CAP_PROP_POS_FRAMES, current
) # ? Do we want to restore the video to the original position?
if not success:
raise IndexError(f"Failed to read frame {index}")
return frame
Loading