-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Problem
Currently the frame_sampling
package implements its own Dataset
class, which it subclasses to create the VideoDataset
class. This class has value outside of the frame_sampling
package.
Code
The code being discussed:
frame-sampling/src/frame_sampling/dataset.py
Lines 1 to 75 in da2671e
"""Implements classes for dealing with video data.""" | |
from abc import ABC | |
from abc import abstractmethod | |
from dataclasses import InitVar | |
from dataclasses import dataclass | |
from pathlib import Path | |
from typing import Generator | |
from typing import Iterator | |
from typing import List | |
from typing import Union | |
@dataclass | |
class Dataset(ABC): | |
"""Defined the abstract base class for all datasets.""" | |
data_dir: InitVar[Union[str, Path]] | |
def __post_init__(self, data_dir: Union[str, Path]) -> None: | |
"""Apply post constructor processing to args.""" | |
# get data path object | |
self._data_path: Path = Path(data_dir) | |
# make sure path exists | |
self._dataset_exists() | |
# create index | |
self.index: List[Path] = [path for path in self._get_filepaths()] | |
def __iter__(self) -> Iterator[Path]: | |
"""Defining the iteration behavior.""" | |
return iter(self.index) | |
def __len__(self) -> int: | |
"""Defining how to calculate length of dataset.""" | |
return len(self.index) | |
def __getitem__(self, idx: int) -> Path: | |
"""Defining how data path objects will be accessed.""" | |
return self.index[idx] | |
def _dataset_exists(self) -> None: | |
"""Make sure path to data dir exists.""" | |
assert self._data_path.exists() | |
def _get_filepaths(self) -> Generator[Path, None, None]: | |
"""Scan target directory for file extensions and grab their file paths.""" | |
# iterate target file extensions | |
for ext in self.file_extensions: | |
# loop through video paths matching ext | |
yield from self._data_path.glob(f"**/{ext}") | |
@property | |
@abstractmethod | |
def type(self) -> str: | |
"""Defines the type of the data found in dataset.""" | |
pass | |
@property | |
@abstractmethod | |
def file_extensions(self) -> List[str]: | |
"""Defines the file extensions accepted for the given data type.""" | |
pass | |
@property | |
def path(self) -> str: | |
"""Retuns the data path as a string.""" | |
return str(self._data_path) | |
class VideoDataset(Dataset): | |
"""Dataset of video files.""" | |
type = "video" | |
file_extensions = ["*.mp4", "*.avi", "*.mkv", "*.mov", "*.webm"] |
Solution
Move code into the DiogenesAnalytics/dataset
repo.
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request