Source code for poseinterface.clips

"""Functions to extract clips from ``poseinterface`` videos."""

import argparse
import json
import logging
import sys
from pathlib import Path

import sleap_io as sio



[docs]
def extract_clip(
    video_path: str | Path,
    start_frame: int,
    duration: int,
) -> tuple[Path, Path | None]:
    """Extract a video clip (and its clip labels if available).

    Reads the source video and saves a ``.mp4`` clip to a ``Clips/``
    subdirectory next to the source video. If a sibling
    ``*_videolabels.json`` file exists (holding labels for the entire
    session video, using the same schema as ``cliplabels.json``), a
    matching ``_cliplabels.json`` containing only the annotations within
    the requested frame range is also written.


    Parameters
    ----------
    video_path
        Path to the input ``.mp4`` video. The filename should follow
        the convention ``sub-<subjectID>_ses-<sessionID>_cam-<camID>.mp4``, and
        if a sibling labels file exists, its filename should be
        ``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.
    start_frame
        Index of the first frame to include in the clip (0-based).
    duration
        Number of frames to include in the clip.  If ``start_frame +
        duration`` exceeds the video length, the duration is clamped to the
        remaining frames and a warning is logged.

    Returns
    -------
    clip_path : Path
        Path to the output clip file.
    clip_json : Path | None
        Path to the ``_cliplabels.json`` file for the clip if extracted,
        None otherwise.

    Raises
    ------
    ValueError
        If ``start_frame`` is negative or ``duration`` is not positive.

    Notes
    -----
    This function optionally consumes a ``*_videolabels.json`` file, sibling
    to the input video file and holding labels for the entire video. This
    file is an intermediate cache useful for data contributors: it follows
    the same schema as ``cliplabels.json`` but it refers to the full video,
    rather than to a clip of it. The ``*_videolabels.json`` file is not part
    of the published benchmark dataset. For further details, see the
    "Intermediate file: `videolabels.json`" section of the benchmark
    dataset specification.

    This function assumes that the ``id`` field in the ``images`` list of the
    source ``*_videolabels.json`` corresponds to 0-based global frame indices
    of the full video.
    """
    # Check input values
    if start_frame < 0:
        raise ValueError(
            f"start_frame must be non-negative, got {start_frame}"
        )
    if duration <= 0:
        raise ValueError(f"duration must be positive, got {duration}")

    # Create "Clips" directory if it doesn't exist
    video_path = Path(video_path)
    clips_dir = video_path.parent / "Clips"
    clips_dir.mkdir(parents=True, exist_ok=True)

    # Read video as array
    video = sio.load_video(video_path)
    logging.info(
        f"filename: {video_path.name}, fps: {video.fps}, shape: {video.shape}"
    )

    # Clamp duration if it exceeds the video length
    if start_frame + duration > video.shape[0]:
        duration = video.shape[0] - start_frame
        logging.warning(
            "Clip exceeds video length. "
            f"Clamping duration to {duration} frames."
        )

    # Slice clip and save as mp4
    clip = video[start_frame : start_frame + duration]
    clip_path = (
        clips_dir / f"{video_path.stem}_start-{start_frame}_dur-{duration}.mp4"
    )
    sio.save_video(clip, clip_path, fps=video.fps)

    # Generate cliplabels.json only if a companion videolabels.json file exists
    video_json = video_path.parent / f"{video_path.stem}_videolabels.json"
    if video_json.exists():
        clip_json = _extract_cliplabels(
            video_path, clips_dir, start_frame, duration
        )
        logging.info(
            f"Extracted clip {clip_path.name} with labels {clip_json.name} "
            f"({duration} frames from start_frame={start_frame})."
        )
    else:
        clip_json = None
        logging.info(
            f"Extracted clip {clip_path.name} "
            f"({duration} frames from start_frame={start_frame}). "
            "No companion *_videolabels.json found; skipping label extraction."
        )

    return clip_path, clip_json



def _extract_cliplabels(
    video_path: Path, clips_dir: Path, start_frame: int, duration: int
) -> Path:
    """Extract clip labels from the sibling *_videolabels.json file."""
    # Read file with labels for the whole video
    video_json = video_path.parent / f"{video_path.stem}_videolabels.json"
    with open(video_json) as f:
        video_labels = json.load(f)

    # Compute clip end frame
    end_frame = start_frame + duration

    # Keep only data from the images in the clip, re-indexing ids to be
    # 0-based within the clip. file_name is left untouched to retain in it
    # the global (video-based) frame index
    clip_labels = {}
    clip_labels["images"] = [
        {
            **img,
            "id": img["id"] - start_frame,  # overwrite id
        }
        for img in video_labels["images"]
        if start_frame <= img["id"] < end_frame
    ]

    # Keep only annotations within the clip, remapping image_id to the local
    # (clip-based) frame index, and renumbering annotation ids to be 1-based
    # within the clip.
    clip_labels["annotations"] = [
        {
            **annot,
            "image_id": annot["image_id"] - start_frame,  # overwrite image_id
            "id": new_id,
        }
        for new_id, annot in enumerate(
            (
                ant
                for ant in video_labels["annotations"]
                if start_frame <= ant["image_id"] < end_frame
            ),  # generator lazily yields only annotations within the clip
            start=1,  # annotation ids are 1-based within clip
        )
    ]
    # pass categories unchanged
    clip_labels["categories"] = video_labels["categories"]

    # Save json with filtered data to clips directory
    clip_json = (
        clips_dir / f"{video_path.stem}_"
        f"start-{start_frame}_dur-{duration}_cliplabels.json"
    )
    with open(clip_json, "w") as f:
        json.dump(clip_labels, f)

    return clip_json


def main(args: argparse.Namespace) -> None:
    """Run clip extraction from parsed command-line arguments.

    Parameters
    ----------
    args
        Parsed arguments containing ``video_path``, ``start_frame``,
        and ``duration``.
    """
    # Extract clip
    extract_clip(args.video_path, args.start_frame, args.duration)


def parse_args(args: list[str]) -> argparse.Namespace:
    """Parse command-line arguments for clip extraction.

    Parameters
    ----------
    args
        List of command-line argument strings (e.g. ``sys.argv[1:]``).

    Returns
    -------
    argparse.Namespace
        Parsed arguments with attributes ``video_path`` (str),
        ``start_frame`` (int), and ``duration`` (int).
    """
    parser = argparse.ArgumentParser(
        description=(
            "Extract clips from video (and corresponding "
            "clip labels if available)."
        )
    )
    parser.add_argument(
        "--video_path",
        type=str,
        required=True,
        help="Path to video file to clip. The filename should follow "
        "the convention ``sub-<subjectID>_ses-<sessionID>_cam-<camID>.mp4``, "
        "and if a sibling labels file exists, its filename should be "
        "``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.",
    )
    parser.add_argument(
        "--start_frame",
        type=int,
        required=True,
        help="Start frame of the clip as a 0-based index.",
    )
    parser.add_argument(
        "--duration",
        type=int,
        required=True,
        help="Total length of the output clip in frames",
    )
    return parser.parse_args(args)


def wrapper() -> None:
    """Entry point for the ``extract-clip`` console script."""
    args = parse_args(sys.argv[1:])
    main(args)


if __name__ == "__main__":
    wrapper()