Skip to content

Utilities API

This page documents helper scripts and repair utilities under vbc/utils/.

Fix VBC Tags

Dry-run capable helper for adding missing VBC metadata tags to existing MP4 files.

fix_vbc_tags

Move Error Files

Move source MP4 files and matching .err markers into a quarantine directory.

move_err_files

Move source MP4 files that have corresponding .err files in the output directory to /tmp/err.

Input directory is provided by the user; output directory is derived by appending _out to the input directory name. Relative structure is preserved under /tmp/err. If more than 20 .err files are found, the script asks for confirmation before moving anything.

safe_move

safe_move(src: Path, dest: Path) -> Tuple[bool, str]

Move src to dest, creating parents; avoid overwriting existing dest.

Source code in vbc/utils/move_err_files.py
def safe_move(src: Path, dest: Path) -> Tuple[bool, str]:
    """Move src to dest, creating parents; avoid overwriting existing dest."""
    dest.parent.mkdir(parents=True, exist_ok=True)
    if dest.exists():
        return False, f"destination exists, skipping: {dest}"
    try:
        shutil.move(str(src), str(dest))
        return True, ""
    except Exception as e:
        return False, f"failed to move {src} -> {dest}: {e}"

Copy Failed Videos

Copy source videos that correspond to .err markers while preserving relative paths.

copy_failed_videos

Copy source videos for failed compressions based on .err files.

This script finds all .err files in the output directory and copies the corresponding source .mp4 files to a new directory, preserving the date subdirectory structure.

copy_failed_videos

copy_failed_videos(source_dir: str, error_dir: str, destination_dir: str)

Find .err files and copy corresponding source videos.

Parameters:

Name Type Description Default
source_dir str

Directory with original .mp4 files (e.g., SR)

required
error_dir str

Directory with .err files (e.g., SR_out)

required
destination_dir str

Destination directory (e.g., SR_new)

required
Source code in vbc/utils/copy_failed_videos.py
def copy_failed_videos(source_dir: str, error_dir: str, destination_dir: str):
    """
    Find .err files and copy corresponding source videos.

    Args:
        source_dir: Directory with original .mp4 files (e.g., SR)
        error_dir: Directory with .err files (e.g., SR_out)
        destination_dir: Destination directory (e.g., SR_new)
    """
    source_path = Path(source_dir)
    error_path = Path(error_dir)
    dest_path = Path(destination_dir)

    if not source_path.exists():
        print(f"Error: Source directory does not exist: {source_dir}")
        sys.exit(1)

    if not error_path.exists():
        print(f"Error: Error directory does not exist: {error_dir}")
        sys.exit(1)

    # Find all .err files
    err_files = list(error_path.rglob("*.err"))

    if not err_files:
        print(f"No .err files found in {error_dir}")
        return

    print(f"Found {len(err_files)} .err files")
    print(f"Source directory: {source_dir}")
    print(f"Destination directory: {destination_dir}")
    print()

    copied = 0
    not_found = 0

    for err_file in err_files:
        # Get the base name without .err extension
        video_name = err_file.stem + ".mp4"

        # Get relative path from error_dir to maintain structure
        relative_path = err_file.parent.relative_to(error_path)

        # Construct source and destination paths
        source_video = source_path / relative_path / video_name
        dest_video = dest_path / relative_path / video_name

        if source_video.exists():
            # Create destination directory if needed
            dest_video.parent.mkdir(parents=True, exist_ok=True)

            # Copy the file
            print(f"Copying: {relative_path / video_name}")
            shutil.copy2(source_video, dest_video)
            copied += 1
        else:
            print(f"WARNING: Source file not found: {source_video}")
            not_found += 1

    print()
    print("Summary:")
    print(f"  Copied: {copied}")
    print(f"  Not found: {not_found}")
    print(f"  Total .err files: {len(err_files)}")

FLV Repair

High-level FLV repair wrapper for corrupted inputs.

flv_repair

repair_flv_file

repair_flv_file(input_path: Path, output_path: Path, keep_intermediate=False) -> bool

Repairs a FLV/MP4 file by removing the text error prefix and saving as a clean .flv.

Parameters:

Name Type Description Default
input_path Path

Path to the corrupted file.

required
output_path Path

Path where the repaired .flv file should be saved.

required
keep_intermediate

Ignored in this version as we produce only one file.

False

Returns:

Type Description
bool

True if repair was successful, False otherwise.

Source code in vbc/utils/flv_repair.py
def repair_flv_file(input_path: Path, output_path: Path, keep_intermediate=False) -> bool:
    """
    Repairs a FLV/MP4 file by removing the text error prefix and saving as a clean .flv.

    Args:
        input_path: Path to the corrupted file.
        output_path: Path where the repaired .flv file should be saved.
        keep_intermediate: Ignored in this version as we produce only one file.

    Returns:
        True if repair was successful, False otherwise.
    """
    # Ensure output has .flv extension if we're just cutting
    if output_path.suffix.lower() != ".flv":
        output_path = output_path.with_suffix(".flv")

    # 1. Find FLV offset
    try:
        offset = find_flv_header_offset(input_path)
        if offset is None:
            return False
    except Exception:
        return False

    # 2. Extract clean FLV by copying bytes from marker offset
    try:
        written = copy_from_offset(input_path, output_path, offset)

        # Sanity check: is the output file significantly larger than 0?
        if written <= 1000 or not output_path.exists():
            if output_path.exists():
                output_path.unlink()
            return False

        # Verify with ffprobe to ensure the file is actually readable and has video
        # This prevents "repair loops" where we restore a file that VBC will reject again.
        probe_cmd = [
            "ffprobe", 
            "-v", "error", 
            "-select_streams", "v:0",
            "-show_entries", "stream=codec_type",
            "-of", "json",
            str(output_path)
        ]
        probe_result = subprocess.run(probe_cmd, capture_output=True, text=True)

        if probe_result.returncode != 0:
            if output_path.exists():
                output_path.unlink()
            return False

        import json
        probe_data = json.loads(probe_result.stdout)
        if not probe_data.get("streams"):
            # No video stream found - VBC will reject this file anyway
            if output_path.exists():
                output_path.unlink()
            return False

        return True
    except Exception:
        if output_path.exists():
            output_path.unlink()
        return False

FLV Repair Core

Byte-level helpers for finding FLV headers and copying repaired payloads.

flv_repair_core

find_flv_header_offset

find_flv_header_offset(input_path: Path, chunk_size: int = 1024 * 1024) -> Optional[int]

Return byte offset of the first FLV header marker in file.

Prefers exact FLV header prefix b"FLV\x01" and falls back to b"FLV". Returns None when no marker is found.

Source code in vbc/utils/flv_repair_core.py
def find_flv_header_offset(input_path: Path, chunk_size: int = 1024 * 1024) -> Optional[int]:
    """Return byte offset of the first FLV header marker in file.

    Prefers exact FLV header prefix ``b"FLV\\x01"`` and falls back to ``b"FLV"``.
    Returns ``None`` when no marker is found.
    """
    if chunk_size <= 0:
        raise ValueError("chunk_size must be > 0")

    primary_magic = b"FLV\x01"
    fallback_magic = b"FLV"
    overlap = b""
    consumed = 0

    with open(input_path, "rb") as src:
        while True:
            chunk = src.read(chunk_size)
            if not chunk:
                return None

            merged = overlap + chunk
            start_offset = consumed - len(overlap)

            primary_idx = merged.find(primary_magic)
            if primary_idx != -1:
                return start_offset + primary_idx

            fallback_idx = merged.find(fallback_magic)
            if fallback_idx != -1:
                return start_offset + fallback_idx

            # Keep enough bytes to detect markers split across chunk boundaries.
            overlap = merged[-3:] if len(merged) >= 3 else merged
            consumed += len(chunk)

copy_from_offset

copy_from_offset(input_path: Path, output_path: Path, offset: int, chunk_size: int = 1024 * 1024) -> int

Copy file data starting from offset into output_path.

Returns number of bytes written.

Source code in vbc/utils/flv_repair_core.py
def copy_from_offset(input_path: Path, output_path: Path, offset: int, chunk_size: int = 1024 * 1024) -> int:
    """Copy file data starting from ``offset`` into ``output_path``.

    Returns number of bytes written.
    """
    if offset < 0:
        raise ValueError("offset must be >= 0")
    if chunk_size <= 0:
        raise ValueError("chunk_size must be > 0")

    written = 0
    with open(input_path, "rb") as src, open(output_path, "wb") as dst:
        src.seek(offset)
        while True:
            block = src.read(chunk_size)
            if not block:
                break
            dst.write(block)
            written += len(block)
    return written

Re-encode Repair

Fallback repair helper that re-encodes damaged files through ffmpeg.

reencode_repair

repair_via_reencode

repair_via_reencode(input_path: Path, output_path: Path, progress_callback: Optional[Callable[[int], None]] = None) -> bool

Repairs a corrupted video file by performing a fast re-encode to MKV with fixed framerate. Useful for files with broken indices, variable framerate issues, or 'ffmpeg code 234' errors.

Command: ffmpeg -i input -c:v libx264 -preset ultrafast -crf 20 -r 30 -c:a copy output.mkv

Parameters:

Name Type Description Default
input_path Path

Path to the corrupted file.

required
output_path Path

Path where the repaired .mkv file should be saved.

required
progress_callback Optional[Callable[[int], None]]

Optional callback receiving the current output size in bytes.

None

Returns:

Type Description
bool

True if repair was successful and validated, False otherwise.

Source code in vbc/utils/reencode_repair.py
def repair_via_reencode(
    input_path: Path,
    output_path: Path,
    progress_callback: Optional[Callable[[int], None]] = None,
) -> bool:
    """
    Repairs a corrupted video file by performing a fast re-encode to MKV with fixed framerate.
    Useful for files with broken indices, variable framerate issues, or 'ffmpeg code 234' errors.

    Command: ffmpeg -i input -c:v libx264 -preset ultrafast -crf 20 -r 30 -c:a copy output.mkv

    Args:
        input_path: Path to the corrupted file.
        output_path: Path where the repaired .mkv file should be saved.
        progress_callback: Optional callback receiving the current output size in bytes.

    Returns:
        True if repair was successful and validated, False otherwise.
    """
    # Force .mkv extension for safety/compatibility
    if output_path.suffix.lower() != ".mkv":
        output_path = output_path.with_suffix(".mkv")

    cmd = [
        "ffmpeg", "-y",
        "-v", "error",
        "-err_detect", "ignore_err",
        "-i", str(input_path),
        "-c:v", "libx264",
        "-preset", "ultrafast",
        "-crf", "20",
        "-r", "30",          # Force fixed 30 fps to fix timestamp issues
        "-c:a", "copy",      # Copy audio to preserve quality/speed
        "-ignore_unknown",
        str(output_path)
    ]

    try:
        process = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        while process.poll() is None:
            if progress_callback is not None and output_path.exists():
                progress_callback(output_path.stat().st_size)
            time.sleep(1)

        if progress_callback is not None and output_path.exists():
            progress_callback(output_path.stat().st_size)

        process.communicate()
        if process.returncode != 0:
            raise subprocess.CalledProcessError(process.returncode, cmd)

        # Validation
        if not output_path.exists() or output_path.stat().st_size <= 1000:
            if output_path.exists():
                output_path.unlink()
            return False

        # Quick ffprobe check
        probe_cmd = [
            "ffprobe", "-v", "error",
            "-select_streams", "v:0",
            "-show_entries", "stream=codec_type",
            "-of", "json",
            str(output_path)
        ]
        res = subprocess.run(probe_cmd, capture_output=True, text=True)
        if res.returncode == 0 and '"codec_type": "video"' in res.stdout:
            return True

        if output_path.exists():
            output_path.unlink()
        return False

    except Exception:
        if output_path.exists():
            output_path.unlink()
        return False

Audio Consistency Check

Command-line verifier for input/output audio codec handling.

check_audio_consistency

Check audio handling between input and output directories.

Compares input audio codecs to expected output behavior based on VBC rules: - Lossless (pcm_*, flac, alac, truehd, mlp, wavpack, ape, tta) -> AAC 256k - AAC/MP3 -> stream copy - Other/unknown -> AAC 192k - No audio -> no audio