Spaces:

evalstate
/

diffusers-pr-api

Sleeping

File size: 60,890 Bytes

dbf7313

from __future__ import annotations

import argparse
import json
import sys
from collections.abc import Callable
from pathlib import Path
from typing import Any

from slop_farmer.app.duplicate_prs import DEFAULT_FILE_POLICY, FILE_POLICY_CHOICES
from slop_farmer.app_config import command_defaults, extract_cli_config_path
from slop_farmer.config import (
    AnalysisOptions,
    CheckpointImportOptions,
    DashboardDataOptions,
    DatasetRefreshOptions,
    DatasetStatusOptions,
    DeployDashboardOptions,
    MarkdownReportOptions,
    NewContributorReportOptions,
    PipelineOptions,
    PrScopeOptions,
    PrSearchRefreshOptions,
    PublishAnalysisArtifactsOptions,
    RepoRef,
    SaveCacheOptions,
    SnapshotAdoptOptions,
)
from slop_farmer.reports.duplicate_prs import DEFAULT_DUPLICATE_PR_MODEL

CommandHandler = Callable[[argparse.Namespace, Path | None], None]


def _int_at_least(minimum: int) -> Callable[[str], int]:
    def parse(raw: str) -> int:
        value = int(raw)
        if value < minimum:
            raise argparse.ArgumentTypeError(f"expected integer >= {minimum}")
        return value

    return parse


def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
    defaults = _load_parser_defaults(config_path)

    parser = argparse.ArgumentParser(prog="slop-farmer")
    parser.add_argument(
        "--config",
        type=Path,
        help="YAML config file with shared repo/workspace/dashboard defaults.",
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    _add_scrape_parser(subparsers, defaults["scrape"])
    _add_refresh_dataset_parser(subparsers, defaults["refresh-dataset"])
    _add_analyze_parser(subparsers, defaults["analyze"])
    _add_pr_scope_parser(subparsers, defaults["pr-scope"])
    _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"])
    _add_adopt_snapshot_parser(subparsers, defaults["adopt-snapshot"])
    _add_markdown_report_parser(subparsers)
    _add_duplicate_prs_parser(subparsers)
    _add_pr_search_parser(subparsers, defaults["pr-search"])
    _add_new_contributor_report_parser(subparsers, defaults["new-contributor-report"])
    _add_dashboard_data_parser(subparsers, defaults["dashboard-data"])
    _add_publish_analysis_artifacts_parser(subparsers, defaults["publish-analysis-artifacts"])
    _add_save_cache_parser(subparsers, defaults["save-cache"])
    _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"])
    _add_dataset_status_parser(subparsers, defaults["dataset-status"])
    return parser


def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]:
    commands = (
        "scrape",
        "refresh-dataset",
        "analyze",
        "import-hf-checkpoint",
        "pr-scope",
        "pr-search",
        "adopt-snapshot",
        "new-contributor-report",
        "dashboard-data",
        "publish-analysis-artifacts",
        "save-cache",
        "deploy-dashboard",
        "dataset-status",
    )
    return {command: command_defaults(command, config_path=config_path) for command in commands}


# Parser builders


def _add_scrape_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    scrape = subparsers.add_parser("scrape", help="Scrape GitHub and write a snapshot dataset.")
    scrape.add_argument(
        "--repo",
        default=defaults.get("repo", "huggingface/transformers"),
        help="GitHub repository in owner/name form.",
    )
    scrape.add_argument("--output-dir", type=Path, default=Path(defaults.get("output-dir", "data")))
    scrape.add_argument("--since", help="Incremental sync lower bound in ISO 8601 format.")
    scrape.add_argument(
        "--resume",
        dest="resume",
        action="store_true",
        default=True,
        help="Resume from the last successful local watermark when --since is not provided.",
    )
    scrape.add_argument(
        "--no-resume",
        dest="resume",
        action="store_false",
        help="Ignore local watermark state and run from scratch unless --since is set.",
    )
    scrape.add_argument(
        "--http-timeout", type=int, default=180, help="Per-request timeout in seconds."
    )
    scrape.add_argument(
        "--http-max-retries", type=int, default=5, help="Retries for transient network failures."
    )
    scrape.add_argument(
        "--max-issues", type=int, default=None, help="Limit total issue endpoint items read."
    )
    scrape.add_argument(
        "--max-prs", type=int, default=None, help="Limit pull requests to hydrate in detail."
    )
    scrape.add_argument(
        "--issue-max-age-days",
        type=int,
        default=defaults.get("issue-max-age-days"),
        help="Optional created_at age cap for issues included in the snapshot.",
    )
    scrape.add_argument(
        "--pr-max-age-days",
        type=int,
        default=defaults.get("pr-max-age-days"),
        help="Optional created_at age cap for pull requests included in the snapshot.",
    )
    scrape.add_argument(
        "--max-issue-comments", type=int, default=None, help="Limit issue comment rows."
    )
    scrape.add_argument(
        "--max-reviews-per-pr", type=int, default=None, help="Limit review rows per PR."
    )
    scrape.add_argument(
        "--max-review-comments-per-pr",
        type=int,
        default=None,
        help="Limit inline review comment rows per PR.",
    )
    scrape.add_argument(
        "--fetch-timeline",
        action="store_true",
        default=bool(defaults.get("fetch-timeline", False)),
        help="Fetch issue timeline events for linkage rows.",
    )
    scrape.add_argument(
        "--new-contributor-report",
        dest="new_contributor_report",
        action="store_true",
        default=defaults.get("new-contributor-report"),
        help="Generate new contributor dataset/report artifacts for the local snapshot.",
    )
    scrape.add_argument(
        "--no-new-contributor-report",
        dest="new_contributor_report",
        action="store_false",
        help="Skip new contributor dataset/report generation.",
    )
    scrape.add_argument(
        "--new-contributor-window-days",
        type=int,
        default=int(defaults.get("new-contributor-window-days", 42)),
        help="Recent public activity window for contributor enrichment.",
    )
    scrape.add_argument(
        "--new-contributor-max-authors",
        type=int,
        default=int(defaults.get("new-contributor-max-authors", 25)),
        help="Maximum number of contributors to include in the new contributor report. Use 0 for no cap.",
    )


def _add_refresh_dataset_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    refresh = subparsers.add_parser(
        "refresh-dataset",
        help="Refresh the canonical Hugging Face dataset repo from remote watermark state.",
    )
    refresh.add_argument(
        "--repo",
        default=defaults.get("repo", "huggingface/transformers"),
        help="GitHub repository in owner/name form.",
    )
    refresh.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        required=defaults.get("hf-repo-id") is None,
        help="Canonical Hugging Face dataset repo id to refresh.",
    )
    refresh.add_argument("--max-issues", type=int, default=defaults.get("max-issues"))
    refresh.add_argument("--max-prs", type=int, default=defaults.get("max-prs"))
    refresh.add_argument(
        "--max-issue-comments", type=int, default=defaults.get("max-issue-comments")
    )
    refresh.add_argument(
        "--max-reviews-per-pr", type=int, default=defaults.get("max-reviews-per-pr")
    )
    refresh.add_argument(
        "--max-review-comments-per-pr",
        type=int,
        default=defaults.get("max-review-comments-per-pr"),
    )
    refresh.add_argument(
        "--fetch-timeline",
        action="store_true",
        default=bool(defaults.get("fetch-timeline", False)),
    )
    refresh.add_argument(
        "--new-contributor-report",
        dest="new_contributor_report",
        action="store_true",
        default=bool(defaults.get("new-contributor-report", True)),
    )
    refresh.add_argument(
        "--no-new-contributor-report",
        dest="new_contributor_report",
        action="store_false",
    )
    refresh.add_argument(
        "--new-contributor-window-days",
        type=int,
        default=int(defaults.get("new-contributor-window-days", 42)),
    )
    refresh.add_argument(
        "--new-contributor-max-authors",
        type=int,
        default=int(defaults.get("new-contributor-max-authors", 25)),
    )
    refresh.add_argument("--http-timeout", type=int, default=300)
    refresh.add_argument("--http-max-retries", type=int, default=8)
    refresh.add_argument("--checkpoint-every-comments", type=int, default=1000)
    refresh.add_argument("--checkpoint-every-prs", type=int, default=25)
    refresh.add_argument(
        "--private-hf-repo",
        dest="private_hf_repo",
        action="store_true",
        default=bool(defaults.get("private-hf-repo", False)),
        help="Create the target dataset repo as private if needed.",
    )
    refresh.add_argument(
        "--private",
        dest="private_hf_repo",
        action="store_true",
        help=argparse.SUPPRESS,
    )


def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    analyze = subparsers.add_parser(
        "analyze",
        help="Analyze a snapshot and write a local JSON report. Canonical publication is separate.",
    )
    analyze.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Snapshot directory to analyze. Defaults to the latest local snapshot.",
    )
    analyze.add_argument(
        "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data"))
    )
    analyze.add_argument("--output", type=Path, help="Output path for the analysis JSON.")
    analyze.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Analyze a canonical Hugging Face dataset repo by materializing a self-consistent published snapshot locally.",
    )
    analyze.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    analyze.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )
    analyze.add_argument(
        "--ranking-backend",
        choices=("hybrid", "deterministic"),
        default=defaults.get("ranking-backend", "hybrid"),
        help="Whether to use deterministic-only ranking or optional fast-agent enrichment.",
    )
    analyze.add_argument(
        "--model",
        default=defaults.get("model", "gpt-5.4-mini?service_tier=flex"),
        help="Model string used by fast-agent when enabled.",
    )
    analyze.add_argument(
        "--max-clusters",
        type=int,
        default=int(defaults.get("max-clusters", 10)),
        help="Maximum number of meta clusters to include in the report.",
    )
    analyze.add_argument(
        "--hybrid-llm-concurrency",
        type=_int_at_least(1),
        default=int(defaults.get("hybrid-llm-concurrency", 1)),
        help=(
            "Maximum number of hybrid LLM review units to run at once. "
            "Use 1 to minimize provider pressure."
        ),
    )
    analyze.add_argument(
        "--open-prs-only",
        action="store_true",
        default=bool(defaults.get("open-prs-only", False)),
        help="Restrict PR analysis/clustering to open PRs only. Draft PRs are still included.",
    )


def _add_pr_scope_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    pr_scope = subparsers.add_parser(
        "pr-scope", help="Cluster open PRs by holistic file/scope overlap."
    )
    pr_scope.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Snapshot directory to analyze. Defaults to the latest local snapshot.",
    )
    pr_scope.add_argument(
        "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data"))
    )
    pr_scope.add_argument(
        "--output",
        type=Path,
        help="Output path for the PR scope JSON. Defaults next to the snapshot.",
    )
    pr_scope.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
    )
    pr_scope.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    pr_scope.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )


def _add_checkpoint_import_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    checkpoint_import = subparsers.add_parser(
        "import-hf-checkpoint",
        help="Import a checkpoint snapshot from an HF dataset repo into a clean local snapshot.",
    )
    checkpoint_import.add_argument(
        "--source-repo-id",
        default=defaults.get("source-repo-id", "burtenshaw/transformers-pr-slop-dataset"),
        help="Source Hugging Face dataset repo id containing checkpoint folders.",
    )
    checkpoint_import.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "eval_data")),
        help="Local root directory where the imported snapshot should be written.",
    )
    checkpoint_import.add_argument(
        "--checkpoint-id",
        help="Optional checkpoint snapshot id. Defaults to the latest viable checkpoint.",
    )
    checkpoint_import.add_argument(
        "--checkpoint-root",
        choices=("checkpoints", "_checkpoints"),
        help="Optional checkpoint root directory. Defaults to auto-detect.",
    )
    checkpoint_import.add_argument(
        "--publish-repo-id",
        help="Optional HF dataset repo id to publish the imported clean snapshot to.",
    )
    checkpoint_import.add_argument(
        "--private-hf-repo",
        action="store_true",
        help="Create the publish target as private when --publish-repo-id is used.",
    )
    checkpoint_import.add_argument(
        "--force",
        action="store_true",
        help="Overwrite an existing imported snapshot directory if present.",
    )


def _add_adopt_snapshot_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    adopt_snapshot = subparsers.add_parser(
        "adopt-snapshot",
        help="Mark an existing snapshot as the current pipeline base so the next scrape resumes from it.",
    )
    adopt_snapshot.add_argument(
        "--snapshot-dir", type=Path, required=True, help="Existing local snapshot directory."
    )
    adopt_snapshot.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
        help="Pipeline workspace root where state/ and snapshots/latest.json should be written.",
    )
    adopt_snapshot.add_argument(
        "--next-since",
        help="Optional explicit watermark timestamp. Defaults to snapshot watermark.next_since, crawl_started_at, or extracted_at.",
    )


def _add_markdown_report_parser(subparsers: Any) -> None:
    markdown = subparsers.add_parser(
        "markdown-report", help="Render a markdown report from an analysis JSON file."
    )
    markdown.add_argument(
        "--input", type=Path, required=True, help="Path to an existing analysis JSON report."
    )
    markdown.add_argument(
        "--output",
        type=Path,
        help="Output path for the markdown report. Defaults next to the input JSON.",
    )
    markdown.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Optional snapshot directory containing issues.parquet and pull_requests.parquet. Defaults to the input JSON parent directory.",
    )


def _add_duplicate_prs_parser(subparsers: Any) -> None:
    duplicate_prs = subparsers.add_parser(
        "duplicate-prs",
        help="List or merge mergeable duplicate PR clusters from hybrid-enriched analysis.",
    )
    duplicate_prs_subparsers = duplicate_prs.add_subparsers(
        dest="duplicate_prs_command", required=True
    )

    duplicate_list = duplicate_prs_subparsers.add_parser(
        "list",
        help="List mergeable duplicate PR clusters from a hybrid-enriched analysis report.",
    )
    duplicate_list_source = duplicate_list.add_mutually_exclusive_group(required=True)
    duplicate_list_source.add_argument(
        "--report", type=Path, help="Path to an analysis JSON report."
    )
    duplicate_list_source.add_argument(
        "--snapshot-dir", type=Path, help="Snapshot directory to analyze."
    )
    duplicate_list.add_argument(
        "--limit", type=int, default=10, help="Maximum number of mergeable clusters to print."
    )
    duplicate_list.add_argument(
        "--model",
        default=DEFAULT_DUPLICATE_PR_MODEL,
        help="Model string used for hybrid analysis and duplicate-PR mergeability gating.",
    )

    duplicate_merge = duplicate_prs_subparsers.add_parser(
        "merge",
        help="Use Codex to synthesize and publish a minimal upstream PR for a mergeable duplicate cluster.",
    )
    duplicate_merge_source = duplicate_merge.add_mutually_exclusive_group(required=True)
    duplicate_merge_source.add_argument(
        "--report", type=Path, help="Path to an analysis JSON report."
    )
    duplicate_merge_source.add_argument(
        "--snapshot-dir", type=Path, help="Snapshot directory to analyze."
    )
    duplicate_merge.add_argument(
        "--repo-dir",
        type=Path,
        required=True,
        help="Local upstream repository checkout used for the synthesis worktree.",
    )
    duplicate_merge.add_argument(
        "--upstream-repo",
        help="Optional owner/name override for the upstream target repository.",
    )
    duplicate_merge.add_argument(
        "--upstream-remote",
        default="origin",
        help="Remote in --repo-dir that points at the upstream repository. Defaults to origin.",
    )
    duplicate_merge.add_argument(
        "--fork-remote",
        default="fork",
        help="Remote in the synthesis worktree used for pushing the branch. Defaults to fork.",
    )
    duplicate_merge.add_argument("--cluster-id", help="Optional cluster override.")
    duplicate_merge.add_argument(
        "--fork-repo",
        help="Optional owner/name override for the fork push target. Overrides --fork-owner when both are set.",
    )
    duplicate_merge.add_argument(
        "--fork-owner",
        help="Optional GitHub fork owner override. Defaults to the authenticated user.",
    )
    duplicate_merge.add_argument(
        "--file-policy",
        choices=FILE_POLICY_CHOICES,
        default=DEFAULT_FILE_POLICY,
        help="Changed-file policy enforced on the synthesized branch.",
    )
    duplicate_merge.add_argument(
        "--model",
        default=DEFAULT_DUPLICATE_PR_MODEL,
        help="Model string used for hybrid analysis, mergeability gating, and Codex synthesis.",
    )


def _add_pr_search_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    pr_search = subparsers.add_parser(
        "pr-search",
        help="Refresh and query the DuckDB-backed PR code-similarity index.",
    )
    pr_search_subparsers = pr_search.add_subparsers(dest="pr_search_command", required=True)

    refresh = pr_search_subparsers.add_parser(
        "refresh",
        help="Refresh the PR code-similarity index from a local snapshot or HF dataset repo.",
    )
    refresh_source = refresh.add_mutually_exclusive_group()
    refresh_source.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Snapshot directory to index. Defaults to the latest local snapshot.",
    )
    refresh_source.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Hugging Face dataset repo id to materialize before indexing.",
    )
    refresh.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    refresh.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )
    refresh.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
        help="Workspace root used for latest snapshot resolution and default DB placement.",
    )
    refresh.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    refresh.add_argument("--limit-prs", type=int, help="Optional cap on indexed PRs.")
    refresh.add_argument(
        "--include-drafts",
        action="store_true",
        default=bool(defaults.get("include-drafts", False)),
        help="Include draft PRs in the indexed universe.",
    )
    refresh.add_argument(
        "--include-closed",
        action="store_true",
        default=bool(defaults.get("include-closed", False)),
        help="Include closed PRs in the indexed universe.",
    )
    refresh.add_argument(
        "--replace-active",
        dest="replace_active",
        action="store_true",
        default=True,
        help="Activate the new run on success. Enabled by default.",
    )
    refresh.add_argument(
        "--no-replace-active",
        dest="replace_active",
        action="store_false",
        help="Write the new run without switching the active run pointer.",
    )

    similar = pr_search_subparsers.add_parser(
        "similar", help="Show similar PRs for one indexed pull request."
    )
    similar.add_argument("pr_number", type=int, help="Pull request number to query.")
    similar.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    similar.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    similar.add_argument("--repo", help="Optional repo override when the DB holds multiple repos.")
    similar.add_argument("--limit", type=int, default=10, help="Maximum number of rows to show.")
    similar.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")

    probe_github = pr_search_subparsers.add_parser(
        "probe-github",
        help="Fetch one live GitHub PR and compare it against the active indexed scope features.",
    )
    probe_github.add_argument("pr_number", type=int, help="Pull request number to probe.")
    probe_github.add_argument(
        "--repo",
        help="GitHub repository in owner/name form. Defaults to the active repo in the DB.",
    )
    probe_github.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    probe_github.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    probe_github.add_argument(
        "--limit",
        type=int,
        default=10,
        help="Maximum number of similar PR rows to show.",
    )
    probe_github.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")

    candidate_clusters = pr_search_subparsers.add_parser(
        "candidate-clusters",
        help="Show candidate scope clusters for one indexed pull request.",
    )
    candidate_clusters.add_argument("pr_number", type=int, help="Pull request number to query.")
    candidate_clusters.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    candidate_clusters.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    candidate_clusters.add_argument(
        "--repo", help="Optional repo override when the DB holds multiple repos."
    )
    candidate_clusters.add_argument(
        "--limit", type=int, default=5, help="Maximum number of rows to show."
    )
    candidate_clusters.add_argument("--json", action="store_true", help="Emit JSON.")

    cluster = pr_search_subparsers.add_parser("cluster", help="Inspect one scope cluster.")
    cluster_subparsers = cluster.add_subparsers(dest="pr_search_cluster_command", required=True)
    cluster_show = cluster_subparsers.add_parser("show", help="Show cluster details.")
    cluster_show.add_argument("cluster_id", help="Cluster identifier.")
    cluster_show.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    cluster_show.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    cluster_show.add_argument("--repo", help="Optional repo override.")
    cluster_show.add_argument("--json", action="store_true", help="Emit JSON.")

    explain_pair = pr_search_subparsers.add_parser(
        "explain-pair",
        help="Explain one PR pair, falling back to on-demand scoring when needed.",
    )
    explain_pair.add_argument("left_pr_number", type=int)
    explain_pair.add_argument("right_pr_number", type=int)
    explain_pair.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    explain_pair.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    explain_pair.add_argument("--repo", help="Optional repo override.")
    explain_pair.add_argument("--json", action="store_true", help="Emit JSON.")

    status = pr_search_subparsers.add_parser("status", help="Show the active PR search run.")
    status.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    status.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    status.add_argument("--repo", help="Optional repo override.")
    status.add_argument("--json", action="store_true", help="Emit JSON.")

    contributor = pr_search_subparsers.add_parser(
        "contributor", help="Show indexed contributor summary for one author login."
    )
    contributor.add_argument("login", help="GitHub author login to query.")
    contributor.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    contributor.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    contributor.add_argument("--repo", help="Optional repo override.")
    contributor.add_argument("--json", action="store_true", help="Emit JSON.")

    contributor_prs = pr_search_subparsers.add_parser(
        "contributor-prs", help="List indexed PRs for one contributor login."
    )
    contributor_prs.add_argument("login", help="GitHub author login to query.")
    contributor_prs.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    contributor_prs.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    contributor_prs.add_argument("--repo", help="Optional repo override.")
    contributor_prs.add_argument("--limit", type=int, default=20, help="Maximum rows to show.")
    contributor_prs.add_argument("--json", action="store_true", help="Emit JSON.")

    pr_contributor = pr_search_subparsers.add_parser(
        "pr-contributor", help="Show contributor summary for the author of one indexed PR."
    )
    pr_contributor.add_argument("pr_number", type=int, help="Pull request number to query.")
    pr_contributor.add_argument(
        "--db",
        type=Path,
        default=Path(defaults["db"]) if defaults.get("db") else None,
        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
    )
    pr_contributor.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
    )
    pr_contributor.add_argument("--repo", help="Optional repo override.")
    pr_contributor.add_argument("--json", action="store_true", help="Emit JSON.")


def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    new_contributor = subparsers.add_parser(
        "new-contributor-report",
        help="Render a markdown report for newly observed contributors in a snapshot.",
    )
    new_contributor.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Snapshot directory to inspect. Defaults to the latest local snapshot.",
    )
    new_contributor.add_argument(
        "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data"))
    )
    new_contributor.add_argument(
        "--output",
        type=Path,
        help="Output path for the markdown report. Defaults next to the snapshot.",
    )
    new_contributor.add_argument(
        "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot."
    )
    new_contributor.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
    )
    new_contributor.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    new_contributor.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )
    new_contributor.add_argument(
        "--window-days",
        type=int,
        default=int(defaults.get("window-days", 42)),
        help="Recent public activity window for contributor enrichment.",
    )
    new_contributor.add_argument(
        "--max-authors",
        type=int,
        default=int(defaults.get("max-authors", 25)),
        help="Maximum number of contributors to include. Use 0 for no cap.",
    )


def _add_dashboard_data_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    dashboard = subparsers.add_parser(
        "dashboard-data", help="Export frontend-ready JSON for the static dashboard."
    )
    dashboard.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Snapshot directory to export. Defaults to the latest local snapshot.",
    )
    dashboard.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "web/public/data")),
    )
    dashboard.add_argument(
        "--analysis-input",
        type=Path,
        help="Optional analysis report JSON override. Defaults to canonical published current analysis when available, otherwise falls back to snapshot-local analysis files.",
    )
    dashboard.add_argument(
        "--contributors-input",
        type=Path,
        help="Optional contributor report JSON override. Defaults to the materialized snapshot's new-contributors-report.json.",
    )
    dashboard.add_argument(
        "--pr-scope-input",
        type=Path,
        help="Optional PR scope cluster JSON override. Defaults to the materialized snapshot's pr-scope-clusters.json.",
    )
    dashboard.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Materialize the canonical Hugging Face dataset repo instead of using the latest local snapshot.",
    )
    dashboard.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    dashboard.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )
    dashboard.add_argument(
        "--window-days",
        type=int,
        default=int(defaults.get("window-days", 14)),
        help="Recent PR window to expose in the dashboard.",
    )


def _add_publish_analysis_artifacts_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    publish_analysis = subparsers.add_parser(
        "publish-analysis-artifacts",
        help="Publish archived and optional canonical hybrid analysis artifacts to a dataset repo.",
    )
    publish_analysis.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
        help="Pipeline workspace root containing snapshots/latest.json.",
    )
    publish_analysis.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Optional explicit snapshot directory containing analysis-report-hybrid.json.",
    )
    publish_analysis.add_argument(
        "--analysis-input",
        type=Path,
        help="Optional explicit hybrid analysis report JSON to publish instead of snapshot-dir discovery.",
    )
    publish_analysis.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        required=defaults.get("hf-repo-id") is None,
        help="Target Hugging Face dataset repo id.",
    )
    publish_analysis.add_argument("--analysis-id", required=True, help="Immutable analysis run id.")
    publish_analysis.add_argument(
        "--canonical",
        action="store_true",
        default=bool(defaults.get("canonical", False)),
        help="Also update the stable analysis/current canonical alias.",
    )
    publish_analysis.add_argument(
        "--save-cache",
        action="store_true",
        default=bool(defaults.get("save-cache", False)),
        help="Also upload snapshot-local analysis-state/ as mutable operational cache at repo-root analysis-state/.",
    )
    publish_analysis.add_argument(
        "--private-hf-repo",
        action="store_true",
        default=bool(defaults.get("private-hf-repo", False)),
        help="Create the target dataset repo as private if needed.",
    )


def _add_save_cache_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    save_cache = subparsers.add_parser(
        "save-cache",
        help="Upload snapshot-local analysis-state/ as mutable operational cache to a dataset repo.",
    )
    save_cache.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
        help="Pipeline workspace root containing snapshots/latest.json.",
    )
    save_cache.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Optional explicit snapshot directory containing analysis-state/.",
    )
    save_cache.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        required=defaults.get("hf-repo-id") is None,
        help="Target Hugging Face dataset repo id.",
    )
    save_cache.add_argument(
        "--private-hf-repo",
        action="store_true",
        default=bool(defaults.get("private-hf-repo", False)),
        help="Create the target dataset repo as private if needed.",
    )


def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    deploy_dashboard = subparsers.add_parser(
        "deploy-dashboard",
        help="Build and publish the static dashboard to a Hugging Face Space from a materialized dataset view.",
    )
    deploy_dashboard.add_argument(
        "--pipeline-data-dir",
        type=Path,
        default=Path(defaults.get("pipeline-data-dir", "data")),
    )
    deploy_dashboard.add_argument(
        "--web-dir", type=Path, default=Path(defaults.get("web-dir", "web"))
    )
    deploy_dashboard.add_argument(
        "--snapshot-dir",
        type=Path,
        help="Optional snapshot directory to publish. Defaults to the latest snapshot in --pipeline-data-dir.",
    )
    deploy_dashboard.add_argument(
        "--analysis-input",
        type=Path,
        help="Optional analysis report JSON override. Omit to prefer canonical published current analysis when available.",
    )
    deploy_dashboard.add_argument(
        "--contributors-input",
        type=Path,
        help="Optional contributor report JSON override.",
    )
    deploy_dashboard.add_argument(
        "--pr-scope-input",
        type=Path,
        help="Optional PR scope cluster JSON override.",
    )
    deploy_dashboard.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Materialize the canonical Hugging Face dataset repo instead of using the latest local snapshot.",
    )
    deploy_dashboard.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    deploy_dashboard.add_argument(
        "--hf-materialize-dir",
        type=Path,
        default=Path(defaults["hf-materialize-dir"])
        if defaults.get("hf-materialize-dir")
        else None,
        help="Optional local directory used when materializing an HF dataset snapshot.",
    )
    deploy_dashboard.add_argument(
        "--refresh-contributors",
        action="store_true",
        default=bool(defaults.get("refresh-contributors", False)),
    )
    deploy_dashboard.add_argument(
        "--dashboard-window-days",
        type=int,
        default=int(defaults.get("dashboard-window-days", 14)),
    )
    deploy_dashboard.add_argument(
        "--contributor-window-days",
        type=int,
        default=int(
            defaults.get("contributor-window-days", defaults.get("dashboard-window-days", 14))
        ),
    )
    deploy_dashboard.add_argument(
        "--contributor-max-authors",
        type=int,
        default=int(defaults.get("contributor-max-authors", 0)),
    )
    deploy_dashboard.add_argument(
        "--private-space",
        action="store_true",
        default=bool(defaults.get("private-space", False)),
    )
    deploy_dashboard.add_argument(
        "--commit-message",
        default=defaults.get("commit-message", "Deploy dashboard"),
    )
    deploy_dashboard.add_argument(
        "--space-id",
        default=defaults.get("space-id"),
        help="Hugging Face Space repo id.",
    )
    deploy_dashboard.add_argument("--space-title", default=defaults.get("space-title"))
    deploy_dashboard.add_argument("--space-emoji", default=defaults.get("space-emoji", "📊"))
    deploy_dashboard.add_argument(
        "--space-color-from", default=defaults.get("space-color-from", "indigo")
    )
    deploy_dashboard.add_argument(
        "--space-color-to", default=defaults.get("space-color-to", "blue")
    )
    deploy_dashboard.add_argument(
        "--space-short-description",
        default=defaults.get(
            "space-short-description", "Static dashboard for the slop-farmer PR analysis pipeline."
        ),
    )
    deploy_dashboard.add_argument("--dataset-id", default=defaults.get("dataset-id"))
    deploy_dashboard.add_argument(
        "--space-tags", default=defaults.get("space-tags", "dashboard,static")
    )


def _add_dataset_status_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
    dataset_status = subparsers.add_parser(
        "dataset-status",
        help="Inspect canonical dataset freshness and the local latest pointer.",
    )
    dataset_status.add_argument("--repo", default=defaults.get("repo"))
    dataset_status.add_argument(
        "--output-dir",
        type=Path,
        default=Path(defaults.get("output-dir", "data")),
        help="Local workspace root containing snapshots/latest.json.",
    )
    dataset_status.add_argument(
        "--hf-repo-id",
        default=defaults.get("hf-repo-id"),
        help="Canonical Hugging Face dataset repo id to inspect.",
    )
    dataset_status.add_argument(
        "--hf-revision",
        default=defaults.get("hf-revision"),
        help="Optional Hub revision for metadata and README download.",
    )
    dataset_status.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")


# Dispatch helpers


def _explicit_flag_present(flag: str) -> bool:
    return any(arg == flag or arg.startswith(f"{flag}=") for arg in sys.argv[1:])


def _resolve_hf_inputs(args: argparse.Namespace) -> tuple[str | None, str | None, Path | None]:
    hf_repo_id = args.hf_repo_id
    hf_revision = args.hf_revision
    hf_materialize_dir = args.hf_materialize_dir
    if args.snapshot_dir is not None and not _explicit_flag_present("--hf-repo-id"):
        hf_repo_id = None
        hf_revision = None
        hf_materialize_dir = None
    return hf_repo_id, hf_revision, hf_materialize_dir


def _run_scrape(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.app.pipeline import run_pipeline

    new_contributor_report = bool(args.new_contributor_report)
    options = PipelineOptions(
        repo=RepoRef.parse(args.repo),
        output_dir=args.output_dir,
        since=args.since,
        resume=args.resume,
        http_timeout=args.http_timeout,
        http_max_retries=args.http_max_retries,
        max_issues=args.max_issues,
        max_prs=args.max_prs,
        max_issue_comments=args.max_issue_comments,
        max_reviews_per_pr=args.max_reviews_per_pr,
        max_review_comments_per_pr=args.max_review_comments_per_pr,
        fetch_timeline=args.fetch_timeline,
        new_contributor_report=new_contributor_report,
        new_contributor_window_days=args.new_contributor_window_days,
        new_contributor_max_authors=args.new_contributor_max_authors,
        issue_max_age_days=args.issue_max_age_days,
        pr_max_age_days=args.pr_max_age_days,
    )
    print(run_pipeline(options))


def _run_refresh_dataset(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.app.dataset_refresh import run_dataset_refresh

    refresh_defaults = command_defaults("refresh-dataset", config_path=config_path)
    result = run_dataset_refresh(
        DatasetRefreshOptions(
            repo=RepoRef.parse(args.repo),
            hf_repo_id=args.hf_repo_id,
            private_hf_repo=args.private_hf_repo,
            max_issues=args.max_issues,
            max_prs=args.max_prs,
            max_issue_comments=args.max_issue_comments,
            max_reviews_per_pr=args.max_reviews_per_pr,
            max_review_comments_per_pr=args.max_review_comments_per_pr,
            fetch_timeline=args.fetch_timeline,
            new_contributor_report=args.new_contributor_report,
            new_contributor_window_days=args.new_contributor_window_days,
            new_contributor_max_authors=args.new_contributor_max_authors,
            http_timeout=args.http_timeout,
            http_max_retries=args.http_max_retries,
            checkpoint_every_comments=args.checkpoint_every_comments,
            checkpoint_every_prs=args.checkpoint_every_prs,
            cluster_suppression_rules=tuple(refresh_defaults.get("cluster-suppression-rules", ())),
        )
    )
    print(json.dumps(result, indent=2))


def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.reports.analysis import run_analysis

    analyze_defaults = command_defaults("analyze", config_path=config_path)
    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
    options = AnalysisOptions(
        snapshot_dir=args.snapshot_dir,
        output_dir=args.output_dir,
        output=args.output,
        hf_repo_id=hf_repo_id,
        hf_revision=hf_revision,
        hf_materialize_dir=hf_materialize_dir,
        ranking_backend=args.ranking_backend,
        model=args.model,
        max_clusters=args.max_clusters,
        hybrid_llm_concurrency=args.hybrid_llm_concurrency,
        open_prs_only=args.open_prs_only,
        cached_analysis=bool(analyze_defaults.get("cached_analysis", False)),
        pr_template_cleanup_mode=str(
            analyze_defaults.get("pr-template-cleanup-mode", "merge_defaults")
        ),
        pr_template_strip_html_comments=bool(
            analyze_defaults.get("pr-template-strip-html-comments", True)
        ),
        pr_template_trim_closing_reference_prefix=bool(
            analyze_defaults.get("pr-template-trim-closing-reference-prefix", True)
        ),
        pr_template_section_patterns=tuple(
            analyze_defaults.get("pr-template-section-patterns", ())
        ),
        pr_template_line_patterns=tuple(analyze_defaults.get("pr-template-line-patterns", ())),
        cluster_suppression_rules=tuple(analyze_defaults.get("cluster-suppression-rules", ())),
    )
    print(run_analysis(options))


def _run_markdown_report(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.reports.analysis import render_markdown_report

    print(
        render_markdown_report(
            MarkdownReportOptions(
                input=args.input,
                output=args.output,
                snapshot_dir=args.snapshot_dir,
            )
        )
    )


def _run_duplicate_prs(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.duplicate_prs import run_duplicate_pr_merge
    from slop_farmer.reports.duplicate_prs import list_mergeable_duplicate_pr_clusters

    if args.duplicate_prs_command == "list":
        clusters = list_mergeable_duplicate_pr_clusters(
            report_path=args.report,
            snapshot_dir=args.snapshot_dir,
            limit=args.limit,
            model=args.model,
        )
        print(json.dumps(clusters, indent=2))
        return

    result = run_duplicate_pr_merge(
        report_path=args.report,
        snapshot_dir=args.snapshot_dir,
        repo_dir=args.repo_dir,
        upstream_repo=args.upstream_repo,
        upstream_remote=args.upstream_remote,
        fork_remote=args.fork_remote,
        cluster_id=args.cluster_id,
        fork_repo=args.fork_repo,
        fork_owner=args.fork_owner,
        file_policy=args.file_policy,
        model=args.model,
    )
    print(json.dumps(result, indent=2))


def _run_pr_scope(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.reports.pr_scope import run_pr_scope_report

    pr_scope_defaults = command_defaults("pr-scope", config_path=config_path)
    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
    print(
        run_pr_scope_report(
            PrScopeOptions(
                snapshot_dir=args.snapshot_dir,
                output_dir=args.output_dir,
                output=args.output,
                hf_repo_id=hf_repo_id,
                hf_revision=hf_revision,
                hf_materialize_dir=hf_materialize_dir,
                cluster_suppression_rules=tuple(
                    pr_scope_defaults.get("cluster-suppression-rules", ())
                ),
            )
        )
    )


def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.app.pr_search import (
        explain_pr_search_pair,
        format_pr_search_candidate_clusters,
        format_pr_search_cluster,
        format_pr_search_contributor,
        format_pr_search_contributor_pulls,
        format_pr_search_pair,
        format_pr_search_probe,
        format_pr_search_pull_contributor,
        format_pr_search_similar,
        format_pr_search_status,
        get_pr_search_candidate_clusters,
        get_pr_search_cluster,
        get_pr_search_contributor,
        get_pr_search_contributor_pulls,
        get_pr_search_pull_contributor,
        get_pr_search_similar,
        get_pr_search_status,
        probe_pr_search_github,
        resolve_pr_search_db_path,
        run_pr_search_refresh,
    )

    pr_search_defaults = command_defaults("pr-search", config_path=config_path)
    if args.pr_search_command == "refresh":
        hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
        result = run_pr_search_refresh(
            PrSearchRefreshOptions(
                snapshot_dir=args.snapshot_dir,
                output_dir=args.output_dir,
                db=args.db,
                hf_repo_id=hf_repo_id,
                hf_revision=hf_revision,
                hf_materialize_dir=hf_materialize_dir,
                include_drafts=args.include_drafts,
                include_closed=args.include_closed,
                limit_prs=args.limit_prs,
                replace_active=args.replace_active,
                cluster_suppression_rules=tuple(
                    pr_search_defaults.get("cluster-suppression-rules", ())
                ),
            )
        )
        print(json.dumps(result, indent=2))
        return

    db_path = resolve_pr_search_db_path(args.db, output_dir=args.output_dir)
    if args.pr_search_command == "similar":
        result = get_pr_search_similar(
            db_path,
            pr_number=args.pr_number,
            repo=args.repo,
            limit=args.limit,
        )
        print(json.dumps(result, indent=2) if args.json else format_pr_search_similar(result))
        return

    if args.pr_search_command == "probe-github":
        result = probe_pr_search_github(
            db_path,
            pr_number=args.pr_number,
            repo=args.repo,
            limit=args.limit,
        )
        print(json.dumps(result, indent=2) if args.json else format_pr_search_probe(result))
        return

    if args.pr_search_command == "candidate-clusters":
        result = get_pr_search_candidate_clusters(
            db_path,
            pr_number=args.pr_number,
            repo=args.repo,
            limit=args.limit,
        )
        print(
            json.dumps(result, indent=2)
            if args.json
            else format_pr_search_candidate_clusters(result)
        )
        return

    if args.pr_search_command == "cluster":
        if args.pr_search_cluster_command != "show":
            raise ValueError(
                f"Unsupported pr-search cluster command: {args.pr_search_cluster_command}"
            )
        result = get_pr_search_cluster(
            db_path,
            cluster_id=args.cluster_id,
            repo=args.repo,
        )
        print(json.dumps(result, indent=2) if args.json else format_pr_search_cluster(result))
        return

    if args.pr_search_command == "explain-pair":
        result = explain_pr_search_pair(
            db_path,
            left_pr_number=args.left_pr_number,
            right_pr_number=args.right_pr_number,
            repo=args.repo,
        )
        print(json.dumps(result, indent=2) if args.json else format_pr_search_pair(result))
        return

    if args.pr_search_command == "status":
        result = get_pr_search_status(db_path, repo=args.repo)
        print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result))
        return

    if args.pr_search_command == "contributor":
        result = get_pr_search_contributor(db_path, author_login=args.login, repo=args.repo)
        print(json.dumps(result, indent=2) if args.json else format_pr_search_contributor(result))
        return

    if args.pr_search_command == "contributor-prs":
        result = get_pr_search_contributor_pulls(
            db_path,
            author_login=args.login,
            repo=args.repo,
            limit=args.limit,
        )
        print(
            json.dumps(result, indent=2)
            if args.json
            else format_pr_search_contributor_pulls(result)
        )
        return

    if args.pr_search_command == "pr-contributor":
        result = get_pr_search_pull_contributor(
            db_path,
            pr_number=args.pr_number,
            repo=args.repo,
        )
        print(
            json.dumps(result, indent=2) if args.json else format_pr_search_pull_contributor(result)
        )
        return

    raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}")


def _run_import_hf_checkpoint(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.hf_checkpoint_import import import_hf_checkpoint

    print(
        import_hf_checkpoint(
            CheckpointImportOptions(
                source_repo_id=args.source_repo_id,
                output_dir=args.output_dir,
                checkpoint_id=args.checkpoint_id,
                checkpoint_root=args.checkpoint_root,
                publish_repo_id=args.publish_repo_id,
                private_hf_repo=args.private_hf_repo,
                force=args.force,
            )
        )
    )


def _run_adopt_snapshot(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.snapshot_state import adopt_snapshot_for_pipeline

    print(
        adopt_snapshot_for_pipeline(
            SnapshotAdoptOptions(
                snapshot_dir=args.snapshot_dir,
                output_dir=args.output_dir,
                next_since=args.next_since,
            )
        )
    )


def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.reports.new_contributor_report import run_new_contributor_report

    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
    print(
        run_new_contributor_report(
            NewContributorReportOptions(
                snapshot_dir=args.snapshot_dir,
                output_dir=args.output_dir,
                output=args.output,
                json_output=args.json_output,
                hf_repo_id=hf_repo_id,
                hf_revision=hf_revision,
                hf_materialize_dir=hf_materialize_dir,
                window_days=args.window_days,
                max_authors=args.max_authors,
            )
        )
    )


def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> None:
    from slop_farmer.reports.dashboard import run_dashboard_data

    dashboard_defaults = command_defaults("dashboard-data", config_path=config_path)
    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
    print(
        run_dashboard_data(
            DashboardDataOptions(
                snapshot_dir=args.snapshot_dir,
                output_dir=args.output_dir,
                analysis_input=args.analysis_input,
                contributors_input=args.contributors_input,
                pr_scope_input=args.pr_scope_input,
                hf_repo_id=hf_repo_id,
                hf_revision=hf_revision,
                hf_materialize_dir=hf_materialize_dir,
                window_days=args.window_days,
                snapshot_root=(
                    Path(dashboard_defaults["snapshot-root"])
                    if dashboard_defaults.get("snapshot-root")
                    else None
                ),
            )
        )
    )


def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.deploy import run_deploy_dashboard

    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
    run_deploy_dashboard(
        DeployDashboardOptions(
            pipeline_data_dir=args.pipeline_data_dir,
            web_dir=args.web_dir,
            snapshot_dir=args.snapshot_dir,
            analysis_input=args.analysis_input,
            contributors_input=args.contributors_input,
            pr_scope_input=args.pr_scope_input,
            hf_repo_id=hf_repo_id,
            hf_revision=hf_revision,
            hf_materialize_dir=hf_materialize_dir,
            refresh_contributors=args.refresh_contributors,
            dashboard_window_days=args.dashboard_window_days,
            contributor_window_days=args.contributor_window_days,
            contributor_max_authors=args.contributor_max_authors,
            private_space=args.private_space,
            commit_message=args.commit_message,
            space_id=args.space_id,
            space_title=args.space_title,
            space_emoji=args.space_emoji,
            space_color_from=args.space_color_from,
            space_color_to=args.space_color_to,
            space_short_description=args.space_short_description,
            dataset_id=args.dataset_id,
            space_tags=args.space_tags,
        )
    )


def _run_dataset_status(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.dataset_status import format_dataset_status, get_dataset_status

    result = get_dataset_status(
        DatasetStatusOptions(
            repo=args.repo,
            output_dir=args.output_dir,
            hf_repo_id=args.hf_repo_id,
            hf_revision=args.hf_revision,
            json_output=args.json,
        )
    )
    print(json.dumps(result, indent=2) if args.json else format_dataset_status(result))


def _run_publish_analysis_artifacts(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.publish_analysis import run_publish_analysis_artifacts

    print(
        json.dumps(
            run_publish_analysis_artifacts(
                PublishAnalysisArtifactsOptions(
                    output_dir=args.output_dir,
                    snapshot_dir=args.snapshot_dir,
                    analysis_input=args.analysis_input,
                    hf_repo_id=args.hf_repo_id,
                    analysis_id=args.analysis_id,
                    canonical=args.canonical,
                    save_cache=args.save_cache,
                    private_hf_repo=args.private_hf_repo,
                )
            ),
            indent=2,
        )
    )


def _run_save_cache(args: argparse.Namespace, config_path: Path | None) -> None:
    del config_path
    from slop_farmer.app.save_cache import run_save_cache

    print(
        json.dumps(
            run_save_cache(
                SaveCacheOptions(
                    output_dir=args.output_dir,
                    snapshot_dir=args.snapshot_dir,
                    hf_repo_id=args.hf_repo_id,
                    private_hf_repo=args.private_hf_repo,
                )
            ),
            indent=2,
        )
    )


def main() -> None:
    config_path = extract_cli_config_path()
    parser = build_parser(config_path=config_path)
    args = parser.parse_args()

    handlers: dict[str, CommandHandler] = {
        "scrape": _run_scrape,
        "refresh-dataset": _run_refresh_dataset,
        "analyze": _run_analyze,
        "markdown-report": _run_markdown_report,
        "duplicate-prs": _run_duplicate_prs,
        "pr-scope": _run_pr_scope,
        "pr-search": _run_pr_search,
        "import-hf-checkpoint": _run_import_hf_checkpoint,
        "adopt-snapshot": _run_adopt_snapshot,
        "new-contributor-report": _run_new_contributor_report,
        "dashboard-data": _run_dashboard_data,
        "deploy-dashboard": _run_deploy_dashboard,
        "dataset-status": _run_dataset_status,
        "publish-analysis-artifacts": _run_publish_analysis_artifacts,
        "save-cache": _run_save_cache,
    }
    handler = handlers.get(args.command)
    if handler is None:
        parser.error(f"Unknown command: {args.command}")
    handler(args, config_path)


if __name__ == "__main__":
    main()