Spaces:

ProteinDesignLab
/

caliby

Running on Zero

App Files Files Community

Justine Yuan commited on Feb 23

Commit

3beba17

1 Parent(s): e732716

Caliby HuggingFace example

Browse files

Files changed (17) hide show

.gitignore +220 -0
README.md +2 -1
app.py +434 -0
app_config.py +12 -0
caliby_transparent.png +0 -0
constraints.py +46 -0
design.py +244 -0
ensemble.py +36 -0
file_utils.py +73 -0
models.py +23 -0
pyproject.toml +30 -0
requirements.txt +8 -0
self_consistency.py +35 -0
tests/conftest.py +44 -0
tests/test_design_sequences.py +475 -0
tests/test_helpers.py +295 -0
viewers.py +79 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,220 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml
+envs
+CLAUDE.md

README.md CHANGED Viewed

@@ -4,7 +4,8 @@ emoji: 🐢
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
+sdk_version: "6.6.0"
+python_version: "3.12"
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,434 @@

+"""Gradio app for Caliby sequence design."""
+import base64
+from pathlib import Path
+import gradio as gr
+# Eagerly import so the wandb/pydantic init runs in the main thread
+# (where sys.modules['__main__'] exists), not in a Gradio worker thread.
+import caliby.data.preprocessing.atomworks.clean_pdbs  # noqa: F401
+from design import design_sequences
+from file_utils import _get_file_path, _write_zip_from_paths
+from viewers import (
+    _csv_download_output,
+    _file_output,
+    _format_results_display,
+    _get_best_sc_sample,
+    _render_af2_viewer,
+    _update_viewers,
+)
+def _get_upload_instructions(mode: str) -> str:
+    if mode == "none":
+        return "Upload a single PDB or CIF file."
+    elif mode == "synthetic":
+        return "Upload a single PDB or CIF file. Conformers will be generated automatically."
+    else:
+        return "Upload all PDB files — primary conformer first, then additional conformers."
+def _clean_uploaded_pdbs(pdb_files: list | None):
+    if not pdb_files:
+        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False)
+    from caliby import clean_pdbs
+    pdb_paths = [str(_get_file_path(f)) for f in pdb_files]
+    cleaned_paths = clean_pdbs(pdb_paths)
+    zip_path = _write_zip_from_paths(cleaned_paths, "cleaned_pdbs", ".zip")
+    return (
+        cleaned_paths,
+        gr.update(
+            value="**Note:** Your files have been cleaned and standardized to mmCIF format "
+            "to avoid downstream parsing and alignment issues. "
+            "If you plan to use positional constraints, please download the cleaned files and double "
+            "check the new residue indices.",
+            visible=True,
+        ),
+        gr.update(value=zip_path, visible=True),
+        gr.update(interactive=True),
+    )
+def _reset_cleaned_state():
+    return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False)
+def submit_design_sequences(
+    cleaned_files: list[str] | None,
+    ensemble_mode: str,
+    model_variant: str,
+    num_seqs: int,
+    omit_aas: list[str] | None,
+    temperature: float,
+    fixed_pos_seq: str,
+    fixed_pos_scn: str,
+    fixed_pos_override_seq: str,
+    pos_restrict_aatype: str,
+    symmetry_pos: str,
+    num_protpardelle_conformers: int,
+    run_af2_eval: bool = False,
+):
+    df, fasta_text, out_zip_path, sc_zip_path, af2_pdb_data, input_pdb_data = design_sequences(
+        pdb_files=cleaned_files,
+        ensemble_mode=ensemble_mode,
+        model_variant=model_variant,
+        num_seqs=num_seqs,
+        omit_aas=omit_aas,
+        temperature=temperature,
+        fixed_pos_seq=fixed_pos_seq,
+        fixed_pos_scn=fixed_pos_scn,
+        fixed_pos_override_seq=fixed_pos_override_seq,
+        pos_restrict_aatype=pos_restrict_aatype,
+        symmetry_pos=symmetry_pos,
+        num_protpardelle_conformers=num_protpardelle_conformers,
+        run_af2_eval=run_af2_eval,
+    )
+    has_af2 = bool(af2_pdb_data)
+    best_sample = _get_best_sc_sample(df) if has_af2 else ""
+    af2_html = _render_af2_viewer(best_sample, af2_pdb_data) if has_af2 else ""
+    return (
+        gr.update(visible=True),
+        gr.update(value=_format_results_display(df), visible=True),
+        df,
+        gr.update(value=fasta_text, visible=True),
+        _file_output(out_zip_path),
+        _file_output(sc_zip_path),
+        af2_pdb_data,
+        input_pdb_data,
+        best_sample,
+        gr.update(visible=has_af2),
+        af2_html,
+        gr.update(value="", visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+    )
+theme = gr.themes.Base(
+    primary_hue="amber",
+    secondary_hue="orange",
+    radius_size="lg",
+    font=[gr.themes.GoogleFont('Instrument Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
+).set(
+    body_text_color='*neutral_700',
+    body_text_color_dark='*neutral_300',
+    body_text_color_subdued='*neutral_500',
+    block_title_text_color='*neutral_700',
+    block_info_text_color='*neutral_500',
+    block_border_width_dark='0px',
+    block_padding='*spacing_xl calc(*spacing_xl + 3px)',
+    block_label_border_width_dark='0px',
+    block_label_padding='*spacing_md *spacing_lg',
+    button_secondary_background_fill_dark='*neutral_600',
+    checkbox_label_text_color_dark='*neutral_100',
+)
+css = """
+.loading-pulse { animation: pulse 2.5s ease-in-out infinite; }
+@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } }
+.omit-aa-dropdown ul { max-height: 200px !important; overflow-y: auto; }
+.compact-file .large { min-height: 50px !important; }
+#results-table th:nth-child(2),
+#results-table td:nth-child(2) {
+    max-width: 28rem;
+    width: 28rem;
+}
+#results-table td:nth-child(2) {
+    overflow: hidden;
+}
+#results-table td:nth-child(2) > div {
+    display: block;
+    max-width: 100%;
+    overflow-x: auto;
+    overflow-y: hidden;
+    white-space: nowrap !important;
+    scrollbar-width: thin;
+}
+#af2-viewer, #ref-viewer {
+    display: flex;
+    justify-content: center;
+}
+#af2-viewer iframe, #ref-viewer iframe {
+    max-width: 100%;
+}
+"""
+_LOGO_B64 = base64.b64encode(Path(__file__).with_name("caliby_transparent.png").read_bytes()).decode()
+with gr.Blocks(title="Caliby - Protein Sequence Design") as demo:
+    gr.HTML(
+        '<div style="display: flex; align-items: center; gap: 16px;">'
+        f'<img src="data:image/png;base64,{_LOGO_B64}" alt="Caliby logo" style="height: 80px;">'
+        '<h1 style="margin: 0;">Caliby - Protein Sequence Design</h1>'
+        '</div>'
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_variant = gr.Radio(
+                choices=[
+                    ("Caliby", "caliby"),
+                    ("SolubleCaliby v1", "soluble_caliby_v1"),
+                ],
+                value="caliby",
+                label="Model",
+            )
+            ensemble_mode = gr.Radio(
+                choices=[
+                    ("Fixed backbone", "none"),
+                    ("Synthetic ensemble", "synthetic"),
+                    ("Upload your own ensemble", "user"),
+                ],
+                value="synthetic",
+                label="Ensemble mode",
+            )
+            run_af2_eval = gr.Checkbox(
+                label="Run AF2 self-consistency evaluation",
+                value=False,
+                info="Refold designed sequences with AlphaFold2 and compute scRMSD, pLDDT, and TM-score",
+            )
+            upload_instructions = gr.Markdown(
+                _get_upload_instructions("synthetic"),
+            )
+            pdb_input = gr.File(
+                file_count="multiple",
+                label="PDB/CIF file(s)",
+                file_types=[".pdb", ".cif"],
+            )
+            finish_upload_btn = gr.Button("Upload", variant="secondary")
+            cleaned_files_state = gr.State(None)
+            clean_notification = gr.Markdown(visible=False)
+            clean_download = gr.File(
+                label="Download cleaned files", visible=False, elem_classes=["compact-file"]
+            )
+            num_seqs = gr.Slider(
+                minimum=1,
+                maximum=4,
+                value=1,
+                step=1,
+                label="Number of sequences",
+            )
+            omit_aas = gr.Dropdown(
+                choices=[
+                    "A",
+                    "C",
+                    "D",
+                    "E",
+                    "F",
+                    "G",
+                    "H",
+                    "I",
+                    "K",
+                    "L",
+                    "M",
+                    "N",
+                    "P",
+                    "Q",
+                    "R",
+                    "S",
+                    "T",
+                    "V",
+                    "W",
+                    "Y",
+                ],
+                multiselect=True,
+                label="Amino acids to omit",
+                elem_classes=["omit-aa-dropdown"],
+            )
+            temperature = gr.Slider(
+                minimum=0.01,
+                maximum=1,
+                value=0.01,
+                step=0.01,
+                label="Sampling temperature",
+            )
+            submit_btn = gr.Button("Design sequences", variant="primary", interactive=False)
+            with gr.Accordion("Advanced constraints", open=False):
+                fixed_pos_seq = gr.Textbox(
+                    label="Fixed positions",
+                    info="Format: A1-100,B1-100 \nSequence positions in the input PDB to condition on so that they"
+                    " remain fixed during design. For ensemble-conditioned design, fixed_pos_seq is applied using"
+                    " the primary conformer's sequence.",
+                    placeholder="e.g. A1-100,B1-100",
+                )
+                fixed_pos_scn = gr.Textbox(
+                    label="Fixed sidechain positions",
+                    info="Format: A1-10,A12,A15-20 \nSidechain positions in the input PDB to condition on so that they"
+                    " remain fixed during design. Note that fixed sidechain positions must be a subset of fixed"
+                    " sequence positions, since it does not make sense to condition on a sidechain without also"
+                    " conditioning on its sequence identity.",
+                    placeholder="e.g. A1-10,A12,A15-20",
+                )
+                fixed_pos_override_seq = gr.Textbox(
+                    label="Override sequence at positions",
+                    info="Format: A26:A,A27:L \nSequence positions in the input PDB to first override the sequence at,"
+                    " and then condition on. The colon separates the position and the desired amino acid.",
+                    placeholder="e.g. A26:A,A27:L",
+                )
+                pos_restrict_aatype = gr.Textbox(
+                    label="Position restrictions",
+                    info="Format: A26:AVG,A27:VG \nAllowed amino acids for certain positions in the input PDB. The"
+                    " colon separates the position and the allowed amino acids.",
+                    placeholder="e.g. A26:AVG,A27:VG",
+                )
+                symmetry_pos = gr.Textbox(
+                    label="Symmetry positions",
+                    info="Format: A10,B10,C10|A11,B11,C11 \nSymmetry positions for tying sampling across residue"
+                    " positions. The pipe separates groups of positions to sample symmetrically. In the example,"
+                    " A10, B10, and C10 are tied together, and A11, B11, and C11 are tied together.",
+                    placeholder="e.g. A10,B10,C10|A11,B11,C11",
+                )
+                num_protpardelle_conformers = gr.Slider(
+                    minimum=1,
+                    maximum=15,
+                    value=15,
+                    step=1,
+                    label="Number of conformers to generate",
+                    visible=True,
+                )
+        with gr.Column(scale=2):
+            raw_results_df = gr.State(None)
+            af2_pdb_state = gr.State({})
+            input_pdb_state = gr.State({})
+            best_sample_state = gr.State("")
+            results_placeholder = gr.Markdown(
+                "Results will appear here after designing sequences.",
+            )
+            results_header = gr.Markdown("### Results", visible=False)
+            results_df = gr.Dataframe(
+                show_label=False,
+                interactive=False,
+                wrap=False,
+                column_widths=[160, 448],
+                elem_id="results-table",
+                visible=False,
+            )
+            fasta_output = gr.Textbox(
+                label="Sequences (FASTA)",
+                lines=10,
+                visible=False,
+            )
+            with gr.Row():
+                csv_download = gr.File(label="Download results CSV", elem_classes=["compact-file"], visible=False)
+                output_files = gr.File(label="Download CIF files", elem_classes=["compact-file"], visible=False)
+                sc_output_files = gr.File(
+                    label="Download AF2 self-consistency outputs",
+                    elem_classes=["compact-file"],
+                    visible=False,
+                )
+            with gr.Column(visible=False) as viewer_section:
+                gr.Markdown("---")
+                with gr.Row():
+                    gr.Markdown("### AF2 Prediction")
+                    af2_color_mode = gr.Dropdown(
+                        choices=[
+                            ("pLDDT", "plddt"),
+                            ("Chain", "chain"),
+                            ("Rainbow", "rainbow"),
+                            ("Secondary structure", "secondary"),
+                        ],
+                        value="plddt",
+                        label="Color by",
+                        scale=0,
+                    )
+                af2_viewer = gr.HTML(elem_id="af2-viewer")
+                show_overlay = gr.Checkbox(label="Show reference structure", value=False)
+                with gr.Column(visible=False) as ref_section:
+                    with gr.Row():
+                        gr.Markdown("### Reference Structure")
+                        ref_color_mode = gr.Dropdown(
+                            choices=[
+                                ("Chain", "chain"),
+                                ("pLDDT", "plddt"),
+                                ("Rainbow", "rainbow"),
+                                ("Secondary structure", "secondary"),
+                            ],
+                            value="chain",
+                            label="Color by",
+                            scale=0,
+                        )
+                    reference_viewer = gr.HTML(elem_id="ref-viewer")
+    submit_btn.click(
+        fn=lambda: gr.update(value='<div class="loading-pulse">Running design pipeline\u2026</div>', visible=True),
+        outputs=[results_placeholder],
+    ).then(
+        fn=submit_design_sequences,
+        inputs=[
+            cleaned_files_state,
+            ensemble_mode,
+            model_variant,
+            num_seqs,
+            omit_aas,
+            temperature,
+            fixed_pos_seq,
+            fixed_pos_scn,
+            fixed_pos_override_seq,
+            pos_restrict_aatype,
+            symmetry_pos,
+            num_protpardelle_conformers,
+            run_af2_eval,
+        ],
+        outputs=[
+            results_header,
+            results_df,
+            raw_results_df,
+            fasta_output,
+            output_files,
+            sc_output_files,
+            af2_pdb_state,
+            input_pdb_state,
+            best_sample_state,
+            viewer_section,
+            af2_viewer,
+            reference_viewer,
+            ref_section,
+            results_placeholder,
+        ],
+    )
+    raw_results_df.change(fn=_csv_download_output, inputs=[raw_results_df], outputs=[csv_download])
+    finish_upload_btn.click(
+        fn=lambda: gr.update(value="Processing\u2026", interactive=False),
+        outputs=[finish_upload_btn],
+    ).then(
+        fn=_clean_uploaded_pdbs,
+        inputs=[pdb_input],
+        outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn],
+    ).then(
+        fn=lambda: gr.update(value="Upload", interactive=True),
+        outputs=[finish_upload_btn],
+    )
+    pdb_input.change(
+        fn=_reset_cleaned_state,
+        outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn],
+    )
+    ensemble_mode.change(
+        fn=lambda mode: (gr.update(visible=(mode == "synthetic")), _get_upload_instructions(mode)),
+        inputs=[ensemble_mode],
+        outputs=[num_protpardelle_conformers, upload_instructions],
+    )
+    viewer_inputs = [best_sample_state, af2_pdb_state, input_pdb_state, show_overlay, af2_color_mode, ref_color_mode]
+    viewer_outputs = [af2_viewer, reference_viewer, ref_section]
+    show_overlay.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
+    af2_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
+    ref_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs)
+if __name__ == "__main__":
+    demo.launch(theme=theme, css=css, ssr_mode=False)

app_config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""Configuration constants and module-level side effects (weight downloads)."""
+import os
+from huggingface_hub import snapshot_download
+WEIGHTS_DIR = snapshot_download(
+    repo_id="ProteinDesignLab/caliby-weights", repo_type="model", token=os.environ.get("HF_TOKEN")
+)
+# Set MODEL_PARAMS_DIR so caliby's weight utilities can find/download files.
+os.environ.setdefault("MODEL_PARAMS_DIR", WEIGHTS_DIR)

caliby_transparent.png ADDED Viewed

constraints.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""Input validation and position constraint building."""
+import pandas as pd
+def _validate_design_inputs(pdb_files: list | None, ensemble_mode: str) -> str | None:
+    if not pdb_files:
+        return "Upload at least one PDB or CIF file."
+    if ensemble_mode == "user" and len(pdb_files) < 2:
+        return "User ensemble mode requires at least two files."
+    single_file_mode_messages = {
+        "none": "Single structure mode requires exactly one file.",
+        "synthetic": "Protpardelle mode requires exactly one file.",
+    }
+    message = single_file_mode_messages.get(ensemble_mode)
+    if message and len(pdb_files) != 1:
+        return message
+    return None
+def _build_pos_constraint_df(
+    pdb_key: str,
+    fixed_pos_seq: str,
+    fixed_pos_scn: str,
+    fixed_pos_override_seq: str,
+    pos_restrict_aatype: str,
+    symmetry_pos: str,
+) -> pd.DataFrame | None:
+    row = {}
+    if fixed_pos_seq and fixed_pos_seq.strip():
+        row["fixed_pos_seq"] = fixed_pos_seq.strip()
+    if fixed_pos_scn and fixed_pos_scn.strip():
+        row["fixed_pos_scn"] = fixed_pos_scn.strip()
+    if fixed_pos_override_seq and fixed_pos_override_seq.strip():
+        row["fixed_pos_override_seq"] = fixed_pos_override_seq.strip()
+    if pos_restrict_aatype and pos_restrict_aatype.strip():
+        row["pos_restrict_aatype"] = pos_restrict_aatype.strip()
+    if symmetry_pos and symmetry_pos.strip():
+        row["symmetry_pos"] = symmetry_pos.strip()
+    if not row:
+        return None
+    row["pdb_key"] = pdb_key
+    return pd.DataFrame([row])

design.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""Core design pipeline: context building, execution, output formatting."""
+import re
+import tempfile
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+import spaces
+import torch
+from app_config import WEIGHTS_DIR
+from constraints import _build_pos_constraint_df, _validate_design_inputs
+from ensemble import _generate_protpardelle_ensemble, _setup_user_ensemble_dir
+from file_utils import _copy_uploaded_files, _get_file_path, _sanitize_download_stem, _write_zip_from_paths
+from models import get_model
+from self_consistency import _run_self_consistency
+# ZeroGPU quota-aware retry: request the max duration first, and if the
+# scheduler returns a quota error (which is free — no GPU time consumed),
+# parse the remaining seconds and retry with that exact amount.
+_MAX_GPU_DURATION = 120  # Per-call max; daily quota is 210s but per-call cap is lower
+_gpu_duration_override: int | None = None
+def _dynamic_gpu_duration(*args, **kwargs) -> int:
+    """Return the current GPU duration for @spaces.GPU scheduling."""
+    return _gpu_duration_override if _gpu_duration_override is not None else _MAX_GPU_DURATION
+def _parse_quota_left(error: Exception) -> int | None:
+    """Extract remaining GPU seconds from a ZeroGPU quota error message.
+    Returns the number of seconds left, or None if not a recoverable quota error.
+    """
+    message = getattr(error, 'message', None)
+    if not isinstance(message, str):
+        return None
+    match = re.search(r'(\d+)s left\)', message)
+    return int(match.group(1)) if match else None
+def _build_design_context(
+    pdb_paths: list[str],
+    ensemble_mode: str,
+    tmpdir: Path,
+    num_protpardelle_conformers: int,
+    fixed_pos_seq: str,
+    fixed_pos_scn: str,
+    fixed_pos_override_seq: str,
+    pos_restrict_aatype: str,
+    symmetry_pos: str,
+) -> tuple[list[str] | dict[str, list[str]], pd.DataFrame | None]:
+    pdb_key = Path(pdb_paths[0]).stem
+    pos_constraint_df = _build_pos_constraint_df(
+        pdb_key=pdb_key,
+        fixed_pos_seq=fixed_pos_seq,
+        fixed_pos_scn=fixed_pos_scn,
+        fixed_pos_override_seq=fixed_pos_override_seq,
+        pos_restrict_aatype=pos_restrict_aatype,
+        symmetry_pos=symmetry_pos,
+    )
+    if ensemble_mode == "none":
+        return pdb_paths, pos_constraint_df
+    if ensemble_mode == "synthetic":
+        design_inputs = _generate_protpardelle_ensemble(
+            pdb_path=pdb_paths[0],
+            num_conformers=num_protpardelle_conformers,
+            out_dir=tmpdir,
+            weights_dir=WEIGHTS_DIR,
+        )
+    else:
+        design_inputs = _setup_user_ensemble_dir(pdb_paths=pdb_paths)
+    if pos_constraint_df is not None:
+        from caliby import make_ensemble_constraints
+        row = pos_constraint_df.iloc[0]
+        cols = {col: row[col] for col in pos_constraint_df.columns if col != "pdb_key"}
+        pos_constraint_df = make_ensemble_constraints({pdb_key: cols}, design_inputs)
+    return design_inputs, pos_constraint_df
+def _format_outputs(outputs: dict) -> tuple[pd.DataFrame, str, list[str]]:
+    out_pdb_list = outputs["out_pdb"]
+    df = pd.DataFrame(
+        {
+            "Sample": [Path(out_pdb).stem for out_pdb in out_pdb_list],
+            "Sequence": outputs["seq"],
+            "Energy (U)": outputs["U"],
+        }
+    )
+    fasta_lines = []
+    for i, (eid, seq) in enumerate(zip(outputs["example_id"], outputs["seq"])):
+        fasta_lines.append(f">{eid}_sample{i}")
+        fasta_lines.append(seq)
+    fasta_text = "\n".join(fasta_lines)
+    return df, fasta_text, out_pdb_list
+@spaces.GPU(duration=_dynamic_gpu_duration)
+def _design_sequences_gpu(
+    pdb_files: list | None,
+    ensemble_mode: str,
+    model_variant: str,
+    num_seqs: int,
+    omit_aas: list[str] | None,
+    temperature: float,
+    fixed_pos_seq: str,
+    fixed_pos_scn: str,
+    fixed_pos_override_seq: str,
+    pos_restrict_aatype: str,
+    symmetry_pos: str,
+    num_protpardelle_conformers: int,
+    run_af2_eval: bool = False,
+):
+    validation_error = _validate_design_inputs(pdb_files, ensemble_mode)
+    if validation_error:
+        return pd.DataFrame(), validation_error, None, None, {}, {}
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    torch.set_grad_enabled(False)
+    download_stem = _sanitize_download_stem(_get_file_path(pdb_files[0]).stem)
+    gr.Info("Loading model...")
+    model = get_model(model_variant, device)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        pdb_paths = _copy_uploaded_files(pdb_files, tmpdir)
+        input_pdb_data = {Path(p).stem: Path(p).read_text() for p in pdb_paths}
+        out_dir = tmpdir / "outputs"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        if ensemble_mode == "synthetic":
+            gr.Info("Generating conformer ensemble...")
+        elif ensemble_mode == "user":
+            gr.Info("Preparing uploaded ensemble...")
+        design_inputs, pos_constraint_df = _build_design_context(
+            pdb_paths=pdb_paths,
+            ensemble_mode=ensemble_mode,
+            tmpdir=tmpdir,
+            num_protpardelle_conformers=num_protpardelle_conformers,
+            fixed_pos_seq=fixed_pos_seq,
+            fixed_pos_scn=fixed_pos_scn,
+            fixed_pos_override_seq=fixed_pos_override_seq,
+            pos_restrict_aatype=pos_restrict_aatype,
+            symmetry_pos=symmetry_pos,
+        )
+        gr.Info("Designing sequences...")
+        sample_kwargs = dict(
+            out_dir=str(out_dir),
+            num_seqs_per_pdb=num_seqs,
+            omit_aas=omit_aas if omit_aas else None,
+            temperature=temperature,
+            num_workers=0,
+            pos_constraint_df=pos_constraint_df,
+        )
+        if ensemble_mode == "none":
+            outputs = model.sample(design_inputs, **sample_kwargs)
+        else:
+            outputs = model.ensemble_sample(design_inputs, **sample_kwargs)
+        df, fasta_text, out_pdb_list = _format_outputs(outputs)
+        sc_zip_path = None
+        af2_pdb_data = {}
+        if run_af2_eval:
+            gr.Info("Running AF2 self-consistency evaluation...")
+            sc_zip_path, af2_pdb_data = _run_self_consistency(model, df, out_pdb_list, out_dir, download_stem)
+        out_zip_path = _write_zip_from_paths(out_pdb_list, download_stem, "_designs.zip")
+        return df, fasta_text, out_zip_path, sc_zip_path, af2_pdb_data, input_pdb_data
+def design_sequences(
+    pdb_files: list | None,
+    ensemble_mode: str,
+    model_variant: str,
+    num_seqs: int,
+    omit_aas: list[str] | None,
+    temperature: float,
+    fixed_pos_seq: str,
+    fixed_pos_scn: str,
+    fixed_pos_override_seq: str,
+    pos_restrict_aatype: str,
+    symmetry_pos: str,
+    num_protpardelle_conformers: int,
+    run_af2_eval: bool = False,
+):
+    """Run sequence design with ZeroGPU quota-aware retry.
+    Requests the max GPU duration first. If the scheduler returns a quota
+    error (free — no GPU time consumed), parses the remaining seconds and
+    retries with that exact amount to maximize GPU utilization.
+    """
+    global _gpu_duration_override
+    _gpu_duration_override = None
+    try:
+        return _design_sequences_gpu(
+            pdb_files=pdb_files,
+            ensemble_mode=ensemble_mode,
+            model_variant=model_variant,
+            num_seqs=num_seqs,
+            omit_aas=omit_aas,
+            temperature=temperature,
+            fixed_pos_seq=fixed_pos_seq,
+            fixed_pos_scn=fixed_pos_scn,
+            fixed_pos_override_seq=fixed_pos_override_seq,
+            pos_restrict_aatype=pos_restrict_aatype,
+            symmetry_pos=symmetry_pos,
+            num_protpardelle_conformers=num_protpardelle_conformers,
+            run_af2_eval=run_af2_eval,
+        )
+    except gr.Error as e:
+        remaining = _parse_quota_left(e)
+        print(f"[ZeroGPU retry] Caught gr.Error, parsed remaining={remaining}, message={getattr(e, 'message', str(e))}")
+        if remaining is None or remaining <= 0:
+            raise
+        gr.Info(f"GPU quota: {remaining}s remaining, retrying with exact quota")
+        _gpu_duration_override = remaining - 1
+        try:
+            return _design_sequences_gpu(
+                pdb_files=pdb_files,
+                ensemble_mode=ensemble_mode,
+                model_variant=model_variant,
+                num_seqs=num_seqs,
+                omit_aas=omit_aas,
+                temperature=temperature,
+                fixed_pos_seq=fixed_pos_seq,
+                fixed_pos_scn=fixed_pos_scn,
+                fixed_pos_override_seq=fixed_pos_override_seq,
+                pos_restrict_aatype=pos_restrict_aatype,
+                symmetry_pos=symmetry_pos,
+                num_protpardelle_conformers=num_protpardelle_conformers,
+                run_af2_eval=run_af2_eval,
+            )
+        finally:
+            _gpu_duration_override = None

ensemble.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Protpardelle and user ensemble generation."""
+from pathlib import Path
+def _generate_protpardelle_ensemble(
+    pdb_path: str,
+    num_conformers: int,
+    out_dir: Path,
+    weights_dir: str,
+) -> dict[str, list[str]]:
+    """Generate conformers with Protpardelle-1c, return pdb_to_conformers dict."""
+    from caliby import generate_ensembles
+    pdb_to_conformers = generate_ensembles(
+        [pdb_path],
+        out_dir=str(out_dir / "protpardelle_ensemble"),
+        num_samples_per_pdb=num_conformers,
+        model_params_path=weights_dir,
+    )
+    # generate_ensembles returns only generated conformers — prepend the primary structure.
+    pdb_stem = Path(pdb_path).stem
+    pdb_to_conformers[pdb_stem] = [pdb_path] + pdb_to_conformers.get(pdb_stem, [])
+    return pdb_to_conformers
+def _setup_user_ensemble_dir(
+    pdb_paths: list[str],
+    **_ignored,
+) -> dict[str, list[str]]:
+    """Build pdb_to_conformers dict from user-uploaded files.
+    First file is the primary conformer, rest are additional conformers.
+    """
+    pdb_key = Path(pdb_paths[0]).stem
+    return {pdb_key: list(pdb_paths)}

file_utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""File path helpers, ZIP operations, and CSV export."""
+import re
+import tempfile
+import zipfile
+from pathlib import Path
+import pandas as pd
+def _get_file_path(f):
+    if isinstance(f, str):
+        return Path(f)
+    if hasattr(f, "path"):
+        return Path(f.path)
+    if isinstance(f, dict) and "path" in f:
+        return Path(f["path"])
+    return Path(str(f))
+def _sanitize_download_stem(stem: str) -> str:
+    sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._-")
+    return sanitized or "caliby"
+def _make_named_download_path(stem: str, suffix: str) -> str:
+    download_dir = Path(tempfile.mkdtemp(prefix="caliby_download_"))
+    return str(download_dir / f"{_sanitize_download_stem(stem)}{suffix}")
+def _get_results_stem(df: pd.DataFrame) -> str:
+    if "Sample" not in df.columns:
+        return "caliby"
+    sample_name = str(df.iloc[0]["Sample"])
+    return _sanitize_download_stem(re.sub(r"_sample\d+$", "", sample_name))
+def _copy_uploaded_files(pdb_files: list, tmpdir: Path) -> list[str]:
+    pdb_paths = []
+    for f in pdb_files:
+        src = _get_file_path(f)
+        path = tmpdir / src.name
+        path.write_bytes(src.read_bytes())
+        pdb_paths.append(str(path))
+    return pdb_paths
+def _write_zip_from_paths(paths: list[str], download_stem: str, suffix: str) -> str | None:
+    if not paths:
+        return None
+    zip_path = _make_named_download_path(download_stem, suffix)
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        for path in paths:
+            zf.write(path, Path(path).name)
+    return zip_path
+def _write_zip_from_dir(directory: Path, download_stem: str, suffix: str) -> str:
+    zip_path = _make_named_download_path(download_stem, suffix)
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        for path in directory.rglob("*"):
+            if path.is_file():
+                zf.write(path, path.relative_to(directory))
+    return zip_path
+def _df_to_csv(df: pd.DataFrame | None) -> str | None:
+    if df is None or df.empty:
+        return None
+    path = _make_named_download_path(_get_results_stem(df), "_results.csv")
+    df.to_csv(path, index=False)
+    return path

models.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Model loading and caching."""
+import sys
+import types
+from caliby import CalibyModel
+MODELS: dict[str, CalibyModel] = {}
+def get_model(variant: str, device: str) -> CalibyModel:
+    """Load and cache a CalibyModel by variant name."""
+    if variant not in MODELS:
+        # ZeroGPU's @spaces.GPU decorator may remove sys.modules["__main__"].
+        # Lightning's load_from_checkpoint calls inspect.stack() which
+        # requires it, so ensure a placeholder exists.
+        if "__main__" not in sys.modules:
+            sys.modules["__main__"] = types.ModuleType("__main__")
+        from caliby import load_model
+        MODELS[variant] = load_model(variant, device=device)
+    return MODELS[variant]

pyproject.toml ADDED Viewed

	@@ -0,0 +1,30 @@

+[project]
+name = "caliby-hf"
+version = "0.1.0"
+dependencies = [
+  "caliby[af2] @ git+https://github.com/ProteinDesignLab/caliby@20d6757aaaba1662e71234ba25dde0f64b199683",
+  "gradio",
+  "huggingface_hub",
+  "molview>=0.1.0",
+  "omegaconf",
+  "pandas",
+  "pytest",
+  "spaces",
+  "torch",
+]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]
+[tool.ruff]
+line-length = 120
+exclude = ["chroma"]
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+ignore = ["E731"]
+[tool.ruff.format]
+quote-style = "preserve"   # avoid churning quotes
+indent-style = "space"

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+caliby[af2] @ git+https://github.com/ProteinDesignLab/caliby@20d6757aaaba1662e71234ba25dde0f64b199683
+gradio
+huggingface_hub
+molview>=0.1.0
+omegaconf
+pandas
+spaces
+torch

self_consistency.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""AF2 self-consistency evaluation."""
+from pathlib import Path
+import pandas as pd
+from caliby import CalibyModel
+from file_utils import _write_zip_from_dir
+def _run_self_consistency(
+    model: CalibyModel,
+    df: pd.DataFrame,
+    out_pdb_list: list[str],
+    out_dir: Path,
+    download_stem: str,
+) -> tuple[str, dict[str, str]]:
+    from caliby.eval.eval_utils.folding_utils import clear_mem_torch
+    clear_mem_torch()
+    sc_out_dir = out_dir / "self_consistency"
+    id_to_metrics = model.self_consistency_eval(out_pdb_list, out_dir=str(sc_out_dir))
+    for metric in ["sc_ca_rmsd", "avg_ca_plddt", "tmalign_score"]:
+        df[metric] = [id_to_metrics.get(Path(path).stem, {}).get(metric, float("nan")) for path in out_pdb_list]
+    af2_pdb_data = {}
+    for path in out_pdb_list:
+        af2_path = sc_out_dir / "struct_preds" / f"af2_{Path(path).stem}.pdb"
+        if af2_path.exists():
+            af2_pdb_data[Path(path).stem] = af2_path.read_text()
+    sc_zip_path = _write_zip_from_dir(sc_out_dir, download_stem, "_self_consistency.zip")
+    return sc_zip_path, af2_pdb_data

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""Module-level mocking for app.py import-time side effects."""
+import tempfile
+from unittest.mock import patch
+import pytest
+# ---------------------------------------------------------------------------
+# Module-level patches — applied BEFORE any test file can `import app`.
+#
+# app_config.py executes on import:
+#   1. snapshot_download(...)           → needs HF_TOKEN + network
+#   2. os.environ.setdefault(...)       → safe, no mock needed
+# app.py executes on import:
+#   1. base64-encode caliby_transparent.png  → file exists in repo, no mock
+#   2. @spaces.GPU decorator           → no-op when SPACES_ZERO_GPU unset
+# ---------------------------------------------------------------------------
+_FAKE_WEIGHTS_DIR = tempfile.mkdtemp(prefix="caliby_test_weights_")
+patch("huggingface_hub.snapshot_download", return_value=_FAKE_WEIGHTS_DIR).start()
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture
+def sample_outputs():
+    """Mock outputs dict matching caliby's CalibyModel.sample() return format."""
+    return {
+        "example_id": ["1YCR", "1YCR"],
+        "out_pdb": ["/tmp/out/1YCR_sample0.cif", "/tmp/out/1YCR_sample1.cif"],
+        "U": [-142.38, -139.92],
+        "input_seq": ["NATIVE_SEQ", "NATIVE_SEQ"],
+        "seq": ["MTEEQWAQ", "VSEQQWAQ"],
+    }
+@pytest.fixture
+def sample_outputs_with_out_pdbs(sample_outputs):
+    """Outputs dict with the 'out_pdbs' key that app.py's _format_outputs actually reads."""
+    return {**sample_outputs, "out_pdbs": sample_outputs["out_pdb"]}

tests/test_design_sequences.py ADDED Viewed

	@@ -0,0 +1,475 @@

+"""Integration tests for get_model, _setup_user_ensemble_dir, and design_sequences."""
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import gradio as gr
+import pandas as pd
+import pytest
+import design
+import ensemble
+import models
+# ---------------------------------------------------------------------------
+# get_model
+# ---------------------------------------------------------------------------
+class TestGetModel:
+    """Lazy-loads and caches CalibyModel instances via caliby.load_model."""
+    @pytest.fixture(autouse=True)
+    def _clear_model_cache(self):
+        models.MODELS.clear()
+        yield
+        models.MODELS.clear()
+    def test_calls_load_model_with_variant_and_device(self):
+        mock_caliby_model = MagicMock()
+        with patch("caliby.load_model", return_value=mock_caliby_model) as mock_load:
+            result = models.get_model("caliby", "cpu")
+            mock_load.assert_called_once_with("caliby", device="cpu")
+            assert result is mock_caliby_model
+    def test_caches_model_on_repeat_call(self):
+        mock_caliby_model = MagicMock()
+        with patch("caliby.load_model", return_value=mock_caliby_model) as mock_load:
+            first = models.get_model("caliby", "cpu")
+            second = models.get_model("caliby", "cpu")
+            mock_load.assert_called_once()
+            assert first is second
+    def test_different_variants_cached_separately(self):
+        mock_a = MagicMock()
+        mock_b = MagicMock()
+        with patch("caliby.load_model", side_effect=[mock_a, mock_b]):
+            a = models.get_model("caliby", "cpu")
+            b = models.get_model("soluble_caliby_v1", "cpu")
+            assert a is mock_a
+            assert b is mock_b
+# ---------------------------------------------------------------------------
+# _setup_user_ensemble_dir
+# ---------------------------------------------------------------------------
+class TestSetupUserEnsembleDir:
+    """Builds pdb_to_conformers dict from user-uploaded files."""
+    def test_returns_dict_with_primary_key(self):
+        result = ensemble._setup_user_ensemble_dir(["/tmp/primary.pdb", "/tmp/conf1.pdb", "/tmp/conf2.pdb"])
+        assert "primary" in result
+        assert result["primary"] == ["/tmp/primary.pdb", "/tmp/conf1.pdb", "/tmp/conf2.pdb"]
+    def test_first_file_is_primary(self):
+        result = ensemble._setup_user_ensemble_dir(["/tmp/myprotein.cif", "/tmp/alt.pdb"])
+        assert result["myprotein"][0] == "/tmp/myprotein.cif"
+    def test_uses_stem_as_key(self):
+        result = ensemble._setup_user_ensemble_dir(["/path/to/foo.pdb"])
+        assert "foo" in result
+# ---------------------------------------------------------------------------
+# design_sequences — validation
+# ---------------------------------------------------------------------------
+class TestDesignSequencesValidation:
+    """Input validation before any model calls."""
+    def test_no_files(self):
+        df, msg, _, _, _, _ = design.design_sequences(None, "none", "caliby", 4, None, 0.1, "", "", "", "", "", 31)
+        assert df.empty
+        assert "Upload at least one" in msg
+    def test_empty_file_list(self):
+        df, msg, _, _, _, _ = design.design_sequences([], "none", "caliby", 4, None, 0.1, "", "", "", "", "", 31)
+        assert df.empty
+        assert "Upload at least one" in msg
+    def test_single_mode_multiple_files(self):
+        df, msg, _, _, _, _ = design.design_sequences(
+            ["a.pdb", "b.pdb"], "none", "caliby", 4, None, 0.1, "", "", "", "", "", 31
+        )
+        assert "exactly one file" in msg
+    def test_synthetic_mode_multiple_files(self):
+        df, msg, _, _, _, _ = design.design_sequences(
+            ["a.pdb", "b.pdb"], "synthetic", "caliby", 4, None, 0.1, "", "", "", "", "", 31
+        )
+        assert "exactly one file" in msg
+    def test_user_mode_too_few_files(self):
+        df, msg, _, _, _, _ = design.design_sequences(["a.pdb"], "user", "caliby", 4, None, 0.1, "", "", "", "", "", 31)
+        assert "at least two" in msg
+# ---------------------------------------------------------------------------
+# design_sequences — single structure mode
+# ---------------------------------------------------------------------------
+class TestDesignSequencesSingleMode:
+    """Tests ensemble_mode='none' — verifies correct args to CalibyModel.sample()."""
+    def _make_mock_outputs(self):
+        return {
+            "example_id": ["test"],
+            "out_pdb": ["/tmp/test_sample0.cif"],
+            "U": [-100.0],
+            "input_seq": ["NATIVE"],
+            "seq": ["ACDEF"],
+        }
+    def test_sample_called_with_correct_args(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("FAKE PDB")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = self._make_mock_outputs()
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences(
+                [str(pdb_file)],
+                "none",
+                "caliby",
+                4,
+                ["C"],
+                0.5,
+                "A1-100",
+                "A1-10",
+                "A26:A",
+                "A26:AVG",
+                "A10,B10",
+                31,
+            )
+            mock_model.sample.assert_called_once()
+            args, kwargs = mock_model.sample.call_args
+            # First positional arg is pdb_paths
+            assert isinstance(args[0], list)
+            assert len(args[0]) == 1
+            assert args[0][0].endswith("test.pdb")
+            assert kwargs["num_seqs_per_pdb"] == 4
+            assert kwargs["omit_aas"] == ["C"]
+            assert kwargs["temperature"] == 0.5
+            assert kwargs["num_workers"] == 0
+            assert isinstance(kwargs["out_dir"], str)
+            assert isinstance(kwargs["pos_constraint_df"], pd.DataFrame)
+            assert kwargs["pos_constraint_df"].iloc[0]["pdb_key"] == "test"
+    def test_no_constraints_passes_none(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("FAKE")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = self._make_mock_outputs()
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences(
+                [str(pdb_file)],
+                "none",
+                "caliby",
+                1,
+                None,
+                0.1,
+                "",
+                "",
+                "",
+                "",
+                "",
+                31,
+            )
+            assert mock_model.sample.call_args[1]["pos_constraint_df"] is None
+    def test_empty_omit_aas_becomes_none(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("FAKE")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = self._make_mock_outputs()
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences(
+                [str(pdb_file)],
+                "none",
+                "caliby",
+                1,
+                [],
+                0.1,
+                "",
+                "",
+                "",
+                "",
+                "",
+                31,
+            )
+            assert mock_model.sample.call_args[1]["omit_aas"] is None
+# ---------------------------------------------------------------------------
+# design_sequences — user ensemble mode
+# ---------------------------------------------------------------------------
+class TestDesignSequencesUserEnsembleMode:
+    """Tests ensemble_mode='user' — verifies correct args to CalibyModel.ensemble_sample()."""
+    def _make_mock_outputs(self):
+        return {
+            "example_id": ["primary"],
+            "out_pdb": ["/tmp/primary_sample0.cif"],
+            "U": [-100.0],
+            "input_seq": ["NATIVE"],
+            "seq": ["AAA"],
+        }
+    def test_calls_ensemble_sample(self, tmp_path):
+        pdb1 = tmp_path / "primary.pdb"
+        pdb2 = tmp_path / "conf1.pdb"
+        pdb1.write_text("PDB1")
+        pdb2.write_text("PDB2")
+        mock_model = MagicMock()
+        mock_model.ensemble_sample.return_value = self._make_mock_outputs()
+        mock_pdb_to_conf = {"primary": ["/some/primary.pdb", "/some/conf1.pdb"]}
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_setup_user_ensemble_dir", return_value=mock_pdb_to_conf),
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences([str(pdb1), str(pdb2)], "user", "caliby", 4, None, 0.1, "", "", "", "", "", 31)
+            mock_model.ensemble_sample.assert_called_once()
+            args, kwargs = mock_model.ensemble_sample.call_args
+            assert args[0] is mock_pdb_to_conf
+            assert kwargs["pos_constraint_df"] is None
+    def test_constraints_expand_via_make_ensemble_constraints(self, tmp_path):
+        pdb1 = tmp_path / "primary.pdb"
+        pdb2 = tmp_path / "conf1.pdb"
+        pdb1.write_text("PDB1")
+        pdb2.write_text("PDB2")
+        mock_model = MagicMock()
+        mock_model.ensemble_sample.return_value = self._make_mock_outputs()
+        mock_pdb_to_conf = {"primary": ["a.pdb", "b.pdb"]}
+        expanded_df = pd.DataFrame({"pdb_key": ["a", "b"], "fixed_pos_seq": ["A1-10", "A1-10"]})
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_setup_user_ensemble_dir", return_value=mock_pdb_to_conf),
+            patch("caliby.make_ensemble_constraints", return_value=expanded_df) as mock_expand,
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences([str(pdb1), str(pdb2)], "user", "caliby", 1, None, 0.1, "A1-10", "", "", "", "", 31)
+            mock_expand.assert_called_once()
+            constraints_dict, pdb_to_conf_arg = mock_expand.call_args[0]
+            assert isinstance(constraints_dict, dict)
+            assert "primary" in constraints_dict
+            assert constraints_dict["primary"]["fixed_pos_seq"] == "A1-10"
+            assert pdb_to_conf_arg is mock_pdb_to_conf
+# ---------------------------------------------------------------------------
+# design_sequences — error handling
+# ---------------------------------------------------------------------------
+class TestDesignSequencesErrorHandling:
+    """Verifies non-validation failures now raise naturally."""
+    def test_value_error(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("PDB")
+        with (
+            patch.object(design, "get_model", side_effect=ValueError("bad config")),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            with pytest.raises(ValueError, match="bad config"):
+                design.design_sequences([str(pdb_file)], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31)
+    def test_file_not_found(self, tmp_path):
+        with (
+            patch.object(design, "get_model", side_effect=FileNotFoundError("missing.pdb")),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            with pytest.raises(FileNotFoundError, match="missing.pdb"):
+                design.design_sequences(
+                    [str(tmp_path / "ghost.pdb")], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31
+                )
+    def test_unexpected_runtime_error(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("PDB")
+        with (
+            patch.object(design, "get_model", side_effect=RuntimeError("GPU OOM")),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            with pytest.raises(RuntimeError, match="GPU OOM"):
+                design.design_sequences([str(pdb_file)], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31)
+# ---------------------------------------------------------------------------
+# design_sequences — zip output
+# ---------------------------------------------------------------------------
+class TestDesignSequencesZipOutput:
+    """Tests ZIP file creation from output CIF files."""
+    def test_creates_zip_when_out_pdb_present(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("PDB")
+        out_cif = tmp_path / "test_sample0.cif"
+        out_cif.write_text("CIF CONTENT")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = {
+            "example_id": ["test"],
+            "out_pdb": [str(out_cif)],
+            "U": [-100.0],
+            "input_seq": ["NATIVE"],
+            "seq": ["AAA"],
+        }
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            _, _, zip_path, _, _, _ = design.design_sequences(
+                [str(pdb_file)], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31
+            )
+            assert zip_path is not None
+            assert Path(zip_path).name == "test_designs.zip"
+            assert Path(zip_path).exists()
+    def test_empty_out_pdb_raises_for_invalid_caliby_output(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("PDB")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = {
+            "example_id": ["test"],
+            "out_pdb": [],
+            "U": [-100.0],
+            "input_seq": ["NATIVE"],
+            "seq": ["AAA"],
+        }
+        with (
+            patch.object(design, "get_model", return_value=mock_model),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            with pytest.raises(ValueError, match="All arrays must be of the same length"):
+                design.design_sequences([str(pdb_file)], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31)
+# ---------------------------------------------------------------------------
+# design_sequences — ZeroGPU quota-aware retry
+# ---------------------------------------------------------------------------
+class TestParseQuotaLeft:
+    """Tests _parse_quota_left regex parsing of ZeroGPU error messages."""
+    def test_extracts_remaining_seconds(self):
+        e = gr.Error("You have exceeded your free GPU quota (210s requested vs. 45s left). Try again in 0:02:45")
+        assert design._parse_quota_left(e) == 45
+    def test_extracts_zero_remaining(self):
+        e = gr.Error("(210s requested vs. 0s left). Try again in 0:03:30")
+        assert design._parse_quota_left(e) == 0
+    def test_returns_none_for_non_quota_error(self):
+        e = gr.Error("Some other error")
+        assert design._parse_quota_left(e) is None
+    def test_returns_none_for_no_message_attr(self):
+        e = RuntimeError("no message attribute")
+        assert design._parse_quota_left(e) is None
+class TestDesignSequencesQuotaRetry:
+    """Tests ZeroGPU quota-aware retry logic in design_sequences wrapper."""
+    _DESIGN_ARGS = (None, "none", "caliby", 4, None, 0.1, "", "", "", "", "", 31)
+    def test_retry_on_quota_exceeded(self, tmp_path):
+        pdb_file = tmp_path / "test.pdb"
+        pdb_file.write_text("PDB")
+        mock_model = MagicMock()
+        mock_model.sample.return_value = {
+            "example_id": ["test"],
+            "out_pdb": ["/tmp/t.cif"],
+            "U": [-100.0],
+            "input_seq": ["N"],
+            "seq": ["A"],
+        }
+        quota_error = gr.Error("(210s requested vs. 45s left). Try again in 0:02:45")
+        call_count = 0
+        original_fn = design._design_sequences_gpu
+        def side_effect(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise quota_error
+            return original_fn(*args, **kwargs)
+        with (
+            patch.object(design, "_design_sequences_gpu", side_effect=side_effect),
+            patch.object(design, "get_model", return_value=mock_model),
+            patch.object(design, "_write_zip_from_paths", return_value=None),
+            patch("torch.cuda.is_available", return_value=False),
+        ):
+            design.design_sequences([str(pdb_file)], "none", "caliby", 1, None, 0.1, "", "", "", "", "", 31)
+            assert call_count == 2
+            assert design._gpu_duration_override is None  # Reset after retry
+    def test_no_retry_when_remaining_zero(self):
+        quota_error = gr.Error("(210s requested vs. 0s left). Try again in 0:03:30")
+        with patch.object(design, "_design_sequences_gpu", side_effect=quota_error):
+            with pytest.raises(gr.Error):
+                design.design_sequences(*self._DESIGN_ARGS)
+    def test_no_retry_for_non_quota_gr_error(self):
+        other_error = gr.Error("The requested GPU duration (210s) is larger than the maximum allowed")
+        with patch.object(design, "_design_sequences_gpu", side_effect=other_error):
+            with pytest.raises(gr.Error, match="larger than the maximum allowed"):
+                design.design_sequences(*self._DESIGN_ARGS)
+    def test_non_gradio_errors_propagate(self):
+        """ValueError, RuntimeError etc. are not caught by the retry logic."""
+        with patch.object(design, "_design_sequences_gpu", side_effect=ValueError("bad")):
+            with pytest.raises(ValueError, match="bad"):
+                design.design_sequences(*self._DESIGN_ARGS)

tests/test_helpers.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""Unit tests for helper functions."""
+import types
+from pathlib import Path
+import pandas as pd
+import constraints
+import design
+import file_utils
+import viewers
+# ---------------------------------------------------------------------------
+# _get_file_path
+# ---------------------------------------------------------------------------
+class TestGetFilePath:
+    """Normalizes Gradio's various file input formats to a Path."""
+    def test_string_input(self):
+        assert file_utils._get_file_path("/some/path.pdb") == Path("/some/path.pdb")
+    def test_object_with_path_attr(self):
+        obj = types.SimpleNamespace(path="/uploads/file.pdb")
+        assert file_utils._get_file_path(obj) == Path("/uploads/file.pdb")
+    def test_dict_with_path_key(self):
+        result = file_utils._get_file_path({"path": "/uploads/file.pdb", "name": "file.pdb"})
+        assert result == Path("/uploads/file.pdb")
+    def test_fallback_to_str(self):
+        assert file_utils._get_file_path(42) == Path("42")
+# ---------------------------------------------------------------------------
+# _build_pos_constraint_df
+# ---------------------------------------------------------------------------
+class TestBuildPosConstraintDf:
+    """Builds a positional constraint DataFrame for caliby."""
+    def test_all_empty_returns_none(self):
+        assert constraints._build_pos_constraint_df("1YCR", "", "", "", "", "") is None
+    def test_all_whitespace_returns_none(self):
+        assert constraints._build_pos_constraint_df("1YCR", "  ", "  ", "  ", "  ", "  ") is None
+    def test_single_field_populated(self):
+        df = constraints._build_pos_constraint_df("1YCR", "A1-100", "", "", "", "")
+        assert df is not None
+        assert len(df) == 1
+        assert df.iloc[0]["pdb_key"] == "1YCR"
+        assert df.iloc[0]["fixed_pos_seq"] == "A1-100"
+        # Only populated columns + pdb_key should be present
+        assert "fixed_pos_scn" not in df.columns
+    def test_all_fields_populated(self):
+        df = constraints._build_pos_constraint_df("X", "A1", "B2", "A3:G", "A4:V", "A5,B5")
+        assert set(df.columns) == {
+            "pdb_key",
+            "fixed_pos_seq",
+            "fixed_pos_scn",
+            "fixed_pos_override_seq",
+            "pos_restrict_aatype",
+            "symmetry_pos",
+        }
+    def test_columns_match_caliby_valid_columns(self):
+        """All columns must be in caliby's _VALID_POS_CONSTRAINT_COLUMNS."""
+        valid = {
+            "pdb_key",
+            "fixed_pos_seq",
+            "fixed_pos_scn",
+            "fixed_pos_override_seq",
+            "pos_restrict_aatype",
+            "symmetry_pos",
+        }
+        df = constraints._build_pos_constraint_df("X", "A1", "B2", "A3:G", "A4:V", "A5,B5")
+        assert set(df.columns).issubset(valid)
+# ---------------------------------------------------------------------------
+# _df_to_csv
+# ---------------------------------------------------------------------------
+class TestDfToCsv:
+    """Writes a DataFrame to a temp CSV file."""
+    def test_none_returns_none(self):
+        assert file_utils._df_to_csv(None) is None
+    def test_empty_dataframe_returns_none(self):
+        assert file_utils._df_to_csv(pd.DataFrame()) is None
+    def test_valid_dataframe_roundtrips(self):
+        df = pd.DataFrame({"pdb_key": ["1YCR"], "fixed_pos_seq": ["A1-100"]})
+        path = file_utils._df_to_csv(df)
+        assert path is not None
+        assert Path(path).exists()
+        assert path.endswith(".csv")
+        loaded = pd.read_csv(path)
+        pd.testing.assert_frame_equal(df, loaded)
+    def test_uses_sample_name_for_csv_basename(self):
+        df = pd.DataFrame(
+            {
+                "Sample": ["1YCR_sample0"],
+                "Sequence": ["ACDE"],
+                "Energy (U)": [-1.0],
+            }
+        )
+        path = file_utils._df_to_csv(df)
+        assert path is not None
+        assert Path(path).name == "1YCR_results.csv"
+class TestCsvDownloadOutput:
+    """Formats CSV downloads for the Gradio file component."""
+    def test_hides_component_for_empty_dataframe(self):
+        update = viewers._csv_download_output(pd.DataFrame())
+        assert update["visible"] is False
+        assert update["value"] is None
+    def test_shows_named_csv_for_results_dataframe(self):
+        df = pd.DataFrame(
+            {
+                "Sample": ["1YCR_sample0"],
+                "Sequence": ["ACDE"],
+                "Energy (U)": [-1.0],
+            }
+        )
+        update = viewers._csv_download_output(df)
+        assert update["visible"] is True
+        assert Path(update["value"]).name == "1YCR_results.csv"
+class TestFormatResultsDisplay:
+    """Formats the on-screen results table without changing the raw dataframe."""
+    def test_formats_last_four_numeric_columns(self):
+        df = pd.DataFrame(
+            {
+                "Sample": ["1YCR_sample0"],
+                "Sequence": ["ACDE"],
+                "Energy (U)": [-1.2345],
+                "sc_ca_rmsd": [1.0],
+                "avg_ca_plddt": [88.888],
+                "tmalign_score": [0.12345],
+            }
+        )
+        styler = viewers._format_results_display(df)
+        html = styler.to_html()
+        assert "-1.23" in html
+        assert ">1<" in html
+        assert "88.89" in html
+        assert "0.12" in html
+# ---------------------------------------------------------------------------
+# _format_outputs
+# ---------------------------------------------------------------------------
+class TestFormatOutputs:
+    """Formats caliby output dict into (DataFrame, FASTA, out_pdb_list)."""
+    def test_dataframe_structure(self, sample_outputs_with_out_pdbs):
+        df, _, _ = design._format_outputs(sample_outputs_with_out_pdbs)
+        assert list(df.columns) == ["Sample", "Sequence", "Energy (U)"]
+        assert len(df) == 2
+    def test_sample_names_from_path_stems(self, sample_outputs_with_out_pdbs):
+        df, _, _ = design._format_outputs(sample_outputs_with_out_pdbs)
+        assert list(df["Sample"]) == ["1YCR_sample0", "1YCR_sample1"]
+    def test_fasta_format(self, sample_outputs_with_out_pdbs):
+        _, fasta, _ = design._format_outputs(sample_outputs_with_out_pdbs)
+        lines = fasta.strip().split("\n")
+        assert lines[0] == ">1YCR_sample0"
+        assert lines[1] == "MTEEQWAQ"
+        assert lines[2] == ">1YCR_sample1"
+        assert lines[3] == "VSEQQWAQ"
+    def test_uses_caliby_out_pdb_key(self, sample_outputs):
+        assert "out_pdbs" not in sample_outputs
+        df, fasta, out_pdb_list = design._format_outputs(sample_outputs)
+        assert list(df["Sample"]) == ["1YCR_sample0", "1YCR_sample1"]
+        assert ">1YCR_sample0" in fasta
+        assert out_pdb_list == sample_outputs["out_pdb"]
+# ---------------------------------------------------------------------------
+# _get_best_sc_sample
+# ---------------------------------------------------------------------------
+class TestGetBestScSample:
+    """Picks the sample with the highest tmalign_score."""
+    def test_picks_highest_tmalign_score(self):
+        df = pd.DataFrame(
+            {
+                "Sample": ["1YCR_sample0", "1YCR_sample1", "1YCR_sample2"],
+                "tmalign_score": [0.5, 0.9, 0.7],
+            }
+        )
+        assert viewers._get_best_sc_sample(df) == "1YCR_sample1"
+    def test_falls_back_to_first_when_no_tmalign(self):
+        df = pd.DataFrame({"Sample": ["1YCR_sample0", "1YCR_sample1"]})
+        assert viewers._get_best_sc_sample(df) == "1YCR_sample0"
+    def test_falls_back_to_first_when_all_nan(self):
+        df = pd.DataFrame(
+            {
+                "Sample": ["A_sample0", "A_sample1"],
+                "tmalign_score": [float("nan"), float("nan")],
+            }
+        )
+        assert viewers._get_best_sc_sample(df) == "A_sample0"
+    def test_returns_none_for_empty_df(self):
+        assert viewers._get_best_sc_sample(pd.DataFrame()) is None
+# ---------------------------------------------------------------------------
+# _render_af2_viewer / _render_reference_viewer
+# ---------------------------------------------------------------------------
+_MINIMAL_PDB = "ATOM      1  CA  ALA A   1       0.000   0.000   0.000  1.00 90.00           C\nEND\n"
+class TestRenderAf2Viewer:
+    """Renders AF2 prediction with pLDDT coloring via molview."""
+    def test_returns_html_with_valid_data(self):
+        html = viewers._render_af2_viewer("test_sample0", {"test_sample0": _MINIMAL_PDB})
+        assert "iframe" in html
+    def test_returns_empty_for_missing_sample(self):
+        assert viewers._render_af2_viewer("missing", {"other": _MINIMAL_PDB}) == ""
+    def test_returns_empty_for_none_sample(self):
+        assert viewers._render_af2_viewer(None, {"test": _MINIMAL_PDB}) == ""
+    def test_returns_empty_for_empty_data(self):
+        assert viewers._render_af2_viewer("test", {}) == ""
+class TestRenderReferenceViewer:
+    """Renders original input PDB with chain coloring via molview."""
+    def test_maps_sample_to_input_key(self):
+        html = viewers._render_reference_viewer("1YCR_sample0", {"1YCR": _MINIMAL_PDB})
+        assert "iframe" in html
+    def test_returns_empty_when_input_key_missing(self):
+        assert viewers._render_reference_viewer("1YCR_sample0", {"OTHER": _MINIMAL_PDB}) == ""
+    def test_returns_empty_for_none_sample(self):
+        assert viewers._render_reference_viewer(None, {"1YCR": _MINIMAL_PDB}) == ""
+# ---------------------------------------------------------------------------
+# _update_viewers
+# ---------------------------------------------------------------------------
+class TestUpdateViewers:
+    """Combined handler for overlay toggle."""
+    def test_overlay_off_hides_reference(self):
+        af2_html, ref_update = viewers._update_viewers("s0", {"s0": _MINIMAL_PDB}, {"s": _MINIMAL_PDB}, False)
+        assert "iframe" in af2_html
+        assert ref_update["visible"] is False
+    def test_overlay_on_shows_reference(self):
+        af2_html, ref_update = viewers._update_viewers(
+            "s_sample0", {"s_sample0": _MINIMAL_PDB}, {"s": _MINIMAL_PDB}, True
+        )
+        assert "iframe" in af2_html
+        assert ref_update["visible"] is True
+        assert "iframe" in ref_update["value"]

viewers.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Structure viewers and display formatting for the Gradio UI."""
+import re
+import gradio as gr
+import pandas as pd
+from file_utils import _df_to_csv
+def _format_display_number(value) -> str:
+    if pd.isna(value):
+        return ""
+    return f"{float(value):.2f}".rstrip("0").rstrip(".")
+def _file_output(value: str | None) -> dict:
+    return gr.update(value=value, visible=bool(value))
+def _csv_download_output(df: pd.DataFrame | None) -> dict:
+    return _file_output(_df_to_csv(df))
+def _format_results_display(df: pd.DataFrame):
+    numeric_columns = [col for col in df.columns[-4:] if pd.api.types.is_numeric_dtype(df[col])]
+    if not numeric_columns:
+        return df
+    return df.style.format({col: _format_display_number for col in numeric_columns})
+def _get_best_sc_sample(df: pd.DataFrame) -> str | None:
+    if df.empty or "Sample" not in df.columns:
+        return None
+    if "tmalign_score" in df.columns and df["tmalign_score"].notna().any():
+        return str(df.loc[df["tmalign_score"].idxmax(), "Sample"])
+    return str(df.iloc[0]["Sample"])
+def _render_af2_viewer(sample_name: str | None, af2_pdb_data: dict[str, str], color_mode: str = "plddt") -> str:
+    if not sample_name or not af2_pdb_data or sample_name not in af2_pdb_data:
+        return ""
+    import molview as mv
+    v = mv.view(width=840, height=500)
+    v.addModel(af2_pdb_data[sample_name], name=f"AF2: {sample_name}")
+    v.setColorMode(color_mode)
+    v.setBackgroundColor("#000000")
+    return v._repr_html_()
+def _render_reference_viewer(sample_name: str | None, input_pdb_data: dict[str, str], color_mode: str = "chain") -> str:
+    if not sample_name or not input_pdb_data:
+        return ""
+    input_key = re.sub(r"_sample\d+$", "", sample_name)
+    if input_key not in input_pdb_data:
+        return ""
+    import molview as mv
+    v = mv.view(width=840, height=500)
+    v.addModel(input_pdb_data[input_key], name=f"Reference: {input_key}")
+    v.setColorMode(color_mode)
+    v.setBackgroundColor("#000000")
+    return v._repr_html_()
+def _update_viewers(
+    best_sample: str,
+    af2_pdb_data: dict[str, str],
+    input_pdb_data: dict[str, str],
+    show_overlay: bool,
+    color_mode: str = "plddt",
+    ref_color_mode: str = "chain",
+):
+    af2_html = _render_af2_viewer(best_sample, af2_pdb_data, color_mode)
+    if show_overlay:
+        ref_html = _render_reference_viewer(best_sample, input_pdb_data, ref_color_mode)
+        return af2_html, gr.update(value=ref_html, visible=True), gr.update(visible=True)
+    return af2_html, gr.update(value="", visible=False), gr.update(visible=False)