Spaces:

Angione-Lab
/

FateFormerExplorer

Running

App Files Files Community

kaveh commited on 4 days ago

Commit

acd0d36

1 Parent(s): 34e8e2b

added help and refined

Browse files

Files changed (23) hide show

metabolic_map.svg +0 -0
requirements.txt +1 -1
streamlit_hf/app.py +14 -3
streamlit_hf/home.py +79 -49
streamlit_hf/lib/io.py +7 -38
streamlit_hf/lib/plots.py +51 -14
streamlit_hf/lib/ui.py +20 -2
streamlit_hf/pages/1_Single_Cell_Explorer.py +46 -23
streamlit_hf/pages/feature_insights/1_Global_overview.py +21 -9
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py +34 -37
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py +32 -16
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py +19 -31
streamlit_hf/pages/feature_insights/5_Full_table.py +9 -6
streamlit_hf/pages/flux_analysis/1_Pathway_map.py +40 -41
streamlit_hf/pages/flux_analysis/2_Differential_fate.py +13 -15
streamlit_hf/pages/flux_analysis/3_Reaction_ranking.py +27 -11
streamlit_hf/pages/flux_analysis/4_Model_metadata.py +46 -59
streamlit_hf/pages/flux_analysis/5_Interactive_map.py +10 -9
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py +17 -35
streamlit_hf/pages/gene_expression/2_Motif_activity.py +20 -26
streamlit_hf/pages/gene_expression/3_Gene_table.py +20 -4
streamlit_hf/pages/gene_expression/4_Motif_table.py +20 -4
streamlit_hf/static/experiment.svg +355 -0

metabolic_map.svg DELETED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-# FateFormerApp — training, precompute, and local Streamlit dev
 torch>=2.1.0
 numpy>=1.24.0
 pandas>=2.0.0

+# FateFormerApp: training, precompute, and local Streamlit dev
 torch>=2.1.0
 numpy>=1.24.0
 pandas>=2.0.0

streamlit_hf/app.py CHANGED Viewed

@@ -1,13 +1,19 @@
 """
 FateFormer Explorer: interactive analysis hub.
-Run from repository root: PYTHONPATH=. streamlit run streamlit_hf/app.py
 """
-from pathlib import Path
-import streamlit as st
 _APP_DIR = Path(__file__).resolve().parent
 _ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
 _page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
@@ -18,6 +24,11 @@ st.set_page_config(
     **_page_icon_kw,
 )
 _home = str(_APP_DIR / "home.py")
 _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
 _fi = _APP_DIR / "pages" / "feature_insights"

 """
 FateFormer Explorer: interactive analysis hub.
+Run: streamlit run streamlit_hf/app.py (repo root) or streamlit run app.py (from streamlit_hf/).
 """
+from __future__ import annotations
+import sys
+from pathlib import Path
 _APP_DIR = Path(__file__).resolve().parent
+_REPO_ROOT = _APP_DIR.parent
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+import streamlit as st
 _ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
 _page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
     **_page_icon_kw,
 )
+# Preload shared UI helpers before page scripts. Streamlit's file watcher can
+# delete watched modules from sys.modules on save; if that happens mid-import,
+# importlib may raise KeyError on the module name. Loading here narrows the race.
+import streamlit_hf.lib.ui as _streamlit_ui_preload  # noqa: F401, E402
 _home = str(_APP_DIR / "home.py")
 _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
 _fi = _APP_DIR / "pages" / "feature_insights"

streamlit_hf/home.py CHANGED Viewed

@@ -18,6 +18,11 @@ from streamlit_hf.lib import plots
 from streamlit_hf.lib import ui
 _CACHE = Path(__file__).resolve().parent / "cache"
 _APP_NAME = "FateFormer Explorer"
 _HERO_EMOJI = "\U0001f9ec"  # DNA (matches HF Space card tone)
@@ -27,23 +32,21 @@ _HOME_RANK_TOP_N = 15
 _VALIDATION_ROC_AUC = 0.93
 _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
-_UMAP_HELP_MD = """
-**What this is:** A 2‑D **UMAP** of validation cells in the model’s **shared latent space** (RNA + chromatin + flux combined). Nearby points have **similar multimodal profiles**.
-**How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
-**Takeaway:** See whether biological fates form separable groups in the representation the model actually uses.
 """
 _GLOBAL_RANK_HELP_MD = """
-**What this is:** Three linked summaries of **which features** (genes, peaks, or reactions) the analyses rank highest **globally** across modalities.
 **Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
 **How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
-**Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
 """
 _APP_SUBTITLE = (
@@ -51,12 +54,15 @@ _APP_SUBTITLE = (
     "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
 )
-_BIOLOGY_CONTEXT_MARKDOWN = """
-**At a glance**
-- **Biological setting:** **FateFormer** models **direct reprogramming** from mouse embryonic fibroblasts (**MEFs**) to induced endoderm progenitors (**iEPs**), combining **transcriptome (scRNA-seq)**, **chromatin (scATAC-seq)**, and **genome-scale metabolic flux** so fate is not inferred from RNA alone; epigenetic and metabolic context matter.
-- **Data & labels:** Trained on a **large sparse-modality** atlas (**>150,000** cells); **2,110** early cells carry **CellTag-Multi** clonal fate tags, the same experimental labels used to colour validation cells in **UMAP** views here.
-- **Model design:** A **transformer** learns **shared representations** across modalities, handles **missing modalities** and **scarce fate labels**, and ties early transcription, chromatin accessibility, and metabolic activity to **later lineage outcomes**, going beyond RNA-only views of reprogramming.
 """
@@ -84,6 +90,16 @@ st.markdown(
     unsafe_allow_html=True,
 )
 bundle = io.load_latent_bundle()
 df_features = io.load_df_features()
 samples = io.load_samples_df()
@@ -164,26 +180,33 @@ if bundle is not None and df_features is not None:
     with row1_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with row1_umap:
-        ui.plot_caption_with_help(
-            "Each point is a cell · colours = experimental fate labels · validation split",
-            _UMAP_HELP_MD,
-            key="home_umap_help",
-        )
-        fig_u = plots.latent_scatter(
-            plot_umap,
-            "label",
-            title=_UMAP_HOME_TITLE,
-            width=780,
-            height=440,
-            marker_size=5.2,
-            marker_opacity=0.72,
-        )
-        fig_u.update_layout(margin=dict(l=20, r=8, t=52, b=20), title_font_size=15)
-        st.plotly_chart(
-            fig_u,
-            width="stretch",
-            config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
-        )
     ui.plot_caption_with_help(
         "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
@@ -211,22 +234,29 @@ elif bundle is not None:
     with u_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with u_map:
-        ui.plot_caption_with_help(
-            "Feature ranking cache unavailable · UMAP only",
-            _UMAP_HELP_MD,
-            key="home_umap_only_help",
-        )
-        fig_u = plots.latent_scatter(
-            plot_umap,
-            "label",
-            title=_UMAP_HOME_TITLE,
-            width=820,
-            height=480,
-            marker_size=5.5,
-            marker_opacity=0.72,
-        )
-        fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
-        st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
 elif df_features is not None:
     ui.plot_caption_with_help(
         "Feature ranking overview · latent UMAP unavailable",

 from streamlit_hf.lib import ui
 _CACHE = Path(__file__).resolve().parent / "cache"
+_EXPERIMENT_SVG = Path(__file__).resolve().parent / "static" / "experiment.svg"
+# Display width (px) for the home-page schematic; SVG scales cleanly at fixed width.
+_EXPERIMENT_FIGURE_WIDTH_PX = 380
+_CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
 _APP_NAME = "FateFormer Explorer"
 _HERO_EMOJI = "\U0001f9ec"  # DNA (matches HF Space card tone)
 _VALIDATION_ROC_AUC = 0.93
 _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
+_UMAP_HOME_SUBTITLE = "Each point is a cell · colours = experimental fate labels · validation split"
+_UMAP_HOME_SUBTITLE_RANK_MISSING = "Feature ranking cache unavailable · UMAP only"
+_UMAP_HELP_MD = f"""
+**What this is:** A 2‑D **UMAP** of validation **single cells** in the model’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation**. **2,110** cells are shown.
+**How to read it:** Each point is one cell. **Colour** is **experimental fate** from [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal labels. **Axes are unitless**: UMAP preserves *local* neighbourhoods, not real physical distances, so **nearby points** tend to have similar characteristics in this representation. **Hover** a point for cell-level details. For more detail (interactive UMAP, filters, and metadata), open **Single-Cell Explorer** using the link below.
 """
 _GLOBAL_RANK_HELP_MD = """
+**What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), shown as three linked summaries.
 **Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
 **How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
 """
 _APP_SUBTITLE = (
     "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
 )
+_EXPERIMENTAL_SYSTEM_MD = f"""
+Mouse embryonic fibroblasts (**MEFs**) were reprogrammed toward induced endoderm progenitors (**iEPs**) **in vitro** through *Foxa1* and *HNF4A* induction. This process produces **mixed outcomes**: some cells successfully reach the **iEP fate**, whereas others diverge into **off-target** trajectories and stall in **dead-end states**. Using [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal barcoding, **early cells** could be linked to their **later fate**, which made it possible to ask a central biological question: which programs in **early-state cells**, coordinated **across transcriptional, chromatin, and metabolic layers**, drive successful reprogramming, which ones push cells toward off-target states, and which of these mechanisms could be targeted to improve reprogramming efficiency?
+"""
+_BIOLOGY_CONTEXT_MARKDOWN = f"""
+**How FateFormer addresses this**
+- **Multimodal view:** FateFormer integrates **scRNA-seq**, **scATAC-seq**, and **genome-scale metabolic flux** to capture regulatory and metabolic signals that are missed by RNA-only analysis.
+- **Grounded in lineage tracing:** The model is trained on a **sparse-modality atlas of more than 150,000 cells**, including **2,110** early cells linked to later outcomes through **CellTag-Multi** clonal barcoding.
+- **Biological insight:** FateFormer learns representations across modalities, handles **missing inputs** and **limited labels**, and using **explainability methods** highlights the transcriptional, chromatin, and metabolic programs associated with reprogramming success or off target failure.
 """
     unsafe_allow_html=True,
 )
+with st.container(border=True):
+    fig_col, text_col = st.columns([0.42, 0.58], gap="large")
+    with fig_col:
+        if _EXPERIMENT_SVG.is_file():
+            st.image(str(_EXPERIMENT_SVG), width=_EXPERIMENT_FIGURE_WIDTH_PX)
+        else:
+            st.caption("Experimental schematic (`static/experiment.svg`) is missing.")
+    with text_col:
+        st.markdown(_EXPERIMENTAL_SYSTEM_MD)
 bundle = io.load_latent_bundle()
 df_features = io.load_df_features()
 samples = io.load_samples_df()
     with row1_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with row1_umap:
+        try:
+            _umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
+        except TypeError:
+            _umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small")
+        with _umap_plot_col:
+            fig_u = plots.latent_scatter(
+                plot_umap,
+                "label",
+                title=_UMAP_HOME_TITLE,
+                width=780,
+                height=440,
+                marker_size=5.2,
+                marker_opacity=0.72,
+                subtitle=_UMAP_HOME_SUBTITLE,
+            )
+            fig_u.update_layout(margin=dict(l=20, r=8, t=92, b=20), title_font_size=15)
+            st.plotly_chart(
+                fig_u,
+                width="stretch",
+                config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
+            )
+        with _umap_help_col:
+            ui.plot_help_popover(
+                _UMAP_HELP_MD,
+                key="home_umap_help",
+                page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
+            )
     ui.plot_caption_with_help(
         "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
     with u_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with u_map:
+        try:
+            _umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
+        except TypeError:
+            _umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small")
+        with _umap_plot_col2:
+            fig_u = plots.latent_scatter(
+                plot_umap,
+                "label",
+                title=_UMAP_HOME_TITLE,
+                width=820,
+                height=480,
+                marker_size=5.5,
+                marker_opacity=0.72,
+                subtitle=_UMAP_HOME_SUBTITLE_RANK_MISSING,
+            )
+            fig_u.update_layout(margin=dict(l=24, r=12, t=92, b=24), title_font_size=15)
+            st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
+        with _umap_help_col2:
+            ui.plot_help_popover(
+                _UMAP_HELP_MD,
+                key="home_umap_only_help",
+                page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
+            )
 elif df_features is not None:
     ui.plot_caption_with_help(
         "Feature ranking overview · latent UMAP unavailable",

streamlit_hf/lib/io.py CHANGED Viewed

@@ -88,13 +88,9 @@ def load_metabolic_model_metadata() -> pd.DataFrame | None:
 def build_metabolic_model_table(
     meta: pd.DataFrame,
-    flux_df: pd.DataFrame,
     supermodule_id: int | None = None,
 ) -> pd.DataFrame:
-    """
-    Static edge list: substrate → product, reaction label, module class, plus DE / model columns when the
-    reaction string matches a row in the flux feature table.
-    """
     need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
     if not need.issubset(set(meta.columns)):
         return pd.DataFrame()
@@ -103,36 +99,9 @@ def build_metabolic_model_table(
         m = m[m["Supermodule_id"] == int(supermodule_id)]
     if m.empty:
         return pd.DataFrame()
-    fd = flux_df.copy()
-    fd["_rk"] = fd["feature"].map(normalize_reaction_key)
-    fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
-    rows: list[dict] = []
-    for _, r in m.iterrows():
-        k = normalize_reaction_key(str(r["rxnName"]))
-        base = {
-            "Supermodule": r.get("Super.Module.class"),
-            "Module_id": r.get("Module_id"),
-            "Substrate": r["Compound_IN_name"],
-            "Product": r["Compound_OUT_name"],
-            "Reaction": r["rxnName"],
-        }
-        if k in fd.index:
-            row = fd.loc[k]
-            if isinstance(row, pd.DataFrame):
-                row = row.iloc[0]
-            base["log_fc"] = row["log_fc"] if "log_fc" in row.index else None
-            base["pval_adj"] = row["pval_adj"] if "pval_adj" in row.index else None
-            base["mean_rank"] = row["mean_rank"] if "mean_rank" in row.index else None
-            base["pathway"] = row["pathway"] if "pathway" in row.index else None
-        else:
-            base["log_fc"] = None
-            base["pval_adj"] = None
-            base["mean_rank"] = None
-            base["pathway"] = None
-        rows.append(base)
-    return pd.DataFrame(rows)
 def _normalize_metabolite_token(name: str) -> str:
@@ -319,7 +288,7 @@ def build_metabolite_map_bundle(
         if smods:
             lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
         if best_importance is not None:
-            lines.append(f"Strongest linked step: #{best_importance}")
         top_rx = sorted(
             uniq_rx,
@@ -350,12 +319,12 @@ def build_metabolite_map_bundle(
         )
         if precursors:
             lines.append(
-                f"<span style='color:#656d76'>Model precursors (substrates in linked steps)</span><br/>"
                 f"{html.escape(', '.join(precursors[:8]))}"
             )
         if products:
             lines.append(
-                f"<span style='color:#656d76'>Model products (downstream in linked steps)</span><br/>"
                 f"{html.escape(', '.join(products[:8]))}"
             )

 def build_metabolic_model_table(
     meta: pd.DataFrame,
     supermodule_id: int | None = None,
 ) -> pd.DataFrame:
+    """Rows from ``metabolic_model_metadata.csv`` (all file columns except a stray ``Unnamed: 0`` index column)."""
     need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
     if not need.issubset(set(meta.columns)):
         return pd.DataFrame()
         m = m[m["Supermodule_id"] == int(supermodule_id)]
     if m.empty:
         return pd.DataFrame()
+    if "Unnamed: 0" in m.columns:
+        m = m.drop(columns=["Unnamed: 0"])
+    return m.reset_index(drop=True)
 def _normalize_metabolite_token(name: str) -> str:
         if smods:
             lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
         if best_importance is not None:
+            lines.append(f"Strongest linked reaction: #{best_importance}")
         top_rx = sorted(
             uniq_rx,
         )
         if precursors:
             lines.append(
+                f"<span style='color:#656d76'>Model precursors (substrates in linked reactions)</span><br/>"
                 f"{html.escape(', '.join(precursors[:8]))}"
             )
         if products:
             lines.append(
+                f"<span style='color:#656d76'>Model products (downstream in linked reactions)</span><br/>"
                 f"{html.escape(', '.join(products[:8]))}"
             )

streamlit_hf/lib/plots.py CHANGED Viewed

@@ -98,6 +98,7 @@ def latent_scatter(
     height: int = 520,
     marker_size: float = 5.0,
     marker_opacity: float = 0.78,
 ):
     d = df.copy()
     hover_spec = {
@@ -159,7 +160,8 @@ def latent_scatter(
         width=width,
         height=height,
     )
-    if title:
         common["title"] = title
     if continuous:
         fig = px.scatter(
@@ -178,7 +180,10 @@ def latent_scatter(
     fig.update_traces(
         marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
     )
-    top_margin = 56 if title else 28
     fig.update_layout(
         template="plotly_white",
         font=PLOT_FONT,
@@ -190,7 +195,20 @@ def latent_scatter(
         paper_bgcolor=PAGE_BG,
         plot_bgcolor=PAGE_BG,
     )
-    if not title:
         fig.update_layout(title=None)
     fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
     fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
@@ -198,20 +216,29 @@ def latent_scatter(
 def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
-    """Attention rank on x, shift rank on y, least-squares trend line, discrete point colours."""
     need = ("shift_order_mod", "attention_order_mod")
     if not all(c in df_mod.columns for c in need):
         return go.Figure()
     sub = df_mod.dropna(subset=list(need)).copy()
     if sub.empty:
         return go.Figure()
     x = sub["attention_order_mod"].astype(float).to_numpy()
     y = sub["shift_order_mod"].astype(float).to_numpy()
     fig = px.scatter(
         sub,
         x="attention_order_mod",
         y="shift_order_mod",
-        color="top_10_pct",
         hover_name="feature",
         hover_data={
             "mean_rank": True,
@@ -221,14 +248,16 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
         labels={
             "attention_order_mod": "Attention rank",
             "shift_order_mod": "Shift rank",
         },
         width=width,
         height=height,
         color_discrete_map={
-            "both": PALETTE[0],
-            "shift": PALETTE[1],
-            "att": PALETTE[2],
-            "None": "#94a3b8",
         },
     )
     fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
@@ -258,7 +287,14 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
             font=dict(size=14, family=PLOT_FONT["family"]),
         ),
         margin=dict(l=48, r=20, t=52, b=72),
-        legend=dict(orientation="h", yanchor="top", y=-0.2, xanchor="center", x=0.5),
     )
     return fig
@@ -1094,7 +1130,7 @@ def pathway_enrichment_bubble_panel(
 def pathway_gene_membership_heatmap(
     z: np.ndarray, row_labels: list[str], col_labels: list[str]
 ) -> go.Figure:
-    """Pathway × gene grid; empty cells transparent; Reactome/KEGG as a narrow left row spine."""
     if z.size == 0:
         return go.Figure()
@@ -1113,10 +1149,11 @@ def pathway_gene_membership_heatmap(
     # Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
     _z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
-    transparent = "rgba(0,0,0,0)"
     colorscale_main = [
-        [0.0, transparent],
-        [0.14, transparent],
         [0.15, "#e69138"],
         [0.33, "#e69138"],
         [0.34, "#7eb6d9"],

     height: int = 520,
     marker_size: float = 5.0,
     marker_opacity: float = 0.78,
+    subtitle: str | None = None,
 ):
     d = df.copy()
     hover_spec = {
         width=width,
         height=height,
     )
+    # Title + subtitle are applied via update_layout when `subtitle` is set (Plotly 5+).
+    if title and not subtitle:
         common["title"] = title
     if continuous:
         fig = px.scatter(
     fig.update_traces(
         marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
     )
+    if title and subtitle:
+        top_margin = 88
+    else:
+        top_margin = 56 if title else 28
     fig.update_layout(
         template="plotly_white",
         font=PLOT_FONT,
         paper_bgcolor=PAGE_BG,
         plot_bgcolor=PAGE_BG,
     )
+    if title and subtitle:
+        fig.update_layout(
+            title=dict(
+                text=title,
+                x=0.5,
+                xanchor="center",
+                font=dict(size=16, family=PLOT_FONT["family"]),
+                subtitle=dict(
+                    text=subtitle,
+                    font=dict(size=11, color="#64748b", family=PLOT_FONT["family"]),
+                ),
+            ),
+        )
+    elif not title:
         fig.update_layout(title=None)
     fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
     fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
 def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
+    """Attention rank on x, shift rank on y, least-squares trend, colours by top ~10% within this modality."""
     need = ("shift_order_mod", "attention_order_mod")
     if not all(c in df_mod.columns for c in need):
         return go.Figure()
     sub = df_mod.dropna(subset=list(need)).copy()
     if sub.empty:
         return go.Figure()
+    n = len(sub)
+    top_k = max(1, int(np.ceil(0.1 * n)))
+    s_ok = sub["shift_order_mod"].astype(int) <= top_k
+    a_ok = sub["attention_order_mod"].astype(int) <= top_k
+    sub["_tier_label"] = np.where(
+        s_ok & a_ok,
+        "Both",
+        np.where(s_ok, "Shift", np.where(a_ok, "Attention", "Neither")),
+    )
     x = sub["attention_order_mod"].astype(float).to_numpy()
     y = sub["shift_order_mod"].astype(float).to_numpy()
     fig = px.scatter(
         sub,
         x="attention_order_mod",
         y="shift_order_mod",
+        color="_tier_label",
         hover_name="feature",
         hover_data={
             "mean_rank": True,
         labels={
             "attention_order_mod": "Attention rank",
             "shift_order_mod": "Shift rank",
+            "_tier_label": "Top-10% tier",
         },
+        category_orders={"_tier_label": ["Both", "Shift", "Attention", "Neither"]},
         width=width,
         height=height,
         color_discrete_map={
+            "Both": PALETTE[0],
+            "Shift": PALETTE[1],
+            "Attention": PALETTE[2],
+            "Neither": "#94a3b8",
         },
     )
     fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
             font=dict(size=14, family=PLOT_FONT["family"]),
         ),
         margin=dict(l=48, r=20, t=52, b=72),
+        legend=dict(
+            title=dict(text="Among top 10% features?"),
+            orientation="h",
+            yanchor="top",
+            y=-0.2,
+            xanchor="center",
+            x=0.5,
+        ),
     )
     return fig
 def pathway_gene_membership_heatmap(
     z: np.ndarray, row_labels: list[str], col_labels: list[str]
 ) -> go.Figure:
+    """Pathway × gene grid; empty cells use a light tint vs page white; Reactome/KEGG as a narrow left row spine."""
     if z.size == 0:
         return go.Figure()
     # Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
     _z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
+    # Slight contrast vs PAGE_BG (#fff) so empty (code 0) cells read as a grid, not “missing” paint.
+    _empty_cell = "#f1f5f9"
     colorscale_main = [
+        [0.0, _empty_cell],
+        [0.14, _empty_cell],
         [0.15, "#e69138"],
         [0.33, "#e69138"],
         [0.34, "#7eb6d9"],

streamlit_hf/lib/ui.py CHANGED Viewed

@@ -4,6 +4,12 @@ from __future__ import annotations
 import streamlit as st
 def inject_app_styles() -> None:
     """Panel labels, page background, and shared chrome (all pages)."""
@@ -69,8 +75,17 @@ section[data-testid="stMain"] h1 {
     )
-def plot_help_popover(help_md: str, *, key: str) -> None:
-    """Small help control next to a figure; opens Markdown guidance for biologists."""
     with st.popover(
         " ",
         help="What does this figure show?",
@@ -80,6 +95,9 @@ def plot_help_popover(help_md: str, *, key: str) -> None:
         key=key,
     ):
         st.markdown(help_md)
 def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:

 import streamlit as st
+# Feature Insights multipage hub: same title + tagline on every sub-page.
+FEATURE_INSIGHTS_TITLE = "Feature Insights"
+FEATURE_INSIGHTS_CAPTION = (
+    "Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux."
+)
 def inject_app_styles() -> None:
     """Panel labels, page background, and shared chrome (all pages)."""
     )
+def plot_help_popover(
+    help_md: str,
+    *,
+    key: str,
+    page_link: tuple[str, str] | None = None,
+) -> None:
+    """Small help control next to a figure; opens Markdown guidance for biologists.
+    If ``page_link`` is ``(path, label)``, a ``st.page_link`` is rendered after the markdown
+    (e.g. ``("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer")``).
+    """
     with st.popover(
         " ",
         help="What does this figure show?",
         key=key,
     ):
         st.markdown(help_md)
+        if page_link:
+            page_path, page_label = page_link
+            st.page_link(page_path, label=page_label)
 def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:

streamlit_hf/pages/1_Single_Cell_Explorer.py CHANGED Viewed

@@ -19,16 +19,24 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_UMAP_EXPLORER_HELP = """
-**What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
-**How to read it:** Axes are **unitless** UMAP coordinates. **Colour** follows your **Colour by** menu. **Hover** points for values; **click‑drag a box** on the plot to **select** cells and inspect them in the table below.
-**Takeaway:** Check whether mis‑predictions or batch effects line up in particular regions of latent space.
 """
 st.title("Single-Cell Explorer")
-st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
 bundle = io.load_latent_bundle()
 if bundle is None:
@@ -107,21 +115,31 @@ if plot_df.empty:
     st.stop()
 with right:
-    ui.plot_caption_with_help(
-        "Hover points for details · drag on the plot to select cells",
-        _UMAP_EXPLORER_HELP,
-        key="sc_umap_help",
-    )
-    fig = plots.latent_scatter(
-        plot_df,
-        color_opt,
-        title="Validation latent space (UMAP)",
-        width=900,
-        height=560,
-        marker_size=5.8,
-        marker_opacity=0.74,
-    )
-    st.plotly_chart(fig, width="stretch", on_select="rerun", key="latent_pick")
 st.subheader("Selected points")
 state = st.session_state.get("latent_pick")
@@ -155,12 +173,17 @@ else:
     )
 st.subheader("Inspect by dataset index")
 pick = st.number_input(
     "Dataset index",
-    min_value=int(df["dataset_idx"].min()),
-    max_value=int(df["dataset_idx"].max()),
     value=int(df["dataset_idx"].iloc[0]),
-    help="Index `ind` in your sample table; aligns one validation cell to this row.",
 )
 row = df[df["dataset_idx"] == pick]
 if not row.empty:

 ui.inject_app_styles()
+_CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
+_UMAP_EXPLORER_TITLE = "Validation latent space (UMAP)"
+_UMAP_EXPLORER_SUBTITLE = "Hover points for details · drag on the plot to select cells"
+_UMAP_EXPLORER_HELP = f"""
+**What this is:** The same **2‑D UMAP** as on **Home**: validation **single cells** in **FateFormer**’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation** (**2,110** cells before filters). Here you **choose what to colour** and **filter** the cloud.
+**How to read it:** Each point is one cell. **Colour** comes from **Colour by**: e.g. [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) **label**, **predicted fate**, **prediction correct / wrong**, **CV fold**, **batch**, which **modalities** are present, or **dominant fate %**. **Axes are unitless** (UMAP preserves *local* neighbourhoods only). **Hover** a point for per-cell fields.
+**Using this page:** Use **Filters** to keep modality combinations, restrict **prediction outcome** (all / correct only / wrong only), choose **CV folds**, and set a **dominant fate %** range. In the plot **toolbar** (top right), pick **Box select** or **Lasso select**, then **drag** on the canvas; the app **reruns** and the **Selected points** table fills with those rows. To inspect **one** cell without a selection, scroll to **Inspect by dataset index**.
 """
 st.title("Single-Cell Explorer")
+st.caption(
+    "This page is an interactive **validation UMAP** in FateFormer latent space: you choose how points are **coloured**, "
+    "apply **filters**, and can **select** cells on the plot to inspect them in a table or by index."
+)
 bundle = io.load_latent_bundle()
 if bundle is None:
     st.stop()
 with right:
+    try:
+        _sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
+    except TypeError:
+        _sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small")
+    with _sc_umap_plot_col:
+        fig = plots.latent_scatter(
+            plot_df,
+            color_opt,
+            title=_UMAP_EXPLORER_TITLE,
+            width=900,
+            height=560,
+            marker_size=5.8,
+            marker_opacity=0.74,
+            subtitle=_UMAP_EXPLORER_SUBTITLE,
+        )
+        fig.update_layout(margin=dict(l=20, r=12, t=92, b=20), title_font_size=15)
+        st.plotly_chart(
+            fig,
+            width="stretch",
+            on_select="rerun",
+            key="latent_pick",
+            config={"displayModeBar": True, "displaylogo": False},
+        )
+    with _sc_umap_help_col:
+        ui.plot_help_popover(_UMAP_EXPLORER_HELP, key="sc_umap_help")
 st.subheader("Selected points")
 state = st.session_state.get("latent_pick")
     )
 st.subheader("Inspect by dataset index")
+_didx_min = int(df["dataset_idx"].min())
+_didx_max = int(df["dataset_idx"].max())
 pick = st.number_input(
     "Dataset index",
+    min_value=_didx_min,
+    max_value=_didx_max,
     value=int(df["dataset_idx"].iloc[0]),
+    help=(
+        f"The table below is a one-cell summary for the validation set: choose an index from {_didx_min} to {_didx_max} "
+        "to see fate labels, model prediction, available modalities, and related fields for that cell."
+    ),
 )
 row = df[df["dataset_idx"] == pick]
 if not row.empty:

streamlit_hf/pages/feature_insights/1_Global_overview.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Feature Insights — global overview of multimodal feature importance."""
 from __future__ import annotations
@@ -18,18 +18,13 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 _GLOBAL_OVERVIEW_HELP = """
-**What this is:** A **global** snapshot of which **genes, ATAC peaks, or flux reactions** rank highest when **latent shift probes** and **attention rollout** are combined across the whole model.
-**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows the **RNA / ATAC / Flux** breakdown among a larger pool of **lowest mean‑rank** features (strongest overall joint ranking).
-**How to read it:** **Lower mean rank** = higher priority in the joint ranking. **Colours** encode **modality**. Use the sliders to change how many bars and how large the pie pool is.
-**Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
 """
-st.title("Feature Insights")
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 df = io.load_df_features()
 if df is None:
@@ -38,7 +33,14 @@ if df is None:
     )
     st.stop()
 st.subheader("Global overview")
 c1, c2 = st.columns(2)
 with c1:
     top_n_bars = st.slider(
@@ -47,6 +49,11 @@ with c1:
         45,
         20,
         key="t1_topn_bars",
     )
 with c2:
     top_n_pie = st.slider(
@@ -55,6 +62,11 @@ with c2:
         250,
         100,
         key="t1_topn_pie",
     )
 ui.plot_caption_with_help(
     "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",

+"""Feature Insights: global overview of multimodal feature importance."""
 from __future__ import annotations
 ui.inject_app_styles()
 _GLOBAL_OVERVIEW_HELP = """
+**What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), as a **global** view that combines **latent shift** probes and **attention rollout** over the full model.
+**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**, longest bar = 1). The **pie chart** (right) shows **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (**lower mean rank** = higher joint priority).
+**How to read it:** **Longer bars** mean stronger measured influence for that metric. **Colours** mark **modality**. Use the **sliders** above to change bar count and pie pool size. The **pie chart** answers: “Among the most important features in this pool, which data type dominates?”.
 """
 df = io.load_df_features()
 if df is None:
     )
     st.stop()
+st.title(ui.FEATURE_INSIGHTS_TITLE)
+st.caption(ui.FEATURE_INSIGHTS_CAPTION)
 st.subheader("Global overview")
+st.caption(
+    "Here, we give a birds-eye view of which RNA, ATAC, and Flux features matter most: top-N bars for latent shift and "
+    "attention (two explainability methods), plus a pie of modality mix among the strongest features by mean rank "
+    "(sliders change list sizes)."
+)
 c1, c2 = st.columns(2)
 with c1:
     top_n_bars = st.slider(
         45,
         20,
         key="t1_topn_bars",
+        help=(
+            "How many features appear in the left (latent shift) and middle (attention) bar charts: the top N by each "
+            "metric. Each chart is min–max scaled on its own (longest bar = 1). Increase N to list more markers; "
+            "decrease N to focus on the strongest few."
+        ),
     )
 with c2:
     top_n_pie = st.slider(
         250,
         100,
         key="t1_topn_pie",
+        help=(
+            "How many features define the right-hand pie chart: the N strongest by mean rank (lower mean rank = "
+            "stronger joint ranking across shift and attention). A larger pool gives a broader modality mix "
+            "(RNA vs ATAC vs Flux); a smaller pool weights only the very top joint features."
+        ),
     )
 ui.plot_caption_with_help(
     "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",

streamlit_hf/pages/feature_insights/2_Modality_spotlight.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Feature Insights — modality spotlight (RNA, ATAC, Flux)."""
 from __future__ import annotations
@@ -17,31 +17,16 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_JOINT = """
-**What this is:** Within **{mod}** only, features with the **strongest joint ranking** (combined shift + attention priority).
-**How to read it:** Each row is **one feature**; the **two bars** are **shift** and **attention** scores **rescaled0–1 within this top‑N list** so they are comparable. **Hover** for the full name.
-**Takeaway:** Highlights markers that are important both to **representations** and to **model focus** in this modality.
-"""
-_HELP_SHIFT = """
-**What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
-**How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
-**Takeaway:** Mechanistic “if we nudge this input, the embedding changes a lot.”
 """
-_HELP_ATT = """
-**What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
-**How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
-**Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
-"""
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 df = io.load_df_features()
 if df is None:
@@ -50,17 +35,36 @@ if df is None:
     )
     st.stop()
-st.subheader("Modality spotlight")
 st.caption(
-    "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
-    "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
 )
-top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
 st.markdown("##### Joint top markers (by mean rank)")
 st.caption(
-    "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
-    "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
-    "Hover a bar for the full feature name."
 )
 r1a, r1b, r1c = st.columns(3)
 for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
@@ -68,13 +72,11 @@ for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
     if sm.empty:
         continue
     with col:
-        _, _hp = st.columns([1, 0.28])
-        with _hp:
-            ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
         st.plotly_chart(
             plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
             width="stretch",
         )
 st.markdown("##### Shift importance")
 r2a, r2b, r2c = st.columns(3)
 for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
@@ -84,9 +86,6 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
     colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
     sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
     with col:
-        _, _hp = st.columns([1, 0.28])
-        with _hp:
-            ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
         st.plotly_chart(
             plots.rank_bar(
                 sub,
@@ -98,6 +97,7 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
             ),
             width="stretch",
         )
 st.markdown("##### Attention importance")
 r3a, r3b, r3c = st.columns(3)
 for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
@@ -107,9 +107,6 @@ for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
     colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
     sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
     with col:
-        _, _hp = st.columns([1, 0.28])
-        with _hp:
-            ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
         st.plotly_chart(
             plots.rank_bar(
                 sub,

+"""Feature Insights: modality spotlight (RNA, ATAC, Flux)."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_PAGE = """
+**Layout:** Three modality columns (**RNA**, **ATAC**, **Flux**). Each column uses only that modality’s features (**genes**, **TF motifs** from chromatin, or **metabolic reactions**).
+**Joint row** (*Joint top markers*): Features ordered by **mean rank** (combined shift + attention; **lower mean rank** = stronger joint priority). Each row is one feature with **two bars** (shift and attention), **min–max scaled within this top‑N list** (0–1) so both are comparable. **Hover** a bar for the full name.
+**Shift row** (*Shift importance*): **Shift-only** top **N** by latent shift score per column. **Longer bar** = larger shift in this list. **Hover** for the full name.
+**Attention row** (*Attention importance*): **Attention-only** top **N** by rollout importance per column. **Longer bar** = more average attention. **Hover** for the full name.
 """
 df = io.load_df_features()
 if df is None:
     )
     st.stop()
+st.title(ui.FEATURE_INSIGHTS_TITLE)
+st.caption(ui.FEATURE_INSIGHTS_CAPTION)
+try:
+    _spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small")
+with _spot_h_l:
+    st.subheader("Modality spotlight")
+with _spot_h_r:
+    ui.plot_help_popover(_HELP_PAGE, key="t2_page_help")
 st.caption(
+    "Here, we zoom into one modality at a time (RNA, ATAC, or Flux) to explore top fate predictor markers: for each column "
+    "you see joint top markers, then shift-only and attention-only rankings side by side so within-modality comparisons "
+    "stay fair."
+)
+top_n_rank = st.slider(
+    "Top N per chart",
+    10,
+    55,
+    20,
+    key="t2_topn",
+    help=(
+        "Number of features in each chart on this page: the joint (mean-rank) row, the shift-only row, "
+        "and the attention-only row all use this N within each modality column."
+    ),
 )
 st.markdown("##### Joint top markers (by mean rank)")
 st.caption(
+    "Joint row: strongest by mean rank; shift and attention bars scaled within this top-N list. Hover a bar for the full name."
 )
 r1a, r1b, r1c = st.columns(3)
 for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
     if sm.empty:
         continue
     with col:
         st.plotly_chart(
             plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
             width="stretch",
         )
 st.markdown("##### Shift importance")
 r2a, r2b, r2c = st.columns(3)
 for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
     colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
     sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
     with col:
         st.plotly_chart(
             plots.rank_bar(
                 sub,
             ),
             width="stretch",
         )
 st.markdown("##### Attention importance")
 r3a, r3b, r3c = st.columns(3)
 for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
     colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
     sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
     with col:
         st.plotly_chart(
             plots.rank_bar(
                 sub,

streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Feature Insights — shift vs attention rank scatter by modality."""
 from __future__ import annotations
@@ -18,16 +18,24 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_SHIFT_VS_ATT = """
-**What this is:** Each **dot** is **one {mod} feature**. **X** = rank by **attention** (1 = strongest in this modality); **Y** = rank by **latent shift** (1 = strongest).
-**How to read it:** Points **on the diagonal** rank similarly for both metrics. The **red dashed line** is a **least‑squares trend**—it summarises whether higher attention rank tends to pair with higher shift rank in this modality.
-**Takeaway:** Features **far from the trend** are interesting: strong in one lens but not the other (e.g. high attention, lower shift, or the reverse).
-"""
-st.title("Feature Insights")
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 df = io.load_df_features()
@@ -37,11 +45,13 @@ if df is None:
     )
     st.stop()
 st.subheader("Shift vs attention")
 st.caption(
-    "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
-    "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
-    "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
 )
 corr_rows = []
 for mod in ("RNA", "ATAC", "Flux"):
@@ -61,14 +71,20 @@ for mod in ("RNA", "ATAC", "Flux"):
             }
         )
 if corr_rows:
     st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
 rc1, rc2, rc3 = st.columns(3)
 for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
     with col:
         sub_m = df[df["modality"] == mod]
-        _, _hp = st.columns([1, 0.28])
-        with _hp:
-            ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
         st.plotly_chart(
             plots.rank_scatter_shift_vs_attention(sub_m, mod),
             width="stretch",

+"""Feature Insights: shift vs attention rank scatter by modality."""
 from __future__ import annotations
 ui.inject_app_styles()
+# Native Streamlit tooltips (caption help); plain text reads well in the small ? popover.
+_CORR_TABLE_HELP = (
+    "Per-modality correlation between attention rank and latent-shift rank across features in that modality "
+    "(same features as in the scatters below). Pearson r and Spearman rho measure rank agreement, with p-values. "
+    "# features is how many features in that modality were used for the correlation (one rank pair per feature). "
+    "Higher |r| means stronger agreement in how features are ordered: a feature that ranks high on shift (small rank; 1 = strongest) "
+    "tends to sit in a similar place on attention rank, and the same for weaker features, across that modality."
+)
+_SCATTER_HELP = (
+    "Each dot is one feature in that column: a gene (RNA), TF motif (ATAC), or reaction (Flux). "
+    "X = attention rank (1 = strongest in that modality); Y = latent shift rank (1 = strongest). "
+    "Ranks on both axes show agreement between methods: near the diagonal means similar ranking; "
+    "the dashed trend line is a least-squares fit. Correlation for each modality is in the table above; "
+    "stronger r means closer alignment of shift- and attention-based importance as fate predictors. "
+    "Point colour is whether that feature sits in the top ~10% by shift rank, attention rank, both, or neither, "
+    "using ranks within that modality only (same scale as the axes)."
+)
 df = io.load_df_features()
     )
     st.stop()
+st.title(ui.FEATURE_INSIGHTS_TITLE)
+st.caption(ui.FEATURE_INSIGHTS_CAPTION)
 st.subheader("Shift vs attention")
 st.caption(
+    "Here, we explore how much latent-shift and attention-rollout explanations agree on feature importance within each "
+    "modality. A correlation table quantifies rank agreement; scatter plots pair each feature’s two ranks "
+    "(1 = strongest in that modality)."
 )
 corr_rows = []
 for mod in ("RNA", "ATAC", "Flux"):
             }
         )
 if corr_rows:
+    st.caption(
+        "Rank correlation by modality",
+        help=_CORR_TABLE_HELP,
+    )
     st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
+st.caption(
+    "Rank scatter by modality",
+    help=_SCATTER_HELP,
+)
 rc1, rc2, rc3 = st.columns(3)
 for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
     with col:
         sub_m = df[df["modality"] == mod]
         st.plotly_chart(
             plots.rank_scatter_shift_vs_attention(sub_m, mod),
             width="stretch",

streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Feature Insights — attention by predicted cohort."""
 from __future__ import annotations
@@ -17,27 +17,12 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_ATT_COHORT_BARS = """
-**What this is:** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted** for those cells.
-**Cohort menu:** **Compare** shows cohorts **side‑by‑side**. **All / dead‑end / reprogramming** restrict the average to that predicted class only.
-**Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
-**How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
-"""
-_HELP_ROLLOUT_TABLE = """
-**What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
-**How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
-**Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
 """
-st.title("Feature Insights")
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 df = io.load_df_features()
 att = io.load_attention_summary()
@@ -47,7 +32,20 @@ if df is None:
     )
     st.stop()
-st.subheader("Attention vs prediction")
 cohort_mode = st.selectbox(
     "Cohort view",
     [
@@ -78,9 +76,6 @@ else:
     ac1, ac2, ac3 = st.columns(3)
     for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
         with col:
-            _, _hp = st.columns([1, 0.28])
-            with _hp:
-                ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
             st.plotly_chart(
                 plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
                 width="stretch",
@@ -115,12 +110,5 @@ else:
                 vec = vec_all[sl["start"] : sl["stop"]]
                 names = att["feature_names"][sl["start"] : sl["stop"]]
                 mini = plots.rollout_top_features_table(names, vec, top_n_att)
-                cap1, cap2 = st.columns([0.82, 0.18])
-                with cap1:
-                    st.caption(mod)
-                with cap2:
-                    ui.plot_help_popover(
-                        _HELP_ROLLOUT_TABLE.format(mod=mod),
-                        key=f"t4_roll_{mod}_{roll_cohort}",
-                    )
                 st.dataframe(mini, hide_index=True, width="stretch")

+"""Feature Insights: attention by predicted cohort."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_PAGE = """
+**Bar charts (RNA, ATAC, Flux columns):** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted**. **Compare** shows cohorts **side‑by‑side**; **All / dead‑end / reprogramming** restrict the average to that predicted class. Uses **predicted** fate, **not** experimental labels. **Longer bar** = more cumulative attention among the **top‑N** shown. **Hover** for numeric detail.
+**Rollout tables:** The same **mean rollout vector** as the bars, as a **sortable table** of the strongest tokens **per modality column**. Rows are **ranked** by weight for the cohort you select. **Batch** embedding tokens are omitted; use the tables to **copy names** or scan ordering beyond the bars.
 """
 df = io.load_df_features()
 att = io.load_attention_summary()
     )
     st.stop()
+st.title(ui.FEATURE_INSIGHTS_TITLE)
+st.caption(ui.FEATURE_INSIGHTS_CAPTION)
+try:
+    _att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small")
+with _att_h_l:
+    st.subheader("Attention vs prediction")
+with _att_h_r:
+    ui.plot_help_popover(_HELP_PAGE, key="t4_page_help")
+st.caption(
+    "Here, we show mean attention over RNA, ATAC, and Flux tokens conditional on what the model predicted (dead-end, "
+    "reprogramming, or all validation cells), to see on which features model focus to predict different fates."
+)
 cohort_mode = st.selectbox(
     "Cohort view",
     [
     ac1, ac2, ac3 = st.columns(3)
     for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
         with col:
             st.plotly_chart(
                 plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
                 width="stretch",
                 vec = vec_all[sl["start"] : sl["stop"]]
                 names = att["feature_names"][sl["start"] : sl["stop"]]
                 mini = plots.rollout_top_features_table(names, vec, top_n_att)
+                st.caption(mod)
                 st.dataframe(mini, hide_index=True, width="stretch")

streamlit_hf/pages/feature_insights/5_Full_table.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Feature Insights — full ranked feature table."""
 from __future__ import annotations
@@ -17,16 +17,13 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 _FULL_TABLE_HELP = """
-**What this is:** The **full ranked feature list** (RNA genes, ATAC peaks, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
 **Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
 **How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
 """
-st.title("Feature Insights")
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 df = io.load_df_features()
 if df is None:
@@ -35,7 +32,13 @@ if df is None:
     )
     st.stop()
 st.subheader("Full table")
 scope = st.radio(
     "Table scope",
     ["All modalities", "Single modality"],
@@ -71,7 +74,7 @@ show_cols = [
     if c in tbl.columns
 ]
 ui.plot_caption_with_help(
-    "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
     _FULL_TABLE_HELP,
     key="t5_table_help",
 )

+"""Feature Insights: full ranked feature table."""
 from __future__ import annotations
 ui.inject_app_styles()
 _FULL_TABLE_HELP = """
+**What this is:** The **full FateFormer ranked feature list** (RNA genes, ATAC TF motifs, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
 **Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
 **How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
 """
 df = io.load_df_features()
 if df is None:
     )
     st.stop()
+st.title(ui.FEATURE_INSIGHTS_TITLE)
+st.caption(ui.FEATURE_INSIGHTS_CAPTION)
 st.subheader("Full table")
+st.caption(
+    "Here is the complete ranked feature table for the run (RNA genes, ATAC motifs, flux reactions): every shift, "
+    "attention, and joint rank and score the pipeline emitted."
+)
 scope = st.radio(
     "Table scope",
     ["All modalities", "Single modality"],
     if c in tbl.columns
 ]
 ui.plot_caption_with_help(
+    "Full FateFormer list for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
     _FULL_TABLE_HELP,
     key="t5_table_help",
 )

streamlit_hf/pages/flux_analysis/1_Pathway_map.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Flux Analysis — pathway sunburst and reaction annotation panels."""
 from __future__ import annotations
@@ -17,34 +17,20 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_FLUX_SUNBURST = """
-**What this is:** A **hierarchical view** of **metabolic pathways** and the **individual flux reactions** that rank highest by **mean importance** in this model.
-**How to read it:** **Inner rings** = pathway context; **outer segments** = **reactions**. Larger / more central emphasis (depends on layout) highlights **stronger combined ranking** in the results table. Use the slider to include more or fewer reactions.
-**Takeaway:** Quickly see **which pathways dominate** the model’s flux interpretation layer.
-"""
-_HELP_FLUX_ANNOTATION = """
-**What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
-**How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
-**Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
-"""
-_HELP_FLUX_PROFILE = """
-**What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
-**How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
-**Takeaway:** A second lens that tracks **interpretability scores** rather than raw flux alone.
 """
 st.title("Flux Analysis")
 st.caption(
-    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
-    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
 try:
@@ -67,38 +53,51 @@ else:
         _data_msg = "There are no flux reactions in the current results."
         flux = None
-st.subheader("Pathway map")
 if not _data_ok:
     st.error(_data_msg)
 else:
     st.caption(
-        "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
-        "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
-        "Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
     )
     try:
         c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
     except TypeError:
         c1, c2 = st.columns([1.05, 0.95], gap="medium")
     with c1:
-        n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_FLUX_SUNBURST, key="flux_sb_help")
         st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
     with c2:
-        top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_FLUX_ANNOTATION, key="flux_ann_help")
-        st.plotly_chart(
-            plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
-            width="stretch",
         )
-        _, _hp2 = st.columns([1, 0.22])
-        with _hp2:
-            ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
         st.plotly_chart(
-            plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
             width="stretch",
         )

+"""Flux Analysis: pathway sunburst and reaction annotation panels."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_PATHWAY_MAP = """
+**Layout:** **Left column:** **sunburst**. **Right column:** **Pathway / Log₂FC / significance** (three **heatmap** columns, one **row** per reaction).
+**Sunburst:** **Inner ring** = **pathway**; **outer ring** = **reaction**. Reactions are the top set by **mean_rank** (FateFormer joint rank; **lower** = stronger). **Wedge size** reflects that ranking. **Colour** = per-reaction **log₂ fold change** in inferred flux for **reprogramming** vs **dead-end** samples (experimental labels).
+**Pathway / Log₂FC / significance:** Same top-**N** reactions as the **Reactions in heatmap** slider (**N** rows). **Columns:** **Pathway** (categorical colour), **Log₂FC** (reprogramming vs dead-end), **−log₁₀ adjusted p** for that contrast. **Hover** for exact values.
+**Sliders:** **Reactions in sunburst** adjusts only the **left** sunburst. **Reactions in heatmap** sets how many top reactions appear in the **right-hand** heatmap.
 """
 st.title("Flux Analysis")
 st.caption(
+    "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
+    "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
 )
 try:
         _data_msg = "There are no flux reactions in the current results."
         flux = None
+try:
+    _pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small")
+with _pm_h_l:
+    st.subheader("Pathway map")
+with _pm_h_r:
+    ui.plot_help_popover(_HELP_PATHWAY_MAP, key="flux_pathway_map_help")
 if not _data_ok:
     st.error(_data_msg)
 else:
     st.caption(
+        "Here, we map top FateFormer-ranked flux reactions into pathway context: a sunburst (pathway → reaction) and a "
+        "heatmap of pathway, log₂ fold change, and significance for reprogramming vs dead-end."
     )
     try:
         c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
     except TypeError:
         c1, c2 = st.columns([1.05, 0.95], gap="medium")
     with c1:
+        n_sb = st.slider(
+            "Reactions in sunburst",
+            25,
+            90,
+            52,
+            key="flux_sb_n",
+            help=(
+                "How many **top** flux reactions (by **mean rank**) appear in the **sunburst** only. "
+                "Does not change the heatmap; use the other slider for that."
+            ),
+        )
         st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
     with c2:
+        top_n_nb = st.slider(
+            "Reactions in heatmap",
+            12,
+            40,
+            26,
+            key="flux_nb_n",
+            help=(
+                "How many **top** flux reactions (by **mean rank**) appear as **rows** in the **Pathway / Log₂FC / significance** heatmap."
+            ),
         )
         st.plotly_chart(
+            plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
             width="stretch",
         )

streamlit_hf/pages/flux_analysis/2_Differential_fate.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Flux Analysis — differential flux and fate scatter."""
 from __future__ import annotations
@@ -18,25 +18,25 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 _HELP_FLUX_VOLCANO = """
-**What this is:** A **volcano plot** for **reaction‑level flux**: **horizontal axis** = differential activity (**Log₂ fold change** between fate groups); **vertical axis** = **statistical significance** (\u2212log\u2081\u2080 **adjusted p**).
-**How to read it:** Points **far right/left** change most between groups; points **higher up** are more significant. **Colour** encodes the reaction’s **overall mean rank** in the interpretability table. Unreliable points with **no fold change** and **zero** adjusted p‑value are **dropped**.
-**Takeaway:** Highlights reactions that are both **biologically different** and **interpretable** in the model.
 """
 _HELP_FLUX_FATE_SCATTER = """
-**What this is:** Each **point** is a **flux reaction**. **X** = **average flux** in cells called **dead‑end**; **Y** = average in **reprogramming** cells (per the experimental grouping used in the analysis).
-**How to read it:** Points **above the diagonal** are higher in reprogramming; **below** = higher in dead‑end. **Point size** reflects **combined shift + attention** strength; **colour** = **pathway** (minor categories grouped as *Other*).
-**Takeaway:** Links **raw flux behaviour** to **model emphasis** (size) and **pathway context** (colour).
 """
 st.title("Flux Analysis")
 st.caption(
-    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
-    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
 try:
@@ -60,15 +60,13 @@ else:
         flux = None
 st.subheader("Differential & fate")
 if not _data_ok:
     st.error(_data_msg)
 else:
-    st.caption(
-        "**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
-        "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
-        "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
-        "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
-    )
     b1, b2 = st.columns(2)
     with b1:
         _, _hp = st.columns([1, 0.22])

+"""Flux Analysis: differential flux and fate scatter."""
 from __future__ import annotations
 ui.inject_app_styles()
 _HELP_FLUX_VOLCANO = """
+**What this is:** One **point** per **flux reaction**. **X** = **log₂ fold change** in inferred flux between **dead-end**-labeled and **reprogramming**-labeled samples. **Y** = **−log₁₀ adjusted p-value** for that contrast (multiple-testing adjusted in the results table).
+**How to read it:** **Further from zero on X** = stronger shift between cohorts. **Higher on Y** = stronger statistical evidence. **Colour** = **mean rank** (FateFormer joint rank across the feature table; **lower** rank = stronger overall model focus). Reactions with **~zero** fold change and an **adjusted p of exactly zero** are removed as numerical artifacts.
+**Hover** the points for reaction name, pathway, and related fields.
 """
 _HELP_FLUX_FATE_SCATTER = """
+**What this is:** One **point** per **flux reaction**. **X** = **mean flux** across samples labeled **dead-end**; **Y** = **mean flux** across samples labeled **reprogramming** (same per-sample fate labels as elsewhere in this analysis).
+**How to read it:** The **y = x** line would mark equal average flux in both cohorts. **Above** the diagonal, average flux is **higher in reprogramming** than in dead-end for that reaction; **below**, **higher in dead-end**. **Marker size** scales with **√(latent shift importance × attention importance)** (capped for display). **Colour** = **pathway**; smaller pathway groups are merged into **Other**.
+**Hover** for reaction name, **mean rank**, **log₂FC**, and pathway.
 """
 st.title("Flux Analysis")
 st.caption(
+    "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
+    "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
 )
 try:
         flux = None
 st.subheader("Differential & fate")
+st.caption(
+    "Here, we contrast dead-end and reprogramming cells at the reaction level: a volcano of flux log₂FC vs significance, "
+    "and a scatter of mean flux in each cohort with pathway colouring."
+)
 if not _data_ok:
     st.error(_data_msg)
 else:
     b1, b2 = st.columns(2)
     with b1:
         _, _hp = st.columns([1, 0.22])

streamlit_hf/pages/flux_analysis/3_Reaction_ranking.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Flux Analysis — ranked reaction table and download."""
 from __future__ import annotations
@@ -17,17 +17,21 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 _HELP_REACTION_TABLE = """
-**What this is:** A **sortable, filterable** version of the **flux reaction** interpretability table (same reactions as elsewhere in Flux Analysis).
-**Columns:** Typically include **mean_rank** (overall priority), **shift** / **attention** ranks and scores, **pathway** / **module**, and **differential statistics** (e.g. Log₂FC, adjusted *p*) where computed.
-**How to use:** **Filter** by name substring or **pathway**, then **download CSV** for plotting or supplementary material.
 """
 st.title("Flux Analysis")
 st.caption(
-    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
-    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
 try:
@@ -51,20 +55,31 @@ else:
         flux = None
 st.subheader("Reaction ranking")
 if not _data_ok:
     st.error(_data_msg)
 else:
-    ui.plot_caption_with_help(
-        "Filter by reaction name or pathway, then inspect or download the ranked flux table.",
-        _HELP_REACTION_TABLE,
-        key="flux_rank_table_help",
     )
-    q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
     pw_f = st.multiselect(
         "Pathway",
         sorted(flux["pathway"].dropna().unique().astype(str)),
         default=[],
         key="flux_pw_f",
     )
     show = flux
     if q.strip():
@@ -101,4 +116,5 @@ else:
         file_name="fateformer_flux_filtered.csv",
         mime="text/csv",
         key="flux_dl",
     )

+"""Flux Analysis: ranked reaction table and download."""
 from __future__ import annotations
 ui.inject_app_styles()
 _HELP_REACTION_TABLE = """
+**What this is:** The **full FateFormer flux reaction table** for this deployment: one **row** per **reaction** in the metabolic layer, with **joint ranking** and cohort flux statistics from the precomputed results.
+**Ranking:** **mean_rank** = combined **shift + attention** priority (**lower** = stronger overall). **rank_shift_in_modal** / **rank_att_in_modal** and **combined_order_mod** are **within-modality** (Flux-only) ranks; **rank_shift** / **rank_att** are **global** across all features. **importance_shift** / **importance_att** are the underlying scores. **top_10_pct** (if present) flags global top-decile membership from the publish step.
+**Flux / cohort columns:** **mean_de** / **mean_re** = **mean inferred flux** in **dead-end** vs **reprogramming** samples. **log_fc** = **log₂** fold change between those cohorts for that reaction. **pval_adj** = **adjusted p-value** for that contrast. **group** summarises direction or contrast label when present.
+**Context:** **pathway** and **module** annotate the reaction in the reconstruction.
+**Use:** Narrow rows with the **substring** and **pathway** controls; use the table’s own **sort** if your Streamlit build exposes it. **Download** saves the **filtered** view as CSV.
 """
 st.title("Flux Analysis")
 st.caption(
+    "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
+    "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
 )
 try:
         flux = None
 st.subheader("Reaction ranking")
+st.caption(
+    "Here is the searchable flux reaction table: every reaction’s FateFormer ranks, cohort flux summaries, and pathway "
+    "context, with filters and CSV download."
+)
 if not _data_ok:
     st.error(_data_msg)
 else:
+    try:
+        _rr_l, _rr_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+    except TypeError:
+        _rr_l, _rr_r = st.columns([0.94, 0.06], gap="small")
+    with _rr_r:
+        ui.plot_help_popover(_HELP_REACTION_TABLE, key="flux_rank_table_help")
+    q = st.text_input(
+        "Substring filter (reaction name)",
+        "",
+        key="flux_q",
+        help="Keep rows whose **reaction** string contains this text (case-insensitive). Leave empty for no name filter.",
     )
     pw_f = st.multiselect(
         "Pathway",
         sorted(flux["pathway"].dropna().unique().astype(str)),
         default=[],
         key="flux_pw_f",
+        help="Keep rows in any of the selected **pathways**. Leave empty to include all pathways.",
     )
     show = flux
     if q.strip():
         file_name="fateformer_flux_filtered.csv",
         mime="text/csv",
         key="flux_dl",
+        help="CSV of the **current filtered** table (same columns as on screen), sorted by **mean_rank**.",
     )

streamlit_hf/pages/flux_analysis/4_Model_metadata.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Flux Analysis — metabolic model metadata merged with flux table."""
 from __future__ import annotations
@@ -16,73 +16,60 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_MODEL_META = """
-**What this is:** **Directed edges** from the **genome‑scale metabolic model** (substrate → product reactions), **merged** with this app’s **flux interpretability table** where reaction identifiers match.
-**How to read it:** Each row is a **model step** you can relate to **pathways** and **model modules**. Use **Model scope** to zoom to one **supermodule** or view **all** edges.
-**Takeaway:** Connects **curated biochemistry** (stoichiometry / wiring) to **data‑driven rankings** from FateFormer.
 """
 st.title("Flux Analysis")
 st.caption(
-    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
-    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
-try:
-    df = io.load_df_features()
-except Exception:
-    df = None
-_data_ok = True
-if df is None:
-    _data_ok = False
-    _data_msg = (
-        "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
-        "fresh results, or ask them to check the deployment."
-    )
-    flux = None
-    meta = None
-else:
-    flux = df[df["modality"] == "Flux"].copy()
-    if flux.empty:
-        _data_ok = False
-        _data_msg = "There are no flux reactions in the current results."
-        flux = None
-    meta = io.load_metabolic_model_metadata()
 st.subheader("Metabolic model metadata")
-if not _data_ok:
-    st.error(_data_msg)
 else:
-    ui.plot_caption_with_help(
-        "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match.",
-        _HELP_MODEL_META,
-        key="flux_model_meta_help",
     )
-    if meta is None or meta.empty:
-        st.warning("Metabolic model metadata is not available in this build.")
-    else:
-        sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
-        graph_labels = ["All modules"]
-        for sid in sm_ids:
-            cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
-            graph_labels.append(f"{sid}: {cls}")
-        tix = st.selectbox(
-            "Model scope",
-            range(len(graph_labels)),
-            format_func=lambda i: graph_labels[i],
-            key="flux_model_scope",
-            help="Show every step in the model, or restrict to one functional module.",
-        )
-        supermodule_id = None if tix == 0 else sm_ids[tix - 1]
-        tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
-        st.dataframe(tbl, width="stretch", hide_index=True)
-        st.download_button(
-            "Download metabolic model metadata (CSV)",
-            tbl.to_csv(index=False).encode("utf-8"),
-            file_name="fateformer_metabolic_model_edges.csv",
-            mime="text/csv",
-            key="flux_model_dl",
-        )

+"""Flux Analysis: scFEA metabolic model metadata table."""
 from __future__ import annotations
 ui.inject_app_styles()
+_SCFEA_PMC = "https://pmc.ncbi.nlm.nih.gov/articles/PMC8494226/"
+_SCFEA_GITHUB = "https://github.com/changwn/scFEA"
+_HELP_MODEL_META = f"""
+**Source:** The **metabolic model metadata** from **scFEA** (single-cell flux estimation from scRNA-seq) that is used for inferring flux reactions from scRNA-seq data. Open access article: [{_SCFEA_PMC}]({_SCFEA_PMC}) (*Genome Research*, 2021). Code and model resources: [{_SCFEA_GITHUB}]({_SCFEA_GITHUB}).
+**What this is:** The **scFEA** metabolic model info used for inferring fluxomic data from scRNA-seq (one row per substrate → product reaction).
 """
 st.title("Flux Analysis")
 st.caption(
+    "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
+    "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
 )
+meta = io.load_metabolic_model_metadata()
 st.subheader("Metabolic model metadata")
+st.caption(
+    f"Here is the scFEA metabolic model metadata used to interpret flux features: modules, compounds, and reaction names. "
+    f"[Paper]({_SCFEA_PMC}), [GitHub]({_SCFEA_GITHUB})."
+)
+if meta is None or meta.empty:
+    st.error("Metabolic model metadata is not available in this build.")
 else:
+    try:
+        _mm_l, _mm_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+    except TypeError:
+        _mm_l, _mm_r = st.columns([0.94, 0.06], gap="small")
+    with _mm_r:
+        ui.plot_help_popover(_HELP_MODEL_META, key="flux_model_meta_help")
+    sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
+    graph_labels = ["All modules"]
+    for sid in sm_ids:
+        cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
+        graph_labels.append(f"{sid}: {cls}")
+    tix = st.selectbox(
+        "Model scope",
+        range(len(graph_labels)),
+        format_func=lambda i: graph_labels[i],
+        key="flux_model_scope",
+        help=(
+            "**All modules:** every edge row in the metadata CSV. **Named supermodule:** only edges with that "
+            "**Supermodule_id** (class label shown in the menu)."
+        ),
+    )
+    supermodule_id = None if tix == 0 else sm_ids[tix - 1]
+    tbl = io.build_metabolic_model_table(meta, supermodule_id=supermodule_id)
+    st.dataframe(tbl, width="stretch", hide_index=True)
+    st.download_button(
+        "Download metabolic model metadata (CSV)",
+        tbl.to_csv(index=False).encode("utf-8"),
+        file_name="fateformer_metabolic_model_edges.csv",
+        mime="text/csv",
+        key="flux_model_dl",
+        help="CSV export of the table above for the current **Model scope**.",
     )

streamlit_hf/pages/flux_analysis/5_Interactive_map.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Flux Analysis — metabolic map with searchable side panel."""
 from __future__ import annotations
@@ -18,18 +18,18 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_MET_MAP = """
-**What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
-**How to use:** **Search** the list (every word must match somewhere in that row). **Hover** metabolite labels on the map for a short **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**). **Esc** clears search.
-**Takeaway:** A **navigation** layer to relate **pathway geography** to **model-ranked reactions**, not a quantitative flux balance diagram.
 """
 st.title("Flux Analysis")
 st.caption(
-    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
-    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
@@ -210,7 +210,7 @@ function renderMetList(q){
     if(n++>=cap) break;
     const div=document.createElement('div');
     div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
-    const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>—</span>';
     div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
     div.addEventListener('mouseenter',ev=>{
       document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
@@ -314,8 +314,9 @@ init();
 st.subheader("Metabolic map")
 ui.plot_caption_with_help(
-    "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked step (1 = top).",
     _HELP_MET_MAP,
     key="flux_map_help",
 )

+"""Flux Analysis: metabolic map with searchable side panel."""
 from __future__ import annotations
 ui.inject_app_styles()
+_NAR_FATEFORMER_URL = "https://academic.oup.com/nar/article/51/W1/W180/7175334"
+_HELP_MET_MAP = f"""
+**Figure (paper):** Network model of key metabolic pathways linked to fate outcomes identified by the model. Important pathways and reactions are mapped onto the **scFLUX** metabolic network schema. **Arrow colour** shows the **log₂ fold change** in **scFEA**-inferred flux between **reprogramming** and **dead-end** cells: **red** = higher flux in reprogramming, **blue** = higher in dead-end. **Black** arrows = no corresponding scFEA entry or no measurable flux difference. **Triple-star** markers in the figure denote **p_adj < 0.001** (two-sample *t*-test with Benjamini–Hochberg correction). Full article: [{_NAR_FATEFORMER_URL}]({_NAR_FATEFORMER_URL})
+**In this explorer:** The same schematic is **interactive**: **metabolites** on the map link to the reconstruction. The **sidebar** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top). **Search** the list (every word must match somewhere in that row). **Hover** labels for a **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**); **Esc** clears search. Use it as a **navigation** layer between **pathway geography** and **model-ranked reactions**, not a quantitative flux-balance diagram.
 """
 st.title("Flux Analysis")
 st.caption(
+    "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
+    "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
 )
     if(n++>=cap) break;
     const div=document.createElement('div');
     div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
+    const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>-</span>';
     div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
     div.addEventListener('mouseenter',ev=>{
       document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
 st.subheader("Metabolic map")
+st.caption("This page shows the interactive metabolic map of important pathways and reactions.")
 ui.plot_caption_with_help(
+    "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked reaction (1 = top).",
     _HELP_MET_MAP,
     key="flux_map_help",
 )

streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Gene expression — Reactome / KEGG pathway enrichment."""
 from __future__ import annotations
@@ -18,34 +18,18 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_PATH_BUBBLE_DE = """
-**What this is:** **Pathway over‑representation** among genes linked to **dead‑end** cells (Reactome + KEGG merged view). **Significance** is **Benjamini–Hochberg FDR** (*q* < 0.05).
-**How to read it:** Each **bubble** is a pathway; **position** reflects effect size / enrichment strength; **size** often tracks **gene count** or **significance** (see axis labels and hover). Compare to the **reprogramming** panel for fate‑specific patterns.
-**Takeaway:** Highlights **process‑level** themes in the dead‑end transcriptional state.
-"""
-_HELP_PATH_BUBBLE_RE = """
-**What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
-**How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
-**Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
-"""
-_HELP_PATH_HEAT = """
-**What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
-**How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
-**Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
 """
 st.title("Gene Expression & TF Activity")
 st.caption(
-    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
-    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
 )
 df = io.load_df_features()
@@ -59,10 +43,17 @@ if rna.empty and atac.empty:
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
-st.subheader("Gene pathway enrichment")
 st.caption(
-    "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
-    "The lower panel maps leading genes to pathways; empty grid positions are left clear."
 )
 raw = pathway_data.load_de_re_tsv()
 if raw is None:
@@ -76,9 +67,6 @@ else:
     )
     c1, c2 = st.columns(2, gap="medium")
     with c1:
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
         st.plotly_chart(
             plots.pathway_enrichment_bubble_panel(
                 mde,
@@ -89,9 +77,6 @@ else:
             width="stretch",
         )
     with c2:
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
         st.plotly_chart(
             plots.pathway_enrichment_bubble_panel(
                 mre,
@@ -106,7 +91,4 @@ else:
         st.info("No pathway-gene matrix could be built from the current enrichment results.")
     else:
         z, ylabs, xlabs = hm
-        _, _hp = st.columns([1, 0.18])
-        with _hp:
-            ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
         st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")

+"""Gene expression: Reactome / KEGG pathway enrichment."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_PATHWAY_ENRICHMENT = """
+**Overview:** **Gene pathway enrichment**: Reactome and KEGG **over-representation** from fate-split **RNA marker** lists, then a **pathway × gene** heatmap of the leading hits.
+**Bubble panels (dead-end vs reprogramming):** **Leading genes** are **grouped by fate** (dead-end vs reprogramming); each panel runs enrichment on that gene set. **Horizontal axis** = **gene ratio** (enrichment table). **Circles** = **Reactome** pathways; **squares** = **KEGG** pathways. **Vertical** position orders pathways; **size** reflects **gene count**; **colour** = **−log₁₀** Benjamini *q* (*q* < 0.05). **Hover** for pathway name, library, count, and *q*. **Compare** left and right panels for cohort-specific pathways.
+**Heatmap:** **Rows** = enriched **pathway terms** (Reactome block, then KEGG). **Columns** = **genes** (from the same fate-split marker lists that fed enrichment) plus a **Library** stripe (**Reactome** vs **KEGG** per row). **Colour** encodes **dead-end** vs **reprogramming** membership for that gene–pathway pair (and the library stripe); **hover** for the exact label. **Empty** cells = no link in this matrix slice.
 """
 st.title("Gene Expression & TF Activity")
 st.caption(
+    "**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
+    "**gene** and **motif** tables."
 )
 df = io.load_df_features()
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
+try:
+    _pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small")
+with _pe_h_l:
+    st.subheader("Gene pathway enrichment")
+with _pe_h_r:
+    ui.plot_help_popover(_HELP_PATHWAY_ENRICHMENT, key="ge_pathway_page_help")
 st.caption(
+    "Here, we turn fate-split RNA gene markers into Reactome and KEGG over-representation (bubble panels per cohort), "
+    "then lay out a pathway × gene heatmap for the leading hits."
 )
 raw = pathway_data.load_de_re_tsv()
 if raw is None:
     )
     c1, c2 = st.columns(2, gap="medium")
     with c1:
         st.plotly_chart(
             plots.pathway_enrichment_bubble_panel(
                 mde,
             width="stretch",
         )
     with c2:
         st.plotly_chart(
             plots.pathway_enrichment_bubble_panel(
                 mre,
         st.info("No pathway-gene matrix could be built from the current enrichment results.")
     else:
         z, ylabs, xlabs = hm
         st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")

streamlit_hf/pages/gene_expression/2_Motif_activity.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Gene expression — TF motif activity (chromVAR-style)."""
 from __future__ import annotations
@@ -17,26 +17,18 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
-_HELP_MOTIF_VOLC = """
-**What this is:** A **volcano‑style** summary of **TF motif** differences from the **ATAC** layer (**chromVAR‑like** scores): **X** = change between fate groups (typically **reprogramming − dead‑end**); **Y** = **significance**.
-**How to read it:** **Extreme horizontal** motifs differ most between fates; **higher vertical** motifs are more statistically supported. **Hover** for motif names.
-**Takeaway:** Links **chromatin accessibility** motifs to **fate bias** beyond gene‑level RNA.
-"""
-_HELP_MOTIF_SCATTER = """
-**What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
-**How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
-**Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
 """
 st.title("Gene Expression & TF Activity")
 st.caption(
-    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
-    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
 )
 df = io.load_df_features()
@@ -50,24 +42,26 @@ if rna.empty and atac.empty:
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
-st.subheader("Motif activity")
 if atac.empty:
     st.warning("No motif-level ATAC features are available in the current results.")
 else:
-    st.caption(
-        "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
-        "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
-    )
     a1, a2 = st.columns(2, gap="medium")
     with a1:
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
         st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
     with a2:
-        _, _hp = st.columns([1, 0.22])
-        with _hp:
-            ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
         st.plotly_chart(
             plots.notebook_style_activity_scatter(
                 atac,

+"""Gene expression: ATAC TF motif deviation and activity."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_MOTIF_ACTIVITY = """
+**Overview:** **ATAC** **TF motif** plots: **differential** activity between fate labels (left), then **per-fate mean** z-scored activity (right). Scores summarize **motif-level** signal from the accessibility layer.
+**Left (volcano):** **X** = **mean difference** in motif activity (**reprogramming − dead-end**). **Y** = **−log₁₀ adjusted p** (or a precomputed log-*p* column when the table provides it). **Colour** = **mean rank** (joint FateFormer rank; **lower** = stronger). **Hover** for motif name, *p*, **mean rank**, and cohort fields when present.
+**Right (scatter):** **X** / **Y** = **mean z-scored** motif activity in **dead-end** vs **reprogramming** cells. The **y = x** line would mark equal average activity; **above** the diagonal means **higher in reprogramming**. **Colour** = **−log₁₀ adjusted p** (red scale; **higher** = more significant). **Hover** for motif, **mean rank**, and **group**.
 """
 st.title("Gene Expression & TF Activity")
 st.caption(
+    "**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
+    "**gene** and **motif** tables."
 )
 df = io.load_df_features()
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
+try:
+    _ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small")
+with _ma_h_l:
+    st.subheader("Motif activity")
+with _ma_h_r:
+    ui.plot_help_popover(_HELP_MOTIF_ACTIVITY, key="ge_motif_page_help")
+st.caption(
+    "Here, we summarize ATAC TF motif behaviour: differential shift between dead-end and reprogramming (volcano), then "
+    "per-fate mean z-scored activity in a scatter."
+)
 if atac.empty:
     st.warning("No motif-level ATAC features are available in the current results.")
 else:
     a1, a2 = st.columns(2, gap="medium")
     with a1:
         st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
     with a2:
         st.plotly_chart(
             plots.notebook_style_activity_scatter(
                 atac,

streamlit_hf/pages/gene_expression/3_Gene_table.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Gene expression — searchable gene ranking table."""
 from __future__ import annotations
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 TABLE_COLS = [
     "mean_rank",
     "feature",
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
 st.title("Gene Expression & TF Activity")
 st.caption(
-    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
-    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
 )
 df = io.load_df_features()
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
-st.subheader("Gene table")
 if rna.empty:
     st.warning("No RNA gene features are available in the current results.")
 else:

+"""Gene expression: searchable gene ranking table."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_GENE_TABLE = """
+**scRNA-seq** genes used as features in this run: **one row per gene**, sorted by **mean rank** (joint importance). Additional columns are **FateFormer** rank and attribution summaries (within RNA and globally), **per-fate** expression (**dead-end** vs **reprogramming**), and **differential** statistics (*p*-values, log fold change, **group**). Search to narrow the list; use **Download** for a CSV copy.
+"""
 TABLE_COLS = [
     "mean_rank",
     "feature",
 st.title("Gene Expression & TF Activity")
 st.caption(
+    "**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
+    "**gene** and **motif** tables."
 )
 df = io.load_df_features()
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
+try:
+    _gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small")
+with _gt_h_l:
+    st.subheader("Gene table")
+with _gt_h_r:
+    ui.plot_help_popover(_HELP_GENE_TABLE, key="ge_gene_table_help")
+st.caption(
+    "Here is a searchable table of all scRNA-seq genes in the feature set, with FateFormer ranks and per-fate expression "
+    "and differential statistics that you can sort, filter by name, or download CSV."
+)
 if rna.empty:
     st.warning("No RNA gene features are available in the current results.")
 else:

streamlit_hf/pages/gene_expression/4_Motif_table.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Gene expression — searchable motif / TF table."""
 from __future__ import annotations
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 TABLE_COLS = [
     "mean_rank",
     "feature",
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
 st.title("Gene Expression & TF Activity")
 st.caption(
-    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
-    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
 )
 df = io.load_df_features()
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
-st.subheader("Motif table")
 if atac.empty:
     st.warning("No motif-level ATAC features are available in the current results.")
 else:

+"""Gene expression: searchable motif / TF table."""
 from __future__ import annotations
 ui.inject_app_styles()
+_HELP_MOTIF_TABLE = """
+**ATAC** motif / TF features used in this run: **one row per feature**, sorted by **mean rank**. Columns include **FateFormer** ranking and attribution, **per-fate** activity summaries, and **differential** statistics. Search to narrow the list; use **Download** for a CSV copy.
+"""
 TABLE_COLS = [
     "mean_rank",
     "feature",
 st.title("Gene Expression & TF Activity")
 st.caption(
+    "**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
+    "**gene** and **motif** tables."
 )
 df = io.load_df_features()
     st.warning("No RNA gene or ATAC motif features are available in the current results.")
     st.stop()
+try:
+    _mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
+except TypeError:
+    _mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small")
+with _mt_h_l:
+    st.subheader("Motif table")
+with _mt_h_r:
+    ui.plot_help_popover(_HELP_MOTIF_TABLE, key="ge_motif_table_help")
+st.caption(
+    "Here is a searchable table of all ATAC motif / TF features, each with FateFormer ranks and per-fate activity and "
+    "differential fields that you can sort, filter by name, or download CSV."
+)
 if atac.empty:
     st.warning("No motif-level ATAC features are available in the current results.")
 else:

streamlit_hf/static/experiment.svg ADDED Viewed