Spaces:
Running
Running
added help and refined
Browse files- metabolic_map.svg +0 -0
- requirements.txt +1 -1
- streamlit_hf/app.py +14 -3
- streamlit_hf/home.py +79 -49
- streamlit_hf/lib/io.py +7 -38
- streamlit_hf/lib/plots.py +51 -14
- streamlit_hf/lib/ui.py +20 -2
- streamlit_hf/pages/1_Single_Cell_Explorer.py +46 -23
- streamlit_hf/pages/feature_insights/1_Global_overview.py +21 -9
- streamlit_hf/pages/feature_insights/2_Modality_spotlight.py +34 -37
- streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py +32 -16
- streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py +19 -31
- streamlit_hf/pages/feature_insights/5_Full_table.py +9 -6
- streamlit_hf/pages/flux_analysis/1_Pathway_map.py +40 -41
- streamlit_hf/pages/flux_analysis/2_Differential_fate.py +13 -15
- streamlit_hf/pages/flux_analysis/3_Reaction_ranking.py +27 -11
- streamlit_hf/pages/flux_analysis/4_Model_metadata.py +46 -59
- streamlit_hf/pages/flux_analysis/5_Interactive_map.py +10 -9
- streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py +17 -35
- streamlit_hf/pages/gene_expression/2_Motif_activity.py +20 -26
- streamlit_hf/pages/gene_expression/3_Gene_table.py +20 -4
- streamlit_hf/pages/gene_expression/4_Motif_table.py +20 -4
- streamlit_hf/static/experiment.svg +355 -0
metabolic_map.svg
DELETED
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# FateFormerApp
|
| 2 |
torch>=2.1.0
|
| 3 |
numpy>=1.24.0
|
| 4 |
pandas>=2.0.0
|
|
|
|
| 1 |
+
# FateFormerApp: training, precompute, and local Streamlit dev
|
| 2 |
torch>=2.1.0
|
| 3 |
numpy>=1.24.0
|
| 4 |
pandas>=2.0.0
|
streamlit_hf/app.py
CHANGED
|
@@ -1,13 +1,19 @@
|
|
| 1 |
"""
|
| 2 |
FateFormer Explorer: interactive analysis hub.
|
| 3 |
-
Run
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
from
|
| 7 |
|
| 8 |
-
import
|
|
|
|
| 9 |
|
| 10 |
_APP_DIR = Path(__file__).resolve().parent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
_ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
|
| 12 |
_page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
|
| 13 |
|
|
@@ -18,6 +24,11 @@ st.set_page_config(
|
|
| 18 |
**_page_icon_kw,
|
| 19 |
)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
_home = str(_APP_DIR / "home.py")
|
| 22 |
_p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
|
| 23 |
_fi = _APP_DIR / "pages" / "feature_insights"
|
|
|
|
| 1 |
"""
|
| 2 |
FateFormer Explorer: interactive analysis hub.
|
| 3 |
+
Run: streamlit run streamlit_hf/app.py (repo root) or streamlit run app.py (from streamlit_hf/).
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
from __future__ import annotations
|
| 7 |
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
|
| 11 |
_APP_DIR = Path(__file__).resolve().parent
|
| 12 |
+
_REPO_ROOT = _APP_DIR.parent
|
| 13 |
+
if str(_REPO_ROOT) not in sys.path:
|
| 14 |
+
sys.path.insert(0, str(_REPO_ROOT))
|
| 15 |
+
|
| 16 |
+
import streamlit as st
|
| 17 |
_ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
|
| 18 |
_page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
|
| 19 |
|
|
|
|
| 24 |
**_page_icon_kw,
|
| 25 |
)
|
| 26 |
|
| 27 |
+
# Preload shared UI helpers before page scripts. Streamlit's file watcher can
|
| 28 |
+
# delete watched modules from sys.modules on save; if that happens mid-import,
|
| 29 |
+
# importlib may raise KeyError on the module name. Loading here narrows the race.
|
| 30 |
+
import streamlit_hf.lib.ui as _streamlit_ui_preload # noqa: F401, E402
|
| 31 |
+
|
| 32 |
_home = str(_APP_DIR / "home.py")
|
| 33 |
_p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
|
| 34 |
_fi = _APP_DIR / "pages" / "feature_insights"
|
streamlit_hf/home.py
CHANGED
|
@@ -18,6 +18,11 @@ from streamlit_hf.lib import plots
|
|
| 18 |
from streamlit_hf.lib import ui
|
| 19 |
|
| 20 |
_CACHE = Path(__file__).resolve().parent / "cache"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
_APP_NAME = "FateFormer Explorer"
|
| 23 |
_HERO_EMOJI = "\U0001f9ec" # DNA (matches HF Space card tone)
|
|
@@ -27,23 +32,21 @@ _HOME_RANK_TOP_N = 15
|
|
| 27 |
_VALIDATION_ROC_AUC = 0.93
|
| 28 |
|
| 29 |
_UMAP_HOME_TITLE = "Validation latent space (UMAP)"
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
_UMAP_HELP_MD = """
|
| 32 |
-
**What this is:** A 2‑D **UMAP** of validation cells in the model’s **
|
| 33 |
-
|
| 34 |
-
**How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
|
| 35 |
|
| 36 |
-
**
|
| 37 |
"""
|
| 38 |
|
| 39 |
_GLOBAL_RANK_HELP_MD = """
|
| 40 |
-
**What this is:**
|
| 41 |
|
| 42 |
**Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
|
| 43 |
|
| 44 |
**How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
|
| 45 |
-
|
| 46 |
-
**Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
|
| 47 |
"""
|
| 48 |
|
| 49 |
_APP_SUBTITLE = (
|
|
@@ -51,12 +54,15 @@ _APP_SUBTITLE = (
|
|
| 51 |
"to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
|
| 52 |
)
|
| 53 |
|
| 54 |
-
|
| 55 |
-
**
|
|
|
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
- **
|
|
|
|
|
|
|
| 60 |
"""
|
| 61 |
|
| 62 |
|
|
@@ -84,6 +90,16 @@ st.markdown(
|
|
| 84 |
unsafe_allow_html=True,
|
| 85 |
)
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
bundle = io.load_latent_bundle()
|
| 88 |
df_features = io.load_df_features()
|
| 89 |
samples = io.load_samples_df()
|
|
@@ -164,26 +180,33 @@ if bundle is not None and df_features is not None:
|
|
| 164 |
with row1_story:
|
| 165 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 166 |
with row1_umap:
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
ui.plot_caption_with_help(
|
| 189 |
"Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
|
|
@@ -211,22 +234,29 @@ elif bundle is not None:
|
|
| 211 |
with u_story:
|
| 212 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 213 |
with u_map:
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
elif df_features is not None:
|
| 231 |
ui.plot_caption_with_help(
|
| 232 |
"Feature ranking overview · latent UMAP unavailable",
|
|
|
|
| 18 |
from streamlit_hf.lib import ui
|
| 19 |
|
| 20 |
_CACHE = Path(__file__).resolve().parent / "cache"
|
| 21 |
+
_EXPERIMENT_SVG = Path(__file__).resolve().parent / "static" / "experiment.svg"
|
| 22 |
+
# Display width (px) for the home-page schematic; SVG scales cleanly at fixed width.
|
| 23 |
+
_EXPERIMENT_FIGURE_WIDTH_PX = 380
|
| 24 |
+
|
| 25 |
+
_CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
|
| 26 |
|
| 27 |
_APP_NAME = "FateFormer Explorer"
|
| 28 |
_HERO_EMOJI = "\U0001f9ec" # DNA (matches HF Space card tone)
|
|
|
|
| 32 |
_VALIDATION_ROC_AUC = 0.93
|
| 33 |
|
| 34 |
_UMAP_HOME_TITLE = "Validation latent space (UMAP)"
|
| 35 |
+
_UMAP_HOME_SUBTITLE = "Each point is a cell · colours = experimental fate labels · validation split"
|
| 36 |
+
_UMAP_HOME_SUBTITLE_RANK_MISSING = "Feature ranking cache unavailable · UMAP only"
|
| 37 |
|
| 38 |
+
_UMAP_HELP_MD = f"""
|
| 39 |
+
**What this is:** A 2‑D **UMAP** of validation **single cells** in the model’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation**. **2,110** cells are shown.
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
**How to read it:** Each point is one cell. **Colour** is **experimental fate** from [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal labels. **Axes are unitless**: UMAP preserves *local* neighbourhoods, not real physical distances, so **nearby points** tend to have similar characteristics in this representation. **Hover** a point for cell-level details. For more detail (interactive UMAP, filters, and metadata), open **Single-Cell Explorer** using the link below.
|
| 42 |
"""
|
| 43 |
|
| 44 |
_GLOBAL_RANK_HELP_MD = """
|
| 45 |
+
**What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), shown as three linked summaries.
|
| 46 |
|
| 47 |
**Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
|
| 48 |
|
| 49 |
**How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
|
|
|
|
|
|
|
| 50 |
"""
|
| 51 |
|
| 52 |
_APP_SUBTITLE = (
|
|
|
|
| 54 |
"to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
|
| 55 |
)
|
| 56 |
|
| 57 |
+
_EXPERIMENTAL_SYSTEM_MD = f"""
|
| 58 |
+
Mouse embryonic fibroblasts (**MEFs**) were reprogrammed toward induced endoderm progenitors (**iEPs**) **in vitro** through *Foxa1* and *HNF4A* induction. This process produces **mixed outcomes**: some cells successfully reach the **iEP fate**, whereas others diverge into **off-target** trajectories and stall in **dead-end states**. Using [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal barcoding, **early cells** could be linked to their **later fate**, which made it possible to ask a central biological question: which programs in **early-state cells**, coordinated **across transcriptional, chromatin, and metabolic layers**, drive successful reprogramming, which ones push cells toward off-target states, and which of these mechanisms could be targeted to improve reprogramming efficiency?
|
| 59 |
+
"""
|
| 60 |
|
| 61 |
+
_BIOLOGY_CONTEXT_MARKDOWN = f"""
|
| 62 |
+
**How FateFormer addresses this**
|
| 63 |
+
- **Multimodal view:** FateFormer integrates **scRNA-seq**, **scATAC-seq**, and **genome-scale metabolic flux** to capture regulatory and metabolic signals that are missed by RNA-only analysis.
|
| 64 |
+
- **Grounded in lineage tracing:** The model is trained on a **sparse-modality atlas of more than 150,000 cells**, including **2,110** early cells linked to later outcomes through **CellTag-Multi** clonal barcoding.
|
| 65 |
+
- **Biological insight:** FateFormer learns representations across modalities, handles **missing inputs** and **limited labels**, and using **explainability methods** highlights the transcriptional, chromatin, and metabolic programs associated with reprogramming success or off target failure.
|
| 66 |
"""
|
| 67 |
|
| 68 |
|
|
|
|
| 90 |
unsafe_allow_html=True,
|
| 91 |
)
|
| 92 |
|
| 93 |
+
with st.container(border=True):
|
| 94 |
+
fig_col, text_col = st.columns([0.42, 0.58], gap="large")
|
| 95 |
+
with fig_col:
|
| 96 |
+
if _EXPERIMENT_SVG.is_file():
|
| 97 |
+
st.image(str(_EXPERIMENT_SVG), width=_EXPERIMENT_FIGURE_WIDTH_PX)
|
| 98 |
+
else:
|
| 99 |
+
st.caption("Experimental schematic (`static/experiment.svg`) is missing.")
|
| 100 |
+
with text_col:
|
| 101 |
+
st.markdown(_EXPERIMENTAL_SYSTEM_MD)
|
| 102 |
+
|
| 103 |
bundle = io.load_latent_bundle()
|
| 104 |
df_features = io.load_df_features()
|
| 105 |
samples = io.load_samples_df()
|
|
|
|
| 180 |
with row1_story:
|
| 181 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 182 |
with row1_umap:
|
| 183 |
+
try:
|
| 184 |
+
_umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
|
| 185 |
+
except TypeError:
|
| 186 |
+
_umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small")
|
| 187 |
+
with _umap_plot_col:
|
| 188 |
+
fig_u = plots.latent_scatter(
|
| 189 |
+
plot_umap,
|
| 190 |
+
"label",
|
| 191 |
+
title=_UMAP_HOME_TITLE,
|
| 192 |
+
width=780,
|
| 193 |
+
height=440,
|
| 194 |
+
marker_size=5.2,
|
| 195 |
+
marker_opacity=0.72,
|
| 196 |
+
subtitle=_UMAP_HOME_SUBTITLE,
|
| 197 |
+
)
|
| 198 |
+
fig_u.update_layout(margin=dict(l=20, r=8, t=92, b=20), title_font_size=15)
|
| 199 |
+
st.plotly_chart(
|
| 200 |
+
fig_u,
|
| 201 |
+
width="stretch",
|
| 202 |
+
config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
|
| 203 |
+
)
|
| 204 |
+
with _umap_help_col:
|
| 205 |
+
ui.plot_help_popover(
|
| 206 |
+
_UMAP_HELP_MD,
|
| 207 |
+
key="home_umap_help",
|
| 208 |
+
page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
|
| 209 |
+
)
|
| 210 |
|
| 211 |
ui.plot_caption_with_help(
|
| 212 |
"Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
|
|
|
|
| 234 |
with u_story:
|
| 235 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 236 |
with u_map:
|
| 237 |
+
try:
|
| 238 |
+
_umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
|
| 239 |
+
except TypeError:
|
| 240 |
+
_umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small")
|
| 241 |
+
with _umap_plot_col2:
|
| 242 |
+
fig_u = plots.latent_scatter(
|
| 243 |
+
plot_umap,
|
| 244 |
+
"label",
|
| 245 |
+
title=_UMAP_HOME_TITLE,
|
| 246 |
+
width=820,
|
| 247 |
+
height=480,
|
| 248 |
+
marker_size=5.5,
|
| 249 |
+
marker_opacity=0.72,
|
| 250 |
+
subtitle=_UMAP_HOME_SUBTITLE_RANK_MISSING,
|
| 251 |
+
)
|
| 252 |
+
fig_u.update_layout(margin=dict(l=24, r=12, t=92, b=24), title_font_size=15)
|
| 253 |
+
st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
|
| 254 |
+
with _umap_help_col2:
|
| 255 |
+
ui.plot_help_popover(
|
| 256 |
+
_UMAP_HELP_MD,
|
| 257 |
+
key="home_umap_only_help",
|
| 258 |
+
page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
|
| 259 |
+
)
|
| 260 |
elif df_features is not None:
|
| 261 |
ui.plot_caption_with_help(
|
| 262 |
"Feature ranking overview · latent UMAP unavailable",
|
streamlit_hf/lib/io.py
CHANGED
|
@@ -88,13 +88,9 @@ def load_metabolic_model_metadata() -> pd.DataFrame | None:
|
|
| 88 |
|
| 89 |
def build_metabolic_model_table(
|
| 90 |
meta: pd.DataFrame,
|
| 91 |
-
flux_df: pd.DataFrame,
|
| 92 |
supermodule_id: int | None = None,
|
| 93 |
) -> pd.DataFrame:
|
| 94 |
-
"""
|
| 95 |
-
Static edge list: substrate → product, reaction label, module class, plus DE / model columns when the
|
| 96 |
-
reaction string matches a row in the flux feature table.
|
| 97 |
-
"""
|
| 98 |
need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
|
| 99 |
if not need.issubset(set(meta.columns)):
|
| 100 |
return pd.DataFrame()
|
|
@@ -103,36 +99,9 @@ def build_metabolic_model_table(
|
|
| 103 |
m = m[m["Supermodule_id"] == int(supermodule_id)]
|
| 104 |
if m.empty:
|
| 105 |
return pd.DataFrame()
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
|
| 110 |
-
|
| 111 |
-
rows: list[dict] = []
|
| 112 |
-
for _, r in m.iterrows():
|
| 113 |
-
k = normalize_reaction_key(str(r["rxnName"]))
|
| 114 |
-
base = {
|
| 115 |
-
"Supermodule": r.get("Super.Module.class"),
|
| 116 |
-
"Module_id": r.get("Module_id"),
|
| 117 |
-
"Substrate": r["Compound_IN_name"],
|
| 118 |
-
"Product": r["Compound_OUT_name"],
|
| 119 |
-
"Reaction": r["rxnName"],
|
| 120 |
-
}
|
| 121 |
-
if k in fd.index:
|
| 122 |
-
row = fd.loc[k]
|
| 123 |
-
if isinstance(row, pd.DataFrame):
|
| 124 |
-
row = row.iloc[0]
|
| 125 |
-
base["log_fc"] = row["log_fc"] if "log_fc" in row.index else None
|
| 126 |
-
base["pval_adj"] = row["pval_adj"] if "pval_adj" in row.index else None
|
| 127 |
-
base["mean_rank"] = row["mean_rank"] if "mean_rank" in row.index else None
|
| 128 |
-
base["pathway"] = row["pathway"] if "pathway" in row.index else None
|
| 129 |
-
else:
|
| 130 |
-
base["log_fc"] = None
|
| 131 |
-
base["pval_adj"] = None
|
| 132 |
-
base["mean_rank"] = None
|
| 133 |
-
base["pathway"] = None
|
| 134 |
-
rows.append(base)
|
| 135 |
-
return pd.DataFrame(rows)
|
| 136 |
|
| 137 |
|
| 138 |
def _normalize_metabolite_token(name: str) -> str:
|
|
@@ -319,7 +288,7 @@ def build_metabolite_map_bundle(
|
|
| 319 |
if smods:
|
| 320 |
lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
|
| 321 |
if best_importance is not None:
|
| 322 |
-
lines.append(f"Strongest linked
|
| 323 |
|
| 324 |
top_rx = sorted(
|
| 325 |
uniq_rx,
|
|
@@ -350,12 +319,12 @@ def build_metabolite_map_bundle(
|
|
| 350 |
)
|
| 351 |
if precursors:
|
| 352 |
lines.append(
|
| 353 |
-
f"<span style='color:#656d76'>Model precursors (substrates in linked
|
| 354 |
f"{html.escape(', '.join(precursors[:8]))}"
|
| 355 |
)
|
| 356 |
if products:
|
| 357 |
lines.append(
|
| 358 |
-
f"<span style='color:#656d76'>Model products (downstream in linked
|
| 359 |
f"{html.escape(', '.join(products[:8]))}"
|
| 360 |
)
|
| 361 |
|
|
|
|
| 88 |
|
| 89 |
def build_metabolic_model_table(
|
| 90 |
meta: pd.DataFrame,
|
|
|
|
| 91 |
supermodule_id: int | None = None,
|
| 92 |
) -> pd.DataFrame:
|
| 93 |
+
"""Rows from ``metabolic_model_metadata.csv`` (all file columns except a stray ``Unnamed: 0`` index column)."""
|
|
|
|
|
|
|
|
|
|
| 94 |
need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
|
| 95 |
if not need.issubset(set(meta.columns)):
|
| 96 |
return pd.DataFrame()
|
|
|
|
| 99 |
m = m[m["Supermodule_id"] == int(supermodule_id)]
|
| 100 |
if m.empty:
|
| 101 |
return pd.DataFrame()
|
| 102 |
+
if "Unnamed: 0" in m.columns:
|
| 103 |
+
m = m.drop(columns=["Unnamed: 0"])
|
| 104 |
+
return m.reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
|
| 107 |
def _normalize_metabolite_token(name: str) -> str:
|
|
|
|
| 288 |
if smods:
|
| 289 |
lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
|
| 290 |
if best_importance is not None:
|
| 291 |
+
lines.append(f"Strongest linked reaction: #{best_importance}")
|
| 292 |
|
| 293 |
top_rx = sorted(
|
| 294 |
uniq_rx,
|
|
|
|
| 319 |
)
|
| 320 |
if precursors:
|
| 321 |
lines.append(
|
| 322 |
+
f"<span style='color:#656d76'>Model precursors (substrates in linked reactions)</span><br/>"
|
| 323 |
f"{html.escape(', '.join(precursors[:8]))}"
|
| 324 |
)
|
| 325 |
if products:
|
| 326 |
lines.append(
|
| 327 |
+
f"<span style='color:#656d76'>Model products (downstream in linked reactions)</span><br/>"
|
| 328 |
f"{html.escape(', '.join(products[:8]))}"
|
| 329 |
)
|
| 330 |
|
streamlit_hf/lib/plots.py
CHANGED
|
@@ -98,6 +98,7 @@ def latent_scatter(
|
|
| 98 |
height: int = 520,
|
| 99 |
marker_size: float = 5.0,
|
| 100 |
marker_opacity: float = 0.78,
|
|
|
|
| 101 |
):
|
| 102 |
d = df.copy()
|
| 103 |
hover_spec = {
|
|
@@ -159,7 +160,8 @@ def latent_scatter(
|
|
| 159 |
width=width,
|
| 160 |
height=height,
|
| 161 |
)
|
| 162 |
-
|
|
|
|
| 163 |
common["title"] = title
|
| 164 |
if continuous:
|
| 165 |
fig = px.scatter(
|
|
@@ -178,7 +180,10 @@ def latent_scatter(
|
|
| 178 |
fig.update_traces(
|
| 179 |
marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
|
| 180 |
)
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
| 182 |
fig.update_layout(
|
| 183 |
template="plotly_white",
|
| 184 |
font=PLOT_FONT,
|
|
@@ -190,7 +195,20 @@ def latent_scatter(
|
|
| 190 |
paper_bgcolor=PAGE_BG,
|
| 191 |
plot_bgcolor=PAGE_BG,
|
| 192 |
)
|
| 193 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
fig.update_layout(title=None)
|
| 195 |
fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
|
| 196 |
fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
|
|
@@ -198,20 +216,29 @@ def latent_scatter(
|
|
| 198 |
|
| 199 |
|
| 200 |
def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
|
| 201 |
-
"""Attention rank on x, shift rank on y, least-squares trend
|
| 202 |
need = ("shift_order_mod", "attention_order_mod")
|
| 203 |
if not all(c in df_mod.columns for c in need):
|
| 204 |
return go.Figure()
|
| 205 |
sub = df_mod.dropna(subset=list(need)).copy()
|
| 206 |
if sub.empty:
|
| 207 |
return go.Figure()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
x = sub["attention_order_mod"].astype(float).to_numpy()
|
| 209 |
y = sub["shift_order_mod"].astype(float).to_numpy()
|
| 210 |
fig = px.scatter(
|
| 211 |
sub,
|
| 212 |
x="attention_order_mod",
|
| 213 |
y="shift_order_mod",
|
| 214 |
-
color="
|
| 215 |
hover_name="feature",
|
| 216 |
hover_data={
|
| 217 |
"mean_rank": True,
|
|
@@ -221,14 +248,16 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
|
|
| 221 |
labels={
|
| 222 |
"attention_order_mod": "Attention rank",
|
| 223 |
"shift_order_mod": "Shift rank",
|
|
|
|
| 224 |
},
|
|
|
|
| 225 |
width=width,
|
| 226 |
height=height,
|
| 227 |
color_discrete_map={
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
},
|
| 233 |
)
|
| 234 |
fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
|
|
@@ -258,7 +287,14 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
|
|
| 258 |
font=dict(size=14, family=PLOT_FONT["family"]),
|
| 259 |
),
|
| 260 |
margin=dict(l=48, r=20, t=52, b=72),
|
| 261 |
-
legend=dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
)
|
| 263 |
return fig
|
| 264 |
|
|
@@ -1094,7 +1130,7 @@ def pathway_enrichment_bubble_panel(
|
|
| 1094 |
def pathway_gene_membership_heatmap(
|
| 1095 |
z: np.ndarray, row_labels: list[str], col_labels: list[str]
|
| 1096 |
) -> go.Figure:
|
| 1097 |
-
"""Pathway × gene grid; empty cells
|
| 1098 |
if z.size == 0:
|
| 1099 |
return go.Figure()
|
| 1100 |
|
|
@@ -1113,10 +1149,11 @@ def pathway_gene_membership_heatmap(
|
|
| 1113 |
|
| 1114 |
# Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
|
| 1115 |
_z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
|
| 1116 |
-
|
|
|
|
| 1117 |
colorscale_main = [
|
| 1118 |
-
[0.0,
|
| 1119 |
-
[0.14,
|
| 1120 |
[0.15, "#e69138"],
|
| 1121 |
[0.33, "#e69138"],
|
| 1122 |
[0.34, "#7eb6d9"],
|
|
|
|
| 98 |
height: int = 520,
|
| 99 |
marker_size: float = 5.0,
|
| 100 |
marker_opacity: float = 0.78,
|
| 101 |
+
subtitle: str | None = None,
|
| 102 |
):
|
| 103 |
d = df.copy()
|
| 104 |
hover_spec = {
|
|
|
|
| 160 |
width=width,
|
| 161 |
height=height,
|
| 162 |
)
|
| 163 |
+
# Title + subtitle are applied via update_layout when `subtitle` is set (Plotly 5+).
|
| 164 |
+
if title and not subtitle:
|
| 165 |
common["title"] = title
|
| 166 |
if continuous:
|
| 167 |
fig = px.scatter(
|
|
|
|
| 180 |
fig.update_traces(
|
| 181 |
marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
|
| 182 |
)
|
| 183 |
+
if title and subtitle:
|
| 184 |
+
top_margin = 88
|
| 185 |
+
else:
|
| 186 |
+
top_margin = 56 if title else 28
|
| 187 |
fig.update_layout(
|
| 188 |
template="plotly_white",
|
| 189 |
font=PLOT_FONT,
|
|
|
|
| 195 |
paper_bgcolor=PAGE_BG,
|
| 196 |
plot_bgcolor=PAGE_BG,
|
| 197 |
)
|
| 198 |
+
if title and subtitle:
|
| 199 |
+
fig.update_layout(
|
| 200 |
+
title=dict(
|
| 201 |
+
text=title,
|
| 202 |
+
x=0.5,
|
| 203 |
+
xanchor="center",
|
| 204 |
+
font=dict(size=16, family=PLOT_FONT["family"]),
|
| 205 |
+
subtitle=dict(
|
| 206 |
+
text=subtitle,
|
| 207 |
+
font=dict(size=11, color="#64748b", family=PLOT_FONT["family"]),
|
| 208 |
+
),
|
| 209 |
+
),
|
| 210 |
+
)
|
| 211 |
+
elif not title:
|
| 212 |
fig.update_layout(title=None)
|
| 213 |
fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
|
| 214 |
fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
|
|
|
|
| 216 |
|
| 217 |
|
| 218 |
def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
|
| 219 |
+
"""Attention rank on x, shift rank on y, least-squares trend, colours by top ~10% within this modality."""
|
| 220 |
need = ("shift_order_mod", "attention_order_mod")
|
| 221 |
if not all(c in df_mod.columns for c in need):
|
| 222 |
return go.Figure()
|
| 223 |
sub = df_mod.dropna(subset=list(need)).copy()
|
| 224 |
if sub.empty:
|
| 225 |
return go.Figure()
|
| 226 |
+
n = len(sub)
|
| 227 |
+
top_k = max(1, int(np.ceil(0.1 * n)))
|
| 228 |
+
s_ok = sub["shift_order_mod"].astype(int) <= top_k
|
| 229 |
+
a_ok = sub["attention_order_mod"].astype(int) <= top_k
|
| 230 |
+
sub["_tier_label"] = np.where(
|
| 231 |
+
s_ok & a_ok,
|
| 232 |
+
"Both",
|
| 233 |
+
np.where(s_ok, "Shift", np.where(a_ok, "Attention", "Neither")),
|
| 234 |
+
)
|
| 235 |
x = sub["attention_order_mod"].astype(float).to_numpy()
|
| 236 |
y = sub["shift_order_mod"].astype(float).to_numpy()
|
| 237 |
fig = px.scatter(
|
| 238 |
sub,
|
| 239 |
x="attention_order_mod",
|
| 240 |
y="shift_order_mod",
|
| 241 |
+
color="_tier_label",
|
| 242 |
hover_name="feature",
|
| 243 |
hover_data={
|
| 244 |
"mean_rank": True,
|
|
|
|
| 248 |
labels={
|
| 249 |
"attention_order_mod": "Attention rank",
|
| 250 |
"shift_order_mod": "Shift rank",
|
| 251 |
+
"_tier_label": "Top-10% tier",
|
| 252 |
},
|
| 253 |
+
category_orders={"_tier_label": ["Both", "Shift", "Attention", "Neither"]},
|
| 254 |
width=width,
|
| 255 |
height=height,
|
| 256 |
color_discrete_map={
|
| 257 |
+
"Both": PALETTE[0],
|
| 258 |
+
"Shift": PALETTE[1],
|
| 259 |
+
"Attention": PALETTE[2],
|
| 260 |
+
"Neither": "#94a3b8",
|
| 261 |
},
|
| 262 |
)
|
| 263 |
fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
|
|
|
|
| 287 |
font=dict(size=14, family=PLOT_FONT["family"]),
|
| 288 |
),
|
| 289 |
margin=dict(l=48, r=20, t=52, b=72),
|
| 290 |
+
legend=dict(
|
| 291 |
+
title=dict(text="Among top 10% features?"),
|
| 292 |
+
orientation="h",
|
| 293 |
+
yanchor="top",
|
| 294 |
+
y=-0.2,
|
| 295 |
+
xanchor="center",
|
| 296 |
+
x=0.5,
|
| 297 |
+
),
|
| 298 |
)
|
| 299 |
return fig
|
| 300 |
|
|
|
|
| 1130 |
def pathway_gene_membership_heatmap(
|
| 1131 |
z: np.ndarray, row_labels: list[str], col_labels: list[str]
|
| 1132 |
) -> go.Figure:
|
| 1133 |
+
"""Pathway × gene grid; empty cells use a light tint vs page white; Reactome/KEGG as a narrow left row spine."""
|
| 1134 |
if z.size == 0:
|
| 1135 |
return go.Figure()
|
| 1136 |
|
|
|
|
| 1149 |
|
| 1150 |
# Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
|
| 1151 |
_z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
|
| 1152 |
+
# Slight contrast vs PAGE_BG (#fff) so empty (code 0) cells read as a grid, not “missing” paint.
|
| 1153 |
+
_empty_cell = "#f1f5f9"
|
| 1154 |
colorscale_main = [
|
| 1155 |
+
[0.0, _empty_cell],
|
| 1156 |
+
[0.14, _empty_cell],
|
| 1157 |
[0.15, "#e69138"],
|
| 1158 |
[0.33, "#e69138"],
|
| 1159 |
[0.34, "#7eb6d9"],
|
streamlit_hf/lib/ui.py
CHANGED
|
@@ -4,6 +4,12 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
import streamlit as st
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def inject_app_styles() -> None:
|
| 9 |
"""Panel labels, page background, and shared chrome (all pages)."""
|
|
@@ -69,8 +75,17 @@ section[data-testid="stMain"] h1 {
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
-
def plot_help_popover(
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
with st.popover(
|
| 75 |
" ",
|
| 76 |
help="What does this figure show?",
|
|
@@ -80,6 +95,9 @@ def plot_help_popover(help_md: str, *, key: str) -> None:
|
|
| 80 |
key=key,
|
| 81 |
):
|
| 82 |
st.markdown(help_md)
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
|
|
|
|
| 4 |
|
| 5 |
import streamlit as st
|
| 6 |
|
| 7 |
+
# Feature Insights multipage hub: same title + tagline on every sub-page.
|
| 8 |
+
FEATURE_INSIGHTS_TITLE = "Feature Insights"
|
| 9 |
+
FEATURE_INSIGHTS_CAPTION = (
|
| 10 |
+
"Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux."
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
|
| 14 |
def inject_app_styles() -> None:
|
| 15 |
"""Panel labels, page background, and shared chrome (all pages)."""
|
|
|
|
| 75 |
)
|
| 76 |
|
| 77 |
|
| 78 |
+
def plot_help_popover(
|
| 79 |
+
help_md: str,
|
| 80 |
+
*,
|
| 81 |
+
key: str,
|
| 82 |
+
page_link: tuple[str, str] | None = None,
|
| 83 |
+
) -> None:
|
| 84 |
+
"""Small help control next to a figure; opens Markdown guidance for biologists.
|
| 85 |
+
|
| 86 |
+
If ``page_link`` is ``(path, label)``, a ``st.page_link`` is rendered after the markdown
|
| 87 |
+
(e.g. ``("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer")``).
|
| 88 |
+
"""
|
| 89 |
with st.popover(
|
| 90 |
" ",
|
| 91 |
help="What does this figure show?",
|
|
|
|
| 95 |
key=key,
|
| 96 |
):
|
| 97 |
st.markdown(help_md)
|
| 98 |
+
if page_link:
|
| 99 |
+
page_path, page_label = page_link
|
| 100 |
+
st.page_link(page_path, label=page_label)
|
| 101 |
|
| 102 |
|
| 103 |
def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
|
streamlit_hf/pages/1_Single_Cell_Explorer.py
CHANGED
|
@@ -19,16 +19,24 @@ from streamlit_hf.lib import ui
|
|
| 19 |
|
| 20 |
ui.inject_app_styles()
|
| 21 |
|
| 22 |
-
|
| 23 |
-
**What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
|
| 24 |
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
|
| 30 |
st.title("Single-Cell Explorer")
|
| 31 |
-
st.caption(
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
bundle = io.load_latent_bundle()
|
| 34 |
if bundle is None:
|
|
@@ -107,21 +115,31 @@ if plot_df.empty:
|
|
| 107 |
st.stop()
|
| 108 |
|
| 109 |
with right:
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
st.subheader("Selected points")
|
| 127 |
state = st.session_state.get("latent_pick")
|
|
@@ -155,12 +173,17 @@ else:
|
|
| 155 |
)
|
| 156 |
|
| 157 |
st.subheader("Inspect by dataset index")
|
|
|
|
|
|
|
| 158 |
pick = st.number_input(
|
| 159 |
"Dataset index",
|
| 160 |
-
min_value=
|
| 161 |
-
max_value=
|
| 162 |
value=int(df["dataset_idx"].iloc[0]),
|
| 163 |
-
help=
|
|
|
|
|
|
|
|
|
|
| 164 |
)
|
| 165 |
row = df[df["dataset_idx"] == pick]
|
| 166 |
if not row.empty:
|
|
|
|
| 19 |
|
| 20 |
ui.inject_app_styles()
|
| 21 |
|
| 22 |
+
_CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
|
|
|
|
| 23 |
|
| 24 |
+
_UMAP_EXPLORER_TITLE = "Validation latent space (UMAP)"
|
| 25 |
+
_UMAP_EXPLORER_SUBTITLE = "Hover points for details · drag on the plot to select cells"
|
| 26 |
|
| 27 |
+
_UMAP_EXPLORER_HELP = f"""
|
| 28 |
+
**What this is:** The same **2‑D UMAP** as on **Home**: validation **single cells** in **FateFormer**’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation** (**2,110** cells before filters). Here you **choose what to colour** and **filter** the cloud.
|
| 29 |
+
|
| 30 |
+
**How to read it:** Each point is one cell. **Colour** comes from **Colour by**: e.g. [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) **label**, **predicted fate**, **prediction correct / wrong**, **CV fold**, **batch**, which **modalities** are present, or **dominant fate %**. **Axes are unitless** (UMAP preserves *local* neighbourhoods only). **Hover** a point for per-cell fields.
|
| 31 |
+
|
| 32 |
+
**Using this page:** Use **Filters** to keep modality combinations, restrict **prediction outcome** (all / correct only / wrong only), choose **CV folds**, and set a **dominant fate %** range. In the plot **toolbar** (top right), pick **Box select** or **Lasso select**, then **drag** on the canvas; the app **reruns** and the **Selected points** table fills with those rows. To inspect **one** cell without a selection, scroll to **Inspect by dataset index**.
|
| 33 |
"""
|
| 34 |
|
| 35 |
st.title("Single-Cell Explorer")
|
| 36 |
+
st.caption(
|
| 37 |
+
"This page is an interactive **validation UMAP** in FateFormer latent space: you choose how points are **coloured**, "
|
| 38 |
+
"apply **filters**, and can **select** cells on the plot to inspect them in a table or by index."
|
| 39 |
+
)
|
| 40 |
|
| 41 |
bundle = io.load_latent_bundle()
|
| 42 |
if bundle is None:
|
|
|
|
| 115 |
st.stop()
|
| 116 |
|
| 117 |
with right:
|
| 118 |
+
try:
|
| 119 |
+
_sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
|
| 120 |
+
except TypeError:
|
| 121 |
+
_sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small")
|
| 122 |
+
with _sc_umap_plot_col:
|
| 123 |
+
fig = plots.latent_scatter(
|
| 124 |
+
plot_df,
|
| 125 |
+
color_opt,
|
| 126 |
+
title=_UMAP_EXPLORER_TITLE,
|
| 127 |
+
width=900,
|
| 128 |
+
height=560,
|
| 129 |
+
marker_size=5.8,
|
| 130 |
+
marker_opacity=0.74,
|
| 131 |
+
subtitle=_UMAP_EXPLORER_SUBTITLE,
|
| 132 |
+
)
|
| 133 |
+
fig.update_layout(margin=dict(l=20, r=12, t=92, b=20), title_font_size=15)
|
| 134 |
+
st.plotly_chart(
|
| 135 |
+
fig,
|
| 136 |
+
width="stretch",
|
| 137 |
+
on_select="rerun",
|
| 138 |
+
key="latent_pick",
|
| 139 |
+
config={"displayModeBar": True, "displaylogo": False},
|
| 140 |
+
)
|
| 141 |
+
with _sc_umap_help_col:
|
| 142 |
+
ui.plot_help_popover(_UMAP_EXPLORER_HELP, key="sc_umap_help")
|
| 143 |
|
| 144 |
st.subheader("Selected points")
|
| 145 |
state = st.session_state.get("latent_pick")
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
st.subheader("Inspect by dataset index")
|
| 176 |
+
_didx_min = int(df["dataset_idx"].min())
|
| 177 |
+
_didx_max = int(df["dataset_idx"].max())
|
| 178 |
pick = st.number_input(
|
| 179 |
"Dataset index",
|
| 180 |
+
min_value=_didx_min,
|
| 181 |
+
max_value=_didx_max,
|
| 182 |
value=int(df["dataset_idx"].iloc[0]),
|
| 183 |
+
help=(
|
| 184 |
+
f"The table below is a one-cell summary for the validation set: choose an index from {_didx_min} to {_didx_max} "
|
| 185 |
+
"to see fate labels, model prediction, available modalities, and related fields for that cell."
|
| 186 |
+
),
|
| 187 |
)
|
| 188 |
row = df[df["dataset_idx"] == pick]
|
| 189 |
if not row.empty:
|
streamlit_hf/pages/feature_insights/1_Global_overview.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Feature Insights
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -18,18 +18,13 @@ from streamlit_hf.lib import ui
|
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
_GLOBAL_OVERVIEW_HELP = """
|
| 21 |
-
**What this is:**
|
| 22 |
|
| 23 |
-
**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows
|
| 24 |
|
| 25 |
-
**How to read it:** **
|
| 26 |
-
|
| 27 |
-
**Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
|
| 28 |
"""
|
| 29 |
|
| 30 |
-
st.title("Feature Insights")
|
| 31 |
-
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 32 |
-
|
| 33 |
df = io.load_df_features()
|
| 34 |
|
| 35 |
if df is None:
|
|
@@ -38,7 +33,14 @@ if df is None:
|
|
| 38 |
)
|
| 39 |
st.stop()
|
| 40 |
|
|
|
|
|
|
|
| 41 |
st.subheader("Global overview")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
c1, c2 = st.columns(2)
|
| 43 |
with c1:
|
| 44 |
top_n_bars = st.slider(
|
|
@@ -47,6 +49,11 @@ with c1:
|
|
| 47 |
45,
|
| 48 |
20,
|
| 49 |
key="t1_topn_bars",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
)
|
| 51 |
with c2:
|
| 52 |
top_n_pie = st.slider(
|
|
@@ -55,6 +62,11 @@ with c2:
|
|
| 55 |
250,
|
| 56 |
100,
|
| 57 |
key="t1_topn_pie",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
)
|
| 59 |
ui.plot_caption_with_help(
|
| 60 |
"Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
|
|
|
|
| 1 |
+
"""Feature Insights: global overview of multimodal feature importance."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
_GLOBAL_OVERVIEW_HELP = """
|
| 21 |
+
**What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), as a **global** view that combines **latent shift** probes and **attention rollout** over the full model.
|
| 22 |
|
| 23 |
+
**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**, longest bar = 1). The **pie chart** (right) shows **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (**lower mean rank** = higher joint priority).
|
| 24 |
|
| 25 |
+
**How to read it:** **Longer bars** mean stronger measured influence for that metric. **Colours** mark **modality**. Use the **sliders** above to change bar count and pie pool size. The **pie chart** answers: “Among the most important features in this pool, which data type dominates?”.
|
|
|
|
|
|
|
| 26 |
"""
|
| 27 |
|
|
|
|
|
|
|
|
|
|
| 28 |
df = io.load_df_features()
|
| 29 |
|
| 30 |
if df is None:
|
|
|
|
| 33 |
)
|
| 34 |
st.stop()
|
| 35 |
|
| 36 |
+
st.title(ui.FEATURE_INSIGHTS_TITLE)
|
| 37 |
+
st.caption(ui.FEATURE_INSIGHTS_CAPTION)
|
| 38 |
st.subheader("Global overview")
|
| 39 |
+
st.caption(
|
| 40 |
+
"Here, we give a birds-eye view of which RNA, ATAC, and Flux features matter most: top-N bars for latent shift and "
|
| 41 |
+
"attention (two explainability methods), plus a pie of modality mix among the strongest features by mean rank "
|
| 42 |
+
"(sliders change list sizes)."
|
| 43 |
+
)
|
| 44 |
c1, c2 = st.columns(2)
|
| 45 |
with c1:
|
| 46 |
top_n_bars = st.slider(
|
|
|
|
| 49 |
45,
|
| 50 |
20,
|
| 51 |
key="t1_topn_bars",
|
| 52 |
+
help=(
|
| 53 |
+
"How many features appear in the left (latent shift) and middle (attention) bar charts: the top N by each "
|
| 54 |
+
"metric. Each chart is min–max scaled on its own (longest bar = 1). Increase N to list more markers; "
|
| 55 |
+
"decrease N to focus on the strongest few."
|
| 56 |
+
),
|
| 57 |
)
|
| 58 |
with c2:
|
| 59 |
top_n_pie = st.slider(
|
|
|
|
| 62 |
250,
|
| 63 |
100,
|
| 64 |
key="t1_topn_pie",
|
| 65 |
+
help=(
|
| 66 |
+
"How many features define the right-hand pie chart: the N strongest by mean rank (lower mean rank = "
|
| 67 |
+
"stronger joint ranking across shift and attention). A larger pool gives a broader modality mix "
|
| 68 |
+
"(RNA vs ATAC vs Flux); a smaller pool weights only the very top joint features."
|
| 69 |
+
),
|
| 70 |
)
|
| 71 |
ui.plot_caption_with_help(
|
| 72 |
"Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
|
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Feature Insights
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,31 +17,16 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
-
|
| 21 |
-
**
|
| 22 |
|
| 23 |
-
**
|
| 24 |
|
| 25 |
-
**
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
_HELP_SHIFT = """
|
| 29 |
-
**What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
|
| 30 |
-
|
| 31 |
-
**How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
|
| 32 |
|
| 33 |
-
**
|
| 34 |
"""
|
| 35 |
|
| 36 |
-
_HELP_ATT = """
|
| 37 |
-
**What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
|
| 38 |
-
|
| 39 |
-
**How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
|
| 40 |
-
|
| 41 |
-
**Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
|
| 42 |
-
"""
|
| 43 |
-
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 44 |
-
|
| 45 |
df = io.load_df_features()
|
| 46 |
|
| 47 |
if df is None:
|
|
@@ -50,17 +35,36 @@ if df is None:
|
|
| 50 |
)
|
| 51 |
st.stop()
|
| 52 |
|
| 53 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
st.caption(
|
| 55 |
-
"
|
| 56 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
)
|
| 58 |
-
|
| 59 |
st.markdown("##### Joint top markers (by mean rank)")
|
| 60 |
st.caption(
|
| 61 |
-
"
|
| 62 |
-
"Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
|
| 63 |
-
"Hover a bar for the full feature name."
|
| 64 |
)
|
| 65 |
r1a, r1b, r1c = st.columns(3)
|
| 66 |
for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
|
|
@@ -68,13 +72,11 @@ for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
|
|
| 68 |
if sm.empty:
|
| 69 |
continue
|
| 70 |
with col:
|
| 71 |
-
_, _hp = st.columns([1, 0.28])
|
| 72 |
-
with _hp:
|
| 73 |
-
ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
|
| 74 |
st.plotly_chart(
|
| 75 |
plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
|
| 76 |
width="stretch",
|
| 77 |
)
|
|
|
|
| 78 |
st.markdown("##### Shift importance")
|
| 79 |
r2a, r2b, r2c = st.columns(3)
|
| 80 |
for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
|
@@ -84,9 +86,6 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
|
| 84 |
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 85 |
sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
|
| 86 |
with col:
|
| 87 |
-
_, _hp = st.columns([1, 0.28])
|
| 88 |
-
with _hp:
|
| 89 |
-
ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
|
| 90 |
st.plotly_chart(
|
| 91 |
plots.rank_bar(
|
| 92 |
sub,
|
|
@@ -98,6 +97,7 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
|
| 98 |
),
|
| 99 |
width="stretch",
|
| 100 |
)
|
|
|
|
| 101 |
st.markdown("##### Attention importance")
|
| 102 |
r3a, r3b, r3c = st.columns(3)
|
| 103 |
for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
|
|
@@ -107,9 +107,6 @@ for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
|
|
| 107 |
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 108 |
sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
|
| 109 |
with col:
|
| 110 |
-
_, _hp = st.columns([1, 0.28])
|
| 111 |
-
with _hp:
|
| 112 |
-
ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
|
| 113 |
st.plotly_chart(
|
| 114 |
plots.rank_bar(
|
| 115 |
sub,
|
|
|
|
| 1 |
+
"""Feature Insights: modality spotlight (RNA, ATAC, Flux)."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_PAGE = """
|
| 21 |
+
**Layout:** Three modality columns (**RNA**, **ATAC**, **Flux**). Each column uses only that modality’s features (**genes**, **TF motifs** from chromatin, or **metabolic reactions**).
|
| 22 |
|
| 23 |
+
**Joint row** (*Joint top markers*): Features ordered by **mean rank** (combined shift + attention; **lower mean rank** = stronger joint priority). Each row is one feature with **two bars** (shift and attention), **min–max scaled within this top‑N list** (0–1) so both are comparable. **Hover** a bar for the full name.
|
| 24 |
|
| 25 |
+
**Shift row** (*Shift importance*): **Shift-only** top **N** by latent shift score per column. **Longer bar** = larger shift in this list. **Hover** for the full name.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
**Attention row** (*Attention importance*): **Attention-only** top **N** by rollout importance per column. **Longer bar** = more average attention. **Hover** for the full name.
|
| 28 |
"""
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
df = io.load_df_features()
|
| 31 |
|
| 32 |
if df is None:
|
|
|
|
| 35 |
)
|
| 36 |
st.stop()
|
| 37 |
|
| 38 |
+
st.title(ui.FEATURE_INSIGHTS_TITLE)
|
| 39 |
+
st.caption(ui.FEATURE_INSIGHTS_CAPTION)
|
| 40 |
+
try:
|
| 41 |
+
_spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 42 |
+
except TypeError:
|
| 43 |
+
_spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small")
|
| 44 |
+
with _spot_h_l:
|
| 45 |
+
st.subheader("Modality spotlight")
|
| 46 |
+
with _spot_h_r:
|
| 47 |
+
ui.plot_help_popover(_HELP_PAGE, key="t2_page_help")
|
| 48 |
st.caption(
|
| 49 |
+
"Here, we zoom into one modality at a time (RNA, ATAC, or Flux) to explore top fate predictor markers: for each column "
|
| 50 |
+
"you see joint top markers, then shift-only and attention-only rankings side by side so within-modality comparisons "
|
| 51 |
+
"stay fair."
|
| 52 |
+
)
|
| 53 |
+
top_n_rank = st.slider(
|
| 54 |
+
"Top N per chart",
|
| 55 |
+
10,
|
| 56 |
+
55,
|
| 57 |
+
20,
|
| 58 |
+
key="t2_topn",
|
| 59 |
+
help=(
|
| 60 |
+
"Number of features in each chart on this page: the joint (mean-rank) row, the shift-only row, "
|
| 61 |
+
"and the attention-only row all use this N within each modality column."
|
| 62 |
+
),
|
| 63 |
)
|
| 64 |
+
|
| 65 |
st.markdown("##### Joint top markers (by mean rank)")
|
| 66 |
st.caption(
|
| 67 |
+
"Joint row: strongest by mean rank; shift and attention bars scaled within this top-N list. Hover a bar for the full name."
|
|
|
|
|
|
|
| 68 |
)
|
| 69 |
r1a, r1b, r1c = st.columns(3)
|
| 70 |
for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
|
|
|
|
| 72 |
if sm.empty:
|
| 73 |
continue
|
| 74 |
with col:
|
|
|
|
|
|
|
|
|
|
| 75 |
st.plotly_chart(
|
| 76 |
plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
|
| 77 |
width="stretch",
|
| 78 |
)
|
| 79 |
+
|
| 80 |
st.markdown("##### Shift importance")
|
| 81 |
r2a, r2b, r2c = st.columns(3)
|
| 82 |
for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
|
|
|
| 86 |
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 87 |
sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
|
| 88 |
with col:
|
|
|
|
|
|
|
|
|
|
| 89 |
st.plotly_chart(
|
| 90 |
plots.rank_bar(
|
| 91 |
sub,
|
|
|
|
| 97 |
),
|
| 98 |
width="stretch",
|
| 99 |
)
|
| 100 |
+
|
| 101 |
st.markdown("##### Attention importance")
|
| 102 |
r3a, r3b, r3c = st.columns(3)
|
| 103 |
for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
|
|
|
|
| 107 |
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 108 |
sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
|
| 109 |
with col:
|
|
|
|
|
|
|
|
|
|
| 110 |
st.plotly_chart(
|
| 111 |
plots.rank_bar(
|
| 112 |
sub,
|
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Feature Insights
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -18,16 +18,24 @@ from streamlit_hf.lib import ui
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
""
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
df = io.load_df_features()
|
| 33 |
|
|
@@ -37,11 +45,13 @@ if df is None:
|
|
| 37 |
)
|
| 38 |
st.stop()
|
| 39 |
|
|
|
|
|
|
|
| 40 |
st.subheader("Shift vs attention")
|
| 41 |
st.caption(
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
)
|
| 46 |
corr_rows = []
|
| 47 |
for mod in ("RNA", "ATAC", "Flux"):
|
|
@@ -61,14 +71,20 @@ for mod in ("RNA", "ATAC", "Flux"):
|
|
| 61 |
}
|
| 62 |
)
|
| 63 |
if corr_rows:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
rc1, rc2, rc3 = st.columns(3)
|
| 66 |
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 67 |
with col:
|
| 68 |
sub_m = df[df["modality"] == mod]
|
| 69 |
-
_, _hp = st.columns([1, 0.28])
|
| 70 |
-
with _hp:
|
| 71 |
-
ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
|
| 72 |
st.plotly_chart(
|
| 73 |
plots.rank_scatter_shift_vs_attention(sub_m, mod),
|
| 74 |
width="stretch",
|
|
|
|
| 1 |
+
"""Feature Insights: shift vs attention rank scatter by modality."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
+
# Native Streamlit tooltips (caption help); plain text reads well in the small ? popover.
|
| 22 |
+
_CORR_TABLE_HELP = (
|
| 23 |
+
"Per-modality correlation between attention rank and latent-shift rank across features in that modality "
|
| 24 |
+
"(same features as in the scatters below). Pearson r and Spearman rho measure rank agreement, with p-values. "
|
| 25 |
+
"# features is how many features in that modality were used for the correlation (one rank pair per feature). "
|
| 26 |
+
"Higher |r| means stronger agreement in how features are ordered: a feature that ranks high on shift (small rank; 1 = strongest) "
|
| 27 |
+
"tends to sit in a similar place on attention rank, and the same for weaker features, across that modality."
|
| 28 |
+
)
|
| 29 |
|
| 30 |
+
_SCATTER_HELP = (
|
| 31 |
+
"Each dot is one feature in that column: a gene (RNA), TF motif (ATAC), or reaction (Flux). "
|
| 32 |
+
"X = attention rank (1 = strongest in that modality); Y = latent shift rank (1 = strongest). "
|
| 33 |
+
"Ranks on both axes show agreement between methods: near the diagonal means similar ranking; "
|
| 34 |
+
"the dashed trend line is a least-squares fit. Correlation for each modality is in the table above; "
|
| 35 |
+
"stronger r means closer alignment of shift- and attention-based importance as fate predictors. "
|
| 36 |
+
"Point colour is whether that feature sits in the top ~10% by shift rank, attention rank, both, or neither, "
|
| 37 |
+
"using ranks within that modality only (same scale as the axes)."
|
| 38 |
+
)
|
| 39 |
|
| 40 |
df = io.load_df_features()
|
| 41 |
|
|
|
|
| 45 |
)
|
| 46 |
st.stop()
|
| 47 |
|
| 48 |
+
st.title(ui.FEATURE_INSIGHTS_TITLE)
|
| 49 |
+
st.caption(ui.FEATURE_INSIGHTS_CAPTION)
|
| 50 |
st.subheader("Shift vs attention")
|
| 51 |
st.caption(
|
| 52 |
+
"Here, we explore how much latent-shift and attention-rollout explanations agree on feature importance within each "
|
| 53 |
+
"modality. A correlation table quantifies rank agreement; scatter plots pair each feature’s two ranks "
|
| 54 |
+
"(1 = strongest in that modality)."
|
| 55 |
)
|
| 56 |
corr_rows = []
|
| 57 |
for mod in ("RNA", "ATAC", "Flux"):
|
|
|
|
| 71 |
}
|
| 72 |
)
|
| 73 |
if corr_rows:
|
| 74 |
+
st.caption(
|
| 75 |
+
"Rank correlation by modality",
|
| 76 |
+
help=_CORR_TABLE_HELP,
|
| 77 |
+
)
|
| 78 |
st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
|
| 79 |
+
|
| 80 |
+
st.caption(
|
| 81 |
+
"Rank scatter by modality",
|
| 82 |
+
help=_SCATTER_HELP,
|
| 83 |
+
)
|
| 84 |
rc1, rc2, rc3 = st.columns(3)
|
| 85 |
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 86 |
with col:
|
| 87 |
sub_m = df[df["modality"] == mod]
|
|
|
|
|
|
|
|
|
|
| 88 |
st.plotly_chart(
|
| 89 |
plots.rank_scatter_shift_vs_attention(sub_m, mod),
|
| 90 |
width="stretch",
|
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Feature Insights
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,27 +17,12 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
-
|
| 21 |
-
**
|
| 22 |
|
| 23 |
-
**
|
| 24 |
-
|
| 25 |
-
**Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
|
| 26 |
-
|
| 27 |
-
**How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
|
| 28 |
-
"""
|
| 29 |
-
|
| 30 |
-
_HELP_ROLLOUT_TABLE = """
|
| 31 |
-
**What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
|
| 32 |
-
|
| 33 |
-
**How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
|
| 34 |
-
|
| 35 |
-
**Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
|
| 36 |
"""
|
| 37 |
|
| 38 |
-
st.title("Feature Insights")
|
| 39 |
-
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 40 |
-
|
| 41 |
df = io.load_df_features()
|
| 42 |
att = io.load_attention_summary()
|
| 43 |
|
|
@@ -47,7 +32,20 @@ if df is None:
|
|
| 47 |
)
|
| 48 |
st.stop()
|
| 49 |
|
| 50 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
cohort_mode = st.selectbox(
|
| 52 |
"Cohort view",
|
| 53 |
[
|
|
@@ -78,9 +76,6 @@ else:
|
|
| 78 |
ac1, ac2, ac3 = st.columns(3)
|
| 79 |
for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
|
| 80 |
with col:
|
| 81 |
-
_, _hp = st.columns([1, 0.28])
|
| 82 |
-
with _hp:
|
| 83 |
-
ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
|
| 84 |
st.plotly_chart(
|
| 85 |
plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
|
| 86 |
width="stretch",
|
|
@@ -115,12 +110,5 @@ else:
|
|
| 115 |
vec = vec_all[sl["start"] : sl["stop"]]
|
| 116 |
names = att["feature_names"][sl["start"] : sl["stop"]]
|
| 117 |
mini = plots.rollout_top_features_table(names, vec, top_n_att)
|
| 118 |
-
|
| 119 |
-
with cap1:
|
| 120 |
-
st.caption(mod)
|
| 121 |
-
with cap2:
|
| 122 |
-
ui.plot_help_popover(
|
| 123 |
-
_HELP_ROLLOUT_TABLE.format(mod=mod),
|
| 124 |
-
key=f"t4_roll_{mod}_{roll_cohort}",
|
| 125 |
-
)
|
| 126 |
st.dataframe(mini, hide_index=True, width="stretch")
|
|
|
|
| 1 |
+
"""Feature Insights: attention by predicted cohort."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_PAGE = """
|
| 21 |
+
**Bar charts (RNA, ATAC, Flux columns):** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted**. **Compare** shows cohorts **side‑by‑side**; **All / dead‑end / reprogramming** restrict the average to that predicted class. Uses **predicted** fate, **not** experimental labels. **Longer bar** = more cumulative attention among the **top‑N** shown. **Hover** for numeric detail.
|
| 22 |
|
| 23 |
+
**Rollout tables:** The same **mean rollout vector** as the bars, as a **sortable table** of the strongest tokens **per modality column**. Rows are **ranked** by weight for the cohort you select. **Batch** embedding tokens are omitted; use the tables to **copy names** or scan ordering beyond the bars.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"""
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
df = io.load_df_features()
|
| 27 |
att = io.load_attention_summary()
|
| 28 |
|
|
|
|
| 32 |
)
|
| 33 |
st.stop()
|
| 34 |
|
| 35 |
+
st.title(ui.FEATURE_INSIGHTS_TITLE)
|
| 36 |
+
st.caption(ui.FEATURE_INSIGHTS_CAPTION)
|
| 37 |
+
try:
|
| 38 |
+
_att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 39 |
+
except TypeError:
|
| 40 |
+
_att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small")
|
| 41 |
+
with _att_h_l:
|
| 42 |
+
st.subheader("Attention vs prediction")
|
| 43 |
+
with _att_h_r:
|
| 44 |
+
ui.plot_help_popover(_HELP_PAGE, key="t4_page_help")
|
| 45 |
+
st.caption(
|
| 46 |
+
"Here, we show mean attention over RNA, ATAC, and Flux tokens conditional on what the model predicted (dead-end, "
|
| 47 |
+
"reprogramming, or all validation cells), to see on which features model focus to predict different fates."
|
| 48 |
+
)
|
| 49 |
cohort_mode = st.selectbox(
|
| 50 |
"Cohort view",
|
| 51 |
[
|
|
|
|
| 76 |
ac1, ac2, ac3 = st.columns(3)
|
| 77 |
for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
|
| 78 |
with col:
|
|
|
|
|
|
|
|
|
|
| 79 |
st.plotly_chart(
|
| 80 |
plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
|
| 81 |
width="stretch",
|
|
|
|
| 110 |
vec = vec_all[sl["start"] : sl["stop"]]
|
| 111 |
names = att["feature_names"][sl["start"] : sl["stop"]]
|
| 112 |
mini = plots.rollout_top_features_table(names, vec, top_n_att)
|
| 113 |
+
st.caption(mod)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
st.dataframe(mini, hide_index=True, width="stretch")
|
streamlit_hf/pages/feature_insights/5_Full_table.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Feature Insights
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,16 +17,13 @@ from streamlit_hf.lib import ui
|
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
_FULL_TABLE_HELP = """
|
| 20 |
-
**What this is:** The **full ranked feature list** (RNA genes, ATAC
|
| 21 |
|
| 22 |
**Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
|
| 23 |
|
| 24 |
**How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
|
| 25 |
"""
|
| 26 |
|
| 27 |
-
st.title("Feature Insights")
|
| 28 |
-
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 29 |
-
|
| 30 |
df = io.load_df_features()
|
| 31 |
|
| 32 |
if df is None:
|
|
@@ -35,7 +32,13 @@ if df is None:
|
|
| 35 |
)
|
| 36 |
st.stop()
|
| 37 |
|
|
|
|
|
|
|
| 38 |
st.subheader("Full table")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
scope = st.radio(
|
| 40 |
"Table scope",
|
| 41 |
["All modalities", "Single modality"],
|
|
@@ -71,7 +74,7 @@ show_cols = [
|
|
| 71 |
if c in tbl.columns
|
| 72 |
]
|
| 73 |
ui.plot_caption_with_help(
|
| 74 |
-
"
|
| 75 |
_FULL_TABLE_HELP,
|
| 76 |
key="t5_table_help",
|
| 77 |
)
|
|
|
|
| 1 |
+
"""Feature Insights: full ranked feature table."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
_FULL_TABLE_HELP = """
|
| 20 |
+
**What this is:** The **full FateFormer ranked feature list** (RNA genes, ATAC TF motifs, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
|
| 21 |
|
| 22 |
**Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
|
| 23 |
|
| 24 |
**How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
|
| 25 |
"""
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
df = io.load_df_features()
|
| 28 |
|
| 29 |
if df is None:
|
|
|
|
| 32 |
)
|
| 33 |
st.stop()
|
| 34 |
|
| 35 |
+
st.title(ui.FEATURE_INSIGHTS_TITLE)
|
| 36 |
+
st.caption(ui.FEATURE_INSIGHTS_CAPTION)
|
| 37 |
st.subheader("Full table")
|
| 38 |
+
st.caption(
|
| 39 |
+
"Here is the complete ranked feature table for the run (RNA genes, ATAC motifs, flux reactions): every shift, "
|
| 40 |
+
"attention, and joint rank and score the pipeline emitted."
|
| 41 |
+
)
|
| 42 |
scope = st.radio(
|
| 43 |
"Table scope",
|
| 44 |
["All modalities", "Single modality"],
|
|
|
|
| 74 |
if c in tbl.columns
|
| 75 |
]
|
| 76 |
ui.plot_caption_with_help(
|
| 77 |
+
"Full FateFormer list for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
|
| 78 |
_FULL_TABLE_HELP,
|
| 79 |
key="t5_table_help",
|
| 80 |
)
|
streamlit_hf/pages/flux_analysis/1_Pathway_map.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Flux Analysis
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,34 +17,20 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
-
|
| 21 |
-
**
|
| 22 |
|
| 23 |
-
**
|
| 24 |
|
| 25 |
-
**
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
_HELP_FLUX_ANNOTATION = """
|
| 29 |
-
**What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
|
| 30 |
-
|
| 31 |
-
**How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
|
| 32 |
-
|
| 33 |
-
**Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
|
| 34 |
-
"""
|
| 35 |
-
|
| 36 |
-
_HELP_FLUX_PROFILE = """
|
| 37 |
-
**What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
|
| 38 |
-
|
| 39 |
-
**How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
|
| 40 |
|
| 41 |
-
**
|
| 42 |
"""
|
| 43 |
|
| 44 |
st.title("Flux Analysis")
|
| 45 |
st.caption(
|
| 46 |
-
"
|
| 47 |
-
"For
|
| 48 |
)
|
| 49 |
|
| 50 |
try:
|
|
@@ -67,38 +53,51 @@ else:
|
|
| 67 |
_data_msg = "There are no flux reactions in the current results."
|
| 68 |
flux = None
|
| 69 |
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
if not _data_ok:
|
| 72 |
st.error(_data_msg)
|
| 73 |
else:
|
| 74 |
st.caption(
|
| 75 |
-
"
|
| 76 |
-
"
|
| 77 |
-
"Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
|
| 78 |
)
|
| 79 |
try:
|
| 80 |
c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
|
| 81 |
except TypeError:
|
| 82 |
c1, c2 = st.columns([1.05, 0.95], gap="medium")
|
| 83 |
with c1:
|
| 84 |
-
n_sb = st.slider(
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
|
| 89 |
with c2:
|
| 90 |
-
top_n_nb = st.slider(
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
-
_, _hp2 = st.columns([1, 0.22])
|
| 99 |
-
with _hp2:
|
| 100 |
-
ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
|
| 101 |
st.plotly_chart(
|
| 102 |
-
plots.
|
| 103 |
width="stretch",
|
| 104 |
)
|
|
|
|
| 1 |
+
"""Flux Analysis: pathway sunburst and reaction annotation panels."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_PATHWAY_MAP = """
|
| 21 |
+
**Layout:** **Left column:** **sunburst**. **Right column:** **Pathway / Log₂FC / significance** (three **heatmap** columns, one **row** per reaction).
|
| 22 |
|
| 23 |
+
**Sunburst:** **Inner ring** = **pathway**; **outer ring** = **reaction**. Reactions are the top set by **mean_rank** (FateFormer joint rank; **lower** = stronger). **Wedge size** reflects that ranking. **Colour** = per-reaction **log₂ fold change** in inferred flux for **reprogramming** vs **dead-end** samples (experimental labels).
|
| 24 |
|
| 25 |
+
**Pathway / Log₂FC / significance:** Same top-**N** reactions as the **Reactions in heatmap** slider (**N** rows). **Columns:** **Pathway** (categorical colour), **Log₂FC** (reprogramming vs dead-end), **−log₁₀ adjusted p** for that contrast. **Hover** for exact values.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
**Sliders:** **Reactions in sunburst** adjusts only the **left** sunburst. **Reactions in heatmap** sets how many top reactions appear in the **right-hand** heatmap.
|
| 28 |
"""
|
| 29 |
|
| 30 |
st.title("Flux Analysis")
|
| 31 |
st.caption(
|
| 32 |
+
"**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
|
| 33 |
+
"For multimodal **shift**/**attention** summaries, open **Feature Insights**."
|
| 34 |
)
|
| 35 |
|
| 36 |
try:
|
|
|
|
| 53 |
_data_msg = "There are no flux reactions in the current results."
|
| 54 |
flux = None
|
| 55 |
|
| 56 |
+
try:
|
| 57 |
+
_pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 58 |
+
except TypeError:
|
| 59 |
+
_pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small")
|
| 60 |
+
with _pm_h_l:
|
| 61 |
+
st.subheader("Pathway map")
|
| 62 |
+
with _pm_h_r:
|
| 63 |
+
ui.plot_help_popover(_HELP_PATHWAY_MAP, key="flux_pathway_map_help")
|
| 64 |
+
|
| 65 |
if not _data_ok:
|
| 66 |
st.error(_data_msg)
|
| 67 |
else:
|
| 68 |
st.caption(
|
| 69 |
+
"Here, we map top FateFormer-ranked flux reactions into pathway context: a sunburst (pathway → reaction) and a "
|
| 70 |
+
"heatmap of pathway, log₂ fold change, and significance for reprogramming vs dead-end."
|
|
|
|
| 71 |
)
|
| 72 |
try:
|
| 73 |
c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
|
| 74 |
except TypeError:
|
| 75 |
c1, c2 = st.columns([1.05, 0.95], gap="medium")
|
| 76 |
with c1:
|
| 77 |
+
n_sb = st.slider(
|
| 78 |
+
"Reactions in sunburst",
|
| 79 |
+
25,
|
| 80 |
+
90,
|
| 81 |
+
52,
|
| 82 |
+
key="flux_sb_n",
|
| 83 |
+
help=(
|
| 84 |
+
"How many **top** flux reactions (by **mean rank**) appear in the **sunburst** only. "
|
| 85 |
+
"Does not change the heatmap; use the other slider for that."
|
| 86 |
+
),
|
| 87 |
+
)
|
| 88 |
st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
|
| 89 |
with c2:
|
| 90 |
+
top_n_nb = st.slider(
|
| 91 |
+
"Reactions in heatmap",
|
| 92 |
+
12,
|
| 93 |
+
40,
|
| 94 |
+
26,
|
| 95 |
+
key="flux_nb_n",
|
| 96 |
+
help=(
|
| 97 |
+
"How many **top** flux reactions (by **mean rank**) appear as **rows** in the **Pathway / Log₂FC / significance** heatmap."
|
| 98 |
+
),
|
| 99 |
)
|
|
|
|
|
|
|
|
|
|
| 100 |
st.plotly_chart(
|
| 101 |
+
plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
|
| 102 |
width="stretch",
|
| 103 |
)
|
streamlit_hf/pages/flux_analysis/2_Differential_fate.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Flux Analysis
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -18,25 +18,25 @@ from streamlit_hf.lib import ui
|
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
_HELP_FLUX_VOLCANO = """
|
| 21 |
-
**What this is:**
|
| 22 |
|
| 23 |
-
**How to read it:**
|
| 24 |
|
| 25 |
-
**
|
| 26 |
"""
|
| 27 |
|
| 28 |
_HELP_FLUX_FATE_SCATTER = """
|
| 29 |
-
**What this is:**
|
| 30 |
|
| 31 |
-
**How to read it:**
|
| 32 |
|
| 33 |
-
**
|
| 34 |
"""
|
| 35 |
|
| 36 |
st.title("Flux Analysis")
|
| 37 |
st.caption(
|
| 38 |
-
"
|
| 39 |
-
"For
|
| 40 |
)
|
| 41 |
|
| 42 |
try:
|
|
@@ -60,15 +60,13 @@ else:
|
|
| 60 |
flux = None
|
| 61 |
|
| 62 |
st.subheader("Differential & fate")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
if not _data_ok:
|
| 64 |
st.error(_data_msg)
|
| 65 |
else:
|
| 66 |
-
st.caption(
|
| 67 |
-
"**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
|
| 68 |
-
"Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
|
| 69 |
-
"**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
|
| 70 |
-
"and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
|
| 71 |
-
)
|
| 72 |
b1, b2 = st.columns(2)
|
| 73 |
with b1:
|
| 74 |
_, _hp = st.columns([1, 0.22])
|
|
|
|
| 1 |
+
"""Flux Analysis: differential flux and fate scatter."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
_HELP_FLUX_VOLCANO = """
|
| 21 |
+
**What this is:** One **point** per **flux reaction**. **X** = **log₂ fold change** in inferred flux between **dead-end**-labeled and **reprogramming**-labeled samples. **Y** = **−log₁₀ adjusted p-value** for that contrast (multiple-testing adjusted in the results table).
|
| 22 |
|
| 23 |
+
**How to read it:** **Further from zero on X** = stronger shift between cohorts. **Higher on Y** = stronger statistical evidence. **Colour** = **mean rank** (FateFormer joint rank across the feature table; **lower** rank = stronger overall model focus). Reactions with **~zero** fold change and an **adjusted p of exactly zero** are removed as numerical artifacts.
|
| 24 |
|
| 25 |
+
**Hover** the points for reaction name, pathway, and related fields.
|
| 26 |
"""
|
| 27 |
|
| 28 |
_HELP_FLUX_FATE_SCATTER = """
|
| 29 |
+
**What this is:** One **point** per **flux reaction**. **X** = **mean flux** across samples labeled **dead-end**; **Y** = **mean flux** across samples labeled **reprogramming** (same per-sample fate labels as elsewhere in this analysis).
|
| 30 |
|
| 31 |
+
**How to read it:** The **y = x** line would mark equal average flux in both cohorts. **Above** the diagonal, average flux is **higher in reprogramming** than in dead-end for that reaction; **below**, **higher in dead-end**. **Marker size** scales with **√(latent shift importance × attention importance)** (capped for display). **Colour** = **pathway**; smaller pathway groups are merged into **Other**.
|
| 32 |
|
| 33 |
+
**Hover** for reaction name, **mean rank**, **log₂FC**, and pathway.
|
| 34 |
"""
|
| 35 |
|
| 36 |
st.title("Flux Analysis")
|
| 37 |
st.caption(
|
| 38 |
+
"**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
|
| 39 |
+
"For multimodal **shift**/**attention** summaries, open **Feature Insights**."
|
| 40 |
)
|
| 41 |
|
| 42 |
try:
|
|
|
|
| 60 |
flux = None
|
| 61 |
|
| 62 |
st.subheader("Differential & fate")
|
| 63 |
+
st.caption(
|
| 64 |
+
"Here, we contrast dead-end and reprogramming cells at the reaction level: a volcano of flux log₂FC vs significance, "
|
| 65 |
+
"and a scatter of mean flux in each cohort with pathway colouring."
|
| 66 |
+
)
|
| 67 |
if not _data_ok:
|
| 68 |
st.error(_data_msg)
|
| 69 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
b1, b2 = st.columns(2)
|
| 71 |
with b1:
|
| 72 |
_, _hp = st.columns([1, 0.22])
|
streamlit_hf/pages/flux_analysis/3_Reaction_ranking.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Flux Analysis
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,17 +17,21 @@ from streamlit_hf.lib import ui
|
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
_HELP_REACTION_TABLE = """
|
| 20 |
-
**What this is:**
|
| 21 |
|
| 22 |
-
**
|
| 23 |
|
| 24 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
st.title("Flux Analysis")
|
| 28 |
st.caption(
|
| 29 |
-
"
|
| 30 |
-
"For
|
| 31 |
)
|
| 32 |
|
| 33 |
try:
|
|
@@ -51,20 +55,31 @@ else:
|
|
| 51 |
flux = None
|
| 52 |
|
| 53 |
st.subheader("Reaction ranking")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
if not _data_ok:
|
| 55 |
st.error(_data_msg)
|
| 56 |
else:
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
)
|
| 62 |
-
q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
|
| 63 |
pw_f = st.multiselect(
|
| 64 |
"Pathway",
|
| 65 |
sorted(flux["pathway"].dropna().unique().astype(str)),
|
| 66 |
default=[],
|
| 67 |
key="flux_pw_f",
|
|
|
|
| 68 |
)
|
| 69 |
show = flux
|
| 70 |
if q.strip():
|
|
@@ -101,4 +116,5 @@ else:
|
|
| 101 |
file_name="fateformer_flux_filtered.csv",
|
| 102 |
mime="text/csv",
|
| 103 |
key="flux_dl",
|
|
|
|
| 104 |
)
|
|
|
|
| 1 |
+
"""Flux Analysis: ranked reaction table and download."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
_HELP_REACTION_TABLE = """
|
| 20 |
+
**What this is:** The **full FateFormer flux reaction table** for this deployment: one **row** per **reaction** in the metabolic layer, with **joint ranking** and cohort flux statistics from the precomputed results.
|
| 21 |
|
| 22 |
+
**Ranking:** **mean_rank** = combined **shift + attention** priority (**lower** = stronger overall). **rank_shift_in_modal** / **rank_att_in_modal** and **combined_order_mod** are **within-modality** (Flux-only) ranks; **rank_shift** / **rank_att** are **global** across all features. **importance_shift** / **importance_att** are the underlying scores. **top_10_pct** (if present) flags global top-decile membership from the publish step.
|
| 23 |
|
| 24 |
+
**Flux / cohort columns:** **mean_de** / **mean_re** = **mean inferred flux** in **dead-end** vs **reprogramming** samples. **log_fc** = **log₂** fold change between those cohorts for that reaction. **pval_adj** = **adjusted p-value** for that contrast. **group** summarises direction or contrast label when present.
|
| 25 |
+
|
| 26 |
+
**Context:** **pathway** and **module** annotate the reaction in the reconstruction.
|
| 27 |
+
|
| 28 |
+
**Use:** Narrow rows with the **substring** and **pathway** controls; use the table’s own **sort** if your Streamlit build exposes it. **Download** saves the **filtered** view as CSV.
|
| 29 |
"""
|
| 30 |
|
| 31 |
st.title("Flux Analysis")
|
| 32 |
st.caption(
|
| 33 |
+
"**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
|
| 34 |
+
"For multimodal **shift**/**attention** summaries, open **Feature Insights**."
|
| 35 |
)
|
| 36 |
|
| 37 |
try:
|
|
|
|
| 55 |
flux = None
|
| 56 |
|
| 57 |
st.subheader("Reaction ranking")
|
| 58 |
+
st.caption(
|
| 59 |
+
"Here is the searchable flux reaction table: every reaction’s FateFormer ranks, cohort flux summaries, and pathway "
|
| 60 |
+
"context, with filters and CSV download."
|
| 61 |
+
)
|
| 62 |
if not _data_ok:
|
| 63 |
st.error(_data_msg)
|
| 64 |
else:
|
| 65 |
+
try:
|
| 66 |
+
_rr_l, _rr_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 67 |
+
except TypeError:
|
| 68 |
+
_rr_l, _rr_r = st.columns([0.94, 0.06], gap="small")
|
| 69 |
+
with _rr_r:
|
| 70 |
+
ui.plot_help_popover(_HELP_REACTION_TABLE, key="flux_rank_table_help")
|
| 71 |
+
q = st.text_input(
|
| 72 |
+
"Substring filter (reaction name)",
|
| 73 |
+
"",
|
| 74 |
+
key="flux_q",
|
| 75 |
+
help="Keep rows whose **reaction** string contains this text (case-insensitive). Leave empty for no name filter.",
|
| 76 |
)
|
|
|
|
| 77 |
pw_f = st.multiselect(
|
| 78 |
"Pathway",
|
| 79 |
sorted(flux["pathway"].dropna().unique().astype(str)),
|
| 80 |
default=[],
|
| 81 |
key="flux_pw_f",
|
| 82 |
+
help="Keep rows in any of the selected **pathways**. Leave empty to include all pathways.",
|
| 83 |
)
|
| 84 |
show = flux
|
| 85 |
if q.strip():
|
|
|
|
| 116 |
file_name="fateformer_flux_filtered.csv",
|
| 117 |
mime="text/csv",
|
| 118 |
key="flux_dl",
|
| 119 |
+
help="CSV of the **current filtered** table (same columns as on screen), sorted by **mean_rank**.",
|
| 120 |
)
|
streamlit_hf/pages/flux_analysis/4_Model_metadata.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Flux Analysis
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -16,73 +16,60 @@ from streamlit_hf.lib import ui
|
|
| 16 |
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
-
**
|
| 25 |
"""
|
| 26 |
|
| 27 |
st.title("Flux Analysis")
|
| 28 |
st.caption(
|
| 29 |
-
"
|
| 30 |
-
"For
|
| 31 |
)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
df = io.load_df_features()
|
| 35 |
-
except Exception:
|
| 36 |
-
df = None
|
| 37 |
-
|
| 38 |
-
_data_ok = True
|
| 39 |
-
if df is None:
|
| 40 |
-
_data_ok = False
|
| 41 |
-
_data_msg = (
|
| 42 |
-
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 43 |
-
"fresh results, or ask them to check the deployment."
|
| 44 |
-
)
|
| 45 |
-
flux = None
|
| 46 |
-
meta = None
|
| 47 |
-
else:
|
| 48 |
-
flux = df[df["modality"] == "Flux"].copy()
|
| 49 |
-
if flux.empty:
|
| 50 |
-
_data_ok = False
|
| 51 |
-
_data_msg = "There are no flux reactions in the current results."
|
| 52 |
-
flux = None
|
| 53 |
-
meta = io.load_metabolic_model_metadata()
|
| 54 |
|
| 55 |
st.subheader("Metabolic model metadata")
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
else:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
-
if meta is None or meta.empty:
|
| 65 |
-
st.warning("Metabolic model metadata is not available in this build.")
|
| 66 |
-
else:
|
| 67 |
-
sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
|
| 68 |
-
graph_labels = ["All modules"]
|
| 69 |
-
for sid in sm_ids:
|
| 70 |
-
cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
|
| 71 |
-
graph_labels.append(f"{sid}: {cls}")
|
| 72 |
-
tix = st.selectbox(
|
| 73 |
-
"Model scope",
|
| 74 |
-
range(len(graph_labels)),
|
| 75 |
-
format_func=lambda i: graph_labels[i],
|
| 76 |
-
key="flux_model_scope",
|
| 77 |
-
help="Show every step in the model, or restrict to one functional module.",
|
| 78 |
-
)
|
| 79 |
-
supermodule_id = None if tix == 0 else sm_ids[tix - 1]
|
| 80 |
-
tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
|
| 81 |
-
st.dataframe(tbl, width="stretch", hide_index=True)
|
| 82 |
-
st.download_button(
|
| 83 |
-
"Download metabolic model metadata (CSV)",
|
| 84 |
-
tbl.to_csv(index=False).encode("utf-8"),
|
| 85 |
-
file_name="fateformer_metabolic_model_edges.csv",
|
| 86 |
-
mime="text/csv",
|
| 87 |
-
key="flux_model_dl",
|
| 88 |
-
)
|
|
|
|
| 1 |
+
"""Flux Analysis: scFEA metabolic model metadata table."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 16 |
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
+
_SCFEA_PMC = "https://pmc.ncbi.nlm.nih.gov/articles/PMC8494226/"
|
| 20 |
+
_SCFEA_GITHUB = "https://github.com/changwn/scFEA"
|
| 21 |
|
| 22 |
+
_HELP_MODEL_META = f"""
|
| 23 |
+
**Source:** The **metabolic model metadata** from **scFEA** (single-cell flux estimation from scRNA-seq) that is used for inferring flux reactions from scRNA-seq data. Open access article: [{_SCFEA_PMC}]({_SCFEA_PMC}) (*Genome Research*, 2021). Code and model resources: [{_SCFEA_GITHUB}]({_SCFEA_GITHUB}).
|
| 24 |
|
| 25 |
+
**What this is:** The **scFEA** metabolic model info used for inferring fluxomic data from scRNA-seq (one row per substrate → product reaction).
|
| 26 |
"""
|
| 27 |
|
| 28 |
st.title("Flux Analysis")
|
| 29 |
st.caption(
|
| 30 |
+
"**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
|
| 31 |
+
"For multimodal **shift**/**attention** summaries, open **Feature Insights**."
|
| 32 |
)
|
| 33 |
|
| 34 |
+
meta = io.load_metabolic_model_metadata()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
st.subheader("Metabolic model metadata")
|
| 37 |
+
st.caption(
|
| 38 |
+
f"Here is the scFEA metabolic model metadata used to interpret flux features: modules, compounds, and reaction names. "
|
| 39 |
+
f"[Paper]({_SCFEA_PMC}), [GitHub]({_SCFEA_GITHUB})."
|
| 40 |
+
)
|
| 41 |
+
if meta is None or meta.empty:
|
| 42 |
+
st.error("Metabolic model metadata is not available in this build.")
|
| 43 |
else:
|
| 44 |
+
try:
|
| 45 |
+
_mm_l, _mm_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 46 |
+
except TypeError:
|
| 47 |
+
_mm_l, _mm_r = st.columns([0.94, 0.06], gap="small")
|
| 48 |
+
with _mm_r:
|
| 49 |
+
ui.plot_help_popover(_HELP_MODEL_META, key="flux_model_meta_help")
|
| 50 |
+
sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
|
| 51 |
+
graph_labels = ["All modules"]
|
| 52 |
+
for sid in sm_ids:
|
| 53 |
+
cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
|
| 54 |
+
graph_labels.append(f"{sid}: {cls}")
|
| 55 |
+
tix = st.selectbox(
|
| 56 |
+
"Model scope",
|
| 57 |
+
range(len(graph_labels)),
|
| 58 |
+
format_func=lambda i: graph_labels[i],
|
| 59 |
+
key="flux_model_scope",
|
| 60 |
+
help=(
|
| 61 |
+
"**All modules:** every edge row in the metadata CSV. **Named supermodule:** only edges with that "
|
| 62 |
+
"**Supermodule_id** (class label shown in the menu)."
|
| 63 |
+
),
|
| 64 |
+
)
|
| 65 |
+
supermodule_id = None if tix == 0 else sm_ids[tix - 1]
|
| 66 |
+
tbl = io.build_metabolic_model_table(meta, supermodule_id=supermodule_id)
|
| 67 |
+
st.dataframe(tbl, width="stretch", hide_index=True)
|
| 68 |
+
st.download_button(
|
| 69 |
+
"Download metabolic model metadata (CSV)",
|
| 70 |
+
tbl.to_csv(index=False).encode("utf-8"),
|
| 71 |
+
file_name="fateformer_metabolic_model_edges.csv",
|
| 72 |
+
mime="text/csv",
|
| 73 |
+
key="flux_model_dl",
|
| 74 |
+
help="CSV export of the table above for the current **Model scope**.",
|
| 75 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
streamlit_hf/pages/flux_analysis/5_Interactive_map.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Flux Analysis
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -18,18 +18,18 @@ from streamlit_hf.lib import ui
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
-
|
| 22 |
-
**What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
|
| 23 |
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
-
**
|
| 27 |
"""
|
| 28 |
|
| 29 |
st.title("Flux Analysis")
|
| 30 |
st.caption(
|
| 31 |
-
"
|
| 32 |
-
"For
|
| 33 |
)
|
| 34 |
|
| 35 |
|
|
@@ -210,7 +210,7 @@ function renderMetList(q){
|
|
| 210 |
if(n++>=cap) break;
|
| 211 |
const div=document.createElement('div');
|
| 212 |
div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
|
| 213 |
-
const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>
|
| 214 |
div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
|
| 215 |
div.addEventListener('mouseenter',ev=>{
|
| 216 |
document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
|
|
@@ -314,8 +314,9 @@ init();
|
|
| 314 |
|
| 315 |
|
| 316 |
st.subheader("Metabolic map")
|
|
|
|
| 317 |
ui.plot_caption_with_help(
|
| 318 |
-
"Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked
|
| 319 |
_HELP_MET_MAP,
|
| 320 |
key="flux_map_help",
|
| 321 |
)
|
|
|
|
| 1 |
+
"""Flux Analysis: metabolic map with searchable side panel."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
+
_NAR_FATEFORMER_URL = "https://academic.oup.com/nar/article/51/W1/W180/7175334"
|
|
|
|
| 22 |
|
| 23 |
+
_HELP_MET_MAP = f"""
|
| 24 |
+
**Figure (paper):** Network model of key metabolic pathways linked to fate outcomes identified by the model. Important pathways and reactions are mapped onto the **scFLUX** metabolic network schema. **Arrow colour** shows the **log₂ fold change** in **scFEA**-inferred flux between **reprogramming** and **dead-end** cells: **red** = higher flux in reprogramming, **blue** = higher in dead-end. **Black** arrows = no corresponding scFEA entry or no measurable flux difference. **Triple-star** markers in the figure denote **p_adj < 0.001** (two-sample *t*-test with Benjamini–Hochberg correction). Full article: [{_NAR_FATEFORMER_URL}]({_NAR_FATEFORMER_URL})
|
| 25 |
|
| 26 |
+
**In this explorer:** The same schematic is **interactive**: **metabolites** on the map link to the reconstruction. The **sidebar** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top). **Search** the list (every word must match somewhere in that row). **Hover** labels for a **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**); **Esc** clears search. Use it as a **navigation** layer between **pathway geography** and **model-ranked reactions**, not a quantitative flux-balance diagram.
|
| 27 |
"""
|
| 28 |
|
| 29 |
st.title("Flux Analysis")
|
| 30 |
st.caption(
|
| 31 |
+
"**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
|
| 32 |
+
"For multimodal **shift**/**attention** summaries, open **Feature Insights**."
|
| 33 |
)
|
| 34 |
|
| 35 |
|
|
|
|
| 210 |
if(n++>=cap) break;
|
| 211 |
const div=document.createElement('div');
|
| 212 |
div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
|
| 213 |
+
const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>-</span>';
|
| 214 |
div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
|
| 215 |
div.addEventListener('mouseenter',ev=>{
|
| 216 |
document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
|
|
|
|
| 314 |
|
| 315 |
|
| 316 |
st.subheader("Metabolic map")
|
| 317 |
+
st.caption("This page shows the interactive metabolic map of important pathways and reactions.")
|
| 318 |
ui.plot_caption_with_help(
|
| 319 |
+
"Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked reaction (1 = top).",
|
| 320 |
_HELP_MET_MAP,
|
| 321 |
key="flux_map_help",
|
| 322 |
)
|
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Gene expression
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -18,34 +18,18 @@ from streamlit_hf.lib import ui
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
-
|
| 22 |
-
**
|
| 23 |
|
| 24 |
-
**
|
| 25 |
|
| 26 |
-
**
|
| 27 |
-
"""
|
| 28 |
-
|
| 29 |
-
_HELP_PATH_BUBBLE_RE = """
|
| 30 |
-
**What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
|
| 31 |
-
|
| 32 |
-
**How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
|
| 33 |
-
|
| 34 |
-
**Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
|
| 35 |
-
"""
|
| 36 |
-
|
| 37 |
-
_HELP_PATH_HEAT = """
|
| 38 |
-
**What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
|
| 39 |
-
|
| 40 |
-
**How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
|
| 41 |
-
|
| 42 |
-
**Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
|
| 43 |
"""
|
| 44 |
|
| 45 |
st.title("Gene Expression & TF Activity")
|
| 46 |
st.caption(
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
)
|
| 50 |
|
| 51 |
df = io.load_df_features()
|
|
@@ -59,10 +43,17 @@ if rna.empty and atac.empty:
|
|
| 59 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
st.stop()
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
st.caption(
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
)
|
| 67 |
raw = pathway_data.load_de_re_tsv()
|
| 68 |
if raw is None:
|
|
@@ -76,9 +67,6 @@ else:
|
|
| 76 |
)
|
| 77 |
c1, c2 = st.columns(2, gap="medium")
|
| 78 |
with c1:
|
| 79 |
-
_, _hp = st.columns([1, 0.22])
|
| 80 |
-
with _hp:
|
| 81 |
-
ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
|
| 82 |
st.plotly_chart(
|
| 83 |
plots.pathway_enrichment_bubble_panel(
|
| 84 |
mde,
|
|
@@ -89,9 +77,6 @@ else:
|
|
| 89 |
width="stretch",
|
| 90 |
)
|
| 91 |
with c2:
|
| 92 |
-
_, _hp = st.columns([1, 0.22])
|
| 93 |
-
with _hp:
|
| 94 |
-
ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
|
| 95 |
st.plotly_chart(
|
| 96 |
plots.pathway_enrichment_bubble_panel(
|
| 97 |
mre,
|
|
@@ -106,7 +91,4 @@ else:
|
|
| 106 |
st.info("No pathway-gene matrix could be built from the current enrichment results.")
|
| 107 |
else:
|
| 108 |
z, ylabs, xlabs = hm
|
| 109 |
-
_, _hp = st.columns([1, 0.18])
|
| 110 |
-
with _hp:
|
| 111 |
-
ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
|
| 112 |
st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
|
|
|
|
| 1 |
+
"""Gene expression: Reactome / KEGG pathway enrichment."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 18 |
|
| 19 |
ui.inject_app_styles()
|
| 20 |
|
| 21 |
+
_HELP_PATHWAY_ENRICHMENT = """
|
| 22 |
+
**Overview:** **Gene pathway enrichment**: Reactome and KEGG **over-representation** from fate-split **RNA marker** lists, then a **pathway × gene** heatmap of the leading hits.
|
| 23 |
|
| 24 |
+
**Bubble panels (dead-end vs reprogramming):** **Leading genes** are **grouped by fate** (dead-end vs reprogramming); each panel runs enrichment on that gene set. **Horizontal axis** = **gene ratio** (enrichment table). **Circles** = **Reactome** pathways; **squares** = **KEGG** pathways. **Vertical** position orders pathways; **size** reflects **gene count**; **colour** = **−log₁₀** Benjamini *q* (*q* < 0.05). **Hover** for pathway name, library, count, and *q*. **Compare** left and right panels for cohort-specific pathways.
|
| 25 |
|
| 26 |
+
**Heatmap:** **Rows** = enriched **pathway terms** (Reactome block, then KEGG). **Columns** = **genes** (from the same fate-split marker lists that fed enrichment) plus a **Library** stripe (**Reactome** vs **KEGG** per row). **Colour** encodes **dead-end** vs **reprogramming** membership for that gene–pathway pair (and the library stripe); **hover** for the exact label. **Empty** cells = no link in this matrix slice.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
"""
|
| 28 |
|
| 29 |
st.title("Gene Expression & TF Activity")
|
| 30 |
st.caption(
|
| 31 |
+
"**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
|
| 32 |
+
"**gene** and **motif** tables."
|
| 33 |
)
|
| 34 |
|
| 35 |
df = io.load_df_features()
|
|
|
|
| 43 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 44 |
st.stop()
|
| 45 |
|
| 46 |
+
try:
|
| 47 |
+
_pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 48 |
+
except TypeError:
|
| 49 |
+
_pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small")
|
| 50 |
+
with _pe_h_l:
|
| 51 |
+
st.subheader("Gene pathway enrichment")
|
| 52 |
+
with _pe_h_r:
|
| 53 |
+
ui.plot_help_popover(_HELP_PATHWAY_ENRICHMENT, key="ge_pathway_page_help")
|
| 54 |
st.caption(
|
| 55 |
+
"Here, we turn fate-split RNA gene markers into Reactome and KEGG over-representation (bubble panels per cohort), "
|
| 56 |
+
"then lay out a pathway × gene heatmap for the leading hits."
|
| 57 |
)
|
| 58 |
raw = pathway_data.load_de_re_tsv()
|
| 59 |
if raw is None:
|
|
|
|
| 67 |
)
|
| 68 |
c1, c2 = st.columns(2, gap="medium")
|
| 69 |
with c1:
|
|
|
|
|
|
|
|
|
|
| 70 |
st.plotly_chart(
|
| 71 |
plots.pathway_enrichment_bubble_panel(
|
| 72 |
mde,
|
|
|
|
| 77 |
width="stretch",
|
| 78 |
)
|
| 79 |
with c2:
|
|
|
|
|
|
|
|
|
|
| 80 |
st.plotly_chart(
|
| 81 |
plots.pathway_enrichment_bubble_panel(
|
| 82 |
mre,
|
|
|
|
| 91 |
st.info("No pathway-gene matrix could be built from the current enrichment results.")
|
| 92 |
else:
|
| 93 |
z, ylabs, xlabs = hm
|
|
|
|
|
|
|
|
|
|
| 94 |
st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
|
streamlit_hf/pages/gene_expression/2_Motif_activity.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Gene expression
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,26 +17,18 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
-
|
| 21 |
-
**
|
| 22 |
|
| 23 |
-
**
|
| 24 |
|
| 25 |
-
**
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
_HELP_MOTIF_SCATTER = """
|
| 29 |
-
**What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
|
| 30 |
-
|
| 31 |
-
**How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
|
| 32 |
-
|
| 33 |
-
**Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
|
| 34 |
"""
|
| 35 |
|
| 36 |
st.title("Gene Expression & TF Activity")
|
| 37 |
st.caption(
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
)
|
| 41 |
|
| 42 |
df = io.load_df_features()
|
|
@@ -50,24 +42,26 @@ if rna.empty and atac.empty:
|
|
| 50 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 51 |
st.stop()
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
if atac.empty:
|
| 55 |
st.warning("No motif-level ATAC features are available in the current results.")
|
| 56 |
else:
|
| 57 |
-
st.caption(
|
| 58 |
-
"Left: mean motif score difference (reprogramming − dead-end) versus significance. "
|
| 59 |
-
"Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
|
| 60 |
-
)
|
| 61 |
a1, a2 = st.columns(2, gap="medium")
|
| 62 |
with a1:
|
| 63 |
-
_, _hp = st.columns([1, 0.22])
|
| 64 |
-
with _hp:
|
| 65 |
-
ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
|
| 66 |
st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
|
| 67 |
with a2:
|
| 68 |
-
_, _hp = st.columns([1, 0.22])
|
| 69 |
-
with _hp:
|
| 70 |
-
ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
|
| 71 |
st.plotly_chart(
|
| 72 |
plots.notebook_style_activity_scatter(
|
| 73 |
atac,
|
|
|
|
| 1 |
+
"""Gene expression: ATAC TF motif deviation and activity."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_MOTIF_ACTIVITY = """
|
| 21 |
+
**Overview:** **ATAC** **TF motif** plots: **differential** activity between fate labels (left), then **per-fate mean** z-scored activity (right). Scores summarize **motif-level** signal from the accessibility layer.
|
| 22 |
|
| 23 |
+
**Left (volcano):** **X** = **mean difference** in motif activity (**reprogramming − dead-end**). **Y** = **−log₁₀ adjusted p** (or a precomputed log-*p* column when the table provides it). **Colour** = **mean rank** (joint FateFormer rank; **lower** = stronger). **Hover** for motif name, *p*, **mean rank**, and cohort fields when present.
|
| 24 |
|
| 25 |
+
**Right (scatter):** **X** / **Y** = **mean z-scored** motif activity in **dead-end** vs **reprogramming** cells. The **y = x** line would mark equal average activity; **above** the diagonal means **higher in reprogramming**. **Colour** = **−log₁₀ adjusted p** (red scale; **higher** = more significant). **Hover** for motif, **mean rank**, and **group**.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"""
|
| 27 |
|
| 28 |
st.title("Gene Expression & TF Activity")
|
| 29 |
st.caption(
|
| 30 |
+
"**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
|
| 31 |
+
"**gene** and **motif** tables."
|
| 32 |
)
|
| 33 |
|
| 34 |
df = io.load_df_features()
|
|
|
|
| 42 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 43 |
st.stop()
|
| 44 |
|
| 45 |
+
try:
|
| 46 |
+
_ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 47 |
+
except TypeError:
|
| 48 |
+
_ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small")
|
| 49 |
+
with _ma_h_l:
|
| 50 |
+
st.subheader("Motif activity")
|
| 51 |
+
with _ma_h_r:
|
| 52 |
+
ui.plot_help_popover(_HELP_MOTIF_ACTIVITY, key="ge_motif_page_help")
|
| 53 |
+
st.caption(
|
| 54 |
+
"Here, we summarize ATAC TF motif behaviour: differential shift between dead-end and reprogramming (volcano), then "
|
| 55 |
+
"per-fate mean z-scored activity in a scatter."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
if atac.empty:
|
| 59 |
st.warning("No motif-level ATAC features are available in the current results.")
|
| 60 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
a1, a2 = st.columns(2, gap="medium")
|
| 62 |
with a1:
|
|
|
|
|
|
|
|
|
|
| 63 |
st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
|
| 64 |
with a2:
|
|
|
|
|
|
|
|
|
|
| 65 |
st.plotly_chart(
|
| 66 |
plots.notebook_style_activity_scatter(
|
| 67 |
atac,
|
streamlit_hf/pages/gene_expression/3_Gene_table.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Gene expression
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
TABLE_COLS = [
|
| 21 |
"mean_rank",
|
| 22 |
"feature",
|
|
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
|
|
| 44 |
|
| 45 |
st.title("Gene Expression & TF Activity")
|
| 46 |
st.caption(
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
)
|
| 50 |
|
| 51 |
df = io.load_df_features()
|
|
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
|
|
| 59 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
st.stop()
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
if rna.empty:
|
| 64 |
st.warning("No RNA gene features are available in the current results.")
|
| 65 |
else:
|
|
|
|
| 1 |
+
"""Gene expression: searchable gene ranking table."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_GENE_TABLE = """
|
| 21 |
+
**scRNA-seq** genes used as features in this run: **one row per gene**, sorted by **mean rank** (joint importance). Additional columns are **FateFormer** rank and attribution summaries (within RNA and globally), **per-fate** expression (**dead-end** vs **reprogramming**), and **differential** statistics (*p*-values, log fold change, **group**). Search to narrow the list; use **Download** for a CSV copy.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
TABLE_COLS = [
|
| 25 |
"mean_rank",
|
| 26 |
"feature",
|
|
|
|
| 48 |
|
| 49 |
st.title("Gene Expression & TF Activity")
|
| 50 |
st.caption(
|
| 51 |
+
"**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
|
| 52 |
+
"**gene** and **motif** tables."
|
| 53 |
)
|
| 54 |
|
| 55 |
df = io.load_df_features()
|
|
|
|
| 63 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 64 |
st.stop()
|
| 65 |
|
| 66 |
+
try:
|
| 67 |
+
_gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 68 |
+
except TypeError:
|
| 69 |
+
_gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small")
|
| 70 |
+
with _gt_h_l:
|
| 71 |
+
st.subheader("Gene table")
|
| 72 |
+
with _gt_h_r:
|
| 73 |
+
ui.plot_help_popover(_HELP_GENE_TABLE, key="ge_gene_table_help")
|
| 74 |
+
st.caption(
|
| 75 |
+
"Here is a searchable table of all scRNA-seq genes in the feature set, with FateFormer ranks and per-fate expression "
|
| 76 |
+
"and differential statistics that you can sort, filter by name, or download CSV."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
if rna.empty:
|
| 80 |
st.warning("No RNA gene features are available in the current results.")
|
| 81 |
else:
|
streamlit_hf/pages/gene_expression/4_Motif_table.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Gene expression
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
TABLE_COLS = [
|
| 21 |
"mean_rank",
|
| 22 |
"feature",
|
|
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
|
|
| 44 |
|
| 45 |
st.title("Gene Expression & TF Activity")
|
| 46 |
st.caption(
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
)
|
| 50 |
|
| 51 |
df = io.load_df_features()
|
|
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
|
|
| 59 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
st.stop()
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
if atac.empty:
|
| 64 |
st.warning("No motif-level ATAC features are available in the current results.")
|
| 65 |
else:
|
|
|
|
| 1 |
+
"""Gene expression: searchable motif / TF table."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
| 20 |
+
_HELP_MOTIF_TABLE = """
|
| 21 |
+
**ATAC** motif / TF features used in this run: **one row per feature**, sorted by **mean rank**. Columns include **FateFormer** ranking and attribution, **per-fate** activity summaries, and **differential** statistics. Search to narrow the list; use **Download** for a CSV copy.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
TABLE_COLS = [
|
| 25 |
"mean_rank",
|
| 26 |
"feature",
|
|
|
|
| 48 |
|
| 49 |
st.title("Gene Expression & TF Activity")
|
| 50 |
st.caption(
|
| 51 |
+
"**Pathways** (Reactome / KEGG) and pathway–gene views; **ATAC motif** deviation and **TF activity** by fate; "
|
| 52 |
+
"**gene** and **motif** tables."
|
| 53 |
)
|
| 54 |
|
| 55 |
df = io.load_df_features()
|
|
|
|
| 63 |
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 64 |
st.stop()
|
| 65 |
|
| 66 |
+
try:
|
| 67 |
+
_mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
|
| 68 |
+
except TypeError:
|
| 69 |
+
_mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small")
|
| 70 |
+
with _mt_h_l:
|
| 71 |
+
st.subheader("Motif table")
|
| 72 |
+
with _mt_h_r:
|
| 73 |
+
ui.plot_help_popover(_HELP_MOTIF_TABLE, key="ge_motif_table_help")
|
| 74 |
+
st.caption(
|
| 75 |
+
"Here is a searchable table of all ATAC motif / TF features, each with FateFormer ranks and per-fate activity and "
|
| 76 |
+
"differential fields that you can sort, filter by name, or download CSV."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
if atac.empty:
|
| 80 |
st.warning("No motif-level ATAC features are available in the current results.")
|
| 81 |
else:
|
streamlit_hf/static/experiment.svg
ADDED
|
|