kaveh commited on
Commit
acd0d36
·
1 Parent(s): 34e8e2b

added help and refined

Browse files
metabolic_map.svg DELETED
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- # FateFormerApp training, precompute, and local Streamlit dev
2
  torch>=2.1.0
3
  numpy>=1.24.0
4
  pandas>=2.0.0
 
1
+ # FateFormerApp: training, precompute, and local Streamlit dev
2
  torch>=2.1.0
3
  numpy>=1.24.0
4
  pandas>=2.0.0
streamlit_hf/app.py CHANGED
@@ -1,13 +1,19 @@
1
  """
2
  FateFormer Explorer: interactive analysis hub.
3
- Run from repository root: PYTHONPATH=. streamlit run streamlit_hf/app.py
4
  """
5
 
6
- from pathlib import Path
7
 
8
- import streamlit as st
 
9
 
10
  _APP_DIR = Path(__file__).resolve().parent
 
 
 
 
 
11
  _ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
12
  _page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
13
 
@@ -18,6 +24,11 @@ st.set_page_config(
18
  **_page_icon_kw,
19
  )
20
 
 
 
 
 
 
21
  _home = str(_APP_DIR / "home.py")
22
  _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
23
  _fi = _APP_DIR / "pages" / "feature_insights"
 
1
  """
2
  FateFormer Explorer: interactive analysis hub.
3
+ Run: streamlit run streamlit_hf/app.py (repo root) or streamlit run app.py (from streamlit_hf/).
4
  """
5
 
6
+ from __future__ import annotations
7
 
8
+ import sys
9
+ from pathlib import Path
10
 
11
  _APP_DIR = Path(__file__).resolve().parent
12
+ _REPO_ROOT = _APP_DIR.parent
13
+ if str(_REPO_ROOT) not in sys.path:
14
+ sys.path.insert(0, str(_REPO_ROOT))
15
+
16
+ import streamlit as st
17
  _ICON_PATH = _APP_DIR / "static" / "app_icon.svg"
18
  _page_icon_kw = {"page_icon": str(_ICON_PATH)} if _ICON_PATH.is_file() else {}
19
 
 
24
  **_page_icon_kw,
25
  )
26
 
27
+ # Preload shared UI helpers before page scripts. Streamlit's file watcher can
28
+ # delete watched modules from sys.modules on save; if that happens mid-import,
29
+ # importlib may raise KeyError on the module name. Loading here narrows the race.
30
+ import streamlit_hf.lib.ui as _streamlit_ui_preload # noqa: F401, E402
31
+
32
  _home = str(_APP_DIR / "home.py")
33
  _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
34
  _fi = _APP_DIR / "pages" / "feature_insights"
streamlit_hf/home.py CHANGED
@@ -18,6 +18,11 @@ from streamlit_hf.lib import plots
18
  from streamlit_hf.lib import ui
19
 
20
  _CACHE = Path(__file__).resolve().parent / "cache"
 
 
 
 
 
21
 
22
  _APP_NAME = "FateFormer Explorer"
23
  _HERO_EMOJI = "\U0001f9ec" # DNA (matches HF Space card tone)
@@ -27,23 +32,21 @@ _HOME_RANK_TOP_N = 15
27
  _VALIDATION_ROC_AUC = 0.93
28
 
29
  _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
 
 
30
 
31
- _UMAP_HELP_MD = """
32
- **What this is:** A 2‑D **UMAP** of validation cells in the model’s **shared latent space** (RNA + chromatin + flux combined). Nearby points have **similar multimodal profiles**.
33
-
34
- **How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
35
 
36
- **Takeaway:** See whether biological fates form separable groups in the representation the model actually uses.
37
  """
38
 
39
  _GLOBAL_RANK_HELP_MD = """
40
- **What this is:** Three linked summaries of **which features** (genes, peaks, or reactions) the analyses rank highest **globally** across modalities.
41
 
42
  **Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
43
 
44
  **How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
45
-
46
- **Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
47
  """
48
 
49
  _APP_SUBTITLE = (
@@ -51,12 +54,15 @@ _APP_SUBTITLE = (
51
  "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
52
  )
53
 
54
- _BIOLOGY_CONTEXT_MARKDOWN = """
55
- **At a glance**
 
56
 
57
- - **Biological setting:** **FateFormer** models **direct reprogramming** from mouse embryonic fibroblasts (**MEFs**) to induced endoderm progenitors (**iEPs**), combining **transcriptome (scRNA-seq)**, **chromatin (scATAC-seq)**, and **genome-scale metabolic flux** so fate is not inferred from RNA alone; epigenetic and metabolic context matter.
58
- - **Data & labels:** Trained on a **large sparse-modality** atlas (**>150,000** cells); **2,110** early cells carry **CellTag-Multi** clonal fate tags, the same experimental labels used to colour validation cells in **UMAP** views here.
59
- - **Model design:** A **transformer** learns **shared representations** across modalities, handles **missing modalities** and **scarce fate labels**, and ties early transcription, chromatin accessibility, and metabolic activity to **later lineage outcomes**, going beyond RNA-only views of reprogramming.
 
 
60
  """
61
 
62
 
@@ -84,6 +90,16 @@ st.markdown(
84
  unsafe_allow_html=True,
85
  )
86
 
 
 
 
 
 
 
 
 
 
 
87
  bundle = io.load_latent_bundle()
88
  df_features = io.load_df_features()
89
  samples = io.load_samples_df()
@@ -164,26 +180,33 @@ if bundle is not None and df_features is not None:
164
  with row1_story:
165
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
166
  with row1_umap:
167
- ui.plot_caption_with_help(
168
- "Each point is a cell · colours = experimental fate labels · validation split",
169
- _UMAP_HELP_MD,
170
- key="home_umap_help",
171
- )
172
- fig_u = plots.latent_scatter(
173
- plot_umap,
174
- "label",
175
- title=_UMAP_HOME_TITLE,
176
- width=780,
177
- height=440,
178
- marker_size=5.2,
179
- marker_opacity=0.72,
180
- )
181
- fig_u.update_layout(margin=dict(l=20, r=8, t=52, b=20), title_font_size=15)
182
- st.plotly_chart(
183
- fig_u,
184
- width="stretch",
185
- config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
186
- )
 
 
 
 
 
 
 
187
 
188
  ui.plot_caption_with_help(
189
  "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
@@ -211,22 +234,29 @@ elif bundle is not None:
211
  with u_story:
212
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
213
  with u_map:
214
- ui.plot_caption_with_help(
215
- "Feature ranking cache unavailable · UMAP only",
216
- _UMAP_HELP_MD,
217
- key="home_umap_only_help",
218
- )
219
- fig_u = plots.latent_scatter(
220
- plot_umap,
221
- "label",
222
- title=_UMAP_HOME_TITLE,
223
- width=820,
224
- height=480,
225
- marker_size=5.5,
226
- marker_opacity=0.72,
227
- )
228
- fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
229
- st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
 
 
 
 
 
 
 
230
  elif df_features is not None:
231
  ui.plot_caption_with_help(
232
  "Feature ranking overview · latent UMAP unavailable",
 
18
  from streamlit_hf.lib import ui
19
 
20
  _CACHE = Path(__file__).resolve().parent / "cache"
21
+ _EXPERIMENT_SVG = Path(__file__).resolve().parent / "static" / "experiment.svg"
22
+ # Display width (px) for the home-page schematic; SVG scales cleanly at fixed width.
23
+ _EXPERIMENT_FIGURE_WIDTH_PX = 380
24
+
25
+ _CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
26
 
27
  _APP_NAME = "FateFormer Explorer"
28
  _HERO_EMOJI = "\U0001f9ec" # DNA (matches HF Space card tone)
 
32
  _VALIDATION_ROC_AUC = 0.93
33
 
34
  _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
35
+ _UMAP_HOME_SUBTITLE = "Each point is a cell · colours = experimental fate labels · validation split"
36
+ _UMAP_HOME_SUBTITLE_RANK_MISSING = "Feature ranking cache unavailable · UMAP only"
37
 
38
+ _UMAP_HELP_MD = f"""
39
+ **What this is:** A 2‑D **UMAP** of validation **single cells** in the model’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation**. **2,110** cells are shown.
 
 
40
 
41
+ **How to read it:** Each point is one cell. **Colour** is **experimental fate** from [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal labels. **Axes are unitless**: UMAP preserves *local* neighbourhoods, not real physical distances, so **nearby points** tend to have similar characteristics in this representation. **Hover** a point for cell-level details. For more detail (interactive UMAP, filters, and metadata), open **Single-Cell Explorer** using the link below.
42
  """
43
 
44
  _GLOBAL_RANK_HELP_MD = """
45
+ **What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), shown as three linked summaries.
46
 
47
  **Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
48
 
49
  **How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
 
 
50
  """
51
 
52
  _APP_SUBTITLE = (
 
54
  "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
55
  )
56
 
57
+ _EXPERIMENTAL_SYSTEM_MD = f"""
58
+ Mouse embryonic fibroblasts (**MEFs**) were reprogrammed toward induced endoderm progenitors (**iEPs**) **in vitro** through *Foxa1* and *HNF4A* induction. This process produces **mixed outcomes**: some cells successfully reach the **iEP fate**, whereas others diverge into **off-target** trajectories and stall in **dead-end states**. Using [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) clonal barcoding, **early cells** could be linked to their **later fate**, which made it possible to ask a central biological question: which programs in **early-state cells**, coordinated **across transcriptional, chromatin, and metabolic layers**, drive successful reprogramming, which ones push cells toward off-target states, and which of these mechanisms could be targeted to improve reprogramming efficiency?
59
+ """
60
 
61
+ _BIOLOGY_CONTEXT_MARKDOWN = f"""
62
+ **How FateFormer addresses this**
63
+ - **Multimodal view:** FateFormer integrates **scRNA-seq**, **scATAC-seq**, and **genome-scale metabolic flux** to capture regulatory and metabolic signals that are missed by RNA-only analysis.
64
+ - **Grounded in lineage tracing:** The model is trained on a **sparse-modality atlas of more than 150,000 cells**, including **2,110** early cells linked to later outcomes through **CellTag-Multi** clonal barcoding.
65
+ - **Biological insight:** FateFormer learns representations across modalities, handles **missing inputs** and **limited labels**, and using **explainability methods** highlights the transcriptional, chromatin, and metabolic programs associated with reprogramming success or off target failure.
66
  """
67
 
68
 
 
90
  unsafe_allow_html=True,
91
  )
92
 
93
+ with st.container(border=True):
94
+ fig_col, text_col = st.columns([0.42, 0.58], gap="large")
95
+ with fig_col:
96
+ if _EXPERIMENT_SVG.is_file():
97
+ st.image(str(_EXPERIMENT_SVG), width=_EXPERIMENT_FIGURE_WIDTH_PX)
98
+ else:
99
+ st.caption("Experimental schematic (`static/experiment.svg`) is missing.")
100
+ with text_col:
101
+ st.markdown(_EXPERIMENTAL_SYSTEM_MD)
102
+
103
  bundle = io.load_latent_bundle()
104
  df_features = io.load_df_features()
105
  samples = io.load_samples_df()
 
180
  with row1_story:
181
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
182
  with row1_umap:
183
+ try:
184
+ _umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
185
+ except TypeError:
186
+ _umap_plot_col, _umap_help_col = st.columns([0.94, 0.06], gap="small")
187
+ with _umap_plot_col:
188
+ fig_u = plots.latent_scatter(
189
+ plot_umap,
190
+ "label",
191
+ title=_UMAP_HOME_TITLE,
192
+ width=780,
193
+ height=440,
194
+ marker_size=5.2,
195
+ marker_opacity=0.72,
196
+ subtitle=_UMAP_HOME_SUBTITLE,
197
+ )
198
+ fig_u.update_layout(margin=dict(l=20, r=8, t=92, b=20), title_font_size=15)
199
+ st.plotly_chart(
200
+ fig_u,
201
+ width="stretch",
202
+ config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
203
+ )
204
+ with _umap_help_col:
205
+ ui.plot_help_popover(
206
+ _UMAP_HELP_MD,
207
+ key="home_umap_help",
208
+ page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
209
+ )
210
 
211
  ui.plot_caption_with_help(
212
  "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
 
234
  with u_story:
235
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
236
  with u_map:
237
+ try:
238
+ _umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
239
+ except TypeError:
240
+ _umap_plot_col2, _umap_help_col2 = st.columns([0.94, 0.06], gap="small")
241
+ with _umap_plot_col2:
242
+ fig_u = plots.latent_scatter(
243
+ plot_umap,
244
+ "label",
245
+ title=_UMAP_HOME_TITLE,
246
+ width=820,
247
+ height=480,
248
+ marker_size=5.5,
249
+ marker_opacity=0.72,
250
+ subtitle=_UMAP_HOME_SUBTITLE_RANK_MISSING,
251
+ )
252
+ fig_u.update_layout(margin=dict(l=24, r=12, t=92, b=24), title_font_size=15)
253
+ st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
254
+ with _umap_help_col2:
255
+ ui.plot_help_popover(
256
+ _UMAP_HELP_MD,
257
+ key="home_umap_only_help",
258
+ page_link=("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer"),
259
+ )
260
  elif df_features is not None:
261
  ui.plot_caption_with_help(
262
  "Feature ranking overview · latent UMAP unavailable",
streamlit_hf/lib/io.py CHANGED
@@ -88,13 +88,9 @@ def load_metabolic_model_metadata() -> pd.DataFrame | None:
88
 
89
  def build_metabolic_model_table(
90
  meta: pd.DataFrame,
91
- flux_df: pd.DataFrame,
92
  supermodule_id: int | None = None,
93
  ) -> pd.DataFrame:
94
- """
95
- Static edge list: substrate → product, reaction label, module class, plus DE / model columns when the
96
- reaction string matches a row in the flux feature table.
97
- """
98
  need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
99
  if not need.issubset(set(meta.columns)):
100
  return pd.DataFrame()
@@ -103,36 +99,9 @@ def build_metabolic_model_table(
103
  m = m[m["Supermodule_id"] == int(supermodule_id)]
104
  if m.empty:
105
  return pd.DataFrame()
106
-
107
- fd = flux_df.copy()
108
- fd["_rk"] = fd["feature"].map(normalize_reaction_key)
109
- fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
110
-
111
- rows: list[dict] = []
112
- for _, r in m.iterrows():
113
- k = normalize_reaction_key(str(r["rxnName"]))
114
- base = {
115
- "Supermodule": r.get("Super.Module.class"),
116
- "Module_id": r.get("Module_id"),
117
- "Substrate": r["Compound_IN_name"],
118
- "Product": r["Compound_OUT_name"],
119
- "Reaction": r["rxnName"],
120
- }
121
- if k in fd.index:
122
- row = fd.loc[k]
123
- if isinstance(row, pd.DataFrame):
124
- row = row.iloc[0]
125
- base["log_fc"] = row["log_fc"] if "log_fc" in row.index else None
126
- base["pval_adj"] = row["pval_adj"] if "pval_adj" in row.index else None
127
- base["mean_rank"] = row["mean_rank"] if "mean_rank" in row.index else None
128
- base["pathway"] = row["pathway"] if "pathway" in row.index else None
129
- else:
130
- base["log_fc"] = None
131
- base["pval_adj"] = None
132
- base["mean_rank"] = None
133
- base["pathway"] = None
134
- rows.append(base)
135
- return pd.DataFrame(rows)
136
 
137
 
138
  def _normalize_metabolite_token(name: str) -> str:
@@ -319,7 +288,7 @@ def build_metabolite_map_bundle(
319
  if smods:
320
  lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
321
  if best_importance is not None:
322
- lines.append(f"Strongest linked step: #{best_importance}")
323
 
324
  top_rx = sorted(
325
  uniq_rx,
@@ -350,12 +319,12 @@ def build_metabolite_map_bundle(
350
  )
351
  if precursors:
352
  lines.append(
353
- f"<span style='color:#656d76'>Model precursors (substrates in linked steps)</span><br/>"
354
  f"{html.escape(', '.join(precursors[:8]))}"
355
  )
356
  if products:
357
  lines.append(
358
- f"<span style='color:#656d76'>Model products (downstream in linked steps)</span><br/>"
359
  f"{html.escape(', '.join(products[:8]))}"
360
  )
361
 
 
88
 
89
  def build_metabolic_model_table(
90
  meta: pd.DataFrame,
 
91
  supermodule_id: int | None = None,
92
  ) -> pd.DataFrame:
93
+ """Rows from ``metabolic_model_metadata.csv`` (all file columns except a stray ``Unnamed: 0`` index column)."""
 
 
 
94
  need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Supermodule_id", "Super.Module.class"}
95
  if not need.issubset(set(meta.columns)):
96
  return pd.DataFrame()
 
99
  m = m[m["Supermodule_id"] == int(supermodule_id)]
100
  if m.empty:
101
  return pd.DataFrame()
102
+ if "Unnamed: 0" in m.columns:
103
+ m = m.drop(columns=["Unnamed: 0"])
104
+ return m.reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
 
107
  def _normalize_metabolite_token(name: str) -> str:
 
288
  if smods:
289
  lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
290
  if best_importance is not None:
291
+ lines.append(f"Strongest linked reaction: #{best_importance}")
292
 
293
  top_rx = sorted(
294
  uniq_rx,
 
319
  )
320
  if precursors:
321
  lines.append(
322
+ f"<span style='color:#656d76'>Model precursors (substrates in linked reactions)</span><br/>"
323
  f"{html.escape(', '.join(precursors[:8]))}"
324
  )
325
  if products:
326
  lines.append(
327
+ f"<span style='color:#656d76'>Model products (downstream in linked reactions)</span><br/>"
328
  f"{html.escape(', '.join(products[:8]))}"
329
  )
330
 
streamlit_hf/lib/plots.py CHANGED
@@ -98,6 +98,7 @@ def latent_scatter(
98
  height: int = 520,
99
  marker_size: float = 5.0,
100
  marker_opacity: float = 0.78,
 
101
  ):
102
  d = df.copy()
103
  hover_spec = {
@@ -159,7 +160,8 @@ def latent_scatter(
159
  width=width,
160
  height=height,
161
  )
162
- if title:
 
163
  common["title"] = title
164
  if continuous:
165
  fig = px.scatter(
@@ -178,7 +180,10 @@ def latent_scatter(
178
  fig.update_traces(
179
  marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
180
  )
181
- top_margin = 56 if title else 28
 
 
 
182
  fig.update_layout(
183
  template="plotly_white",
184
  font=PLOT_FONT,
@@ -190,7 +195,20 @@ def latent_scatter(
190
  paper_bgcolor=PAGE_BG,
191
  plot_bgcolor=PAGE_BG,
192
  )
193
- if not title:
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  fig.update_layout(title=None)
195
  fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
196
  fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
@@ -198,20 +216,29 @@ def latent_scatter(
198
 
199
 
200
  def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
201
- """Attention rank on x, shift rank on y, least-squares trend line, discrete point colours."""
202
  need = ("shift_order_mod", "attention_order_mod")
203
  if not all(c in df_mod.columns for c in need):
204
  return go.Figure()
205
  sub = df_mod.dropna(subset=list(need)).copy()
206
  if sub.empty:
207
  return go.Figure()
 
 
 
 
 
 
 
 
 
208
  x = sub["attention_order_mod"].astype(float).to_numpy()
209
  y = sub["shift_order_mod"].astype(float).to_numpy()
210
  fig = px.scatter(
211
  sub,
212
  x="attention_order_mod",
213
  y="shift_order_mod",
214
- color="top_10_pct",
215
  hover_name="feature",
216
  hover_data={
217
  "mean_rank": True,
@@ -221,14 +248,16 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
221
  labels={
222
  "attention_order_mod": "Attention rank",
223
  "shift_order_mod": "Shift rank",
 
224
  },
 
225
  width=width,
226
  height=height,
227
  color_discrete_map={
228
- "both": PALETTE[0],
229
- "shift": PALETTE[1],
230
- "att": PALETTE[2],
231
- "None": "#94a3b8",
232
  },
233
  )
234
  fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
@@ -258,7 +287,14 @@ def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, hei
258
  font=dict(size=14, family=PLOT_FONT["family"]),
259
  ),
260
  margin=dict(l=48, r=20, t=52, b=72),
261
- legend=dict(orientation="h", yanchor="top", y=-0.2, xanchor="center", x=0.5),
 
 
 
 
 
 
 
262
  )
263
  return fig
264
 
@@ -1094,7 +1130,7 @@ def pathway_enrichment_bubble_panel(
1094
  def pathway_gene_membership_heatmap(
1095
  z: np.ndarray, row_labels: list[str], col_labels: list[str]
1096
  ) -> go.Figure:
1097
- """Pathway × gene grid; empty cells transparent; Reactome/KEGG as a narrow left row spine."""
1098
  if z.size == 0:
1099
  return go.Figure()
1100
 
@@ -1113,10 +1149,11 @@ def pathway_gene_membership_heatmap(
1113
 
1114
  # Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
1115
  _z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
1116
- transparent = "rgba(0,0,0,0)"
 
1117
  colorscale_main = [
1118
- [0.0, transparent],
1119
- [0.14, transparent],
1120
  [0.15, "#e69138"],
1121
  [0.33, "#e69138"],
1122
  [0.34, "#7eb6d9"],
 
98
  height: int = 520,
99
  marker_size: float = 5.0,
100
  marker_opacity: float = 0.78,
101
+ subtitle: str | None = None,
102
  ):
103
  d = df.copy()
104
  hover_spec = {
 
160
  width=width,
161
  height=height,
162
  )
163
+ # Title + subtitle are applied via update_layout when `subtitle` is set (Plotly 5+).
164
+ if title and not subtitle:
165
  common["title"] = title
166
  if continuous:
167
  fig = px.scatter(
 
180
  fig.update_traces(
181
  marker=dict(size=marker_size, opacity=marker_opacity, line=dict(width=0.25, color="rgba(255,255,255,0.4)"))
182
  )
183
+ if title and subtitle:
184
+ top_margin = 88
185
+ else:
186
+ top_margin = 56 if title else 28
187
  fig.update_layout(
188
  template="plotly_white",
189
  font=PLOT_FONT,
 
195
  paper_bgcolor=PAGE_BG,
196
  plot_bgcolor=PAGE_BG,
197
  )
198
+ if title and subtitle:
199
+ fig.update_layout(
200
+ title=dict(
201
+ text=title,
202
+ x=0.5,
203
+ xanchor="center",
204
+ font=dict(size=16, family=PLOT_FONT["family"]),
205
+ subtitle=dict(
206
+ text=subtitle,
207
+ font=dict(size=11, color="#64748b", family=PLOT_FONT["family"]),
208
+ ),
209
+ ),
210
+ )
211
+ elif not title:
212
  fig.update_layout(title=None)
213
  fig.update_xaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
214
  fig.update_yaxes(showticklabels=False, showgrid=True, gridcolor="rgba(0,0,0,0.06)", zeroline=False)
 
216
 
217
 
218
  def rank_scatter_shift_vs_attention(df_mod, modality: str, width: int = 420, height: int = 440):
219
+ """Attention rank on x, shift rank on y, least-squares trend, colours by top ~10% within this modality."""
220
  need = ("shift_order_mod", "attention_order_mod")
221
  if not all(c in df_mod.columns for c in need):
222
  return go.Figure()
223
  sub = df_mod.dropna(subset=list(need)).copy()
224
  if sub.empty:
225
  return go.Figure()
226
+ n = len(sub)
227
+ top_k = max(1, int(np.ceil(0.1 * n)))
228
+ s_ok = sub["shift_order_mod"].astype(int) <= top_k
229
+ a_ok = sub["attention_order_mod"].astype(int) <= top_k
230
+ sub["_tier_label"] = np.where(
231
+ s_ok & a_ok,
232
+ "Both",
233
+ np.where(s_ok, "Shift", np.where(a_ok, "Attention", "Neither")),
234
+ )
235
  x = sub["attention_order_mod"].astype(float).to_numpy()
236
  y = sub["shift_order_mod"].astype(float).to_numpy()
237
  fig = px.scatter(
238
  sub,
239
  x="attention_order_mod",
240
  y="shift_order_mod",
241
+ color="_tier_label",
242
  hover_name="feature",
243
  hover_data={
244
  "mean_rank": True,
 
248
  labels={
249
  "attention_order_mod": "Attention rank",
250
  "shift_order_mod": "Shift rank",
251
+ "_tier_label": "Top-10% tier",
252
  },
253
+ category_orders={"_tier_label": ["Both", "Shift", "Attention", "Neither"]},
254
  width=width,
255
  height=height,
256
  color_discrete_map={
257
+ "Both": PALETTE[0],
258
+ "Shift": PALETTE[1],
259
+ "Attention": PALETTE[2],
260
+ "Neither": "#94a3b8",
261
  },
262
  )
263
  fig.update_traces(marker=dict(size=7, opacity=0.62, line=dict(width=0.5, color="rgba(15,23,42,0.28)")))
 
287
  font=dict(size=14, family=PLOT_FONT["family"]),
288
  ),
289
  margin=dict(l=48, r=20, t=52, b=72),
290
+ legend=dict(
291
+ title=dict(text="Among top 10% features?"),
292
+ orientation="h",
293
+ yanchor="top",
294
+ y=-0.2,
295
+ xanchor="center",
296
+ x=0.5,
297
+ ),
298
  )
299
  return fig
300
 
 
1130
  def pathway_gene_membership_heatmap(
1131
  z: np.ndarray, row_labels: list[str], col_labels: list[str]
1132
  ) -> go.Figure:
1133
+ """Pathway × gene grid; empty cells use a light tint vs page white; Reactome/KEGG as a narrow left row spine."""
1134
  if z.size == 0:
1135
  return go.Figure()
1136
 
 
1149
 
1150
  # Discrete codes 0–4 must not use z/4 (3→0.75 landed in the KEGG band). Map to fixed slots.
1151
  _z_plot = {0: 0.04, 1: 0.24, 2: 0.44, 3: 0.64, 4: 0.84}
1152
+ # Slight contrast vs PAGE_BG (#fff) so empty (code 0) cells read as a grid, not “missing” paint.
1153
+ _empty_cell = "#f1f5f9"
1154
  colorscale_main = [
1155
+ [0.0, _empty_cell],
1156
+ [0.14, _empty_cell],
1157
  [0.15, "#e69138"],
1158
  [0.33, "#e69138"],
1159
  [0.34, "#7eb6d9"],
streamlit_hf/lib/ui.py CHANGED
@@ -4,6 +4,12 @@ from __future__ import annotations
4
 
5
  import streamlit as st
6
 
 
 
 
 
 
 
7
 
8
  def inject_app_styles() -> None:
9
  """Panel labels, page background, and shared chrome (all pages)."""
@@ -69,8 +75,17 @@ section[data-testid="stMain"] h1 {
69
  )
70
 
71
 
72
- def plot_help_popover(help_md: str, *, key: str) -> None:
73
- """Small help control next to a figure; opens Markdown guidance for biologists."""
 
 
 
 
 
 
 
 
 
74
  with st.popover(
75
  " ",
76
  help="What does this figure show?",
@@ -80,6 +95,9 @@ def plot_help_popover(help_md: str, *, key: str) -> None:
80
  key=key,
81
  ):
82
  st.markdown(help_md)
 
 
 
83
 
84
 
85
  def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
 
4
 
5
  import streamlit as st
6
 
7
+ # Feature Insights multipage hub: same title + tagline on every sub-page.
8
+ FEATURE_INSIGHTS_TITLE = "Feature Insights"
9
+ FEATURE_INSIGHTS_CAPTION = (
10
+ "Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux."
11
+ )
12
+
13
 
14
  def inject_app_styles() -> None:
15
  """Panel labels, page background, and shared chrome (all pages)."""
 
75
  )
76
 
77
 
78
+ def plot_help_popover(
79
+ help_md: str,
80
+ *,
81
+ key: str,
82
+ page_link: tuple[str, str] | None = None,
83
+ ) -> None:
84
+ """Small help control next to a figure; opens Markdown guidance for biologists.
85
+
86
+ If ``page_link`` is ``(path, label)``, a ``st.page_link`` is rendered after the markdown
87
+ (e.g. ``("pages/1_Single_Cell_Explorer.py", "Single-Cell Explorer")``).
88
+ """
89
  with st.popover(
90
  " ",
91
  help="What does this figure show?",
 
95
  key=key,
96
  ):
97
  st.markdown(help_md)
98
+ if page_link:
99
+ page_path, page_label = page_link
100
+ st.page_link(page_path, label=page_label)
101
 
102
 
103
  def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
streamlit_hf/pages/1_Single_Cell_Explorer.py CHANGED
@@ -19,16 +19,24 @@ from streamlit_hf.lib import ui
19
 
20
  ui.inject_app_styles()
21
 
22
- _UMAP_EXPLORER_HELP = """
23
- **What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
24
 
25
- **How to read it:** Axes are **unitless** UMAP coordinates. **Colour** follows your **Colour by** menu. **Hover** points for values; **click‑drag a box** on the plot to **select** cells and inspect them in the table below.
 
26
 
27
- **Takeaway:** Check whether mis‑predictions or batch effects line up in particular regions of latent space.
 
 
 
 
 
28
  """
29
 
30
  st.title("Single-Cell Explorer")
31
- st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
 
 
 
32
 
33
  bundle = io.load_latent_bundle()
34
  if bundle is None:
@@ -107,21 +115,31 @@ if plot_df.empty:
107
  st.stop()
108
 
109
  with right:
110
- ui.plot_caption_with_help(
111
- "Hover points for details · drag on the plot to select cells",
112
- _UMAP_EXPLORER_HELP,
113
- key="sc_umap_help",
114
- )
115
- fig = plots.latent_scatter(
116
- plot_df,
117
- color_opt,
118
- title="Validation latent space (UMAP)",
119
- width=900,
120
- height=560,
121
- marker_size=5.8,
122
- marker_opacity=0.74,
123
- )
124
- st.plotly_chart(fig, width="stretch", on_select="rerun", key="latent_pick")
 
 
 
 
 
 
 
 
 
 
125
 
126
  st.subheader("Selected points")
127
  state = st.session_state.get("latent_pick")
@@ -155,12 +173,17 @@ else:
155
  )
156
 
157
  st.subheader("Inspect by dataset index")
 
 
158
  pick = st.number_input(
159
  "Dataset index",
160
- min_value=int(df["dataset_idx"].min()),
161
- max_value=int(df["dataset_idx"].max()),
162
  value=int(df["dataset_idx"].iloc[0]),
163
- help="Index `ind` in your sample table; aligns one validation cell to this row.",
 
 
 
164
  )
165
  row = df[df["dataset_idx"] == pick]
166
  if not row.empty:
 
19
 
20
  ui.inject_app_styles()
21
 
22
+ _CELLTAG_MULTI_ARTICLE_URL = "https://www.nature.com/articles/s41587-023-01931-4"
 
23
 
24
+ _UMAP_EXPLORER_TITLE = "Validation latent space (UMAP)"
25
+ _UMAP_EXPLORER_SUBTITLE = "Hover points for details · drag on the plot to select cells"
26
 
27
+ _UMAP_EXPLORER_HELP = f"""
28
+ **What this is:** The same **2‑D UMAP** as on **Home**: validation **single cells** in **FateFormer**’s **latent space** (**context vector token representation**), summarised across **5-fold cross-validation** (**2,110** cells before filters). Here you **choose what to colour** and **filter** the cloud.
29
+
30
+ **How to read it:** Each point is one cell. **Colour** comes from **Colour by**: e.g. [**CellTag-Multi**]({_CELLTAG_MULTI_ARTICLE_URL}) **label**, **predicted fate**, **prediction correct / wrong**, **CV fold**, **batch**, which **modalities** are present, or **dominant fate %**. **Axes are unitless** (UMAP preserves *local* neighbourhoods only). **Hover** a point for per-cell fields.
31
+
32
+ **Using this page:** Use **Filters** to keep modality combinations, restrict **prediction outcome** (all / correct only / wrong only), choose **CV folds**, and set a **dominant fate %** range. In the plot **toolbar** (top right), pick **Box select** or **Lasso select**, then **drag** on the canvas; the app **reruns** and the **Selected points** table fills with those rows. To inspect **one** cell without a selection, scroll to **Inspect by dataset index**.
33
  """
34
 
35
  st.title("Single-Cell Explorer")
36
+ st.caption(
37
+ "This page is an interactive **validation UMAP** in FateFormer latent space: you choose how points are **coloured**, "
38
+ "apply **filters**, and can **select** cells on the plot to inspect them in a table or by index."
39
+ )
40
 
41
  bundle = io.load_latent_bundle()
42
  if bundle is None:
 
115
  st.stop()
116
 
117
  with right:
118
+ try:
119
+ _sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small", vertical_alignment="top")
120
+ except TypeError:
121
+ _sc_umap_plot_col, _sc_umap_help_col = st.columns([0.94, 0.06], gap="small")
122
+ with _sc_umap_plot_col:
123
+ fig = plots.latent_scatter(
124
+ plot_df,
125
+ color_opt,
126
+ title=_UMAP_EXPLORER_TITLE,
127
+ width=900,
128
+ height=560,
129
+ marker_size=5.8,
130
+ marker_opacity=0.74,
131
+ subtitle=_UMAP_EXPLORER_SUBTITLE,
132
+ )
133
+ fig.update_layout(margin=dict(l=20, r=12, t=92, b=20), title_font_size=15)
134
+ st.plotly_chart(
135
+ fig,
136
+ width="stretch",
137
+ on_select="rerun",
138
+ key="latent_pick",
139
+ config={"displayModeBar": True, "displaylogo": False},
140
+ )
141
+ with _sc_umap_help_col:
142
+ ui.plot_help_popover(_UMAP_EXPLORER_HELP, key="sc_umap_help")
143
 
144
  st.subheader("Selected points")
145
  state = st.session_state.get("latent_pick")
 
173
  )
174
 
175
  st.subheader("Inspect by dataset index")
176
+ _didx_min = int(df["dataset_idx"].min())
177
+ _didx_max = int(df["dataset_idx"].max())
178
  pick = st.number_input(
179
  "Dataset index",
180
+ min_value=_didx_min,
181
+ max_value=_didx_max,
182
  value=int(df["dataset_idx"].iloc[0]),
183
+ help=(
184
+ f"The table below is a one-cell summary for the validation set: choose an index from {_didx_min} to {_didx_max} "
185
+ "to see fate labels, model prediction, available modalities, and related fields for that cell."
186
+ ),
187
  )
188
  row = df[df["dataset_idx"] == pick]
189
  if not row.empty:
streamlit_hf/pages/feature_insights/1_Global_overview.py CHANGED
@@ -1,4 +1,4 @@
1
- """Feature Insights global overview of multimodal feature importance."""
2
 
3
  from __future__ import annotations
4
 
@@ -18,18 +18,13 @@ from streamlit_hf.lib import ui
18
  ui.inject_app_styles()
19
 
20
  _GLOBAL_OVERVIEW_HELP = """
21
- **What this is:** A **global** snapshot of which **genes, ATAC peaks, or flux reactions** rank highest when **latent shift probes** and **attention rollout** are combined across the whole model.
22
 
23
- **Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows the **RNA / ATAC / Flux** breakdown among a larger pool of **lowest meanrank** features (strongest overall joint ranking).
24
 
25
- **How to read it:** **Lower mean rank** = higher priority in the joint ranking. **Colours** encode **modality**. Use the sliders to change how many bars and how large the pie pool is.
26
-
27
- **Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
28
  """
29
 
30
- st.title("Feature Insights")
31
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
32
-
33
  df = io.load_df_features()
34
 
35
  if df is None:
@@ -38,7 +33,14 @@ if df is None:
38
  )
39
  st.stop()
40
 
 
 
41
  st.subheader("Global overview")
 
 
 
 
 
42
  c1, c2 = st.columns(2)
43
  with c1:
44
  top_n_bars = st.slider(
@@ -47,6 +49,11 @@ with c1:
47
  45,
48
  20,
49
  key="t1_topn_bars",
 
 
 
 
 
50
  )
51
  with c2:
52
  top_n_pie = st.slider(
@@ -55,6 +62,11 @@ with c2:
55
  250,
56
  100,
57
  key="t1_topn_pie",
 
 
 
 
 
58
  )
59
  ui.plot_caption_with_help(
60
  "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
 
1
+ """Feature Insights: global overview of multimodal feature importance."""
2
 
3
  from __future__ import annotations
4
 
 
18
  ui.inject_app_styles()
19
 
20
  _GLOBAL_OVERVIEW_HELP = """
21
+ **What this is:** The **top important fate-predictor markers** for **FateFormer** across its **three modalities** (**RNA** genes, **TF motifs** from chromatin (ATAC), and **flux** reactions), as a **global** view that combines **latent shift** probes and **attention rollout** over the full model.
22
 
23
+ **Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**, longest bar = 1). The **pie chart** (right) shows **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (**lower mean rank** = higher joint priority).
24
 
25
+ **How to read it:** **Longer bars** mean stronger measured influence for that metric. **Colours** mark **modality**. Use the **sliders** above to change bar count and pie pool size. The **pie chart** answers: “Among the most important features in this pool, which data type dominates?”.
 
 
26
  """
27
 
 
 
 
28
  df = io.load_df_features()
29
 
30
  if df is None:
 
33
  )
34
  st.stop()
35
 
36
+ st.title(ui.FEATURE_INSIGHTS_TITLE)
37
+ st.caption(ui.FEATURE_INSIGHTS_CAPTION)
38
  st.subheader("Global overview")
39
+ st.caption(
40
+ "Here, we give a birds-eye view of which RNA, ATAC, and Flux features matter most: top-N bars for latent shift and "
41
+ "attention (two explainability methods), plus a pie of modality mix among the strongest features by mean rank "
42
+ "(sliders change list sizes)."
43
+ )
44
  c1, c2 = st.columns(2)
45
  with c1:
46
  top_n_bars = st.slider(
 
49
  45,
50
  20,
51
  key="t1_topn_bars",
52
+ help=(
53
+ "How many features appear in the left (latent shift) and middle (attention) bar charts: the top N by each "
54
+ "metric. Each chart is min–max scaled on its own (longest bar = 1). Increase N to list more markers; "
55
+ "decrease N to focus on the strongest few."
56
+ ),
57
  )
58
  with c2:
59
  top_n_pie = st.slider(
 
62
  250,
63
  100,
64
  key="t1_topn_pie",
65
+ help=(
66
+ "How many features define the right-hand pie chart: the N strongest by mean rank (lower mean rank = "
67
+ "stronger joint ranking across shift and attention). A larger pool gives a broader modality mix "
68
+ "(RNA vs ATAC vs Flux); a smaller pool weights only the very top joint features."
69
+ ),
70
  )
71
  ui.plot_caption_with_help(
72
  "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py CHANGED
@@ -1,4 +1,4 @@
1
- """Feature Insights modality spotlight (RNA, ATAC, Flux)."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,31 +17,16 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
20
- _HELP_JOINT = """
21
- **What this is:** Within **{mod}** only, features with the **strongest joint ranking** (combined shift + attention priority).
22
 
23
- **How to read it:** Each row is **one feature**; the **two bars** are **shift** and **attention** scores **rescaled01 within this top‑N list** so they are comparable. **Hover** for the full name.
24
 
25
- **Takeaway:** Highlights markers that are important both to **representations** and to **model focus** in this modality.
26
- """
27
-
28
- _HELP_SHIFT = """
29
- **What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
30
-
31
- **How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
32
 
33
- **Takeaway:** Mechanistic “if we nudge this input, the embedding changes a lot.
34
  """
35
 
36
- _HELP_ATT = """
37
- **What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
38
-
39
- **How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
40
-
41
- **Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
42
- """
43
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
44
-
45
  df = io.load_df_features()
46
 
47
  if df is None:
@@ -50,17 +35,36 @@ if df is None:
50
  )
51
  st.stop()
52
 
53
- st.subheader("Modality spotlight")
 
 
 
 
 
 
 
 
 
54
  st.caption(
55
- "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
56
- "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
58
- top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
59
  st.markdown("##### Joint top markers (by mean rank)")
60
  st.caption(
61
- "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
62
- "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
63
- "Hover a bar for the full feature name."
64
  )
65
  r1a, r1b, r1c = st.columns(3)
66
  for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
@@ -68,13 +72,11 @@ for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
68
  if sm.empty:
69
  continue
70
  with col:
71
- _, _hp = st.columns([1, 0.28])
72
- with _hp:
73
- ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
74
  st.plotly_chart(
75
  plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
76
  width="stretch",
77
  )
 
78
  st.markdown("##### Shift importance")
79
  r2a, r2b, r2c = st.columns(3)
80
  for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
@@ -84,9 +86,6 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
84
  colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
85
  sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
86
  with col:
87
- _, _hp = st.columns([1, 0.28])
88
- with _hp:
89
- ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
90
  st.plotly_chart(
91
  plots.rank_bar(
92
  sub,
@@ -98,6 +97,7 @@ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
98
  ),
99
  width="stretch",
100
  )
 
101
  st.markdown("##### Attention importance")
102
  r3a, r3b, r3c = st.columns(3)
103
  for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
@@ -107,9 +107,6 @@ for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
107
  colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
108
  sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
109
  with col:
110
- _, _hp = st.columns([1, 0.28])
111
- with _hp:
112
- ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
113
  st.plotly_chart(
114
  plots.rank_bar(
115
  sub,
 
1
+ """Feature Insights: modality spotlight (RNA, ATAC, Flux)."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_PAGE = """
21
+ **Layout:** Three modality columns (**RNA**, **ATAC**, **Flux**). Each column uses only that modality’s features (**genes**, **TF motifs** from chromatin, or **metabolic reactions**).
22
 
23
+ **Joint row** (*Joint top markers*): Features ordered by **mean rank** (combined shift + attention; **lower mean rank** = stronger joint priority). Each row is one feature with **two bars** (shift and attention), **minmax scaled within this top‑N list** (0–1) so both are comparable. **Hover** a bar for the full name.
24
 
25
+ **Shift row** (*Shift importance*): **Shift-only** top **N** by latent shift score per column. **Longer bar** = larger shift in this list. **Hover** for the full name.
 
 
 
 
 
 
26
 
27
+ **Attention row** (*Attention importance*): **Attention-only** top **N** by rollout importance per column. **Longer bar** = more average attention. **Hover** for the full name.
28
  """
29
 
 
 
 
 
 
 
 
 
 
30
  df = io.load_df_features()
31
 
32
  if df is None:
 
35
  )
36
  st.stop()
37
 
38
+ st.title(ui.FEATURE_INSIGHTS_TITLE)
39
+ st.caption(ui.FEATURE_INSIGHTS_CAPTION)
40
+ try:
41
+ _spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
42
+ except TypeError:
43
+ _spot_h_l, _spot_h_r = st.columns([0.94, 0.06], gap="small")
44
+ with _spot_h_l:
45
+ st.subheader("Modality spotlight")
46
+ with _spot_h_r:
47
+ ui.plot_help_popover(_HELP_PAGE, key="t2_page_help")
48
  st.caption(
49
+ "Here, we zoom into one modality at a time (RNA, ATAC, or Flux) to explore top fate predictor markers: for each column "
50
+ "you see joint top markers, then shift-only and attention-only rankings side by side so within-modality comparisons "
51
+ "stay fair."
52
+ )
53
+ top_n_rank = st.slider(
54
+ "Top N per chart",
55
+ 10,
56
+ 55,
57
+ 20,
58
+ key="t2_topn",
59
+ help=(
60
+ "Number of features in each chart on this page: the joint (mean-rank) row, the shift-only row, "
61
+ "and the attention-only row all use this N within each modality column."
62
+ ),
63
  )
64
+
65
  st.markdown("##### Joint top markers (by mean rank)")
66
  st.caption(
67
+ "Joint row: strongest by mean rank; shift and attention bars scaled within this top-N list. Hover a bar for the full name."
 
 
68
  )
69
  r1a, r1b, r1c = st.columns(3)
70
  for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
 
72
  if sm.empty:
73
  continue
74
  with col:
 
 
 
75
  st.plotly_chart(
76
  plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
77
  width="stretch",
78
  )
79
+
80
  st.markdown("##### Shift importance")
81
  r2a, r2b, r2c = st.columns(3)
82
  for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
 
86
  colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
87
  sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
88
  with col:
 
 
 
89
  st.plotly_chart(
90
  plots.rank_bar(
91
  sub,
 
97
  ),
98
  width="stretch",
99
  )
100
+
101
  st.markdown("##### Attention importance")
102
  r3a, r3b, r3c = st.columns(3)
103
  for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
 
107
  colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
108
  sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
109
  with col:
 
 
 
110
  st.plotly_chart(
111
  plots.rank_bar(
112
  sub,
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py CHANGED
@@ -1,4 +1,4 @@
1
- """Feature Insights shift vs attention rank scatter by modality."""
2
 
3
  from __future__ import annotations
4
 
@@ -18,16 +18,24 @@ from streamlit_hf.lib import ui
18
 
19
  ui.inject_app_styles()
20
 
21
- _HELP_SHIFT_VS_ATT = """
22
- **What this is:** Each **dot** is **one {mod} feature**. **X** = rank by **attention** (1 = strongest in this modality); **Y** = rank by **latent shift** (1 = strongest).
23
-
24
- **How to read it:** Points **on the diagonal** rank similarly for both metrics. The **red dashed line** is a **least‑squares trend**—it summarises whether higher attention rank tends to pair with higher shift rank in this modality.
25
-
26
- **Takeaway:** Features **far from the trend** are interesting: strong in one lens but not the other (e.g. high attention, lower shift, or the reverse).
27
- """
 
28
 
29
- st.title("Feature Insights")
30
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
 
 
 
 
 
 
 
31
 
32
  df = io.load_df_features()
33
 
@@ -37,11 +45,13 @@ if df is None:
37
  )
38
  st.stop()
39
 
 
 
40
  st.subheader("Shift vs attention")
41
  st.caption(
42
- "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
43
- "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
44
- "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
45
  )
46
  corr_rows = []
47
  for mod in ("RNA", "ATAC", "Flux"):
@@ -61,14 +71,20 @@ for mod in ("RNA", "ATAC", "Flux"):
61
  }
62
  )
63
  if corr_rows:
 
 
 
 
64
  st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
 
 
 
 
 
65
  rc1, rc2, rc3 = st.columns(3)
66
  for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
67
  with col:
68
  sub_m = df[df["modality"] == mod]
69
- _, _hp = st.columns([1, 0.28])
70
- with _hp:
71
- ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
72
  st.plotly_chart(
73
  plots.rank_scatter_shift_vs_attention(sub_m, mod),
74
  width="stretch",
 
1
+ """Feature Insights: shift vs attention rank scatter by modality."""
2
 
3
  from __future__ import annotations
4
 
 
18
 
19
  ui.inject_app_styles()
20
 
21
+ # Native Streamlit tooltips (caption help); plain text reads well in the small ? popover.
22
+ _CORR_TABLE_HELP = (
23
+ "Per-modality correlation between attention rank and latent-shift rank across features in that modality "
24
+ "(same features as in the scatters below). Pearson r and Spearman rho measure rank agreement, with p-values. "
25
+ "# features is how many features in that modality were used for the correlation (one rank pair per feature). "
26
+ "Higher |r| means stronger agreement in how features are ordered: a feature that ranks high on shift (small rank; 1 = strongest) "
27
+ "tends to sit in a similar place on attention rank, and the same for weaker features, across that modality."
28
+ )
29
 
30
+ _SCATTER_HELP = (
31
+ "Each dot is one feature in that column: a gene (RNA), TF motif (ATAC), or reaction (Flux). "
32
+ "X = attention rank (1 = strongest in that modality); Y = latent shift rank (1 = strongest). "
33
+ "Ranks on both axes show agreement between methods: near the diagonal means similar ranking; "
34
+ "the dashed trend line is a least-squares fit. Correlation for each modality is in the table above; "
35
+ "stronger r means closer alignment of shift- and attention-based importance as fate predictors. "
36
+ "Point colour is whether that feature sits in the top ~10% by shift rank, attention rank, both, or neither, "
37
+ "using ranks within that modality only (same scale as the axes)."
38
+ )
39
 
40
  df = io.load_df_features()
41
 
 
45
  )
46
  st.stop()
47
 
48
+ st.title(ui.FEATURE_INSIGHTS_TITLE)
49
+ st.caption(ui.FEATURE_INSIGHTS_CAPTION)
50
  st.subheader("Shift vs attention")
51
  st.caption(
52
+ "Here, we explore how much latent-shift and attention-rollout explanations agree on feature importance within each "
53
+ "modality. A correlation table quantifies rank agreement; scatter plots pair each feature’s two ranks "
54
+ "(1 = strongest in that modality)."
55
  )
56
  corr_rows = []
57
  for mod in ("RNA", "ATAC", "Flux"):
 
71
  }
72
  )
73
  if corr_rows:
74
+ st.caption(
75
+ "Rank correlation by modality",
76
+ help=_CORR_TABLE_HELP,
77
+ )
78
  st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
79
+
80
+ st.caption(
81
+ "Rank scatter by modality",
82
+ help=_SCATTER_HELP,
83
+ )
84
  rc1, rc2, rc3 = st.columns(3)
85
  for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
86
  with col:
87
  sub_m = df[df["modality"] == mod]
 
 
 
88
  st.plotly_chart(
89
  plots.rank_scatter_shift_vs_attention(sub_m, mod),
90
  width="stretch",
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py CHANGED
@@ -1,4 +1,4 @@
1
- """Feature Insights attention by predicted cohort."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,27 +17,12 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
20
- _HELP_ATT_COHORT_BARS = """
21
- **What this is:** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted** for those cells.
22
 
23
- **Cohort menu:** **Compare** shows cohorts **side‑by‑side**. **All / dead‑end / reprogramming** restrict the average to that predicted class only.
24
-
25
- **Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
26
-
27
- **How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
28
- """
29
-
30
- _HELP_ROLLOUT_TABLE = """
31
- **What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
32
-
33
- **How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
34
-
35
- **Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
36
  """
37
 
38
- st.title("Feature Insights")
39
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
40
-
41
  df = io.load_df_features()
42
  att = io.load_attention_summary()
43
 
@@ -47,7 +32,20 @@ if df is None:
47
  )
48
  st.stop()
49
 
50
- st.subheader("Attention vs prediction")
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  cohort_mode = st.selectbox(
52
  "Cohort view",
53
  [
@@ -78,9 +76,6 @@ else:
78
  ac1, ac2, ac3 = st.columns(3)
79
  for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
80
  with col:
81
- _, _hp = st.columns([1, 0.28])
82
- with _hp:
83
- ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
84
  st.plotly_chart(
85
  plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
86
  width="stretch",
@@ -115,12 +110,5 @@ else:
115
  vec = vec_all[sl["start"] : sl["stop"]]
116
  names = att["feature_names"][sl["start"] : sl["stop"]]
117
  mini = plots.rollout_top_features_table(names, vec, top_n_att)
118
- cap1, cap2 = st.columns([0.82, 0.18])
119
- with cap1:
120
- st.caption(mod)
121
- with cap2:
122
- ui.plot_help_popover(
123
- _HELP_ROLLOUT_TABLE.format(mod=mod),
124
- key=f"t4_roll_{mod}_{roll_cohort}",
125
- )
126
  st.dataframe(mini, hide_index=True, width="stretch")
 
1
+ """Feature Insights: attention by predicted cohort."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_PAGE = """
21
+ **Bar charts (RNA, ATAC, Flux columns):** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted**. **Compare** shows cohorts **side‑by‑side**; **All / dead‑end / reprogramming** restrict the average to that predicted class. Uses **predicted** fate, **not** experimental labels. **Longer bar** = more cumulative attention among the **top‑N** shown. **Hover** for numeric detail.
22
 
23
+ **Rollout tables:** The same **mean rollout vector** as the bars, as a **sortable table** of the strongest tokens **per modality column**. Rows are **ranked** by weight for the cohort you select. **Batch** embedding tokens are omitted; use the tables to **copy names** or scan ordering beyond the bars.
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
25
 
 
 
 
26
  df = io.load_df_features()
27
  att = io.load_attention_summary()
28
 
 
32
  )
33
  st.stop()
34
 
35
+ st.title(ui.FEATURE_INSIGHTS_TITLE)
36
+ st.caption(ui.FEATURE_INSIGHTS_CAPTION)
37
+ try:
38
+ _att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
39
+ except TypeError:
40
+ _att_h_l, _att_h_r = st.columns([0.94, 0.06], gap="small")
41
+ with _att_h_l:
42
+ st.subheader("Attention vs prediction")
43
+ with _att_h_r:
44
+ ui.plot_help_popover(_HELP_PAGE, key="t4_page_help")
45
+ st.caption(
46
+ "Here, we show mean attention over RNA, ATAC, and Flux tokens conditional on what the model predicted (dead-end, "
47
+ "reprogramming, or all validation cells), to see on which features model focus to predict different fates."
48
+ )
49
  cohort_mode = st.selectbox(
50
  "Cohort view",
51
  [
 
76
  ac1, ac2, ac3 = st.columns(3)
77
  for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
78
  with col:
 
 
 
79
  st.plotly_chart(
80
  plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
81
  width="stretch",
 
110
  vec = vec_all[sl["start"] : sl["stop"]]
111
  names = att["feature_names"][sl["start"] : sl["stop"]]
112
  mini = plots.rollout_top_features_table(names, vec, top_n_att)
113
+ st.caption(mod)
 
 
 
 
 
 
 
114
  st.dataframe(mini, hide_index=True, width="stretch")
streamlit_hf/pages/feature_insights/5_Full_table.py CHANGED
@@ -1,4 +1,4 @@
1
- """Feature Insights full ranked feature table."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,16 +17,13 @@ from streamlit_hf.lib import ui
17
  ui.inject_app_styles()
18
 
19
  _FULL_TABLE_HELP = """
20
- **What this is:** The **full ranked feature list** (RNA genes, ATAC peaks, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
21
 
22
  **Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
23
 
24
  **How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
25
  """
26
 
27
- st.title("Feature Insights")
28
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
29
-
30
  df = io.load_df_features()
31
 
32
  if df is None:
@@ -35,7 +32,13 @@ if df is None:
35
  )
36
  st.stop()
37
 
 
 
38
  st.subheader("Full table")
 
 
 
 
39
  scope = st.radio(
40
  "Table scope",
41
  ["All modalities", "Single modality"],
@@ -71,7 +74,7 @@ show_cols = [
71
  if c in tbl.columns
72
  ]
73
  ui.plot_caption_with_help(
74
- "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
75
  _FULL_TABLE_HELP,
76
  key="t5_table_help",
77
  )
 
1
+ """Feature Insights: full ranked feature table."""
2
 
3
  from __future__ import annotations
4
 
 
17
  ui.inject_app_styles()
18
 
19
  _FULL_TABLE_HELP = """
20
+ **What this is:** The **full FateFormer ranked feature list** (RNA genes, ATAC TF motifs, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
21
 
22
  **Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
23
 
24
  **How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
25
  """
26
 
 
 
 
27
  df = io.load_df_features()
28
 
29
  if df is None:
 
32
  )
33
  st.stop()
34
 
35
+ st.title(ui.FEATURE_INSIGHTS_TITLE)
36
+ st.caption(ui.FEATURE_INSIGHTS_CAPTION)
37
  st.subheader("Full table")
38
+ st.caption(
39
+ "Here is the complete ranked feature table for the run (RNA genes, ATAC motifs, flux reactions): every shift, "
40
+ "attention, and joint rank and score the pipeline emitted."
41
+ )
42
  scope = st.radio(
43
  "Table scope",
44
  ["All modalities", "Single modality"],
 
74
  if c in tbl.columns
75
  ]
76
  ui.plot_caption_with_help(
77
+ "Full FateFormer list for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
78
  _FULL_TABLE_HELP,
79
  key="t5_table_help",
80
  )
streamlit_hf/pages/flux_analysis/1_Pathway_map.py CHANGED
@@ -1,4 +1,4 @@
1
- """Flux Analysis pathway sunburst and reaction annotation panels."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,34 +17,20 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
20
- _HELP_FLUX_SUNBURST = """
21
- **What this is:** A **hierarchical view** of **metabolic pathways** and the **individual flux reactions** that rank highest by **mean importance** in this model.
22
 
23
- **How to read it:** **Inner rings** = pathway context; **outer segments** = **reactions**. Larger / more central emphasis (depends on layout) highlights **stronger combined ranking** in the results table. Use the slider to include more or fewer reactions.
24
 
25
- **Takeaway:** Quickly see **which pathways dominate** the model’s flux interpretation layer.
26
- """
27
-
28
- _HELP_FLUX_ANNOTATION = """
29
- **What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
30
-
31
- **How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
32
-
33
- **Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
34
- """
35
-
36
- _HELP_FLUX_PROFILE = """
37
- **What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
38
-
39
- **How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
40
 
41
- **Takeaway:** A second lens that tracks **interpretability scores** rather than raw flux alone.
42
  """
43
 
44
  st.title("Flux Analysis")
45
  st.caption(
46
- "Reaction-level flux: how pathways, statistics, and model rankings line up. "
47
- "For global rank bars and shift vs. attention scatter, open **Feature insights**."
48
  )
49
 
50
  try:
@@ -67,38 +53,51 @@ else:
67
  _data_msg = "There are no flux reactions in the current results."
68
  flux = None
69
 
70
- st.subheader("Pathway map")
 
 
 
 
 
 
 
 
71
  if not _data_ok:
72
  st.error(_data_msg)
73
  else:
74
  st.caption(
75
- "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
76
- "same reactions: pathway, differential LogFC, and statistical significance, aligned row by row. "
77
- "Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
78
  )
79
  try:
80
  c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
81
  except TypeError:
82
  c1, c2 = st.columns([1.05, 0.95], gap="medium")
83
  with c1:
84
- n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
85
- _, _hp = st.columns([1, 0.22])
86
- with _hp:
87
- ui.plot_help_popover(_HELP_FLUX_SUNBURST, key="flux_sb_help")
 
 
 
 
 
 
 
88
  st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
89
  with c2:
90
- top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
91
- _, _hp = st.columns([1, 0.22])
92
- with _hp:
93
- ui.plot_help_popover(_HELP_FLUX_ANNOTATION, key="flux_ann_help")
94
- st.plotly_chart(
95
- plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
96
- width="stretch",
 
 
97
  )
98
- _, _hp2 = st.columns([1, 0.22])
99
- with _hp2:
100
- ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
101
  st.plotly_chart(
102
- plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
103
  width="stretch",
104
  )
 
1
+ """Flux Analysis: pathway sunburst and reaction annotation panels."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_PATHWAY_MAP = """
21
+ **Layout:** **Left column:** **sunburst**. **Right column:** **Pathway / Log₂FC / significance** (three **heatmap** columns, one **row** per reaction).
22
 
23
+ **Sunburst:** **Inner ring** = **pathway**; **outer ring** = **reaction**. Reactions are the top set by **mean_rank** (FateFormer joint rank; **lower** = stronger). **Wedge size** reflects that ranking. **Colour** = per-reaction **log₂ fold change** in inferred flux for **reprogramming** vs **dead-end** samples (experimental labels).
24
 
25
+ **Pathway / Log₂FC / significance:** Same top-**N** reactions as the **Reactions in heatmap** slider (**N** rows). **Columns:** **Pathway** (categorical colour), **Log₂FC** (reprogramming vs dead-end), **−log₁₀ adjusted p** for that contrast. **Hover** for exact values.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ **Sliders:** **Reactions in sunburst** adjusts only the **left** sunburst. **Reactions in heatmap** sets how many top reactions appear in the **right-hand** heatmap.
28
  """
29
 
30
  st.title("Flux Analysis")
31
  st.caption(
32
+ "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
33
+ "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
34
  )
35
 
36
  try:
 
53
  _data_msg = "There are no flux reactions in the current results."
54
  flux = None
55
 
56
+ try:
57
+ _pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
58
+ except TypeError:
59
+ _pm_h_l, _pm_h_r = st.columns([0.94, 0.06], gap="small")
60
+ with _pm_h_l:
61
+ st.subheader("Pathway map")
62
+ with _pm_h_r:
63
+ ui.plot_help_popover(_HELP_PATHWAY_MAP, key="flux_pathway_map_help")
64
+
65
  if not _data_ok:
66
  st.error(_data_msg)
67
  else:
68
  st.caption(
69
+ "Here, we map top FateFormer-ranked flux reactions into pathway context: a sunburst (pathway reaction) and a "
70
+ "heatmap of pathway, log fold change, and significance for reprogramming vs dead-end."
 
71
  )
72
  try:
73
  c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
74
  except TypeError:
75
  c1, c2 = st.columns([1.05, 0.95], gap="medium")
76
  with c1:
77
+ n_sb = st.slider(
78
+ "Reactions in sunburst",
79
+ 25,
80
+ 90,
81
+ 52,
82
+ key="flux_sb_n",
83
+ help=(
84
+ "How many **top** flux reactions (by **mean rank**) appear in the **sunburst** only. "
85
+ "Does not change the heatmap; use the other slider for that."
86
+ ),
87
+ )
88
  st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
89
  with c2:
90
+ top_n_nb = st.slider(
91
+ "Reactions in heatmap",
92
+ 12,
93
+ 40,
94
+ 26,
95
+ key="flux_nb_n",
96
+ help=(
97
+ "How many **top** flux reactions (by **mean rank**) appear as **rows** in the **Pathway / Log₂FC / significance** heatmap."
98
+ ),
99
  )
 
 
 
100
  st.plotly_chart(
101
+ plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
102
  width="stretch",
103
  )
streamlit_hf/pages/flux_analysis/2_Differential_fate.py CHANGED
@@ -1,4 +1,4 @@
1
- """Flux Analysis differential flux and fate scatter."""
2
 
3
  from __future__ import annotations
4
 
@@ -18,25 +18,25 @@ from streamlit_hf.lib import ui
18
  ui.inject_app_styles()
19
 
20
  _HELP_FLUX_VOLCANO = """
21
- **What this is:** A **volcano plot** for **reaction‑level flux**: **horizontal axis** = differential activity (**Log₂ fold change** between fate groups); **vertical axis** = **statistical significance** (\u2212log\u2081\u2080 **adjusted p**).
22
 
23
- **How to read it:** Points **far right/left** change most between groups; points **higher up** are more significant. **Colour** encodes the reaction’s **overall mean rank** in the interpretability table. Unreliable points with **no fold change** and **zero** adjusted p‑value are **dropped**.
24
 
25
- **Takeaway:** Highlights reactions that are both **biologically different** and **interpretable** in the model.
26
  """
27
 
28
  _HELP_FLUX_FATE_SCATTER = """
29
- **What this is:** Each **point** is a **flux reaction**. **X** = **average flux** in cells called **deadend**; **Y** = average in **reprogramming** cells (per the experimental grouping used in the analysis).
30
 
31
- **How to read it:** Points **above the diagonal** are higher in reprogramming; **below** = higher in deadend. **Point size** reflects **combined shift + attention** strength; **colour** = **pathway** (minor categories grouped as *Other*).
32
 
33
- **Takeaway:** Links **raw flux behaviour** to **model emphasis** (size) and **pathway context** (colour).
34
  """
35
 
36
  st.title("Flux Analysis")
37
  st.caption(
38
- "Reaction-level flux: how pathways, statistics, and model rankings line up. "
39
- "For global rank bars and shift vs. attention scatter, open **Feature insights**."
40
  )
41
 
42
  try:
@@ -60,15 +60,13 @@ else:
60
  flux = None
61
 
62
  st.subheader("Differential & fate")
 
 
 
 
63
  if not _data_ok:
64
  st.error(_data_msg)
65
  else:
66
- st.caption(
67
- "**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
68
- "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
69
- "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
70
- "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
71
- )
72
  b1, b2 = st.columns(2)
73
  with b1:
74
  _, _hp = st.columns([1, 0.22])
 
1
+ """Flux Analysis: differential flux and fate scatter."""
2
 
3
  from __future__ import annotations
4
 
 
18
  ui.inject_app_styles()
19
 
20
  _HELP_FLUX_VOLCANO = """
21
+ **What this is:** One **point** per **flux reaction**. **X** = **log₂ fold change** in inferred flux between **dead-end**-labeled and **reprogramming**-labeled samples. **Y** = **−log₁₀ adjusted p-value** for that contrast (multiple-testing adjusted in the results table).
22
 
23
+ **How to read it:** **Further from zero on X** = stronger shift between cohorts. **Higher on Y** = stronger statistical evidence. **Colour** = **mean rank** (FateFormer joint rank across the feature table; **lower** rank = stronger overall model focus). Reactions with **~zero** fold change and an **adjusted p of exactly zero** are removed as numerical artifacts.
24
 
25
+ **Hover** the points for reaction name, pathway, and related fields.
26
  """
27
 
28
  _HELP_FLUX_FATE_SCATTER = """
29
+ **What this is:** One **point** per **flux reaction**. **X** = **mean flux** across samples labeled **dead-end**; **Y** = **mean flux** across samples labeled **reprogramming** (same per-sample fate labels as elsewhere in this analysis).
30
 
31
+ **How to read it:** The **y = x** line would mark equal average flux in both cohorts. **Above** the diagonal, average flux is **higher in reprogramming** than in dead-end for that reaction; **below**, **higher in dead-end**. **Marker size** scales with **√(latent shift importance × attention importance)** (capped for display). **Colour** = **pathway**; smaller pathway groups are merged into **Other**.
32
 
33
+ **Hover** for reaction name, **mean rank**, **log₂FC**, and pathway.
34
  """
35
 
36
  st.title("Flux Analysis")
37
  st.caption(
38
+ "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
39
+ "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
40
  )
41
 
42
  try:
 
60
  flux = None
61
 
62
  st.subheader("Differential & fate")
63
+ st.caption(
64
+ "Here, we contrast dead-end and reprogramming cells at the reaction level: a volcano of flux log₂FC vs significance, "
65
+ "and a scatter of mean flux in each cohort with pathway colouring."
66
+ )
67
  if not _data_ok:
68
  st.error(_data_msg)
69
  else:
 
 
 
 
 
 
70
  b1, b2 = st.columns(2)
71
  with b1:
72
  _, _hp = st.columns([1, 0.22])
streamlit_hf/pages/flux_analysis/3_Reaction_ranking.py CHANGED
@@ -1,4 +1,4 @@
1
- """Flux Analysis ranked reaction table and download."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,17 +17,21 @@ from streamlit_hf.lib import ui
17
  ui.inject_app_styles()
18
 
19
  _HELP_REACTION_TABLE = """
20
- **What this is:** A **sortable, filterable** version of the **flux reaction** interpretability table (same reactions as elsewhere in Flux Analysis).
21
 
22
- **Columns:** Typically include **mean_rank** (overall priority), **shift** / **attention** ranks and scores, **pathway** / **module**, and **differential statistics** (e.g. Log₂FC, adjusted *p*) where computed.
23
 
24
- **How to use:** **Filter** by name substring or **pathway**, then **download CSV** for plotting or supplementary material.
 
 
 
 
25
  """
26
 
27
  st.title("Flux Analysis")
28
  st.caption(
29
- "Reaction-level flux: how pathways, statistics, and model rankings line up. "
30
- "For global rank bars and shift vs. attention scatter, open **Feature insights**."
31
  )
32
 
33
  try:
@@ -51,20 +55,31 @@ else:
51
  flux = None
52
 
53
  st.subheader("Reaction ranking")
 
 
 
 
54
  if not _data_ok:
55
  st.error(_data_msg)
56
  else:
57
- ui.plot_caption_with_help(
58
- "Filter by reaction name or pathway, then inspect or download the ranked flux table.",
59
- _HELP_REACTION_TABLE,
60
- key="flux_rank_table_help",
 
 
 
 
 
 
 
61
  )
62
- q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
63
  pw_f = st.multiselect(
64
  "Pathway",
65
  sorted(flux["pathway"].dropna().unique().astype(str)),
66
  default=[],
67
  key="flux_pw_f",
 
68
  )
69
  show = flux
70
  if q.strip():
@@ -101,4 +116,5 @@ else:
101
  file_name="fateformer_flux_filtered.csv",
102
  mime="text/csv",
103
  key="flux_dl",
 
104
  )
 
1
+ """Flux Analysis: ranked reaction table and download."""
2
 
3
  from __future__ import annotations
4
 
 
17
  ui.inject_app_styles()
18
 
19
  _HELP_REACTION_TABLE = """
20
+ **What this is:** The **full FateFormer flux reaction table** for this deployment: one **row** per **reaction** in the metabolic layer, with **joint ranking** and cohort flux statistics from the precomputed results.
21
 
22
+ **Ranking:** **mean_rank** = combined **shift + attention** priority (**lower** = stronger overall). **rank_shift_in_modal** / **rank_att_in_modal** and **combined_order_mod** are **within-modality** (Flux-only) ranks; **rank_shift** / **rank_att** are **global** across all features. **importance_shift** / **importance_att** are the underlying scores. **top_10_pct** (if present) flags global top-decile membership from the publish step.
23
 
24
+ **Flux / cohort columns:** **mean_de** / **mean_re** = **mean inferred flux** in **dead-end** vs **reprogramming** samples. **log_fc** = **log₂** fold change between those cohorts for that reaction. **pval_adj** = **adjusted p-value** for that contrast. **group** summarises direction or contrast label when present.
25
+
26
+ **Context:** **pathway** and **module** annotate the reaction in the reconstruction.
27
+
28
+ **Use:** Narrow rows with the **substring** and **pathway** controls; use the table’s own **sort** if your Streamlit build exposes it. **Download** saves the **filtered** view as CSV.
29
  """
30
 
31
  st.title("Flux Analysis")
32
  st.caption(
33
+ "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
34
+ "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
35
  )
36
 
37
  try:
 
55
  flux = None
56
 
57
  st.subheader("Reaction ranking")
58
+ st.caption(
59
+ "Here is the searchable flux reaction table: every reaction’s FateFormer ranks, cohort flux summaries, and pathway "
60
+ "context, with filters and CSV download."
61
+ )
62
  if not _data_ok:
63
  st.error(_data_msg)
64
  else:
65
+ try:
66
+ _rr_l, _rr_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
67
+ except TypeError:
68
+ _rr_l, _rr_r = st.columns([0.94, 0.06], gap="small")
69
+ with _rr_r:
70
+ ui.plot_help_popover(_HELP_REACTION_TABLE, key="flux_rank_table_help")
71
+ q = st.text_input(
72
+ "Substring filter (reaction name)",
73
+ "",
74
+ key="flux_q",
75
+ help="Keep rows whose **reaction** string contains this text (case-insensitive). Leave empty for no name filter.",
76
  )
 
77
  pw_f = st.multiselect(
78
  "Pathway",
79
  sorted(flux["pathway"].dropna().unique().astype(str)),
80
  default=[],
81
  key="flux_pw_f",
82
+ help="Keep rows in any of the selected **pathways**. Leave empty to include all pathways.",
83
  )
84
  show = flux
85
  if q.strip():
 
116
  file_name="fateformer_flux_filtered.csv",
117
  mime="text/csv",
118
  key="flux_dl",
119
+ help="CSV of the **current filtered** table (same columns as on screen), sorted by **mean_rank**.",
120
  )
streamlit_hf/pages/flux_analysis/4_Model_metadata.py CHANGED
@@ -1,4 +1,4 @@
1
- """Flux Analysis metabolic model metadata merged with flux table."""
2
 
3
  from __future__ import annotations
4
 
@@ -16,73 +16,60 @@ from streamlit_hf.lib import ui
16
 
17
  ui.inject_app_styles()
18
 
19
- _HELP_MODEL_META = """
20
- **What this is:** **Directed edges** from the **genome‑scale metabolic model** (substrate → product reactions), **merged** with this app’s **flux interpretability table** where reaction identifiers match.
21
 
22
- **How to read it:** Each row is a **model step** you can relate to **pathways** and **model modules**. Use **Model scope** to zoom to one **supermodule** or view **all** edges.
 
23
 
24
- **Takeaway:** Connects **curated biochemistry** (stoichiometry / wiring) to **data‑driven rankings** from FateFormer.
25
  """
26
 
27
  st.title("Flux Analysis")
28
  st.caption(
29
- "Reaction-level flux: how pathways, statistics, and model rankings line up. "
30
- "For global rank bars and shift vs. attention scatter, open **Feature insights**."
31
  )
32
 
33
- try:
34
- df = io.load_df_features()
35
- except Exception:
36
- df = None
37
-
38
- _data_ok = True
39
- if df is None:
40
- _data_ok = False
41
- _data_msg = (
42
- "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
43
- "fresh results, or ask them to check the deployment."
44
- )
45
- flux = None
46
- meta = None
47
- else:
48
- flux = df[df["modality"] == "Flux"].copy()
49
- if flux.empty:
50
- _data_ok = False
51
- _data_msg = "There are no flux reactions in the current results."
52
- flux = None
53
- meta = io.load_metabolic_model_metadata()
54
 
55
  st.subheader("Metabolic model metadata")
56
- if not _data_ok:
57
- st.error(_data_msg)
 
 
 
 
58
  else:
59
- ui.plot_caption_with_help(
60
- "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match.",
61
- _HELP_MODEL_META,
62
- key="flux_model_meta_help",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  )
64
- if meta is None or meta.empty:
65
- st.warning("Metabolic model metadata is not available in this build.")
66
- else:
67
- sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
68
- graph_labels = ["All modules"]
69
- for sid in sm_ids:
70
- cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
71
- graph_labels.append(f"{sid}: {cls}")
72
- tix = st.selectbox(
73
- "Model scope",
74
- range(len(graph_labels)),
75
- format_func=lambda i: graph_labels[i],
76
- key="flux_model_scope",
77
- help="Show every step in the model, or restrict to one functional module.",
78
- )
79
- supermodule_id = None if tix == 0 else sm_ids[tix - 1]
80
- tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
81
- st.dataframe(tbl, width="stretch", hide_index=True)
82
- st.download_button(
83
- "Download metabolic model metadata (CSV)",
84
- tbl.to_csv(index=False).encode("utf-8"),
85
- file_name="fateformer_metabolic_model_edges.csv",
86
- mime="text/csv",
87
- key="flux_model_dl",
88
- )
 
1
+ """Flux Analysis: scFEA metabolic model metadata table."""
2
 
3
  from __future__ import annotations
4
 
 
16
 
17
  ui.inject_app_styles()
18
 
19
+ _SCFEA_PMC = "https://pmc.ncbi.nlm.nih.gov/articles/PMC8494226/"
20
+ _SCFEA_GITHUB = "https://github.com/changwn/scFEA"
21
 
22
+ _HELP_MODEL_META = f"""
23
+ **Source:** The **metabolic model metadata** from **scFEA** (single-cell flux estimation from scRNA-seq) that is used for inferring flux reactions from scRNA-seq data. Open access article: [{_SCFEA_PMC}]({_SCFEA_PMC}) (*Genome Research*, 2021). Code and model resources: [{_SCFEA_GITHUB}]({_SCFEA_GITHUB}).
24
 
25
+ **What this is:** The **scFEA** metabolic model info used for inferring fluxomic data from scRNA-seq (one row per substrate → product reaction).
26
  """
27
 
28
  st.title("Flux Analysis")
29
  st.caption(
30
+ "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
31
+ "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
32
  )
33
 
34
+ meta = io.load_metabolic_model_metadata()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  st.subheader("Metabolic model metadata")
37
+ st.caption(
38
+ f"Here is the scFEA metabolic model metadata used to interpret flux features: modules, compounds, and reaction names. "
39
+ f"[Paper]({_SCFEA_PMC}), [GitHub]({_SCFEA_GITHUB})."
40
+ )
41
+ if meta is None or meta.empty:
42
+ st.error("Metabolic model metadata is not available in this build.")
43
  else:
44
+ try:
45
+ _mm_l, _mm_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
46
+ except TypeError:
47
+ _mm_l, _mm_r = st.columns([0.94, 0.06], gap="small")
48
+ with _mm_r:
49
+ ui.plot_help_popover(_HELP_MODEL_META, key="flux_model_meta_help")
50
+ sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
51
+ graph_labels = ["All modules"]
52
+ for sid in sm_ids:
53
+ cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
54
+ graph_labels.append(f"{sid}: {cls}")
55
+ tix = st.selectbox(
56
+ "Model scope",
57
+ range(len(graph_labels)),
58
+ format_func=lambda i: graph_labels[i],
59
+ key="flux_model_scope",
60
+ help=(
61
+ "**All modules:** every edge row in the metadata CSV. **Named supermodule:** only edges with that "
62
+ "**Supermodule_id** (class label shown in the menu)."
63
+ ),
64
+ )
65
+ supermodule_id = None if tix == 0 else sm_ids[tix - 1]
66
+ tbl = io.build_metabolic_model_table(meta, supermodule_id=supermodule_id)
67
+ st.dataframe(tbl, width="stretch", hide_index=True)
68
+ st.download_button(
69
+ "Download metabolic model metadata (CSV)",
70
+ tbl.to_csv(index=False).encode("utf-8"),
71
+ file_name="fateformer_metabolic_model_edges.csv",
72
+ mime="text/csv",
73
+ key="flux_model_dl",
74
+ help="CSV export of the table above for the current **Model scope**.",
75
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
streamlit_hf/pages/flux_analysis/5_Interactive_map.py CHANGED
@@ -1,4 +1,4 @@
1
- """Flux Analysis metabolic map with searchable side panel."""
2
 
3
  from __future__ import annotations
4
 
@@ -18,18 +18,18 @@ from streamlit_hf.lib import ui
18
 
19
  ui.inject_app_styles()
20
 
21
- _HELP_MET_MAP = """
22
- **What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
23
 
24
- **How to use:** **Search** the list (every word must match somewhere in that row). **Hover** metabolite labels on the map for a short **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**). **Esc** clears search.
 
25
 
26
- **Takeaway:** A **navigation** layer to relate **pathway geography** to **model-ranked reactions**, not a quantitative flux balance diagram.
27
  """
28
 
29
  st.title("Flux Analysis")
30
  st.caption(
31
- "Reaction-level flux: how pathways, statistics, and model rankings line up. "
32
- "For global rank bars and shift vs. attention scatter, open **Feature insights**."
33
  )
34
 
35
 
@@ -210,7 +210,7 @@ function renderMetList(q){
210
  if(n++>=cap) break;
211
  const div=document.createElement('div');
212
  div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
213
- const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span></span>';
214
  div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
215
  div.addEventListener('mouseenter',ev=>{
216
  document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
@@ -314,8 +314,9 @@ init();
314
 
315
 
316
  st.subheader("Metabolic map")
 
317
  ui.plot_caption_with_help(
318
- "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked step (1 = top).",
319
  _HELP_MET_MAP,
320
  key="flux_map_help",
321
  )
 
1
+ """Flux Analysis: metabolic map with searchable side panel."""
2
 
3
  from __future__ import annotations
4
 
 
18
 
19
  ui.inject_app_styles()
20
 
21
+ _NAR_FATEFORMER_URL = "https://academic.oup.com/nar/article/51/W1/W180/7175334"
 
22
 
23
+ _HELP_MET_MAP = f"""
24
+ **Figure (paper):** Network model of key metabolic pathways linked to fate outcomes identified by the model. Important pathways and reactions are mapped onto the **scFLUX** metabolic network schema. **Arrow colour** shows the **log₂ fold change** in **scFEA**-inferred flux between **reprogramming** and **dead-end** cells: **red** = higher flux in reprogramming, **blue** = higher in dead-end. **Black** arrows = no corresponding scFEA entry or no measurable flux difference. **Triple-star** markers in the figure denote **p_adj < 0.001** (two-sample *t*-test with Benjamini–Hochberg correction). Full article: [{_NAR_FATEFORMER_URL}]({_NAR_FATEFORMER_URL})
25
 
26
+ **In this explorer:** The same schematic is **interactive**: **metabolites** on the map link to the reconstruction. The **sidebar** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top). **Search** the list (every word must match somewhere in that row). **Hover** labels for a **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**); **Esc** clears search. Use it as a **navigation** layer between **pathway geography** and **model-ranked reactions**, not a quantitative flux-balance diagram.
27
  """
28
 
29
  st.title("Flux Analysis")
30
  st.caption(
31
+ "**Flux Analysis** ties inferred **reaction flux** to **pathways**, **fate contrasts**, **rankings**, and **model** metadata. "
32
+ "For multimodal **shift**/**attention** summaries, open **Feature Insights**."
33
  )
34
 
35
 
 
210
  if(n++>=cap) break;
211
  const div=document.createElement('div');
212
  div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
213
+ const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>-</span>';
214
  div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
215
  div.addEventListener('mouseenter',ev=>{
216
  document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
 
314
 
315
 
316
  st.subheader("Metabolic map")
317
+ st.caption("This page shows the interactive metabolic map of important pathways and reactions.")
318
  ui.plot_caption_with_help(
319
+ "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked reaction (1 = top).",
320
  _HELP_MET_MAP,
321
  key="flux_map_help",
322
  )
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py CHANGED
@@ -1,4 +1,4 @@
1
- """Gene expression Reactome / KEGG pathway enrichment."""
2
 
3
  from __future__ import annotations
4
 
@@ -18,34 +18,18 @@ from streamlit_hf.lib import ui
18
 
19
  ui.inject_app_styles()
20
 
21
- _HELP_PATH_BUBBLE_DE = """
22
- **What this is:** **Pathway over‑representation** among genes linked to **dead‑end** cells (Reactome + KEGG merged view). **Significance** is **Benjamini–Hochberg FDR** (*q* < 0.05).
23
 
24
- **How to read it:** Each **bubble** is a pathway; **position** reflects effect size / enrichment strength; **size** often tracks **gene count** or **significance** (see axis labels and hover). Compare to the **reprogramming** panel for fate‑specific patterns.
25
 
26
- **Takeaway:** Highlights **process‑level** themes in the deadend transcriptional state.
27
- """
28
-
29
- _HELP_PATH_BUBBLE_RE = """
30
- **What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
31
-
32
- **How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
33
-
34
- **Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
35
- """
36
-
37
- _HELP_PATH_HEAT = """
38
- **What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
39
-
40
- **How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
41
-
42
- **Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
43
  """
44
 
45
  st.title("Gene Expression & TF Activity")
46
  st.caption(
47
- "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
- "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
  )
50
 
51
  df = io.load_df_features()
@@ -59,10 +43,17 @@ if rna.empty and atac.empty:
59
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
  st.stop()
61
 
62
- st.subheader("Gene pathway enrichment")
 
 
 
 
 
 
 
63
  st.caption(
64
- "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
65
- "The lower panel maps leading genes to pathways; empty grid positions are left clear."
66
  )
67
  raw = pathway_data.load_de_re_tsv()
68
  if raw is None:
@@ -76,9 +67,6 @@ else:
76
  )
77
  c1, c2 = st.columns(2, gap="medium")
78
  with c1:
79
- _, _hp = st.columns([1, 0.22])
80
- with _hp:
81
- ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
82
  st.plotly_chart(
83
  plots.pathway_enrichment_bubble_panel(
84
  mde,
@@ -89,9 +77,6 @@ else:
89
  width="stretch",
90
  )
91
  with c2:
92
- _, _hp = st.columns([1, 0.22])
93
- with _hp:
94
- ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
95
  st.plotly_chart(
96
  plots.pathway_enrichment_bubble_panel(
97
  mre,
@@ -106,7 +91,4 @@ else:
106
  st.info("No pathway-gene matrix could be built from the current enrichment results.")
107
  else:
108
  z, ylabs, xlabs = hm
109
- _, _hp = st.columns([1, 0.18])
110
- with _hp:
111
- ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
112
  st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
 
1
+ """Gene expression: Reactome / KEGG pathway enrichment."""
2
 
3
  from __future__ import annotations
4
 
 
18
 
19
  ui.inject_app_styles()
20
 
21
+ _HELP_PATHWAY_ENRICHMENT = """
22
+ **Overview:** **Gene pathway enrichment**: Reactome and KEGG **over-representation** from fate-split **RNA marker** lists, then a **pathway × gene** heatmap of the leading hits.
23
 
24
+ **Bubble panels (dead-end vs reprogramming):** **Leading genes** are **grouped by fate** (dead-end vs reprogramming); each panel runs enrichment on that gene set. **Horizontal axis** = **gene ratio** (enrichment table). **Circles** = **Reactome** pathways; **squares** = **KEGG** pathways. **Vertical** position orders pathways; **size** reflects **gene count**; **colour** = **−log₁₀** Benjamini *q* (*q* < 0.05). **Hover** for pathway name, library, count, and *q*. **Compare** left and right panels for cohort-specific pathways.
25
 
26
+ **Heatmap:** **Rows** = enriched **pathway terms** (Reactome block, then KEGG). **Columns** = **genes** (from the same fate-split marker lists that fed enrichment) plus a **Library** stripe (**Reactome** vs **KEGG** per row). **Colour** encodes **dead-end** vs **reprogramming** membership for that gene–pathway pair (and the library stripe); **hover** for the exact label. **Empty** cells = no link in this matrix slice.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  """
28
 
29
  st.title("Gene Expression & TF Activity")
30
  st.caption(
31
+ "**Pathways** (Reactome / KEGG) and pathwaygene views; **ATAC motif** deviation and **TF activity** by fate; "
32
+ "**gene** and **motif** tables."
33
  )
34
 
35
  df = io.load_df_features()
 
43
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
44
  st.stop()
45
 
46
+ try:
47
+ _pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
48
+ except TypeError:
49
+ _pe_h_l, _pe_h_r = st.columns([0.94, 0.06], gap="small")
50
+ with _pe_h_l:
51
+ st.subheader("Gene pathway enrichment")
52
+ with _pe_h_r:
53
+ ui.plot_help_popover(_HELP_PATHWAY_ENRICHMENT, key="ge_pathway_page_help")
54
  st.caption(
55
+ "Here, we turn fate-split RNA gene markers into Reactome and KEGG over-representation (bubble panels per cohort), "
56
+ "then lay out a pathway × gene heatmap for the leading hits."
57
  )
58
  raw = pathway_data.load_de_re_tsv()
59
  if raw is None:
 
67
  )
68
  c1, c2 = st.columns(2, gap="medium")
69
  with c1:
 
 
 
70
  st.plotly_chart(
71
  plots.pathway_enrichment_bubble_panel(
72
  mde,
 
77
  width="stretch",
78
  )
79
  with c2:
 
 
 
80
  st.plotly_chart(
81
  plots.pathway_enrichment_bubble_panel(
82
  mre,
 
91
  st.info("No pathway-gene matrix could be built from the current enrichment results.")
92
  else:
93
  z, ylabs, xlabs = hm
 
 
 
94
  st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
streamlit_hf/pages/gene_expression/2_Motif_activity.py CHANGED
@@ -1,4 +1,4 @@
1
- """Gene expression TF motif activity (chromVAR-style)."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,26 +17,18 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
20
- _HELP_MOTIF_VOLC = """
21
- **What this is:** A **volcano‑style** summary of **TF motif** differences from the **ATAC** layer (**chromVAR‑like** scores): **X** = change between fate groups (typically **reprogramming − dead‑end**); **Y** = **significance**.
22
 
23
- **How to read it:** **Extreme horizontal** motifs differ most between fates; **higher vertical** motifs are more statistically supported. **Hover** for motif names.
24
 
25
- **Takeaway:** Links **chromatin accessibility** motifs to **fate bias** beyond gene‑level RNA.
26
- """
27
-
28
- _HELP_MOTIF_SCATTER = """
29
- **What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
30
-
31
- **How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
32
-
33
- **Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
34
  """
35
 
36
  st.title("Gene Expression & TF Activity")
37
  st.caption(
38
- "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
39
- "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
40
  )
41
 
42
  df = io.load_df_features()
@@ -50,24 +42,26 @@ if rna.empty and atac.empty:
50
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
51
  st.stop()
52
 
53
- st.subheader("Motif activity")
 
 
 
 
 
 
 
 
 
 
 
 
54
  if atac.empty:
55
  st.warning("No motif-level ATAC features are available in the current results.")
56
  else:
57
- st.caption(
58
- "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
59
- "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
60
- )
61
  a1, a2 = st.columns(2, gap="medium")
62
  with a1:
63
- _, _hp = st.columns([1, 0.22])
64
- with _hp:
65
- ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
66
  st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
67
  with a2:
68
- _, _hp = st.columns([1, 0.22])
69
- with _hp:
70
- ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
71
  st.plotly_chart(
72
  plots.notebook_style_activity_scatter(
73
  atac,
 
1
+ """Gene expression: ATAC TF motif deviation and activity."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_MOTIF_ACTIVITY = """
21
+ **Overview:** **ATAC** **TF motif** plots: **differential** activity between fate labels (left), then **per-fate mean** z-scored activity (right). Scores summarize **motif-level** signal from the accessibility layer.
22
 
23
+ **Left (volcano):** **X** = **mean difference** in motif activity (**reprogramming dead-end**). **Y** = **−log₁₀ adjusted p** (or a precomputed log-*p* column when the table provides it). **Colour** = **mean rank** (joint FateFormer rank; **lower** = stronger). **Hover** for motif name, *p*, **mean rank**, and cohort fields when present.
24
 
25
+ **Right (scatter):** **X** / **Y** = **mean z-scored** motif activity in **dead-end** vs **reprogramming** cells. The **y = x** line would mark equal average activity; **above** the diagonal means **higher in reprogramming**. **Colour** = **−log₁₀ adjusted p** (red scale; **higher** = more significant). **Hover** for motif, **mean rank**, and **group**.
 
 
 
 
 
 
 
 
26
  """
27
 
28
  st.title("Gene Expression & TF Activity")
29
  st.caption(
30
+ "**Pathways** (Reactome / KEGG) and pathwaygene views; **ATAC motif** deviation and **TF activity** by fate; "
31
+ "**gene** and **motif** tables."
32
  )
33
 
34
  df = io.load_df_features()
 
42
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
43
  st.stop()
44
 
45
+ try:
46
+ _ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
47
+ except TypeError:
48
+ _ma_h_l, _ma_h_r = st.columns([0.94, 0.06], gap="small")
49
+ with _ma_h_l:
50
+ st.subheader("Motif activity")
51
+ with _ma_h_r:
52
+ ui.plot_help_popover(_HELP_MOTIF_ACTIVITY, key="ge_motif_page_help")
53
+ st.caption(
54
+ "Here, we summarize ATAC TF motif behaviour: differential shift between dead-end and reprogramming (volcano), then "
55
+ "per-fate mean z-scored activity in a scatter."
56
+ )
57
+
58
  if atac.empty:
59
  st.warning("No motif-level ATAC features are available in the current results.")
60
  else:
 
 
 
 
61
  a1, a2 = st.columns(2, gap="medium")
62
  with a1:
 
 
 
63
  st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
64
  with a2:
 
 
 
65
  st.plotly_chart(
66
  plots.notebook_style_activity_scatter(
67
  atac,
streamlit_hf/pages/gene_expression/3_Gene_table.py CHANGED
@@ -1,4 +1,4 @@
1
- """Gene expression searchable gene ranking table."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
 
 
 
 
20
  TABLE_COLS = [
21
  "mean_rank",
22
  "feature",
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
44
 
45
  st.title("Gene Expression & TF Activity")
46
  st.caption(
47
- "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
- "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
  )
50
 
51
  df = io.load_df_features()
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
59
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
  st.stop()
61
 
62
- st.subheader("Gene table")
 
 
 
 
 
 
 
 
 
 
 
 
63
  if rna.empty:
64
  st.warning("No RNA gene features are available in the current results.")
65
  else:
 
1
+ """Gene expression: searchable gene ranking table."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_GENE_TABLE = """
21
+ **scRNA-seq** genes used as features in this run: **one row per gene**, sorted by **mean rank** (joint importance). Additional columns are **FateFormer** rank and attribution summaries (within RNA and globally), **per-fate** expression (**dead-end** vs **reprogramming**), and **differential** statistics (*p*-values, log fold change, **group**). Search to narrow the list; use **Download** for a CSV copy.
22
+ """
23
+
24
  TABLE_COLS = [
25
  "mean_rank",
26
  "feature",
 
48
 
49
  st.title("Gene Expression & TF Activity")
50
  st.caption(
51
+ "**Pathways** (Reactome / KEGG) and pathwaygene views; **ATAC motif** deviation and **TF activity** by fate; "
52
+ "**gene** and **motif** tables."
53
  )
54
 
55
  df = io.load_df_features()
 
63
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
64
  st.stop()
65
 
66
+ try:
67
+ _gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
68
+ except TypeError:
69
+ _gt_h_l, _gt_h_r = st.columns([0.94, 0.06], gap="small")
70
+ with _gt_h_l:
71
+ st.subheader("Gene table")
72
+ with _gt_h_r:
73
+ ui.plot_help_popover(_HELP_GENE_TABLE, key="ge_gene_table_help")
74
+ st.caption(
75
+ "Here is a searchable table of all scRNA-seq genes in the feature set, with FateFormer ranks and per-fate expression "
76
+ "and differential statistics that you can sort, filter by name, or download CSV."
77
+ )
78
+
79
  if rna.empty:
80
  st.warning("No RNA gene features are available in the current results.")
81
  else:
streamlit_hf/pages/gene_expression/4_Motif_table.py CHANGED
@@ -1,4 +1,4 @@
1
- """Gene expression searchable motif / TF table."""
2
 
3
  from __future__ import annotations
4
 
@@ -17,6 +17,10 @@ from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
 
 
 
 
20
  TABLE_COLS = [
21
  "mean_rank",
22
  "feature",
@@ -44,8 +48,8 @@ def _table_cols(show: pd.DataFrame) -> list[str]:
44
 
45
  st.title("Gene Expression & TF Activity")
46
  st.caption(
47
- "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
- "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
  )
50
 
51
  df = io.load_df_features()
@@ -59,7 +63,19 @@ if rna.empty and atac.empty:
59
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
  st.stop()
61
 
62
- st.subheader("Motif table")
 
 
 
 
 
 
 
 
 
 
 
 
63
  if atac.empty:
64
  st.warning("No motif-level ATAC features are available in the current results.")
65
  else:
 
1
+ """Gene expression: searchable motif / TF table."""
2
 
3
  from __future__ import annotations
4
 
 
17
 
18
  ui.inject_app_styles()
19
 
20
+ _HELP_MOTIF_TABLE = """
21
+ **ATAC** motif / TF features used in this run: **one row per feature**, sorted by **mean rank**. Columns include **FateFormer** ranking and attribution, **per-fate** activity summaries, and **differential** statistics. Search to narrow the list; use **Download** for a CSV copy.
22
+ """
23
+
24
  TABLE_COLS = [
25
  "mean_rank",
26
  "feature",
 
48
 
49
  st.title("Gene Expression & TF Activity")
50
  st.caption(
51
+ "**Pathways** (Reactome / KEGG) and pathwaygene views; **ATAC motif** deviation and **TF activity** by fate; "
52
+ "**gene** and **motif** tables."
53
  )
54
 
55
  df = io.load_df_features()
 
63
  st.warning("No RNA gene or ATAC motif features are available in the current results.")
64
  st.stop()
65
 
66
+ try:
67
+ _mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small", vertical_alignment="center")
68
+ except TypeError:
69
+ _mt_h_l, _mt_h_r = st.columns([0.94, 0.06], gap="small")
70
+ with _mt_h_l:
71
+ st.subheader("Motif table")
72
+ with _mt_h_r:
73
+ ui.plot_help_popover(_HELP_MOTIF_TABLE, key="ge_motif_table_help")
74
+ st.caption(
75
+ "Here is a searchable table of all ATAC motif / TF features, each with FateFormer ranks and per-fate activity and "
76
+ "differential fields that you can sort, filter by name, or download CSV."
77
+ )
78
+
79
  if atac.empty:
80
  st.warning("No motif-level ATAC features are available in the current results.")
81
  else:
streamlit_hf/static/experiment.svg ADDED