m0ksh commited on
Commit
68a01ab
·
verified ·
1 Parent(s): cccf8bd

Sync from GitHub (preserve manual model files)

Browse files
StreamlitApp/StreamlitApp.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
@@ -8,7 +10,7 @@ import plotly.express as px
8
  import html as _html
9
  from sklearn.manifold import TSNE
10
 
11
- # modular imports
12
  from utils.predict import load_model, predict_amp, encode_sequence
13
  from utils.analyze import aa_composition, compute_properties
14
  from utils.optimize import optimize_sequence
@@ -60,15 +62,15 @@ def _try_copy_to_clipboard(text: str) -> None:
60
  except Exception:
61
  pass
62
 
63
- # APP CONFIG
64
  st.set_page_config(page_title="AMP Predictor", layout="wide")
65
 
66
- # App title
67
  st.title("PeptideAI")
68
  st.write("Antimicrobial Peptide Predictor and Optimizer")
69
  st.divider()
70
 
71
- # SESSION STATE KEYS (one-time init)
72
  if "predictions" not in st.session_state:
73
  st.session_state.predictions = [] # list of dicts
74
  if "predict_ran" not in st.session_state:
@@ -83,6 +85,8 @@ if "optimize_input" not in st.session_state:
83
  st.session_state.optimize_input = "" # last optimize input
84
  if "optimize_output" not in st.session_state:
85
  st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
 
 
86
  if "visualize_sequences" not in st.session_state:
87
  st.session_state.visualize_sequences = None
88
  if "visualize_df" not in st.session_state:
@@ -90,7 +94,7 @@ if "visualize_df" not in st.session_state:
90
  if "visualize_peptide_input" not in st.session_state:
91
  st.session_state.visualize_peptide_input = ""
92
 
93
- # SIDEBAR: navigation + global clear
94
  st.sidebar.header("Navigation")
95
  page = st.sidebar.radio(
96
  "Go to",
@@ -98,15 +102,14 @@ page = st.sidebar.radio(
98
  "Predict",
99
  "Analyze",
100
  "Optimize",
101
- "Visualize Peptide",
102
- "Visualize t-SNE",
103
  "About",
104
  ],
105
  )
106
 
107
  if st.sidebar.button("Clear All Fields"):
108
-
109
- # clear only our known keys
110
  keys = [
111
  "predictions",
112
  "predict_ran",
@@ -115,6 +118,7 @@ if st.sidebar.button("Clear All Fields"):
115
  "analyze_output",
116
  "optimize_input",
117
  "optimize_output",
 
118
  "visualize_sequences",
119
  "visualize_df",
120
  "visualize_peptide_input",
@@ -123,8 +127,7 @@ if st.sidebar.button("Clear All Fields"):
123
  if k in st.session_state:
124
  del st.session_state[k]
125
  st.sidebar.success("Cleared app state.")
126
- # Streamlit renamed `experimental_rerun()` -> `rerun()` in newer versions.
127
- # Use a version-safe call so Spaces don't fail with AttributeError.
128
  rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
129
  if rerun_fn is not None:
130
  rerun_fn()
@@ -132,10 +135,22 @@ if st.sidebar.button("Clear All Fields"):
132
  st.stop()
133
 
134
 
135
- # Load model once
136
  model = load_model()
137
 
138
- # PREDICT PAGE
 
 
 
 
 
 
 
 
 
 
 
 
139
  if page == "Predict":
140
  st.header("AMP Predictor")
141
 
@@ -156,7 +171,7 @@ if page == "Predict":
156
  )
157
  uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
158
 
159
- # Sequence length warnings (preview only; does not run model).
160
  preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
161
  if preview_sequences:
162
  short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
@@ -170,7 +185,7 @@ if page == "Predict":
170
 
171
  if run:
172
 
173
- # Gather sequences
174
  sequences = []
175
  if seq_input:
176
  sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
@@ -196,12 +211,12 @@ if page == "Predict":
196
  progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
197
  progress.progress(1.0)
198
 
199
- # Persist new predictions and mark that we ran
200
  st.session_state.predictions = results
201
  st.session_state.predict_ran = True
202
  st.success("Prediction complete.")
203
 
204
- # If user hasn't just run predictions, show the last saved results (if any)
205
  if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
206
  st.divider()
207
 
@@ -228,27 +243,29 @@ if page == "Predict":
228
  st.write(f"Reason: {top_candidate['Reason']}")
229
 
230
  st.divider()
231
- # Keep the original dataframe for full overview/download compatibility.
232
  st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
233
  csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
234
  st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
235
 
236
- # ANALYZE PAGE
237
  elif page == "Analyze":
238
  st.header("Peptide Analyzer")
239
 
240
- # show the last saved analyze output if user navigated back
241
- last_seq = st.session_state.analyze_input
242
- seq = st.text_input(
243
- "Enter a peptide sequence to analyze:",
244
- value=last_seq,
245
- )
 
 
246
 
247
  warn = sequence_length_warning(seq)
248
  if warn:
249
  st.caption(f"Warning: {warn}")
250
 
251
- # only run analysis when input changed from last saved input
252
  if seq and seq != st.session_state.get("analyze_input", ""):
253
  with st.spinner("Running analysis..."):
254
  label, conf = predict_amp(seq, model)
@@ -258,11 +275,11 @@ elif page == "Analyze":
258
  comp = aa_composition(seq)
259
  props = compute_properties(seq)
260
 
261
- # normalize property key names if necessary
262
  net_charge = props.get("Net Charge (approx.)",
263
  props.get("Net charge", props.get("NetCharge", 0)))
264
 
265
- # build analysis summary (same rules as before)
266
  length = props.get("Length", len(seq))
267
  hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
268
  charge = net_charge
@@ -294,11 +311,11 @@ elif page == "Analyze":
294
  if comp.get("C", 0) + comp.get("W", 0) >= 2:
295
  analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
296
 
297
- # Save to session state
298
  st.session_state.analyze_input = seq
299
  st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
300
 
301
- # If we have stored output, display it
302
  if st.session_state.analyze_output:
303
  label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
304
 
@@ -306,7 +323,7 @@ elif page == "Analyze":
306
  display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
307
  st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
308
 
309
- # Sequence health check badge
310
  hydro = props.get("Hydrophobic Fraction", 0)
311
  charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
312
  health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
@@ -321,7 +338,7 @@ elif page == "Analyze":
321
 
322
  st.subheader("Physicochemical Properties and Favorability")
323
 
324
- # pull properties safely
325
  length = props.get("Length", len(st.session_state.analyze_input))
326
  hydro = props.get("Hydrophobic Fraction", 0)
327
  charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
@@ -345,7 +362,7 @@ elif page == "Analyze":
345
  "font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
346
  )
347
 
348
- # Render the favorability table with working inline tooltips.
349
  hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
350
  charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
351
  table_html = (
@@ -357,7 +374,7 @@ elif page == "Analyze":
357
  "left:50%;"
358
  "top:125%;"
359
  "transform:translateX(-50%);"
360
- "max-width:860px;"
361
  "white-space:normal;"
362
  "padding:8px 10px;"
363
  "background:rgba(30,30,30,0.95);"
@@ -400,7 +417,7 @@ elif page == "Analyze":
400
  angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
401
  angles += angles[:1]
402
 
403
- # Adjusted figsize for better vertical space
404
  fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
405
  fig.patch.set_facecolor("white")
406
  ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
@@ -436,12 +453,12 @@ elif page == "Analyze":
436
  st.caption("Run analysis with a sequence to compare against known AMPs.")
437
 
438
  st.divider()
439
- # Analysis Summary
440
  st.subheader("Analysis Summary")
441
  for line in analysis:
442
  st.write(f"- {line}")
443
 
444
- # Export analysis report
445
  st.divider()
446
  st.subheader("Export Analysis Report")
447
  export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
@@ -485,25 +502,24 @@ elif page == "Analyze":
485
  mime="text/plain",
486
  )
487
 
488
- # OPTIMIZE PAGE
489
  elif page == "Optimize":
490
  st.header("Peptide Optimizer")
491
 
492
- # Form: Enter in the text field submits the form (same as clicking Run Optimization).
493
- with st.form("optimize_form", clear_on_submit=False):
494
  seq = st.text_input(
495
  "Enter a peptide sequence to optimize:",
496
- value=st.session_state.get("optimize_input", ""),
497
  )
498
- submitted = st.form_submit_button("Run Optimization")
499
 
500
  warn_opt = sequence_length_warning(seq) if seq else None
501
  if warn_opt:
502
  st.caption(f"Warning: {warn_opt}")
503
 
504
- if submitted and seq and str(seq).strip():
 
505
  seq = str(seq).strip()
506
- st.session_state.optimize_input = seq
507
  progress = st.progress(0.0, text="Optimizing...")
508
  with st.spinner("Optimizing sequence..."):
509
  improved_seq, improved_conf, history = optimize_sequence(seq, model)
@@ -512,7 +528,7 @@ elif page == "Optimize":
512
  progress.progress(1.0, text="Optimization complete")
513
  st.success("Optimization finished.")
514
 
515
- # If there is saved output show it
516
  if st.session_state.optimize_output:
517
  orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
518
  summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
@@ -536,7 +552,7 @@ elif page == "Optimize":
536
  )
537
 
538
  st.divider()
539
- # Mutation Heatmap
540
  st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
541
  st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
542
  with st.expander("Mutation Details (table)"):
@@ -555,7 +571,7 @@ elif page == "Optimize":
555
  st.subheader("Mutation Steps")
556
  st.dataframe(df_steps, use_container_width=True)
557
 
558
- # Confidence improvement plot
559
  step_nums = df_steps["Step"].tolist()
560
  conf_values = df_steps["New Confidence (%)"].tolist()
561
  df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
@@ -563,32 +579,14 @@ elif page == "Optimize":
563
  fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
564
  st.plotly_chart(fig, use_container_width=True)
565
 
566
- # VISUALIZE PEPTIDE PAGE
567
- elif page == "Visualize Peptide":
568
  st.header("Peptide Visualizer")
569
- # Tighter legend expanders (summary row + scrollable body)
570
- st.markdown(
571
- """
572
- <style>
573
- div[data-testid="stExpander"] details > summary {
574
- padding-top: 0.3rem !important;
575
- padding-bottom: 0.3rem !important;
576
- min-height: 2rem !important;
577
- }
578
- div[data-testid="stExpander"] details div[data-testid="stMarkdownContainer"] {
579
- max-height: 6.5rem;
580
- overflow-y: auto;
581
- }
582
- </style>
583
- """,
584
- unsafe_allow_html=True,
585
- )
586
-
587
- st.text_input(
588
- "Enter a peptide sequence to visualize:",
589
- key="visualize_peptide_input",
590
- placeholder="Paste or type a one-letter amino-acid sequence",
591
- )
592
 
593
  seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
594
  clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
@@ -634,23 +632,23 @@ elif page == "Visualize Peptide":
634
  with st.expander("Map · legend", expanded=False):
635
  st.markdown(COMPACT_MAP_LEGEND)
636
 
637
- # VISUALIZE t-SNE PAGE
638
- elif page == "Visualize t-SNE":
639
  st.header("t-SNE Visualizer")
640
  st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
641
 
642
  uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
643
 
644
- # If file uploaded, set session sequences (replacing previous)
645
  if uploaded_file:
646
  text = uploaded_file.read().decode("utf-8")
647
  sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
648
  st.session_state.visualize_sequences = sequences
649
 
650
- # Clear any previous df so we recompute
651
  st.session_state.visualize_df = None
652
 
653
- # If we have sequences stored, compute embeddings and t-SNE if no df present
654
  if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
655
  sequences = st.session_state.visualize_sequences
656
  if len(sequences) < 2:
@@ -660,7 +658,7 @@ elif page == "Visualize t-SNE":
660
  with st.spinner("Generating embedding..."):
661
  embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
662
 
663
- # Use model internals for embeddings; keep same approach as your module
664
  embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
665
 
666
  for i, s in enumerate(sequences):
@@ -693,7 +691,7 @@ elif page == "Visualize t-SNE":
693
  st.session_state.visualize_df = df
694
  progress.progress(1.0, text="Embedding ready")
695
 
696
- # If we have a t-SNE dataframe, show plot and sidebar filters
697
  if st.session_state.visualize_df is not None:
698
  df = st.session_state.visualize_df
699
  st.subheader("t-SNE plot")
@@ -729,7 +727,7 @@ elif page == "Visualize t-SNE":
729
  • Coloring by properties reveals biochemical trends.
730
  """)
731
 
732
- # ABOUT PAGE
733
  elif page == "About":
734
  st.header("About the Project")
735
  st.markdown("""
@@ -739,8 +737,8 @@ It uses a trained neural network to estimate whether a peptide is likely to be a
739
  - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
740
  - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
741
  - **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
742
- - **Peptide Visualizer**: single-sequence 3D approximation + detailed helical wheel + functional region map with consistent residue coloring and concise legend dropdowns.
743
- - **t-SNE Visualizer**: upload many sequences, embed with the model, run t-SNE, and explore clusters with filters and hover metadata.
744
  - **About**: this overview and disclaimer.
745
 
746
  **Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.
 
1
+ """Main Streamlit entrypoint wiring Predict, Analyze, Optimize, Visualize, and t-SNE pages."""
2
+
3
  import streamlit as st
4
  import pandas as pd
5
  import numpy as np
 
10
  import html as _html
11
  from sklearn.manifold import TSNE
12
 
13
+ # Page features are implemented in utils so this file stays orchestration-focused.
14
  from utils.predict import load_model, predict_amp, encode_sequence
15
  from utils.analyze import aa_composition, compute_properties
16
  from utils.optimize import optimize_sequence
 
62
  except Exception:
63
  pass
64
 
65
+ # Configure global app layout once before rendering widgets.
66
  st.set_page_config(page_title="AMP Predictor", layout="wide")
67
 
68
+ # Global title shown above all pages.
69
  st.title("PeptideAI")
70
  st.write("Antimicrobial Peptide Predictor and Optimizer")
71
  st.divider()
72
 
73
+ # Initialize session keys so navigation keeps user state across pages.
74
  if "predictions" not in st.session_state:
75
  st.session_state.predictions = [] # list of dicts
76
  if "predict_ran" not in st.session_state:
 
85
  st.session_state.optimize_input = "" # last optimize input
86
  if "optimize_output" not in st.session_state:
87
  st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
88
+ if "optimize_last_ran_input" not in st.session_state:
89
+ st.session_state.optimize_last_ran_input = ""
90
  if "visualize_sequences" not in st.session_state:
91
  st.session_state.visualize_sequences = None
92
  if "visualize_df" not in st.session_state:
 
94
  if "visualize_peptide_input" not in st.session_state:
95
  st.session_state.visualize_peptide_input = ""
96
 
97
+ # Sidebar route selector drives top-level page rendering.
98
  st.sidebar.header("Navigation")
99
  page = st.sidebar.radio(
100
  "Go to",
 
102
  "Predict",
103
  "Analyze",
104
  "Optimize",
105
+ "Visualize",
106
+ "t-SNE",
107
  "About",
108
  ],
109
  )
110
 
111
  if st.sidebar.button("Clear All Fields"):
112
+ # Reset only app-owned state keys, then rerun to refresh all widgets.
 
113
  keys = [
114
  "predictions",
115
  "predict_ran",
 
118
  "analyze_output",
119
  "optimize_input",
120
  "optimize_output",
121
+ "optimize_last_ran_input",
122
  "visualize_sequences",
123
  "visualize_df",
124
  "visualize_peptide_input",
 
127
  if k in st.session_state:
128
  del st.session_state[k]
129
  st.sidebar.success("Cleared app state.")
130
+ # Support both old and new Streamlit rerun APIs.
 
131
  rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
132
  if rerun_fn is not None:
133
  rerun_fn()
 
135
  st.stop()
136
 
137
 
138
+ # Cache model weights once per server process for fast repeated inference.
139
  model = load_model()
140
 
141
+ # Shared style tweak keeps expander spacing consistent across pages.
142
+ st.markdown(
143
+ """<style>
144
+ div[data-testid="stExpander"] details > summary {
145
+ padding-top: 0.3rem !important;
146
+ padding-bottom: 0.3rem !important;
147
+ min-height: 2rem !important;
148
+ }
149
+ </style>""",
150
+ unsafe_allow_html=True,
151
+ )
152
+
153
+ # Predict page: batch inference from text area and optional upload.
154
  if page == "Predict":
155
  st.header("AMP Predictor")
156
 
 
171
  )
172
  uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
173
 
174
+ # Show quick length guidance before running the model.
175
  preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
176
  if preview_sequences:
177
  short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
 
185
 
186
  if run:
187
 
188
+ # Merge direct text input and uploaded FASTA/plain-text entries.
189
  sequences = []
190
  if seq_input:
191
  sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
 
211
  progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
212
  progress.progress(1.0)
213
 
214
+ # Persist results so users can switch pages without losing output.
215
  st.session_state.predictions = results
216
  st.session_state.predict_ran = True
217
  st.success("Prediction complete.")
218
 
219
+ # Always show latest saved prediction set for continuity across navigation.
220
  if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
221
  st.divider()
222
 
 
243
  st.write(f"Reason: {top_candidate['Reason']}")
244
 
245
  st.divider()
246
+ # Full table + CSV export preserve the complete prediction batch.
247
  st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
248
  csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
249
  st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
250
 
251
+ # Analyze page: single-sequence diagnostics and report export.
252
  elif page == "Analyze":
253
  st.header("Peptide Analyzer")
254
 
255
+ # Match optimizer-like boxed input style for consistent UI spacing.
256
+ with st.container(border=True):
257
+ # Seed input with previous analyzed sequence for quick iteration.
258
+ last_seq = st.session_state.analyze_input
259
+ seq = st.text_input(
260
+ "Enter a peptide sequence to analyze:",
261
+ value=last_seq,
262
+ )
263
 
264
  warn = sequence_length_warning(seq)
265
  if warn:
266
  st.caption(f"Warning: {warn}")
267
 
268
+ # Recompute only when sequence changes to avoid redundant work on reruns.
269
  if seq and seq != st.session_state.get("analyze_input", ""):
270
  with st.spinner("Running analysis..."):
271
  label, conf = predict_amp(seq, model)
 
275
  comp = aa_composition(seq)
276
  props = compute_properties(seq)
277
 
278
+ # Normalize property key variants returned by helper functions.
279
  net_charge = props.get("Net Charge (approx.)",
280
  props.get("Net charge", props.get("NetCharge", 0)))
281
 
282
+ # Build short, user-facing interpretation bullets.
283
  length = props.get("Length", len(seq))
284
  hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
285
  charge = net_charge
 
311
  if comp.get("C", 0) + comp.get("W", 0) >= 2:
312
  analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
313
 
314
+ # Save computed payload for display + report exports below.
315
  st.session_state.analyze_input = seq
316
  st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
317
 
318
+ # Render last computed analysis block.
319
  if st.session_state.analyze_output:
320
  label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
321
 
 
323
  display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
324
  st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
325
 
326
+ # Health badge blends model confidence with simple chemistry heuristics.
327
  hydro = props.get("Hydrophobic Fraction", 0)
328
  charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
329
  health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
 
338
 
339
  st.subheader("Physicochemical Properties and Favorability")
340
 
341
+ # Pull fields defensively in case key names vary.
342
  length = props.get("Length", len(st.session_state.analyze_input))
343
  hydro = props.get("Hydrophobic Fraction", 0)
344
  charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
 
362
  "font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
363
  )
364
 
365
+ # Use HTML table for custom inline "(i)" tooltips.
366
  hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
367
  charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
368
  table_html = (
 
374
  "left:50%;"
375
  "top:125%;"
376
  "transform:translateX(-50%);"
377
+ "max-width:1080px;"
378
  "white-space:normal;"
379
  "padding:8px 10px;"
380
  "background:rgba(30,30,30,0.95);"
 
417
  angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
418
  angles += angles[:1]
419
 
420
+ # Compact radar chart compares sequence values against an "ideal AMP" band.
421
  fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
422
  fig.patch.set_facecolor("white")
423
  ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
 
453
  st.caption("Run analysis with a sequence to compare against known AMPs.")
454
 
455
  st.divider()
456
+ # Summarize key findings as plain-language bullets.
457
  st.subheader("Analysis Summary")
458
  for line in analysis:
459
  st.write(f"- {line}")
460
 
461
+ # Export section packages current analysis in CSV or TXT format.
462
  st.divider()
463
  st.subheader("Export Analysis Report")
464
  export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
 
502
  mime="text/plain",
503
  )
504
 
505
+ # Optimize page: greedy mutation search with per-step diagnostics.
506
  elif page == "Optimize":
507
  st.header("Peptide Optimizer")
508
 
509
+ with st.container(border=True):
 
510
  seq = st.text_input(
511
  "Enter a peptide sequence to optimize:",
512
+ key="optimize_input",
513
  )
 
514
 
515
  warn_opt = sequence_length_warning(seq) if seq else None
516
  if warn_opt:
517
  st.caption(f"Warning: {warn_opt}")
518
 
519
+ # Re-run optimization when the entered sequence changes.
520
+ if seq and str(seq).strip() and str(seq).strip() != st.session_state.get("optimize_last_ran_input", ""):
521
  seq = str(seq).strip()
522
+ st.session_state.optimize_last_ran_input = seq
523
  progress = st.progress(0.0, text="Optimizing...")
524
  with st.spinner("Optimizing sequence..."):
525
  improved_seq, improved_conf, history = optimize_sequence(seq, model)
 
528
  progress.progress(1.0, text="Optimization complete")
529
  st.success("Optimization finished.")
530
 
531
+ # Render latest optimization artifacts from session state.
532
  if st.session_state.optimize_output:
533
  orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
534
  summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
 
552
  )
553
 
554
  st.divider()
555
+ # Heatmap + table make residue-level edits easy to inspect.
556
  st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
557
  st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
558
  with st.expander("Mutation Details (table)"):
 
571
  st.subheader("Mutation Steps")
572
  st.dataframe(df_steps, use_container_width=True)
573
 
574
+ # Trend line shows confidence gain over accepted mutation steps.
575
  step_nums = df_steps["Step"].tolist()
576
  conf_values = df_steps["New Confidence (%)"].tolist()
577
  df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
 
579
  fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
580
  st.plotly_chart(fig, use_container_width=True)
581
 
582
+ # Visualize page: structural/sequence interpretation for one peptide.
583
+ elif page == "Visualize":
584
  st.header("Peptide Visualizer")
585
+ with st.container(border=True):
586
+ st.text_input(
587
+ "Enter a peptide sequence to visualize:",
588
+ key="visualize_peptide_input",
589
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
 
591
  seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
592
  clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
 
632
  with st.expander("Map · legend", expanded=False):
633
  st.markdown(COMPACT_MAP_LEGEND)
634
 
635
+ # t-SNE page: embedding projection for multi-sequence exploration.
636
+ elif page == "t-SNE":
637
  st.header("t-SNE Visualizer")
638
  st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
639
 
640
  uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
641
 
642
+ # Parse upload and replace previous sequence set.
643
  if uploaded_file:
644
  text = uploaded_file.read().decode("utf-8")
645
  sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
646
  st.session_state.visualize_sequences = sequences
647
 
648
+ # Invalidate previous embedding projection after new upload.
649
  st.session_state.visualize_df = None
650
 
651
+ # Compute embeddings once and cache the projected dataframe in session.
652
  if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
653
  sequences = st.session_state.visualize_sequences
654
  if len(sequences) < 2:
 
658
  with st.spinner("Generating embedding..."):
659
  embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
660
 
661
+ # Use penultimate model representation as embedding features.
662
  embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
663
 
664
  for i, s in enumerate(sequences):
 
691
  st.session_state.visualize_df = df
692
  progress.progress(1.0, text="Embedding ready")
693
 
694
+ # Render interactive scatter + filters once a projected dataframe exists.
695
  if st.session_state.visualize_df is not None:
696
  df = st.session_state.visualize_df
697
  st.subheader("t-SNE plot")
 
727
  • Coloring by properties reveals biochemical trends.
728
  """)
729
 
730
+ # About page: quick orientation + disclaimer for new users.
731
  elif page == "About":
732
  st.header("About the Project")
733
  st.markdown("""
 
737
  - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
738
  - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
739
  - **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
740
+ - **Visualize**: single-sequence 3D approximation + detailed helical wheel + functional region map with consistent residue coloring and clear legend dropdowns.
741
+ - **t-SNE**: upload many sequences, embed with the model, run t-SNE, and explore clusters with filters and hover metadata.
742
  - **About**: this overview and disclaimer.
743
 
744
  **Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.
StreamlitApp/utils/analyze.py CHANGED
@@ -1,15 +1,16 @@
 
 
1
  from collections import Counter
2
 
3
  def aa_composition(sequence):
 
4
  amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
5
  counts = Counter(sequence)
6
  total = len(sequence)
7
  return {aa: counts.get(aa, 0) / total for aa in amino_acids}
8
 
9
- # Compute sequence properties
10
  def compute_properties(sequence):
11
-
12
- # Property calculations
13
  aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
14
  'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
15
  'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
 
1
+ """Sequence composition and physicochemical property helpers."""
2
+
3
  from collections import Counter
4
 
5
  def aa_composition(sequence):
6
+ """Return normalized frequencies for the 20 canonical amino acids."""
7
  amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
8
  counts = Counter(sequence)
9
  total = len(sequence)
10
  return {aa: counts.get(aa, 0) / total for aa in amino_acids}
11
 
 
12
  def compute_properties(sequence):
13
+ """Compute simple length, mass, hydrophobicity, and net-charge signals."""
 
14
  aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
15
  'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
16
  'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
StreamlitApp/utils/optimize.py CHANGED
@@ -1,13 +1,16 @@
 
 
1
  import random
2
  from utils.predict import predict_amp
3
 
 
4
  HYDROPHOBIC = set("AILMFWVPG")
5
  HYDROPHILIC = set("STNQYCH")
6
  POSITIVE = set("KRH")
7
  NEGATIVE = set("DE")
8
 
9
- # Function to mutate a residue based on simple heuristics
10
  def mutate_residue(residue):
 
11
  if residue in POSITIVE:
12
  return residue, "Retained strong positive residue"
13
  elif residue in NEGATIVE:
@@ -19,7 +22,6 @@ def mutate_residue(residue):
19
  else:
20
  return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
21
 
22
- # Sequence optimization function
23
  def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
24
  """
25
  Iteratively optimize sequence to increase AMP probability.
@@ -30,7 +32,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
30
  best_conf = conf
31
  history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
32
 
33
- # Optimization loop
34
  for _ in range(max_rounds):
35
  best_mutation = None
36
  best_mutation_conf = best_conf
@@ -53,7 +55,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
53
  history.append((current_seq, best_conf, change, old_res, new_res, reason))
54
  else:
55
 
56
- # No further improvement, stop
57
  break
58
 
59
  return current_seq, best_conf, history
 
1
+ """Heuristic mutation search used by the Optimize page."""
2
+
3
  import random
4
  from utils.predict import predict_amp
5
 
6
+ # Residue groups used to propose chemistry-aware substitutions.
7
  HYDROPHOBIC = set("AILMFWVPG")
8
  HYDROPHILIC = set("STNQYCH")
9
  POSITIVE = set("KRH")
10
  NEGATIVE = set("DE")
11
 
 
12
  def mutate_residue(residue):
13
+ """Return a candidate replacement residue and rationale."""
14
  if residue in POSITIVE:
15
  return residue, "Retained strong positive residue"
16
  elif residue in NEGATIVE:
 
22
  else:
23
  return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
24
 
 
25
  def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
26
  """
27
  Iteratively optimize sequence to increase AMP probability.
 
32
  best_conf = conf
33
  history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
34
 
35
+ # Greedy loop: keep only the best confidence-improving mutation each round.
36
  for _ in range(max_rounds):
37
  best_mutation = None
38
  best_mutation_conf = best_conf
 
55
  history.append((current_seq, best_conf, change, old_res, new_res, reason))
56
  else:
57
 
58
+ # Stop when no mutation clears the minimum improvement threshold.
59
  break
60
 
61
  return current_seq, best_conf, history
StreamlitApp/utils/peptide_extras.py CHANGED
@@ -121,6 +121,7 @@ def sequence_similarity(seq1: str, seq2: str) -> float:
121
 
122
 
123
  def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
 
124
  if not sequence or not KNOWN_AMPS:
125
  return None, 0.0
126
  seq = "".join(c for c in sequence.upper() if not c.isspace())
@@ -204,6 +205,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
204
  import matplotlib.pyplot as plt
205
  from matplotlib import patheffects as pe
206
 
 
207
  clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
208
  n = len(clean)
209
  fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
@@ -247,6 +249,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
247
  zorder=2,
248
  )
249
 
 
250
  colors = [residue_color_mpl(aa) for aa in clean]
251
  ax.scatter(
252
  angles_rad,
@@ -303,6 +306,7 @@ def build_importance_map_html(sequence: str) -> str:
303
  """Build HTML for residue importance highlighting (escape non-AA safely)."""
304
  import html as html_mod
305
 
 
306
  parts: List[str] = []
307
  for ch in sequence:
308
  if ch.isspace():
@@ -379,6 +383,7 @@ def render_3d_structure(
379
  """
380
  import streamlit.components.v1 as components
381
 
 
382
  clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
383
  if not clean:
384
  return False
 
121
 
122
 
123
  def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
124
+ """Return the closest known AMP and simple position-match similarity score."""
125
  if not sequence or not KNOWN_AMPS:
126
  return None, 0.0
127
  seq = "".join(c for c in sequence.upper() if not c.isspace())
 
205
  import matplotlib.pyplot as plt
206
  from matplotlib import patheffects as pe
207
 
208
+ # Normalize user input to whitespace-free uppercase sequence.
209
  clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
210
  n = len(clean)
211
  fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
 
249
  zorder=2,
250
  )
251
 
252
+ # Draw residue nodes after spokes/connectors so labels stay readable.
253
  colors = [residue_color_mpl(aa) for aa in clean]
254
  ax.scatter(
255
  angles_rad,
 
306
  """Build HTML for residue importance highlighting (escape non-AA safely)."""
307
  import html as html_mod
308
 
309
+ # Emit one colored <span> per residue for inline sequence highlighting.
310
  parts: List[str] = []
311
  for ch in sequence:
312
  if ch.isspace():
 
383
  """
384
  import streamlit.components.v1 as components
385
 
386
+ # Input sanitization keeps renderer stable across pasted FASTA/text snippets.
387
  clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
388
  if not clean:
389
  return False
StreamlitApp/utils/predict.py CHANGED
@@ -1,10 +1,12 @@
 
 
1
  import pathlib
2
  import numpy as np
3
  import torch
4
  import streamlit as st
5
  from torch import nn
6
 
7
- # Model Definition
8
  class FastMLP(nn.Module):
9
  def __init__(self, input_dim=1024):
10
  super(FastMLP, self).__init__()
@@ -20,9 +22,9 @@ class FastMLP(nn.Module):
20
  def forward(self, x):
21
  return self.layers(x)
22
 
23
- # Model Loader
24
  @st.cache_resource
25
  def load_model():
 
26
  # Always resolve relative to the StreamlitApp folder, not the process CWD.
27
  streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
28
  repo_root = streamlitapp_dir.parent
@@ -42,13 +44,12 @@ def load_model():
42
  f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
43
  )
44
 
45
- # Build model and load weights
46
  model = FastMLP(input_dim=1024)
47
  model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
48
  model.eval()
49
  return model
50
 
51
- # Sequence Encoder
52
  def encode_sequence(seq, max_len=51):
53
  """
54
  Converts amino acid sequence to flattened one-hot vector
@@ -57,19 +58,19 @@ def encode_sequence(seq, max_len=51):
57
  amino_acids = "ACDEFGHIKLMNPQRSTVWY"
58
  aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
59
 
60
- one_hot = np.zeros((max_len, len(amino_acids))) # max_len x 20
 
61
  for i, aa in enumerate(seq[:max_len]):
62
  if aa in aa_to_idx:
63
  one_hot[i, aa_to_idx[aa]] = 1
64
 
65
- flat = one_hot.flatten() # length = max_len*20 = 1020
66
 
67
  if len(flat) < 1024:
68
  flat = np.pad(flat, (0, 1024 - len(flat)))
69
 
70
  return flat
71
 
72
- # Prediction Function
73
  def predict_amp(sequence, model):
74
  """
75
  Takes an amino acid sequence string and the loaded model,
@@ -77,6 +78,7 @@ def predict_amp(sequence, model):
77
  """
78
  x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
79
 
 
80
  with torch.no_grad():
81
  logits = model(x)
82
  prob = torch.sigmoid(logits).item()
 
1
+ """Model loading, sequence encoding, and AMP inference helpers."""
2
+
3
  import pathlib
4
  import numpy as np
5
  import torch
6
  import streamlit as st
7
  from torch import nn
8
 
9
+ # Lightweight MLP used for AMP binary classification.
10
  class FastMLP(nn.Module):
11
  def __init__(self, input_dim=1024):
12
  super(FastMLP, self).__init__()
 
22
  def forward(self, x):
23
  return self.layers(x)
24
 
 
25
  @st.cache_resource
26
  def load_model():
27
+ """Load model weights once per Streamlit process."""
28
  # Always resolve relative to the StreamlitApp folder, not the process CWD.
29
  streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
30
  repo_root = streamlitapp_dir.parent
 
44
  f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
45
  )
46
 
47
+ # Instantiate architecture and hydrate weights from disk.
48
  model = FastMLP(input_dim=1024)
49
  model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
50
  model.eval()
51
  return model
52
 
 
53
  def encode_sequence(seq, max_len=51):
54
  """
55
  Converts amino acid sequence to flattened one-hot vector
 
58
  amino_acids = "ACDEFGHIKLMNPQRSTVWY"
59
  aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
60
 
61
+ # Encode each residue as a one-hot row, then flatten to vector features.
62
+ one_hot = np.zeros((max_len, len(amino_acids)))
63
  for i, aa in enumerate(seq[:max_len]):
64
  if aa in aa_to_idx:
65
  one_hot[i, aa_to_idx[aa]] = 1
66
 
67
+ flat = one_hot.flatten()
68
 
69
  if len(flat) < 1024:
70
  flat = np.pad(flat, (0, 1024 - len(flat)))
71
 
72
  return flat
73
 
 
74
  def predict_amp(sequence, model):
75
  """
76
  Takes an amino acid sequence string and the loaded model,
 
78
  """
79
  x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
80
 
81
+ # Sigmoid(logit) gives AMP probability in [0, 1].
82
  with torch.no_grad():
83
  logits = model(x)
84
  prob = torch.sigmoid(logits).item()
StreamlitApp/utils/rateLimit.py CHANGED
@@ -1,9 +1,10 @@
 
 
1
  import time
2
  from collections import deque
3
 
4
  class RateLimiter:
5
-
6
- #Sliding-window rate limiter per instance
7
  def __init__(self, max_calls: int, period_seconds: float):
8
  self.max_calls = max_calls
9
  self.period = period_seconds
@@ -12,7 +13,7 @@ class RateLimiter:
12
  def allow(self) -> bool:
13
  now = time.time()
14
 
15
- # Drop entries older than window
16
  while self.calls and self.calls[0] <= now - self.period:
17
  self.calls.popleft()
18
  if len(self.calls) < self.max_calls:
@@ -21,8 +22,7 @@ class RateLimiter:
21
  return False
22
 
23
  def time_until_next(self) -> float:
24
-
25
- # Seconds until next slot is available (0 if already available)
26
  now = time.time()
27
  if len(self.calls) < self.max_calls:
28
  return 0.0
 
1
+ """Simple in-memory sliding-window rate limiter."""
2
+
3
  import time
4
  from collections import deque
5
 
6
  class RateLimiter:
7
+ # Each instance tracks call timestamps for one caller/key.
 
8
  def __init__(self, max_calls: int, period_seconds: float):
9
  self.max_calls = max_calls
10
  self.period = period_seconds
 
13
  def allow(self) -> bool:
14
  now = time.time()
15
 
16
+ # Drop timestamps outside the active window.
17
  while self.calls and self.calls[0] <= now - self.period:
18
  self.calls.popleft()
19
  if len(self.calls) < self.max_calls:
 
22
  return False
23
 
24
  def time_until_next(self) -> float:
25
+ # Return wait time before another call is allowed (seconds).
 
26
  now = time.time()
27
  if len(self.calls) < self.max_calls:
28
  return 0.0
StreamlitApp/utils/ui_helpers.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import html as _html
2
  from typing import Dict, List, Tuple, Optional
3
 
@@ -20,7 +22,7 @@ def predicted_confidence(row: Dict) -> Optional[float]:
20
  return None
21
  if pred == "AMP":
22
  return p_amp
23
- # Non-AMP probability is (1 - AMP probability)
24
  return 1.0 - p_amp
25
 
26
 
@@ -47,7 +49,7 @@ def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
47
  if not predictions:
48
  return None
49
 
50
- # Prefer AMP predictions; otherwise pick highest confidence overall.
51
  amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
52
  rows = amp_rows if amp_rows else predictions
53
 
@@ -90,7 +92,7 @@ def mutation_heatmap_html(original: str, final: str) -> str:
90
  fin = final or ""
91
  max_len = max(len(orig), len(fin))
92
 
93
- # Display in a fixed-width monospace container.
94
  out: List[str] = [
95
  "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
96
  ]
@@ -142,7 +144,7 @@ def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_
142
  orig_seq = orig_seq or ""
143
  final_seq = final_seq or ""
144
 
145
- # Compute properties only if non-empty.
146
  props_orig = compute_properties(orig_seq) if orig_seq else {}
147
  props_final = compute_properties(final_seq) if final_seq else {}
148
 
@@ -198,8 +200,7 @@ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float
198
  """
199
  Returns: (label, color_css)
200
  """
201
- # If the model is *extremely* confident, treat it as strong regardless
202
- # of charge/hydrophobicity heuristics (prevents "moderate" at ~99%).
203
  if conf_prob >= 0.9:
204
  return "Strong AMP candidate", "#2ca02c"
205
  if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
 
1
+ """UI-facing formatting and summary helpers shared across pages."""
2
+
3
  import html as _html
4
  from typing import Dict, List, Tuple, Optional
5
 
 
22
  return None
23
  if pred == "AMP":
24
  return p_amp
25
+ # Convert AMP probability into confidence for the predicted class.
26
  return 1.0 - p_amp
27
 
28
 
 
49
  if not predictions:
50
  return None
51
 
52
+ # Prefer AMP rows first, then fall back to highest-confidence overall row.
53
  amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
54
  rows = amp_rows if amp_rows else predictions
55
 
 
92
  fin = final or ""
93
  max_len = max(len(orig), len(fin))
94
 
95
+ # Use monospace layout so per-position residue changes align visually.
96
  out: List[str] = [
97
  "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
98
  ]
 
144
  orig_seq = orig_seq or ""
145
  final_seq = final_seq or ""
146
 
147
+ # Property deltas drive the compact "what changed" summary panel.
148
  props_orig = compute_properties(orig_seq) if orig_seq else {}
149
  props_final = compute_properties(final_seq) if final_seq else {}
150
 
 
200
  """
201
  Returns: (label, color_css)
202
  """
203
+ # Very high model confidence is treated as strong even outside ideal property ranges.
 
204
  if conf_prob >= 0.9:
205
  return "Strong AMP candidate", "#2ca02c"
206
  if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
StreamlitApp/utils/visualize.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
3
  from sklearn.manifold import TSNE
@@ -6,14 +8,15 @@ import torch
6
  import numpy as np
7
  from utils.predict import encode_sequence
8
 
9
- # t-SNE Visualization
10
  def tsne_visualization(sequences, model):
 
11
  st.info("Generating embeddings... this may take a moment.")
12
  embeddings = []
13
  for seq in sequences:
14
  x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
15
  with torch.no_grad():
16
- emb = model.layers[0](x) # Grab first layer embedding
 
17
  embeddings.append(emb.numpy().flatten())
18
 
19
  embeddings = np.vstack(embeddings)
 
1
+ """Legacy t-SNE helper retained for ad-hoc embedding previews."""
2
+
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
  from sklearn.manifold import TSNE
 
8
  import numpy as np
9
  from utils.predict import encode_sequence
10
 
 
11
  def tsne_visualization(sequences, model):
12
+ """Project model embeddings into 2D and render a quick scatter plot."""
13
  st.info("Generating embeddings... this may take a moment.")
14
  embeddings = []
15
  for seq in sequences:
16
  x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
17
  with torch.no_grad():
18
+ # Use an early hidden layer as a compact learned representation.
19
+ emb = model.layers[0](x)
20
  embeddings.append(emb.numpy().flatten())
21
 
22
  embeddings = np.vstack(embeddings)