Sync from GitHub (preserve manual model files)
Browse files- StreamlitApp/StreamlitApp.py +80 -82
- StreamlitApp/utils/analyze.py +4 -3
- StreamlitApp/utils/optimize.py +6 -4
- StreamlitApp/utils/peptide_extras.py +5 -0
- StreamlitApp/utils/predict.py +9 -7
- StreamlitApp/utils/rateLimit.py +5 -5
- StreamlitApp/utils/ui_helpers.py +7 -6
- StreamlitApp/utils/visualize.py +5 -2
StreamlitApp/StreamlitApp.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
|
@@ -8,7 +10,7 @@ import plotly.express as px
|
|
| 8 |
import html as _html
|
| 9 |
from sklearn.manifold import TSNE
|
| 10 |
|
| 11 |
-
#
|
| 12 |
from utils.predict import load_model, predict_amp, encode_sequence
|
| 13 |
from utils.analyze import aa_composition, compute_properties
|
| 14 |
from utils.optimize import optimize_sequence
|
|
@@ -60,15 +62,15 @@ def _try_copy_to_clipboard(text: str) -> None:
|
|
| 60 |
except Exception:
|
| 61 |
pass
|
| 62 |
|
| 63 |
-
#
|
| 64 |
st.set_page_config(page_title="AMP Predictor", layout="wide")
|
| 65 |
|
| 66 |
-
#
|
| 67 |
st.title("PeptideAI")
|
| 68 |
st.write("Antimicrobial Peptide Predictor and Optimizer")
|
| 69 |
st.divider()
|
| 70 |
|
| 71 |
-
#
|
| 72 |
if "predictions" not in st.session_state:
|
| 73 |
st.session_state.predictions = [] # list of dicts
|
| 74 |
if "predict_ran" not in st.session_state:
|
|
@@ -83,6 +85,8 @@ if "optimize_input" not in st.session_state:
|
|
| 83 |
st.session_state.optimize_input = "" # last optimize input
|
| 84 |
if "optimize_output" not in st.session_state:
|
| 85 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
|
|
|
|
|
|
| 86 |
if "visualize_sequences" not in st.session_state:
|
| 87 |
st.session_state.visualize_sequences = None
|
| 88 |
if "visualize_df" not in st.session_state:
|
|
@@ -90,7 +94,7 @@ if "visualize_df" not in st.session_state:
|
|
| 90 |
if "visualize_peptide_input" not in st.session_state:
|
| 91 |
st.session_state.visualize_peptide_input = ""
|
| 92 |
|
| 93 |
-
#
|
| 94 |
st.sidebar.header("Navigation")
|
| 95 |
page = st.sidebar.radio(
|
| 96 |
"Go to",
|
|
@@ -98,15 +102,14 @@ page = st.sidebar.radio(
|
|
| 98 |
"Predict",
|
| 99 |
"Analyze",
|
| 100 |
"Optimize",
|
| 101 |
-
"Visualize
|
| 102 |
-
"
|
| 103 |
"About",
|
| 104 |
],
|
| 105 |
)
|
| 106 |
|
| 107 |
if st.sidebar.button("Clear All Fields"):
|
| 108 |
-
|
| 109 |
-
# clear only our known keys
|
| 110 |
keys = [
|
| 111 |
"predictions",
|
| 112 |
"predict_ran",
|
|
@@ -115,6 +118,7 @@ if st.sidebar.button("Clear All Fields"):
|
|
| 115 |
"analyze_output",
|
| 116 |
"optimize_input",
|
| 117 |
"optimize_output",
|
|
|
|
| 118 |
"visualize_sequences",
|
| 119 |
"visualize_df",
|
| 120 |
"visualize_peptide_input",
|
|
@@ -123,8 +127,7 @@ if st.sidebar.button("Clear All Fields"):
|
|
| 123 |
if k in st.session_state:
|
| 124 |
del st.session_state[k]
|
| 125 |
st.sidebar.success("Cleared app state.")
|
| 126 |
-
#
|
| 127 |
-
# Use a version-safe call so Spaces don't fail with AttributeError.
|
| 128 |
rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
|
| 129 |
if rerun_fn is not None:
|
| 130 |
rerun_fn()
|
|
@@ -132,10 +135,22 @@ if st.sidebar.button("Clear All Fields"):
|
|
| 132 |
st.stop()
|
| 133 |
|
| 134 |
|
| 135 |
-
#
|
| 136 |
model = load_model()
|
| 137 |
|
| 138 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
if page == "Predict":
|
| 140 |
st.header("AMP Predictor")
|
| 141 |
|
|
@@ -156,7 +171,7 @@ if page == "Predict":
|
|
| 156 |
)
|
| 157 |
uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
|
| 158 |
|
| 159 |
-
#
|
| 160 |
preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
|
| 161 |
if preview_sequences:
|
| 162 |
short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
|
|
@@ -170,7 +185,7 @@ if page == "Predict":
|
|
| 170 |
|
| 171 |
if run:
|
| 172 |
|
| 173 |
-
#
|
| 174 |
sequences = []
|
| 175 |
if seq_input:
|
| 176 |
sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
|
|
@@ -196,12 +211,12 @@ if page == "Predict":
|
|
| 196 |
progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
|
| 197 |
progress.progress(1.0)
|
| 198 |
|
| 199 |
-
# Persist
|
| 200 |
st.session_state.predictions = results
|
| 201 |
st.session_state.predict_ran = True
|
| 202 |
st.success("Prediction complete.")
|
| 203 |
|
| 204 |
-
#
|
| 205 |
if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
|
| 206 |
st.divider()
|
| 207 |
|
|
@@ -228,27 +243,29 @@ if page == "Predict":
|
|
| 228 |
st.write(f"Reason: {top_candidate['Reason']}")
|
| 229 |
|
| 230 |
st.divider()
|
| 231 |
-
#
|
| 232 |
st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
|
| 233 |
csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
|
| 234 |
st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
|
| 235 |
|
| 236 |
-
#
|
| 237 |
elif page == "Analyze":
|
| 238 |
st.header("Peptide Analyzer")
|
| 239 |
|
| 240 |
-
#
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
| 246 |
|
| 247 |
warn = sequence_length_warning(seq)
|
| 248 |
if warn:
|
| 249 |
st.caption(f"Warning: {warn}")
|
| 250 |
|
| 251 |
-
# only
|
| 252 |
if seq and seq != st.session_state.get("analyze_input", ""):
|
| 253 |
with st.spinner("Running analysis..."):
|
| 254 |
label, conf = predict_amp(seq, model)
|
|
@@ -258,11 +275,11 @@ elif page == "Analyze":
|
|
| 258 |
comp = aa_composition(seq)
|
| 259 |
props = compute_properties(seq)
|
| 260 |
|
| 261 |
-
#
|
| 262 |
net_charge = props.get("Net Charge (approx.)",
|
| 263 |
props.get("Net charge", props.get("NetCharge", 0)))
|
| 264 |
|
| 265 |
-
#
|
| 266 |
length = props.get("Length", len(seq))
|
| 267 |
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
|
| 268 |
charge = net_charge
|
|
@@ -294,11 +311,11 @@ elif page == "Analyze":
|
|
| 294 |
if comp.get("C", 0) + comp.get("W", 0) >= 2:
|
| 295 |
analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
|
| 296 |
|
| 297 |
-
# Save
|
| 298 |
st.session_state.analyze_input = seq
|
| 299 |
st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
|
| 300 |
|
| 301 |
-
#
|
| 302 |
if st.session_state.analyze_output:
|
| 303 |
label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
|
| 304 |
|
|
@@ -306,7 +323,7 @@ elif page == "Analyze":
|
|
| 306 |
display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
|
| 307 |
st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
|
| 308 |
|
| 309 |
-
#
|
| 310 |
hydro = props.get("Hydrophobic Fraction", 0)
|
| 311 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
| 312 |
health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
|
|
@@ -321,7 +338,7 @@ elif page == "Analyze":
|
|
| 321 |
|
| 322 |
st.subheader("Physicochemical Properties and Favorability")
|
| 323 |
|
| 324 |
-
#
|
| 325 |
length = props.get("Length", len(st.session_state.analyze_input))
|
| 326 |
hydro = props.get("Hydrophobic Fraction", 0)
|
| 327 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
|
@@ -345,7 +362,7 @@ elif page == "Analyze":
|
|
| 345 |
"font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
|
| 346 |
)
|
| 347 |
|
| 348 |
-
#
|
| 349 |
hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
|
| 350 |
charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
|
| 351 |
table_html = (
|
|
@@ -357,7 +374,7 @@ elif page == "Analyze":
|
|
| 357 |
"left:50%;"
|
| 358 |
"top:125%;"
|
| 359 |
"transform:translateX(-50%);"
|
| 360 |
-
"max-width:
|
| 361 |
"white-space:normal;"
|
| 362 |
"padding:8px 10px;"
|
| 363 |
"background:rgba(30,30,30,0.95);"
|
|
@@ -400,7 +417,7 @@ elif page == "Analyze":
|
|
| 400 |
angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
|
| 401 |
angles += angles[:1]
|
| 402 |
|
| 403 |
-
#
|
| 404 |
fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
|
| 405 |
fig.patch.set_facecolor("white")
|
| 406 |
ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
|
|
@@ -436,12 +453,12 @@ elif page == "Analyze":
|
|
| 436 |
st.caption("Run analysis with a sequence to compare against known AMPs.")
|
| 437 |
|
| 438 |
st.divider()
|
| 439 |
-
#
|
| 440 |
st.subheader("Analysis Summary")
|
| 441 |
for line in analysis:
|
| 442 |
st.write(f"- {line}")
|
| 443 |
|
| 444 |
-
# Export analysis
|
| 445 |
st.divider()
|
| 446 |
st.subheader("Export Analysis Report")
|
| 447 |
export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
|
|
@@ -485,25 +502,24 @@ elif page == "Analyze":
|
|
| 485 |
mime="text/plain",
|
| 486 |
)
|
| 487 |
|
| 488 |
-
#
|
| 489 |
elif page == "Optimize":
|
| 490 |
st.header("Peptide Optimizer")
|
| 491 |
|
| 492 |
-
|
| 493 |
-
with st.form("optimize_form", clear_on_submit=False):
|
| 494 |
seq = st.text_input(
|
| 495 |
"Enter a peptide sequence to optimize:",
|
| 496 |
-
|
| 497 |
)
|
| 498 |
-
submitted = st.form_submit_button("Run Optimization")
|
| 499 |
|
| 500 |
warn_opt = sequence_length_warning(seq) if seq else None
|
| 501 |
if warn_opt:
|
| 502 |
st.caption(f"Warning: {warn_opt}")
|
| 503 |
|
| 504 |
-
|
|
|
|
| 505 |
seq = str(seq).strip()
|
| 506 |
-
st.session_state.
|
| 507 |
progress = st.progress(0.0, text="Optimizing...")
|
| 508 |
with st.spinner("Optimizing sequence..."):
|
| 509 |
improved_seq, improved_conf, history = optimize_sequence(seq, model)
|
|
@@ -512,7 +528,7 @@ elif page == "Optimize":
|
|
| 512 |
progress.progress(1.0, text="Optimization complete")
|
| 513 |
st.success("Optimization finished.")
|
| 514 |
|
| 515 |
-
#
|
| 516 |
if st.session_state.optimize_output:
|
| 517 |
orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
|
| 518 |
summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
|
|
@@ -536,7 +552,7 @@ elif page == "Optimize":
|
|
| 536 |
)
|
| 537 |
|
| 538 |
st.divider()
|
| 539 |
-
#
|
| 540 |
st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
|
| 541 |
st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
|
| 542 |
with st.expander("Mutation Details (table)"):
|
|
@@ -555,7 +571,7 @@ elif page == "Optimize":
|
|
| 555 |
st.subheader("Mutation Steps")
|
| 556 |
st.dataframe(df_steps, use_container_width=True)
|
| 557 |
|
| 558 |
-
#
|
| 559 |
step_nums = df_steps["Step"].tolist()
|
| 560 |
conf_values = df_steps["New Confidence (%)"].tolist()
|
| 561 |
df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
|
|
@@ -563,32 +579,14 @@ elif page == "Optimize":
|
|
| 563 |
fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
|
| 564 |
st.plotly_chart(fig, use_container_width=True)
|
| 565 |
|
| 566 |
-
#
|
| 567 |
-
elif page == "Visualize
|
| 568 |
st.header("Peptide Visualizer")
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
padding-top: 0.3rem !important;
|
| 575 |
-
padding-bottom: 0.3rem !important;
|
| 576 |
-
min-height: 2rem !important;
|
| 577 |
-
}
|
| 578 |
-
div[data-testid="stExpander"] details div[data-testid="stMarkdownContainer"] {
|
| 579 |
-
max-height: 6.5rem;
|
| 580 |
-
overflow-y: auto;
|
| 581 |
-
}
|
| 582 |
-
</style>
|
| 583 |
-
""",
|
| 584 |
-
unsafe_allow_html=True,
|
| 585 |
-
)
|
| 586 |
-
|
| 587 |
-
st.text_input(
|
| 588 |
-
"Enter a peptide sequence to visualize:",
|
| 589 |
-
key="visualize_peptide_input",
|
| 590 |
-
placeholder="Paste or type a one-letter amino-acid sequence",
|
| 591 |
-
)
|
| 592 |
|
| 593 |
seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
|
| 594 |
clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
|
|
@@ -634,23 +632,23 @@ elif page == "Visualize Peptide":
|
|
| 634 |
with st.expander("Map · legend", expanded=False):
|
| 635 |
st.markdown(COMPACT_MAP_LEGEND)
|
| 636 |
|
| 637 |
-
#
|
| 638 |
-
elif page == "
|
| 639 |
st.header("t-SNE Visualizer")
|
| 640 |
st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
|
| 641 |
|
| 642 |
uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
|
| 643 |
|
| 644 |
-
#
|
| 645 |
if uploaded_file:
|
| 646 |
text = uploaded_file.read().decode("utf-8")
|
| 647 |
sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
|
| 648 |
st.session_state.visualize_sequences = sequences
|
| 649 |
|
| 650 |
-
#
|
| 651 |
st.session_state.visualize_df = None
|
| 652 |
|
| 653 |
-
#
|
| 654 |
if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
|
| 655 |
sequences = st.session_state.visualize_sequences
|
| 656 |
if len(sequences) < 2:
|
|
@@ -660,7 +658,7 @@ elif page == "Visualize t-SNE":
|
|
| 660 |
with st.spinner("Generating embedding..."):
|
| 661 |
embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
|
| 662 |
|
| 663 |
-
# Use model
|
| 664 |
embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
|
| 665 |
|
| 666 |
for i, s in enumerate(sequences):
|
|
@@ -693,7 +691,7 @@ elif page == "Visualize t-SNE":
|
|
| 693 |
st.session_state.visualize_df = df
|
| 694 |
progress.progress(1.0, text="Embedding ready")
|
| 695 |
|
| 696 |
-
#
|
| 697 |
if st.session_state.visualize_df is not None:
|
| 698 |
df = st.session_state.visualize_df
|
| 699 |
st.subheader("t-SNE plot")
|
|
@@ -729,7 +727,7 @@ elif page == "Visualize t-SNE":
|
|
| 729 |
• Coloring by properties reveals biochemical trends.
|
| 730 |
""")
|
| 731 |
|
| 732 |
-
#
|
| 733 |
elif page == "About":
|
| 734 |
st.header("About the Project")
|
| 735 |
st.markdown("""
|
|
@@ -739,8 +737,8 @@ It uses a trained neural network to estimate whether a peptide is likely to be a
|
|
| 739 |
- **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
|
| 740 |
- **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
|
| 741 |
- **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
|
| 742 |
-
- **
|
| 743 |
-
- **t-SNE
|
| 744 |
- **About**: this overview and disclaimer.
|
| 745 |
|
| 746 |
**Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.
|
|
|
|
| 1 |
+
"""Main Streamlit entrypoint wiring Predict, Analyze, Optimize, Visualize, and t-SNE pages."""
|
| 2 |
+
|
| 3 |
import streamlit as st
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
|
|
|
| 10 |
import html as _html
|
| 11 |
from sklearn.manifold import TSNE
|
| 12 |
|
| 13 |
+
# Page features are implemented in utils so this file stays orchestration-focused.
|
| 14 |
from utils.predict import load_model, predict_amp, encode_sequence
|
| 15 |
from utils.analyze import aa_composition, compute_properties
|
| 16 |
from utils.optimize import optimize_sequence
|
|
|
|
| 62 |
except Exception:
|
| 63 |
pass
|
| 64 |
|
| 65 |
+
# Configure global app layout once before rendering widgets.
|
| 66 |
st.set_page_config(page_title="AMP Predictor", layout="wide")
|
| 67 |
|
| 68 |
+
# Global title shown above all pages.
|
| 69 |
st.title("PeptideAI")
|
| 70 |
st.write("Antimicrobial Peptide Predictor and Optimizer")
|
| 71 |
st.divider()
|
| 72 |
|
| 73 |
+
# Initialize session keys so navigation keeps user state across pages.
|
| 74 |
if "predictions" not in st.session_state:
|
| 75 |
st.session_state.predictions = [] # list of dicts
|
| 76 |
if "predict_ran" not in st.session_state:
|
|
|
|
| 85 |
st.session_state.optimize_input = "" # last optimize input
|
| 86 |
if "optimize_output" not in st.session_state:
|
| 87 |
st.session_state.optimize_output = None # (orig_seq, orig_conf, improved_seq, improved_conf, history)
|
| 88 |
+
if "optimize_last_ran_input" not in st.session_state:
|
| 89 |
+
st.session_state.optimize_last_ran_input = ""
|
| 90 |
if "visualize_sequences" not in st.session_state:
|
| 91 |
st.session_state.visualize_sequences = None
|
| 92 |
if "visualize_df" not in st.session_state:
|
|
|
|
| 94 |
if "visualize_peptide_input" not in st.session_state:
|
| 95 |
st.session_state.visualize_peptide_input = ""
|
| 96 |
|
| 97 |
+
# Sidebar route selector drives top-level page rendering.
|
| 98 |
st.sidebar.header("Navigation")
|
| 99 |
page = st.sidebar.radio(
|
| 100 |
"Go to",
|
|
|
|
| 102 |
"Predict",
|
| 103 |
"Analyze",
|
| 104 |
"Optimize",
|
| 105 |
+
"Visualize",
|
| 106 |
+
"t-SNE",
|
| 107 |
"About",
|
| 108 |
],
|
| 109 |
)
|
| 110 |
|
| 111 |
if st.sidebar.button("Clear All Fields"):
|
| 112 |
+
# Reset only app-owned state keys, then rerun to refresh all widgets.
|
|
|
|
| 113 |
keys = [
|
| 114 |
"predictions",
|
| 115 |
"predict_ran",
|
|
|
|
| 118 |
"analyze_output",
|
| 119 |
"optimize_input",
|
| 120 |
"optimize_output",
|
| 121 |
+
"optimize_last_ran_input",
|
| 122 |
"visualize_sequences",
|
| 123 |
"visualize_df",
|
| 124 |
"visualize_peptide_input",
|
|
|
|
| 127 |
if k in st.session_state:
|
| 128 |
del st.session_state[k]
|
| 129 |
st.sidebar.success("Cleared app state.")
|
| 130 |
+
# Support both old and new Streamlit rerun APIs.
|
|
|
|
| 131 |
rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
|
| 132 |
if rerun_fn is not None:
|
| 133 |
rerun_fn()
|
|
|
|
| 135 |
st.stop()
|
| 136 |
|
| 137 |
|
| 138 |
+
# Cache model weights once per server process for fast repeated inference.
|
| 139 |
model = load_model()
|
| 140 |
|
| 141 |
+
# Shared style tweak keeps expander spacing consistent across pages.
|
| 142 |
+
st.markdown(
|
| 143 |
+
"""<style>
|
| 144 |
+
div[data-testid="stExpander"] details > summary {
|
| 145 |
+
padding-top: 0.3rem !important;
|
| 146 |
+
padding-bottom: 0.3rem !important;
|
| 147 |
+
min-height: 2rem !important;
|
| 148 |
+
}
|
| 149 |
+
</style>""",
|
| 150 |
+
unsafe_allow_html=True,
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Predict page: batch inference from text area and optional upload.
|
| 154 |
if page == "Predict":
|
| 155 |
st.header("AMP Predictor")
|
| 156 |
|
|
|
|
| 171 |
)
|
| 172 |
uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
|
| 173 |
|
| 174 |
+
# Show quick length guidance before running the model.
|
| 175 |
preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
|
| 176 |
if preview_sequences:
|
| 177 |
short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
|
|
|
|
| 185 |
|
| 186 |
if run:
|
| 187 |
|
| 188 |
+
# Merge direct text input and uploaded FASTA/plain-text entries.
|
| 189 |
sequences = []
|
| 190 |
if seq_input:
|
| 191 |
sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
|
|
|
|
| 211 |
progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
|
| 212 |
progress.progress(1.0)
|
| 213 |
|
| 214 |
+
# Persist results so users can switch pages without losing output.
|
| 215 |
st.session_state.predictions = results
|
| 216 |
st.session_state.predict_ran = True
|
| 217 |
st.success("Prediction complete.")
|
| 218 |
|
| 219 |
+
# Always show latest saved prediction set for continuity across navigation.
|
| 220 |
if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
|
| 221 |
st.divider()
|
| 222 |
|
|
|
|
| 243 |
st.write(f"Reason: {top_candidate['Reason']}")
|
| 244 |
|
| 245 |
st.divider()
|
| 246 |
+
# Full table + CSV export preserve the complete prediction batch.
|
| 247 |
st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
|
| 248 |
csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
|
| 249 |
st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
|
| 250 |
|
| 251 |
+
# Analyze page: single-sequence diagnostics and report export.
|
| 252 |
elif page == "Analyze":
|
| 253 |
st.header("Peptide Analyzer")
|
| 254 |
|
| 255 |
+
# Match optimizer-like boxed input style for consistent UI spacing.
|
| 256 |
+
with st.container(border=True):
|
| 257 |
+
# Seed input with previous analyzed sequence for quick iteration.
|
| 258 |
+
last_seq = st.session_state.analyze_input
|
| 259 |
+
seq = st.text_input(
|
| 260 |
+
"Enter a peptide sequence to analyze:",
|
| 261 |
+
value=last_seq,
|
| 262 |
+
)
|
| 263 |
|
| 264 |
warn = sequence_length_warning(seq)
|
| 265 |
if warn:
|
| 266 |
st.caption(f"Warning: {warn}")
|
| 267 |
|
| 268 |
+
# Recompute only when sequence changes to avoid redundant work on reruns.
|
| 269 |
if seq and seq != st.session_state.get("analyze_input", ""):
|
| 270 |
with st.spinner("Running analysis..."):
|
| 271 |
label, conf = predict_amp(seq, model)
|
|
|
|
| 275 |
comp = aa_composition(seq)
|
| 276 |
props = compute_properties(seq)
|
| 277 |
|
| 278 |
+
# Normalize property key variants returned by helper functions.
|
| 279 |
net_charge = props.get("Net Charge (approx.)",
|
| 280 |
props.get("Net charge", props.get("NetCharge", 0)))
|
| 281 |
|
| 282 |
+
# Build short, user-facing interpretation bullets.
|
| 283 |
length = props.get("Length", len(seq))
|
| 284 |
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
|
| 285 |
charge = net_charge
|
|
|
|
| 311 |
if comp.get("C", 0) + comp.get("W", 0) >= 2:
|
| 312 |
analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
|
| 313 |
|
| 314 |
+
# Save computed payload for display + report exports below.
|
| 315 |
st.session_state.analyze_input = seq
|
| 316 |
st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
|
| 317 |
|
| 318 |
+
# Render last computed analysis block.
|
| 319 |
if st.session_state.analyze_output:
|
| 320 |
label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
|
| 321 |
|
|
|
|
| 323 |
display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
|
| 324 |
st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
|
| 325 |
|
| 326 |
+
# Health badge blends model confidence with simple chemistry heuristics.
|
| 327 |
hydro = props.get("Hydrophobic Fraction", 0)
|
| 328 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
| 329 |
health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
|
|
|
|
| 338 |
|
| 339 |
st.subheader("Physicochemical Properties and Favorability")
|
| 340 |
|
| 341 |
+
# Pull fields defensively in case key names vary.
|
| 342 |
length = props.get("Length", len(st.session_state.analyze_input))
|
| 343 |
hydro = props.get("Hydrophobic Fraction", 0)
|
| 344 |
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
|
|
|
|
| 362 |
"font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
|
| 363 |
)
|
| 364 |
|
| 365 |
+
# Use HTML table for custom inline "(i)" tooltips.
|
| 366 |
hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
|
| 367 |
charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
|
| 368 |
table_html = (
|
|
|
|
| 374 |
"left:50%;"
|
| 375 |
"top:125%;"
|
| 376 |
"transform:translateX(-50%);"
|
| 377 |
+
"max-width:1080px;"
|
| 378 |
"white-space:normal;"
|
| 379 |
"padding:8px 10px;"
|
| 380 |
"background:rgba(30,30,30,0.95);"
|
|
|
|
| 417 |
angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
|
| 418 |
angles += angles[:1]
|
| 419 |
|
| 420 |
+
# Compact radar chart compares sequence values against an "ideal AMP" band.
|
| 421 |
fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
|
| 422 |
fig.patch.set_facecolor("white")
|
| 423 |
ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
|
|
|
|
| 453 |
st.caption("Run analysis with a sequence to compare against known AMPs.")
|
| 454 |
|
| 455 |
st.divider()
|
| 456 |
+
# Summarize key findings as plain-language bullets.
|
| 457 |
st.subheader("Analysis Summary")
|
| 458 |
for line in analysis:
|
| 459 |
st.write(f"- {line}")
|
| 460 |
|
| 461 |
+
# Export section packages current analysis in CSV or TXT format.
|
| 462 |
st.divider()
|
| 463 |
st.subheader("Export Analysis Report")
|
| 464 |
export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
|
|
|
|
| 502 |
mime="text/plain",
|
| 503 |
)
|
| 504 |
|
| 505 |
+
# Optimize page: greedy mutation search with per-step diagnostics.
|
| 506 |
elif page == "Optimize":
|
| 507 |
st.header("Peptide Optimizer")
|
| 508 |
|
| 509 |
+
with st.container(border=True):
|
|
|
|
| 510 |
seq = st.text_input(
|
| 511 |
"Enter a peptide sequence to optimize:",
|
| 512 |
+
key="optimize_input",
|
| 513 |
)
|
|
|
|
| 514 |
|
| 515 |
warn_opt = sequence_length_warning(seq) if seq else None
|
| 516 |
if warn_opt:
|
| 517 |
st.caption(f"Warning: {warn_opt}")
|
| 518 |
|
| 519 |
+
# Re-run optimization when the entered sequence changes.
|
| 520 |
+
if seq and str(seq).strip() and str(seq).strip() != st.session_state.get("optimize_last_ran_input", ""):
|
| 521 |
seq = str(seq).strip()
|
| 522 |
+
st.session_state.optimize_last_ran_input = seq
|
| 523 |
progress = st.progress(0.0, text="Optimizing...")
|
| 524 |
with st.spinner("Optimizing sequence..."):
|
| 525 |
improved_seq, improved_conf, history = optimize_sequence(seq, model)
|
|
|
|
| 528 |
progress.progress(1.0, text="Optimization complete")
|
| 529 |
st.success("Optimization finished.")
|
| 530 |
|
| 531 |
+
# Render latest optimization artifacts from session state.
|
| 532 |
if st.session_state.optimize_output:
|
| 533 |
orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
|
| 534 |
summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
|
|
|
|
| 552 |
)
|
| 553 |
|
| 554 |
st.divider()
|
| 555 |
+
# Heatmap + table make residue-level edits easy to inspect.
|
| 556 |
st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
|
| 557 |
st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
|
| 558 |
with st.expander("Mutation Details (table)"):
|
|
|
|
| 571 |
st.subheader("Mutation Steps")
|
| 572 |
st.dataframe(df_steps, use_container_width=True)
|
| 573 |
|
| 574 |
+
# Trend line shows confidence gain over accepted mutation steps.
|
| 575 |
step_nums = df_steps["Step"].tolist()
|
| 576 |
conf_values = df_steps["New Confidence (%)"].tolist()
|
| 577 |
df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
|
|
|
|
| 579 |
fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
|
| 580 |
st.plotly_chart(fig, use_container_width=True)
|
| 581 |
|
| 582 |
+
# Visualize page: structural/sequence interpretation for one peptide.
|
| 583 |
+
elif page == "Visualize":
|
| 584 |
st.header("Peptide Visualizer")
|
| 585 |
+
with st.container(border=True):
|
| 586 |
+
st.text_input(
|
| 587 |
+
"Enter a peptide sequence to visualize:",
|
| 588 |
+
key="visualize_peptide_input",
|
| 589 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
|
| 591 |
seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
|
| 592 |
clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
|
|
|
|
| 632 |
with st.expander("Map · legend", expanded=False):
|
| 633 |
st.markdown(COMPACT_MAP_LEGEND)
|
| 634 |
|
| 635 |
+
# t-SNE page: embedding projection for multi-sequence exploration.
|
| 636 |
+
elif page == "t-SNE":
|
| 637 |
st.header("t-SNE Visualizer")
|
| 638 |
st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
|
| 639 |
|
| 640 |
uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
|
| 641 |
|
| 642 |
+
# Parse upload and replace previous sequence set.
|
| 643 |
if uploaded_file:
|
| 644 |
text = uploaded_file.read().decode("utf-8")
|
| 645 |
sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
|
| 646 |
st.session_state.visualize_sequences = sequences
|
| 647 |
|
| 648 |
+
# Invalidate previous embedding projection after new upload.
|
| 649 |
st.session_state.visualize_df = None
|
| 650 |
|
| 651 |
+
# Compute embeddings once and cache the projected dataframe in session.
|
| 652 |
if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
|
| 653 |
sequences = st.session_state.visualize_sequences
|
| 654 |
if len(sequences) < 2:
|
|
|
|
| 658 |
with st.spinner("Generating embedding..."):
|
| 659 |
embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
|
| 660 |
|
| 661 |
+
# Use penultimate model representation as embedding features.
|
| 662 |
embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
|
| 663 |
|
| 664 |
for i, s in enumerate(sequences):
|
|
|
|
| 691 |
st.session_state.visualize_df = df
|
| 692 |
progress.progress(1.0, text="Embedding ready")
|
| 693 |
|
| 694 |
+
# Render interactive scatter + filters once a projected dataframe exists.
|
| 695 |
if st.session_state.visualize_df is not None:
|
| 696 |
df = st.session_state.visualize_df
|
| 697 |
st.subheader("t-SNE plot")
|
|
|
|
| 727 |
• Coloring by properties reveals biochemical trends.
|
| 728 |
""")
|
| 729 |
|
| 730 |
+
# About page: quick orientation + disclaimer for new users.
|
| 731 |
elif page == "About":
|
| 732 |
st.header("About the Project")
|
| 733 |
st.markdown("""
|
|
|
|
| 737 |
- **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
|
| 738 |
- **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
|
| 739 |
- **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
|
| 740 |
+
- **Visualize**: single-sequence 3D approximation + detailed helical wheel + functional region map with consistent residue coloring and clear legend dropdowns.
|
| 741 |
+
- **t-SNE**: upload many sequences, embed with the model, run t-SNE, and explore clusters with filters and hover metadata.
|
| 742 |
- **About**: this overview and disclaimer.
|
| 743 |
|
| 744 |
**Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.
|
StreamlitApp/utils/analyze.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
|
|
|
|
|
| 1 |
from collections import Counter
|
| 2 |
|
| 3 |
def aa_composition(sequence):
|
|
|
|
| 4 |
amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
|
| 5 |
counts = Counter(sequence)
|
| 6 |
total = len(sequence)
|
| 7 |
return {aa: counts.get(aa, 0) / total for aa in amino_acids}
|
| 8 |
|
| 9 |
-
# Compute sequence properties
|
| 10 |
def compute_properties(sequence):
|
| 11 |
-
|
| 12 |
-
# Property calculations
|
| 13 |
aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
|
| 14 |
'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
|
| 15 |
'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
|
|
|
|
| 1 |
+
"""Sequence composition and physicochemical property helpers."""
|
| 2 |
+
|
| 3 |
from collections import Counter
|
| 4 |
|
| 5 |
def aa_composition(sequence):
|
| 6 |
+
"""Return normalized frequencies for the 20 canonical amino acids."""
|
| 7 |
amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
|
| 8 |
counts = Counter(sequence)
|
| 9 |
total = len(sequence)
|
| 10 |
return {aa: counts.get(aa, 0) / total for aa in amino_acids}
|
| 11 |
|
|
|
|
| 12 |
def compute_properties(sequence):
|
| 13 |
+
"""Compute simple length, mass, hydrophobicity, and net-charge signals."""
|
|
|
|
| 14 |
aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
|
| 15 |
'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
|
| 16 |
'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,
|
StreamlitApp/utils/optimize.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
|
|
|
|
|
| 1 |
import random
|
| 2 |
from utils.predict import predict_amp
|
| 3 |
|
|
|
|
| 4 |
HYDROPHOBIC = set("AILMFWVPG")
|
| 5 |
HYDROPHILIC = set("STNQYCH")
|
| 6 |
POSITIVE = set("KRH")
|
| 7 |
NEGATIVE = set("DE")
|
| 8 |
|
| 9 |
-
# Function to mutate a residue based on simple heuristics
|
| 10 |
def mutate_residue(residue):
|
|
|
|
| 11 |
if residue in POSITIVE:
|
| 12 |
return residue, "Retained strong positive residue"
|
| 13 |
elif residue in NEGATIVE:
|
|
@@ -19,7 +22,6 @@ def mutate_residue(residue):
|
|
| 19 |
else:
|
| 20 |
return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
|
| 21 |
|
| 22 |
-
# Sequence optimization function
|
| 23 |
def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
|
| 24 |
"""
|
| 25 |
Iteratively optimize sequence to increase AMP probability.
|
|
@@ -30,7 +32,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
|
|
| 30 |
best_conf = conf
|
| 31 |
history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
|
| 32 |
|
| 33 |
-
#
|
| 34 |
for _ in range(max_rounds):
|
| 35 |
best_mutation = None
|
| 36 |
best_mutation_conf = best_conf
|
|
@@ -53,7 +55,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
|
|
| 53 |
history.append((current_seq, best_conf, change, old_res, new_res, reason))
|
| 54 |
else:
|
| 55 |
|
| 56 |
-
#
|
| 57 |
break
|
| 58 |
|
| 59 |
return current_seq, best_conf, history
|
|
|
|
| 1 |
+
"""Heuristic mutation search used by the Optimize page."""
|
| 2 |
+
|
| 3 |
import random
|
| 4 |
from utils.predict import predict_amp
|
| 5 |
|
| 6 |
+
# Residue groups used to propose chemistry-aware substitutions.
|
| 7 |
HYDROPHOBIC = set("AILMFWVPG")
|
| 8 |
HYDROPHILIC = set("STNQYCH")
|
| 9 |
POSITIVE = set("KRH")
|
| 10 |
NEGATIVE = set("DE")
|
| 11 |
|
|
|
|
| 12 |
def mutate_residue(residue):
|
| 13 |
+
"""Return a candidate replacement residue and rationale."""
|
| 14 |
if residue in POSITIVE:
|
| 15 |
return residue, "Retained strong positive residue"
|
| 16 |
elif residue in NEGATIVE:
|
|
|
|
| 22 |
else:
|
| 23 |
return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
|
| 24 |
|
|
|
|
| 25 |
def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
|
| 26 |
"""
|
| 27 |
Iteratively optimize sequence to increase AMP probability.
|
|
|
|
| 32 |
best_conf = conf
|
| 33 |
history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
|
| 34 |
|
| 35 |
+
# Greedy loop: keep only the best confidence-improving mutation each round.
|
| 36 |
for _ in range(max_rounds):
|
| 37 |
best_mutation = None
|
| 38 |
best_mutation_conf = best_conf
|
|
|
|
| 55 |
history.append((current_seq, best_conf, change, old_res, new_res, reason))
|
| 56 |
else:
|
| 57 |
|
| 58 |
+
# Stop when no mutation clears the minimum improvement threshold.
|
| 59 |
break
|
| 60 |
|
| 61 |
return current_seq, best_conf, history
|
StreamlitApp/utils/peptide_extras.py
CHANGED
|
@@ -121,6 +121,7 @@ def sequence_similarity(seq1: str, seq2: str) -> float:
|
|
| 121 |
|
| 122 |
|
| 123 |
def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
|
|
|
|
| 124 |
if not sequence or not KNOWN_AMPS:
|
| 125 |
return None, 0.0
|
| 126 |
seq = "".join(c for c in sequence.upper() if not c.isspace())
|
|
@@ -204,6 +205,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
|
|
| 204 |
import matplotlib.pyplot as plt
|
| 205 |
from matplotlib import patheffects as pe
|
| 206 |
|
|
|
|
| 207 |
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
|
| 208 |
n = len(clean)
|
| 209 |
fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
|
|
@@ -247,6 +249,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
|
|
| 247 |
zorder=2,
|
| 248 |
)
|
| 249 |
|
|
|
|
| 250 |
colors = [residue_color_mpl(aa) for aa in clean]
|
| 251 |
ax.scatter(
|
| 252 |
angles_rad,
|
|
@@ -303,6 +306,7 @@ def build_importance_map_html(sequence: str) -> str:
|
|
| 303 |
"""Build HTML for residue importance highlighting (escape non-AA safely)."""
|
| 304 |
import html as html_mod
|
| 305 |
|
|
|
|
| 306 |
parts: List[str] = []
|
| 307 |
for ch in sequence:
|
| 308 |
if ch.isspace():
|
|
@@ -379,6 +383,7 @@ def render_3d_structure(
|
|
| 379 |
"""
|
| 380 |
import streamlit.components.v1 as components
|
| 381 |
|
|
|
|
| 382 |
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
|
| 383 |
if not clean:
|
| 384 |
return False
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
|
| 124 |
+
"""Return the closest known AMP and simple position-match similarity score."""
|
| 125 |
if not sequence or not KNOWN_AMPS:
|
| 126 |
return None, 0.0
|
| 127 |
seq = "".join(c for c in sequence.upper() if not c.isspace())
|
|
|
|
| 205 |
import matplotlib.pyplot as plt
|
| 206 |
from matplotlib import patheffects as pe
|
| 207 |
|
| 208 |
+
# Normalize user input to whitespace-free uppercase sequence.
|
| 209 |
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
|
| 210 |
n = len(clean)
|
| 211 |
fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
|
|
|
|
| 249 |
zorder=2,
|
| 250 |
)
|
| 251 |
|
| 252 |
+
# Draw residue nodes after spokes/connectors so labels stay readable.
|
| 253 |
colors = [residue_color_mpl(aa) for aa in clean]
|
| 254 |
ax.scatter(
|
| 255 |
angles_rad,
|
|
|
|
| 306 |
"""Build HTML for residue importance highlighting (escape non-AA safely)."""
|
| 307 |
import html as html_mod
|
| 308 |
|
| 309 |
+
# Emit one colored <span> per residue for inline sequence highlighting.
|
| 310 |
parts: List[str] = []
|
| 311 |
for ch in sequence:
|
| 312 |
if ch.isspace():
|
|
|
|
| 383 |
"""
|
| 384 |
import streamlit.components.v1 as components
|
| 385 |
|
| 386 |
+
# Input sanitization keeps renderer stable across pasted FASTA/text snippets.
|
| 387 |
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
|
| 388 |
if not clean:
|
| 389 |
return False
|
StreamlitApp/utils/predict.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
|
|
|
|
|
| 1 |
import pathlib
|
| 2 |
import numpy as np
|
| 3 |
import torch
|
| 4 |
import streamlit as st
|
| 5 |
from torch import nn
|
| 6 |
|
| 7 |
-
#
|
| 8 |
class FastMLP(nn.Module):
|
| 9 |
def __init__(self, input_dim=1024):
|
| 10 |
super(FastMLP, self).__init__()
|
|
@@ -20,9 +22,9 @@ class FastMLP(nn.Module):
|
|
| 20 |
def forward(self, x):
|
| 21 |
return self.layers(x)
|
| 22 |
|
| 23 |
-
# Model Loader
|
| 24 |
@st.cache_resource
|
| 25 |
def load_model():
|
|
|
|
| 26 |
# Always resolve relative to the StreamlitApp folder, not the process CWD.
|
| 27 |
streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
|
| 28 |
repo_root = streamlitapp_dir.parent
|
|
@@ -42,13 +44,12 @@ def load_model():
|
|
| 42 |
f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
|
| 43 |
)
|
| 44 |
|
| 45 |
-
#
|
| 46 |
model = FastMLP(input_dim=1024)
|
| 47 |
model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
|
| 48 |
model.eval()
|
| 49 |
return model
|
| 50 |
|
| 51 |
-
# Sequence Encoder
|
| 52 |
def encode_sequence(seq, max_len=51):
|
| 53 |
"""
|
| 54 |
Converts amino acid sequence to flattened one-hot vector
|
|
@@ -57,19 +58,19 @@ def encode_sequence(seq, max_len=51):
|
|
| 57 |
amino_acids = "ACDEFGHIKLMNPQRSTVWY"
|
| 58 |
aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
|
| 59 |
|
| 60 |
-
|
|
|
|
| 61 |
for i, aa in enumerate(seq[:max_len]):
|
| 62 |
if aa in aa_to_idx:
|
| 63 |
one_hot[i, aa_to_idx[aa]] = 1
|
| 64 |
|
| 65 |
-
flat = one_hot.flatten()
|
| 66 |
|
| 67 |
if len(flat) < 1024:
|
| 68 |
flat = np.pad(flat, (0, 1024 - len(flat)))
|
| 69 |
|
| 70 |
return flat
|
| 71 |
|
| 72 |
-
# Prediction Function
|
| 73 |
def predict_amp(sequence, model):
|
| 74 |
"""
|
| 75 |
Takes an amino acid sequence string and the loaded model,
|
|
@@ -77,6 +78,7 @@ def predict_amp(sequence, model):
|
|
| 77 |
"""
|
| 78 |
x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
|
| 79 |
|
|
|
|
| 80 |
with torch.no_grad():
|
| 81 |
logits = model(x)
|
| 82 |
prob = torch.sigmoid(logits).item()
|
|
|
|
| 1 |
+
"""Model loading, sequence encoding, and AMP inference helpers."""
|
| 2 |
+
|
| 3 |
import pathlib
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
| 6 |
import streamlit as st
|
| 7 |
from torch import nn
|
| 8 |
|
| 9 |
+
# Lightweight MLP used for AMP binary classification.
|
| 10 |
class FastMLP(nn.Module):
|
| 11 |
def __init__(self, input_dim=1024):
|
| 12 |
super(FastMLP, self).__init__()
|
|
|
|
| 22 |
def forward(self, x):
|
| 23 |
return self.layers(x)
|
| 24 |
|
|
|
|
| 25 |
@st.cache_resource
|
| 26 |
def load_model():
|
| 27 |
+
"""Load model weights once per Streamlit process."""
|
| 28 |
# Always resolve relative to the StreamlitApp folder, not the process CWD.
|
| 29 |
streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
|
| 30 |
repo_root = streamlitapp_dir.parent
|
|
|
|
| 44 |
f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
|
| 45 |
)
|
| 46 |
|
| 47 |
+
# Instantiate architecture and hydrate weights from disk.
|
| 48 |
model = FastMLP(input_dim=1024)
|
| 49 |
model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
|
| 50 |
model.eval()
|
| 51 |
return model
|
| 52 |
|
|
|
|
| 53 |
def encode_sequence(seq, max_len=51):
|
| 54 |
"""
|
| 55 |
Converts amino acid sequence to flattened one-hot vector
|
|
|
|
| 58 |
amino_acids = "ACDEFGHIKLMNPQRSTVWY"
|
| 59 |
aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
|
| 60 |
|
| 61 |
+
# Encode each residue as a one-hot row, then flatten to vector features.
|
| 62 |
+
one_hot = np.zeros((max_len, len(amino_acids)))
|
| 63 |
for i, aa in enumerate(seq[:max_len]):
|
| 64 |
if aa in aa_to_idx:
|
| 65 |
one_hot[i, aa_to_idx[aa]] = 1
|
| 66 |
|
| 67 |
+
flat = one_hot.flatten()
|
| 68 |
|
| 69 |
if len(flat) < 1024:
|
| 70 |
flat = np.pad(flat, (0, 1024 - len(flat)))
|
| 71 |
|
| 72 |
return flat
|
| 73 |
|
|
|
|
| 74 |
def predict_amp(sequence, model):
|
| 75 |
"""
|
| 76 |
Takes an amino acid sequence string and the loaded model,
|
|
|
|
| 78 |
"""
|
| 79 |
x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
|
| 80 |
|
| 81 |
+
# Sigmoid(logit) gives AMP probability in [0, 1].
|
| 82 |
with torch.no_grad():
|
| 83 |
logits = model(x)
|
| 84 |
prob = torch.sigmoid(logits).item()
|
StreamlitApp/utils/rateLimit.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
from collections import deque
|
| 3 |
|
| 4 |
class RateLimiter:
|
| 5 |
-
|
| 6 |
-
#Sliding-window rate limiter per instance
|
| 7 |
def __init__(self, max_calls: int, period_seconds: float):
|
| 8 |
self.max_calls = max_calls
|
| 9 |
self.period = period_seconds
|
|
@@ -12,7 +13,7 @@ class RateLimiter:
|
|
| 12 |
def allow(self) -> bool:
|
| 13 |
now = time.time()
|
| 14 |
|
| 15 |
-
# Drop
|
| 16 |
while self.calls and self.calls[0] <= now - self.period:
|
| 17 |
self.calls.popleft()
|
| 18 |
if len(self.calls) < self.max_calls:
|
|
@@ -21,8 +22,7 @@ class RateLimiter:
|
|
| 21 |
return False
|
| 22 |
|
| 23 |
def time_until_next(self) -> float:
|
| 24 |
-
|
| 25 |
-
# Seconds until next slot is available (0 if already available)
|
| 26 |
now = time.time()
|
| 27 |
if len(self.calls) < self.max_calls:
|
| 28 |
return 0.0
|
|
|
|
| 1 |
+
"""Simple in-memory sliding-window rate limiter."""
|
| 2 |
+
|
| 3 |
import time
|
| 4 |
from collections import deque
|
| 5 |
|
| 6 |
class RateLimiter:
|
| 7 |
+
# Each instance tracks call timestamps for one caller/key.
|
|
|
|
| 8 |
def __init__(self, max_calls: int, period_seconds: float):
|
| 9 |
self.max_calls = max_calls
|
| 10 |
self.period = period_seconds
|
|
|
|
| 13 |
def allow(self) -> bool:
|
| 14 |
now = time.time()
|
| 15 |
|
| 16 |
+
# Drop timestamps outside the active window.
|
| 17 |
while self.calls and self.calls[0] <= now - self.period:
|
| 18 |
self.calls.popleft()
|
| 19 |
if len(self.calls) < self.max_calls:
|
|
|
|
| 22 |
return False
|
| 23 |
|
| 24 |
def time_until_next(self) -> float:
|
| 25 |
+
# Return wait time before another call is allowed (seconds).
|
|
|
|
| 26 |
now = time.time()
|
| 27 |
if len(self.calls) < self.max_calls:
|
| 28 |
return 0.0
|
StreamlitApp/utils/ui_helpers.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import html as _html
|
| 2 |
from typing import Dict, List, Tuple, Optional
|
| 3 |
|
|
@@ -20,7 +22,7 @@ def predicted_confidence(row: Dict) -> Optional[float]:
|
|
| 20 |
return None
|
| 21 |
if pred == "AMP":
|
| 22 |
return p_amp
|
| 23 |
-
#
|
| 24 |
return 1.0 - p_amp
|
| 25 |
|
| 26 |
|
|
@@ -47,7 +49,7 @@ def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
|
|
| 47 |
if not predictions:
|
| 48 |
return None
|
| 49 |
|
| 50 |
-
# Prefer AMP
|
| 51 |
amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
|
| 52 |
rows = amp_rows if amp_rows else predictions
|
| 53 |
|
|
@@ -90,7 +92,7 @@ def mutation_heatmap_html(original: str, final: str) -> str:
|
|
| 90 |
fin = final or ""
|
| 91 |
max_len = max(len(orig), len(fin))
|
| 92 |
|
| 93 |
-
#
|
| 94 |
out: List[str] = [
|
| 95 |
"<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
|
| 96 |
]
|
|
@@ -142,7 +144,7 @@ def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_
|
|
| 142 |
orig_seq = orig_seq or ""
|
| 143 |
final_seq = final_seq or ""
|
| 144 |
|
| 145 |
-
#
|
| 146 |
props_orig = compute_properties(orig_seq) if orig_seq else {}
|
| 147 |
props_final = compute_properties(final_seq) if final_seq else {}
|
| 148 |
|
|
@@ -198,8 +200,7 @@ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float
|
|
| 198 |
"""
|
| 199 |
Returns: (label, color_css)
|
| 200 |
"""
|
| 201 |
-
#
|
| 202 |
-
# of charge/hydrophobicity heuristics (prevents "moderate" at ~99%).
|
| 203 |
if conf_prob >= 0.9:
|
| 204 |
return "Strong AMP candidate", "#2ca02c"
|
| 205 |
if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
|
|
|
|
| 1 |
+
"""UI-facing formatting and summary helpers shared across pages."""
|
| 2 |
+
|
| 3 |
import html as _html
|
| 4 |
from typing import Dict, List, Tuple, Optional
|
| 5 |
|
|
|
|
| 22 |
return None
|
| 23 |
if pred == "AMP":
|
| 24 |
return p_amp
|
| 25 |
+
# Convert AMP probability into confidence for the predicted class.
|
| 26 |
return 1.0 - p_amp
|
| 27 |
|
| 28 |
|
|
|
|
| 49 |
if not predictions:
|
| 50 |
return None
|
| 51 |
|
| 52 |
+
# Prefer AMP rows first, then fall back to highest-confidence overall row.
|
| 53 |
amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
|
| 54 |
rows = amp_rows if amp_rows else predictions
|
| 55 |
|
|
|
|
| 92 |
fin = final or ""
|
| 93 |
max_len = max(len(orig), len(fin))
|
| 94 |
|
| 95 |
+
# Use monospace layout so per-position residue changes align visually.
|
| 96 |
out: List[str] = [
|
| 97 |
"<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
|
| 98 |
]
|
|
|
|
| 144 |
orig_seq = orig_seq or ""
|
| 145 |
final_seq = final_seq or ""
|
| 146 |
|
| 147 |
+
# Property deltas drive the compact "what changed" summary panel.
|
| 148 |
props_orig = compute_properties(orig_seq) if orig_seq else {}
|
| 149 |
props_final = compute_properties(final_seq) if final_seq else {}
|
| 150 |
|
|
|
|
| 200 |
"""
|
| 201 |
Returns: (label, color_css)
|
| 202 |
"""
|
| 203 |
+
# Very high model confidence is treated as strong even outside ideal property ranges.
|
|
|
|
| 204 |
if conf_prob >= 0.9:
|
| 205 |
return "Strong AMP candidate", "#2ca02c"
|
| 206 |
if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
|
StreamlitApp/utils/visualize.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
from sklearn.manifold import TSNE
|
|
@@ -6,14 +8,15 @@ import torch
|
|
| 6 |
import numpy as np
|
| 7 |
from utils.predict import encode_sequence
|
| 8 |
|
| 9 |
-
# t-SNE Visualization
|
| 10 |
def tsne_visualization(sequences, model):
|
|
|
|
| 11 |
st.info("Generating embeddings... this may take a moment.")
|
| 12 |
embeddings = []
|
| 13 |
for seq in sequences:
|
| 14 |
x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
|
| 15 |
with torch.no_grad():
|
| 16 |
-
|
|
|
|
| 17 |
embeddings.append(emb.numpy().flatten())
|
| 18 |
|
| 19 |
embeddings = np.vstack(embeddings)
|
|
|
|
| 1 |
+
"""Legacy t-SNE helper retained for ad-hoc embedding previews."""
|
| 2 |
+
|
| 3 |
import pandas as pd
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
from sklearn.manifold import TSNE
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
from utils.predict import encode_sequence
|
| 10 |
|
|
|
|
| 11 |
def tsne_visualization(sequences, model):
|
| 12 |
+
"""Project model embeddings into 2D and render a quick scatter plot."""
|
| 13 |
st.info("Generating embeddings... this may take a moment.")
|
| 14 |
embeddings = []
|
| 15 |
for seq in sequences:
|
| 16 |
x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
|
| 17 |
with torch.no_grad():
|
| 18 |
+
# Use an early hidden layer as a compact learned representation.
|
| 19 |
+
emb = model.layers[0](x)
|
| 20 |
embeddings.append(emb.numpy().flatten())
|
| 21 |
|
| 22 |
embeddings = np.vstack(embeddings)
|