Spaces:

m0ksh
/

PeptideAI

Sleeping

App Files Files Community

m0ksh commited on Mar 23

Commit

68a01ab

verified ·

1 Parent(s): cccf8bd

Sync from GitHub (preserve manual model files)

Browse files

Files changed (8) hide show

StreamlitApp/StreamlitApp.py +80 -82
StreamlitApp/utils/analyze.py +4 -3
StreamlitApp/utils/optimize.py +6 -4
StreamlitApp/utils/peptide_extras.py +5 -0
StreamlitApp/utils/predict.py +9 -7
StreamlitApp/utils/rateLimit.py +5 -5
StreamlitApp/utils/ui_helpers.py +7 -6
StreamlitApp/utils/visualize.py +5 -2

StreamlitApp/StreamlitApp.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
@@ -8,7 +10,7 @@ import plotly.express as px
 import html as _html
 from sklearn.manifold import TSNE
-# modular imports
 from utils.predict import load_model, predict_amp, encode_sequence
 from utils.analyze import aa_composition, compute_properties
 from utils.optimize import optimize_sequence
@@ -60,15 +62,15 @@ def _try_copy_to_clipboard(text: str) -> None:
         except Exception:
             pass
-# APP CONFIG
 st.set_page_config(page_title="AMP Predictor", layout="wide")
-# App title
 st.title("PeptideAI")
 st.write("Antimicrobial Peptide Predictor and Optimizer")
 st.divider()
-# SESSION STATE KEYS (one-time init)
 if "predictions" not in st.session_state:
     st.session_state.predictions = []               # list of dicts
 if "predict_ran" not in st.session_state:
@@ -83,6 +85,8 @@ if "optimize_input" not in st.session_state:
     st.session_state.optimize_input = ""           # last optimize input
 if "optimize_output" not in st.session_state:
     st.session_state.optimize_output = None       # (orig_seq, orig_conf, improved_seq, improved_conf, history)
 if "visualize_sequences" not in st.session_state:
     st.session_state.visualize_sequences = None
 if "visualize_df" not in st.session_state:
@@ -90,7 +94,7 @@ if "visualize_df" not in st.session_state:
 if "visualize_peptide_input" not in st.session_state:
     st.session_state.visualize_peptide_input = ""
-# SIDEBAR: navigation + global clear
 st.sidebar.header("Navigation")
 page = st.sidebar.radio(
     "Go to",
@@ -98,15 +102,14 @@ page = st.sidebar.radio(
         "Predict",
         "Analyze",
         "Optimize",
-        "Visualize Peptide",
-        "Visualize t-SNE",
         "About",
     ],
 )
 if st.sidebar.button("Clear All Fields"):
-    # clear only our known keys
     keys = [
         "predictions",
         "predict_ran",
@@ -115,6 +118,7 @@ if st.sidebar.button("Clear All Fields"):
         "analyze_output",
         "optimize_input",
         "optimize_output",
         "visualize_sequences",
         "visualize_df",
         "visualize_peptide_input",
@@ -123,8 +127,7 @@ if st.sidebar.button("Clear All Fields"):
         if k in st.session_state:
             del st.session_state[k]
     st.sidebar.success("Cleared app state.")
-    # Streamlit renamed `experimental_rerun()` -> `rerun()` in newer versions.
-    # Use a version-safe call so Spaces don't fail with AttributeError.
     rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
     if rerun_fn is not None:
         rerun_fn()
@@ -132,10 +135,22 @@ if st.sidebar.button("Clear All Fields"):
         st.stop()
-# Load model once
 model = load_model()
-#  PREDICT PAGE
 if page == "Predict":
     st.header("AMP Predictor")
@@ -156,7 +171,7 @@ if page == "Predict":
     )
     uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
-    # Sequence length warnings (preview only; does not run model).
     preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
     if preview_sequences:
         short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
@@ -170,7 +185,7 @@ if page == "Predict":
     if run:
-        # Gather sequences
         sequences = []
         if seq_input:
             sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
@@ -196,12 +211,12 @@ if page == "Predict":
                     progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
             progress.progress(1.0)
-            # Persist new predictions and mark that we ran
             st.session_state.predictions = results
             st.session_state.predict_ran = True
             st.success("Prediction complete.")
-    # If user hasn't just run predictions, show the last saved results (if any)
     if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
         st.divider()
@@ -228,27 +243,29 @@ if page == "Predict":
                 st.write(f"Reason: {top_candidate['Reason']}")
         st.divider()
-        # Keep the original dataframe for full overview/download compatibility.
         st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
         csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
         st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
-#  ANALYZE PAGE
 elif page == "Analyze":
     st.header("Peptide Analyzer")
-    # show the last saved analyze output if user navigated back
-    last_seq = st.session_state.analyze_input
-    seq = st.text_input(
-        "Enter a peptide sequence to analyze:",
-        value=last_seq,
-    )
     warn = sequence_length_warning(seq)
     if warn:
         st.caption(f"Warning: {warn}")
-    # only run analysis when input changed from last saved input
     if seq and seq != st.session_state.get("analyze_input", ""):
         with st.spinner("Running analysis..."):
             label, conf = predict_amp(seq, model)
@@ -258,11 +275,11 @@ elif page == "Analyze":
             comp = aa_composition(seq)
             props = compute_properties(seq)
-            # normalize property key names if necessary
             net_charge = props.get("Net Charge (approx.)",
                                    props.get("Net charge", props.get("NetCharge", 0)))
-            # build analysis summary (same rules as before)
             length = props.get("Length", len(seq))
             hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
             charge = net_charge
@@ -294,11 +311,11 @@ elif page == "Analyze":
             if comp.get("C", 0) + comp.get("W", 0) >= 2:
                 analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
-            # Save to session state
             st.session_state.analyze_input = seq
             st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
-    # If we have stored output, display it
     if st.session_state.analyze_output:
         label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
@@ -306,7 +323,7 @@ elif page == "Analyze":
         display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
         st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
-        # Sequence health check badge
         hydro = props.get("Hydrophobic Fraction", 0)
         charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
         health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
@@ -321,7 +338,7 @@ elif page == "Analyze":
         st.subheader("Physicochemical Properties and Favorability")
-        # pull properties safely
         length = props.get("Length", len(st.session_state.analyze_input))
         hydro = props.get("Hydrophobic Fraction", 0)
         charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
@@ -345,7 +362,7 @@ elif page == "Analyze":
                 "font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
             )
-        # Render the favorability table with working inline tooltips.
         hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
         charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
         table_html = (
@@ -357,7 +374,7 @@ elif page == "Analyze":
             "left:50%;"
             "top:125%;"
             "transform:translateX(-50%);"
-            "max-width:860px;"
             "white-space:normal;"
             "padding:8px 10px;"
             "background:rgba(30,30,30,0.95);"
@@ -400,7 +417,7 @@ elif page == "Analyze":
         angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
         angles += angles[:1]
-        # Adjusted figsize for better vertical space
         fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
         fig.patch.set_facecolor("white")
         ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
@@ -436,12 +453,12 @@ elif page == "Analyze":
             st.caption("Run analysis with a sequence to compare against known AMPs.")
         st.divider()
-        # Analysis Summary
         st.subheader("Analysis Summary")
         for line in analysis:
             st.write(f"- {line}")
-        # Export analysis report
         st.divider()
         st.subheader("Export Analysis Report")
         export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
@@ -485,25 +502,24 @@ elif page == "Analyze":
                 mime="text/plain",
             )
-#  OPTIMIZE PAGE
 elif page == "Optimize":
     st.header("Peptide Optimizer")
-    # Form: Enter in the text field submits the form (same as clicking Run Optimization).
-    with st.form("optimize_form", clear_on_submit=False):
         seq = st.text_input(
             "Enter a peptide sequence to optimize:",
-            value=st.session_state.get("optimize_input", ""),
         )
-        submitted = st.form_submit_button("Run Optimization")
     warn_opt = sequence_length_warning(seq) if seq else None
     if warn_opt:
         st.caption(f"Warning: {warn_opt}")
-    if submitted and seq and str(seq).strip():
         seq = str(seq).strip()
-        st.session_state.optimize_input = seq
         progress = st.progress(0.0, text="Optimizing...")
         with st.spinner("Optimizing sequence..."):
             improved_seq, improved_conf, history = optimize_sequence(seq, model)
@@ -512,7 +528,7 @@ elif page == "Optimize":
         progress.progress(1.0, text="Optimization complete")
         st.success("Optimization finished.")
-    # If there is saved output show it
     if st.session_state.optimize_output:
         orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
         summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
@@ -536,7 +552,7 @@ elif page == "Optimize":
             )
         st.divider()
-        # Mutation Heatmap
         st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
         st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
         with st.expander("Mutation Details (table)"):
@@ -555,7 +571,7 @@ elif page == "Optimize":
             st.subheader("Mutation Steps")
             st.dataframe(df_steps, use_container_width=True)
-            # Confidence improvement plot
             step_nums = df_steps["Step"].tolist()
             conf_values = df_steps["New Confidence (%)"].tolist()
             df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
@@ -563,32 +579,14 @@ elif page == "Optimize":
             fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
             st.plotly_chart(fig, use_container_width=True)
-#  VISUALIZE PEPTIDE PAGE
-elif page == "Visualize Peptide":
     st.header("Peptide Visualizer")
-    # Tighter legend expanders (summary row + scrollable body)
-    st.markdown(
-        """
-        <style>
-        div[data-testid="stExpander"] details > summary {
-            padding-top: 0.3rem !important;
-            padding-bottom: 0.3rem !important;
-            min-height: 2rem !important;
-        }
-        div[data-testid="stExpander"] details div[data-testid="stMarkdownContainer"] {
-            max-height: 6.5rem;
-            overflow-y: auto;
-        }
-        </style>
-        """,
-        unsafe_allow_html=True,
-    )
-    st.text_input(
-        "Enter a peptide sequence to visualize:",
-        key="visualize_peptide_input",
-        placeholder="Paste or type a one-letter amino-acid sequence",
-    )
     seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
     clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
@@ -634,23 +632,23 @@ elif page == "Visualize Peptide":
             with st.expander("Map · legend", expanded=False):
                 st.markdown(COMPACT_MAP_LEGEND)
-#  VISUALIZE t-SNE PAGE
-elif page == "Visualize t-SNE":
     st.header("t-SNE Visualizer")
     st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
     uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
-    # If file uploaded, set session sequences (replacing previous)
     if uploaded_file:
         text = uploaded_file.read().decode("utf-8")
         sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
         st.session_state.visualize_sequences = sequences
-        # Clear any previous df so we recompute
         st.session_state.visualize_df = None
-    # If we have sequences stored, compute embeddings and t-SNE if no df present
     if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
         sequences = st.session_state.visualize_sequences
         if len(sequences) < 2:
@@ -660,7 +658,7 @@ elif page == "Visualize t-SNE":
             with st.spinner("Generating embedding..."):
                 embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
-                # Use model internals for embeddings; keep same approach as your module
                 embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
                 for i, s in enumerate(sequences):
@@ -693,7 +691,7 @@ elif page == "Visualize t-SNE":
                 st.session_state.visualize_df = df
                 progress.progress(1.0, text="Embedding ready")
-    # If we have a t-SNE dataframe, show plot and sidebar filters
     if st.session_state.visualize_df is not None:
         df = st.session_state.visualize_df
         st.subheader("t-SNE plot")
@@ -729,7 +727,7 @@ elif page == "Visualize t-SNE":
 • Coloring by properties reveals biochemical trends.
 """)
-#  ABOUT PAGE
 elif page == "About":
     st.header("About the Project")
     st.markdown("""
@@ -739,8 +737,8 @@ It uses a trained neural network to estimate whether a peptide is likely to be a
 - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
 - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
 - **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
-- **Peptide Visualizer**: single-sequence 3D approximation + detailed helical wheel + functional region map with consistent residue coloring and concise legend dropdowns.
-- **t-SNE Visualizer**: upload many sequences, embed with the model, run t-SNE, and explore clusters with filters and hover metadata.
 - **About**: this overview and disclaimer.
 **Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.

+"""Main Streamlit entrypoint wiring Predict, Analyze, Optimize, Visualize, and t-SNE pages."""
 import streamlit as st
 import pandas as pd
 import numpy as np
 import html as _html
 from sklearn.manifold import TSNE
+# Page features are implemented in utils so this file stays orchestration-focused.
 from utils.predict import load_model, predict_amp, encode_sequence
 from utils.analyze import aa_composition, compute_properties
 from utils.optimize import optimize_sequence
         except Exception:
             pass
+# Configure global app layout once before rendering widgets.
 st.set_page_config(page_title="AMP Predictor", layout="wide")
+# Global title shown above all pages.
 st.title("PeptideAI")
 st.write("Antimicrobial Peptide Predictor and Optimizer")
 st.divider()
+# Initialize session keys so navigation keeps user state across pages.
 if "predictions" not in st.session_state:
     st.session_state.predictions = []               # list of dicts
 if "predict_ran" not in st.session_state:
     st.session_state.optimize_input = ""           # last optimize input
 if "optimize_output" not in st.session_state:
     st.session_state.optimize_output = None       # (orig_seq, orig_conf, improved_seq, improved_conf, history)
+if "optimize_last_ran_input" not in st.session_state:
+    st.session_state.optimize_last_ran_input = ""
 if "visualize_sequences" not in st.session_state:
     st.session_state.visualize_sequences = None
 if "visualize_df" not in st.session_state:
 if "visualize_peptide_input" not in st.session_state:
     st.session_state.visualize_peptide_input = ""
+# Sidebar route selector drives top-level page rendering.
 st.sidebar.header("Navigation")
 page = st.sidebar.radio(
     "Go to",
         "Predict",
         "Analyze",
         "Optimize",
+        "Visualize",
+        "t-SNE",
         "About",
     ],
 )
 if st.sidebar.button("Clear All Fields"):
+    # Reset only app-owned state keys, then rerun to refresh all widgets.
     keys = [
         "predictions",
         "predict_ran",
         "analyze_output",
         "optimize_input",
         "optimize_output",
+        "optimize_last_ran_input",
         "visualize_sequences",
         "visualize_df",
         "visualize_peptide_input",
         if k in st.session_state:
             del st.session_state[k]
     st.sidebar.success("Cleared app state.")
+    # Support both old and new Streamlit rerun APIs.
     rerun_fn = getattr(st, "rerun", None) or getattr(st, "experimental_rerun", None)
     if rerun_fn is not None:
         rerun_fn()
         st.stop()
+# Cache model weights once per server process for fast repeated inference.
 model = load_model()
+# Shared style tweak keeps expander spacing consistent across pages.
+st.markdown(
+    """<style>
+    div[data-testid="stExpander"] details > summary {
+        padding-top: 0.3rem !important;
+        padding-bottom: 0.3rem !important;
+        min-height: 2rem !important;
+    }
+    </style>""",
+    unsafe_allow_html=True,
+)
+# Predict page: batch inference from text area and optional upload.
 if page == "Predict":
     st.header("AMP Predictor")
     )
     uploaded_file = st.file_uploader("Or upload a FASTA/text file", type=["txt", "fasta"])
+    # Show quick length guidance before running the model.
     preview_sequences = [s.strip() for s in (seq_input or "").splitlines() if s.strip()]
     if preview_sequences:
         short_cnt = sum(1 for s in preview_sequences if len(s) < 8)
     if run:
+        # Merge direct text input and uploaded FASTA/plain-text entries.
         sequences = []
         if seq_input:
             sequences += [s.strip() for s in seq_input.splitlines() if s.strip()]
                     progress.progress((i + 1) / max(1, len(sequences)), text=f"Predicted {i + 1}/{len(sequences)}")
             progress.progress(1.0)
+            # Persist results so users can switch pages without losing output.
             st.session_state.predictions = results
             st.session_state.predict_ran = True
             st.success("Prediction complete.")
+    # Always show latest saved prediction set for continuity across navigation.
     if st.session_state.predictions and not (run and st.session_state.predict_ran is False):
         st.divider()
                 st.write(f"Reason: {top_candidate['Reason']}")
         st.divider()
+        # Full table + CSV export preserve the complete prediction batch.
         st.dataframe(pd.DataFrame(st.session_state.predictions), use_container_width=True)
         csv = pd.DataFrame(st.session_state.predictions).to_csv(index=False)
         st.download_button("Download predictions as CSV", csv, "predictions.csv", "text/csv")
+# Analyze page: single-sequence diagnostics and report export.
 elif page == "Analyze":
     st.header("Peptide Analyzer")
+    # Match optimizer-like boxed input style for consistent UI spacing.
+    with st.container(border=True):
+        # Seed input with previous analyzed sequence for quick iteration.
+        last_seq = st.session_state.analyze_input
+        seq = st.text_input(
+            "Enter a peptide sequence to analyze:",
+            value=last_seq,
+        )
     warn = sequence_length_warning(seq)
     if warn:
         st.caption(f"Warning: {warn}")
+    # Recompute only when sequence changes to avoid redundant work on reruns.
     if seq and seq != st.session_state.get("analyze_input", ""):
         with st.spinner("Running analysis..."):
             label, conf = predict_amp(seq, model)
             comp = aa_composition(seq)
             props = compute_properties(seq)
+            # Normalize property key variants returned by helper functions.
             net_charge = props.get("Net Charge (approx.)",
                                    props.get("Net charge", props.get("NetCharge", 0)))
+            # Build short, user-facing interpretation bullets.
             length = props.get("Length", len(seq))
             hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
             charge = net_charge
             if comp.get("C", 0) + comp.get("W", 0) >= 2:
                 analysis.append("Multiple cysteine/tryptophan residues may improve activity.")
+            # Save computed payload for display + report exports below.
             st.session_state.analyze_input = seq
             st.session_state.analyze_output = (label, conf, conf_display, comp, props, analysis)
+    # Render last computed analysis block.
     if st.session_state.analyze_output:
         label, conf, conf_display, comp, props, analysis = st.session_state.analyze_output
         display_conf = round(conf * 100, 1) if label == "AMP" else round((1 - conf) * 100, 1)
         st.write(f"Prediction: **{label}** with **{display_conf}%** confidence")
+        # Health badge blends model confidence with simple chemistry heuristics.
         hydro = props.get("Hydrophobic Fraction", 0)
         charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
         health_label, color = sequence_health_label(float(conf), float(charge), float(hydro))
         st.subheader("Physicochemical Properties and Favorability")
+        # Pull fields defensively in case key names vary.
         length = props.get("Length", len(st.session_state.analyze_input))
         hydro = props.get("Hydrophobic Fraction", 0)
         charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
                 "font-size:12px; font-weight:700; cursor:help;\">(i)</span>"
             )
+        # Use HTML table for custom inline "(i)" tooltips.
         hydro_label = f"Hydrophobic Fraction{_info_icon('Fraction of residues that prefer non-aqueous environments')}"
         charge_label = f"Net Charge{_info_icon('Positive charge helps peptides bind bacterial membranes')}"
         table_html = (
             "left:50%;"
             "top:125%;"
             "transform:translateX(-50%);"
+            "max-width:1080px;"
             "white-space:normal;"
             "padding:8px 10px;"
             "background:rgba(30,30,30,0.95);"
         angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
         angles += angles[:1]
+        # Compact radar chart compares sequence values against an "ideal AMP" band.
         fig, ax = plt.subplots(figsize=(2.8, 3.2), subplot_kw=dict(polar=True))
         fig.patch.set_facecolor("white")
         ax.fill_between(angles, ideal_min, ideal_max, color='#457a00', alpha=0.15, label="Ideal AMP range")
             st.caption("Run analysis with a sequence to compare against known AMPs.")
         st.divider()
+        # Summarize key findings as plain-language bullets.
         st.subheader("Analysis Summary")
         for line in analysis:
             st.write(f"- {line}")
+        # Export section packages current analysis in CSV or TXT format.
         st.divider()
         st.subheader("Export Analysis Report")
         export_format = st.radio("Format", ["CSV", "TXT"], horizontal=True)
                 mime="text/plain",
             )
+# Optimize page: greedy mutation search with per-step diagnostics.
 elif page == "Optimize":
     st.header("Peptide Optimizer")
+    with st.container(border=True):
         seq = st.text_input(
             "Enter a peptide sequence to optimize:",
+            key="optimize_input",
         )
     warn_opt = sequence_length_warning(seq) if seq else None
     if warn_opt:
         st.caption(f"Warning: {warn_opt}")
+    # Re-run optimization when the entered sequence changes.
+    if seq and str(seq).strip() and str(seq).strip() != st.session_state.get("optimize_last_ran_input", ""):
         seq = str(seq).strip()
+        st.session_state.optimize_last_ran_input = seq
         progress = st.progress(0.0, text="Optimizing...")
         with st.spinner("Optimizing sequence..."):
             improved_seq, improved_conf, history = optimize_sequence(seq, model)
         progress.progress(1.0, text="Optimization complete")
         st.success("Optimization finished.")
+    # Render latest optimization artifacts from session state.
     if st.session_state.optimize_output:
         orig_seq, orig_conf, improved_seq, improved_conf, history = st.session_state.optimize_output
         summary = optimization_summary(orig_seq, orig_conf, improved_seq, improved_conf)
             )
         st.divider()
+        # Heatmap + table make residue-level edits easy to inspect.
         st.subheader("Mutation Heatmap (Changed Residues Highlighted)")
         st.markdown(mutation_heatmap_html(orig_seq, improved_seq), unsafe_allow_html=True)
         with st.expander("Mutation Details (table)"):
             st.subheader("Mutation Steps")
             st.dataframe(df_steps, use_container_width=True)
+            # Trend line shows confidence gain over accepted mutation steps.
             step_nums = df_steps["Step"].tolist()
             conf_values = df_steps["New Confidence (%)"].tolist()
             df_graph = pd.DataFrame({"Step": step_nums, "Confidence (%)": conf_values})
             fig.update_layout(yaxis=dict(range=[0, 100]), title="Confidence Improvement Over Steps")
             st.plotly_chart(fig, use_container_width=True)
+# Visualize page: structural/sequence interpretation for one peptide.
+elif page == "Visualize":
     st.header("Peptide Visualizer")
+    with st.container(border=True):
+        st.text_input(
+            "Enter a peptide sequence to visualize:",
+            key="visualize_peptide_input",
+        )
     seq_viz = (st.session_state.get("visualize_peptide_input") or "").strip()
     clean_viz = "".join(c for c in seq_viz.upper() if not c.isspace())
             with st.expander("Map · legend", expanded=False):
                 st.markdown(COMPACT_MAP_LEGEND)
+# t-SNE page: embedding projection for multi-sequence exploration.
+elif page == "t-SNE":
     st.header("t-SNE Visualizer")
     st.write("Upload peptide sequences (FASTA or plain list) to embed sequences and explore clusters with t-SNE.")
     uploaded_file = st.file_uploader("Upload FASTA or text file", type=["txt", "fasta"])
+    # Parse upload and replace previous sequence set.
     if uploaded_file:
         text = uploaded_file.read().decode("utf-8")
         sequences = [l.strip() for l in text.splitlines() if not l.startswith(">") and l.strip()]
         st.session_state.visualize_sequences = sequences
+        # Invalidate previous embedding projection after new upload.
         st.session_state.visualize_df = None
+    # Compute embeddings once and cache the projected dataframe in session.
     if st.session_state.visualize_sequences and st.session_state.visualize_df is None:
         sequences = st.session_state.visualize_sequences
         if len(sequences) < 2:
             with st.spinner("Generating embedding..."):
                 embeddings_list, labels, confs, lengths, hydros, charges = [], [], [], [], [], []
+                # Use penultimate model representation as embedding features.
                 embedding_extractor = torch.nn.Sequential(*list(model.layers)[:-1])
                 for i, s in enumerate(sequences):
                 st.session_state.visualize_df = df
                 progress.progress(1.0, text="Embedding ready")
+    # Render interactive scatter + filters once a projected dataframe exists.
     if st.session_state.visualize_df is not None:
         df = st.session_state.visualize_df
         st.subheader("t-SNE plot")
 • Coloring by properties reveals biochemical trends.
 """)
+# About page: quick orientation + disclaimer for new users.
 elif page == "About":
     st.header("About the Project")
     st.markdown("""
 - **AMP Predictor**: batch predictions from multi-line or FASTA input, length warnings, persisted results, top-candidate highlight, and CSV export.
 - **Peptide Analyzer**: single-sequence numerical and textual analysis — AMP prediction, composition, physicochemical table + radar, similarity to known AMPs, and report export.
 - **Peptide Optimizer**: guided sequence optimization with Enter-to-run input, mutation heatmap, step table, and confidence-vs-step trend.
+- **Visualize**: single-sequence 3D approximation + detailed helical wheel + functional region map with consistent residue coloring and clear legend dropdowns.
+- **t-SNE**: upload many sequences, embed with the model, run t-SNE, and explore clusters with filters and hover metadata.
 - **About**: this overview and disclaimer.
 **Disclaimer:** Predictions are model-based heuristics and are **not** a substitute for wet-lab validation or regulatory use.

StreamlitApp/utils/analyze.py CHANGED Viewed

@@ -1,15 +1,16 @@
 from collections import Counter
 def aa_composition(sequence):
     amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
     counts = Counter(sequence)
     total = len(sequence)
     return {aa: counts.get(aa, 0) / total for aa in amino_acids}
-# Compute sequence properties
 def compute_properties(sequence):
-    # Property calculations
     aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
                   'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
                   'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,

+"""Sequence composition and physicochemical property helpers."""
 from collections import Counter
 def aa_composition(sequence):
+    """Return normalized frequencies for the 20 canonical amino acids."""
     amino_acids = list("ACDEFGHIKLMNPQRSTVWY")
     counts = Counter(sequence)
     total = len(sequence)
     return {aa: counts.get(aa, 0) / total for aa in amino_acids}
 def compute_properties(sequence):
+    """Compute simple length, mass, hydrophobicity, and net-charge signals."""
     aa_weights = {'A': 89.1, 'R': 174.2, 'N': 132.1, 'D': 133.1, 'C': 121.2,
                   'E': 147.1, 'Q': 146.2, 'G': 75.1, 'H': 155.2, 'I': 131.2,
                   'L': 131.2, 'K': 146.2, 'M': 149.2, 'F': 165.2, 'P': 115.1,

StreamlitApp/utils/optimize.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import random
 from utils.predict import predict_amp
 HYDROPHOBIC = set("AILMFWVPG")
 HYDROPHILIC = set("STNQYCH")
 POSITIVE = set("KRH")
 NEGATIVE = set("DE")
-# Function to mutate a residue based on simple heuristics
 def mutate_residue(residue):
     if residue in POSITIVE:
         return residue, "Retained strong positive residue"
     elif residue in NEGATIVE:
@@ -19,7 +22,6 @@ def mutate_residue(residue):
     else:
         return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
-# Sequence optimization function
 def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
     """
     Iteratively optimize sequence to increase AMP probability.
@@ -30,7 +32,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
     best_conf = conf
     history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
-    # Optimization loop
     for _ in range(max_rounds):
         best_mutation = None
         best_mutation_conf = best_conf
@@ -53,7 +55,7 @@ def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
             history.append((current_seq, best_conf, change, old_res, new_res, reason))
         else:
-            # No further improvement, stop
             break
     return current_seq, best_conf, history

+"""Heuristic mutation search used by the Optimize page."""
 import random
 from utils.predict import predict_amp
+# Residue groups used to propose chemistry-aware substitutions.
 HYDROPHOBIC = set("AILMFWVPG")
 HYDROPHILIC = set("STNQYCH")
 POSITIVE = set("KRH")
 NEGATIVE = set("DE")
 def mutate_residue(residue):
+    """Return a candidate replacement residue and rationale."""
     if residue in POSITIVE:
         return residue, "Retained strong positive residue"
     elif residue in NEGATIVE:
     else:
         return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"
 def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
     """
     Iteratively optimize sequence to increase AMP probability.
     best_conf = conf
     history = [(current_seq, conf, "-", "-", "-", "Original sequence")]
+    # Greedy loop: keep only the best confidence-improving mutation each round.
     for _ in range(max_rounds):
         best_mutation = None
         best_mutation_conf = best_conf
             history.append((current_seq, best_conf, change, old_res, new_res, reason))
         else:
+            # Stop when no mutation clears the minimum improvement threshold.
             break
     return current_seq, best_conf, history

StreamlitApp/utils/peptide_extras.py CHANGED Viewed

@@ -121,6 +121,7 @@ def sequence_similarity(seq1: str, seq2: str) -> float:
 def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
     if not sequence or not KNOWN_AMPS:
         return None, 0.0
     seq = "".join(c for c in sequence.upper() if not c.isspace())
@@ -204,6 +205,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
     import matplotlib.pyplot as plt
     from matplotlib import patheffects as pe
     clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
     n = len(clean)
     fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
@@ -247,6 +249,7 @@ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2))
             zorder=2,
         )
     colors = [residue_color_mpl(aa) for aa in clean]
     ax.scatter(
         angles_rad,
@@ -303,6 +306,7 @@ def build_importance_map_html(sequence: str) -> str:
     """Build HTML for residue importance highlighting (escape non-AA safely)."""
     import html as html_mod
     parts: List[str] = []
     for ch in sequence:
         if ch.isspace():
@@ -379,6 +383,7 @@ def render_3d_structure(
     """
     import streamlit.components.v1 as components
     clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
     if not clean:
         return False

 def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
+    """Return the closest known AMP and simple position-match similarity score."""
     if not sequence or not KNOWN_AMPS:
         return None, 0.0
     seq = "".join(c for c in sequence.upper() if not c.isspace())
     import matplotlib.pyplot as plt
     from matplotlib import patheffects as pe
+    # Normalize user input to whitespace-free uppercase sequence.
     clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
     n = len(clean)
     fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
             zorder=2,
         )
+    # Draw residue nodes after spokes/connectors so labels stay readable.
     colors = [residue_color_mpl(aa) for aa in clean]
     ax.scatter(
         angles_rad,
     """Build HTML for residue importance highlighting (escape non-AA safely)."""
     import html as html_mod
+    # Emit one colored <span> per residue for inline sequence highlighting.
     parts: List[str] = []
     for ch in sequence:
         if ch.isspace():
     """
     import streamlit.components.v1 as components
+    # Input sanitization keeps renderer stable across pasted FASTA/text snippets.
     clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
     if not clean:
         return False

StreamlitApp/utils/predict.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import pathlib
 import numpy as np
 import torch
 import streamlit as st
 from torch import nn
-# Model Definition
 class FastMLP(nn.Module):
     def __init__(self, input_dim=1024):
         super(FastMLP, self).__init__()
@@ -20,9 +22,9 @@ class FastMLP(nn.Module):
     def forward(self, x):
         return self.layers(x)
-# Model Loader
 @st.cache_resource
 def load_model():
     # Always resolve relative to the StreamlitApp folder, not the process CWD.
     streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
     repo_root = streamlitapp_dir.parent
@@ -42,13 +44,12 @@ def load_model():
             f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
         )
-    # Build model and load weights
     model = FastMLP(input_dim=1024)
     model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
     model.eval()
     return model
-# Sequence Encoder
 def encode_sequence(seq, max_len=51):
     """
     Converts amino acid sequence to flattened one-hot vector
@@ -57,19 +58,19 @@ def encode_sequence(seq, max_len=51):
     amino_acids = "ACDEFGHIKLMNPQRSTVWY"
     aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
-    one_hot = np.zeros((max_len, len(amino_acids)))  # max_len x 20
     for i, aa in enumerate(seq[:max_len]):
         if aa in aa_to_idx:
             one_hot[i, aa_to_idx[aa]] = 1
-    flat = one_hot.flatten()  # length = max_len*20 = 1020
     if len(flat) < 1024:
         flat = np.pad(flat, (0, 1024 - len(flat)))
     return flat
-# Prediction Function
 def predict_amp(sequence, model):
     """
     Takes an amino acid sequence string and the loaded model,
@@ -77,6 +78,7 @@ def predict_amp(sequence, model):
     """
     x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
     with torch.no_grad():
         logits = model(x)
         prob = torch.sigmoid(logits).item()

+"""Model loading, sequence encoding, and AMP inference helpers."""
 import pathlib
 import numpy as np
 import torch
 import streamlit as st
 from torch import nn
+# Lightweight MLP used for AMP binary classification.
 class FastMLP(nn.Module):
     def __init__(self, input_dim=1024):
         super(FastMLP, self).__init__()
     def forward(self, x):
         return self.layers(x)
 @st.cache_resource
 def load_model():
+    """Load model weights once per Streamlit process."""
     # Always resolve relative to the StreamlitApp folder, not the process CWD.
     streamlitapp_dir = pathlib.Path(__file__).resolve().parent.parent
     repo_root = streamlitapp_dir.parent
             f"- {streamlitapp_dir / 'models' / 'ampMLModel.pt'}\n"
         )
+    # Instantiate architecture and hydrate weights from disk.
     model = FastMLP(input_dim=1024)
     model.load_state_dict(torch.load(str(model_path), map_location="cpu"))
     model.eval()
     return model
 def encode_sequence(seq, max_len=51):
     """
     Converts amino acid sequence to flattened one-hot vector
     amino_acids = "ACDEFGHIKLMNPQRSTVWY"
     aa_to_idx = {aa: i for i, aa in enumerate(amino_acids)}
+    # Encode each residue as a one-hot row, then flatten to vector features.
+    one_hot = np.zeros((max_len, len(amino_acids)))
     for i, aa in enumerate(seq[:max_len]):
         if aa in aa_to_idx:
             one_hot[i, aa_to_idx[aa]] = 1
+    flat = one_hot.flatten()
     if len(flat) < 1024:
         flat = np.pad(flat, (0, 1024 - len(flat)))
     return flat
 def predict_amp(sequence, model):
     """
     Takes an amino acid sequence string and the loaded model,
     """
     x = torch.tensor(encode_sequence(sequence), dtype=torch.float32).unsqueeze(0)
+    # Sigmoid(logit) gives AMP probability in [0, 1].
     with torch.no_grad():
         logits = model(x)
         prob = torch.sigmoid(logits).item()

StreamlitApp/utils/rateLimit.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import time
 from collections import deque
 class RateLimiter:
-    #Sliding-window rate limiter per instance
     def __init__(self, max_calls: int, period_seconds: float):
         self.max_calls = max_calls
         self.period = period_seconds
@@ -12,7 +13,7 @@ class RateLimiter:
     def allow(self) -> bool:
         now = time.time()
-        # Drop entries older than window
         while self.calls and self.calls[0] <= now - self.period:
             self.calls.popleft()
         if len(self.calls) < self.max_calls:
@@ -21,8 +22,7 @@ class RateLimiter:
         return False
     def time_until_next(self) -> float:
-        # Seconds until next slot is available (0 if already available)
         now = time.time()
         if len(self.calls) < self.max_calls:
             return 0.0

+"""Simple in-memory sliding-window rate limiter."""
 import time
 from collections import deque
 class RateLimiter:
+    # Each instance tracks call timestamps for one caller/key.
     def __init__(self, max_calls: int, period_seconds: float):
         self.max_calls = max_calls
         self.period = period_seconds
     def allow(self) -> bool:
         now = time.time()
+        # Drop timestamps outside the active window.
         while self.calls and self.calls[0] <= now - self.period:
             self.calls.popleft()
         if len(self.calls) < self.max_calls:
         return False
     def time_until_next(self) -> float:
+        # Return wait time before another call is allowed (seconds).
         now = time.time()
         if len(self.calls) < self.max_calls:
             return 0.0

StreamlitApp/utils/ui_helpers.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import html as _html
 from typing import Dict, List, Tuple, Optional
@@ -20,7 +22,7 @@ def predicted_confidence(row: Dict) -> Optional[float]:
         return None
     if pred == "AMP":
         return p_amp
-    # Non-AMP probability is (1 - AMP probability)
     return 1.0 - p_amp
@@ -47,7 +49,7 @@ def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
     if not predictions:
         return None
-    # Prefer AMP predictions; otherwise pick highest confidence overall.
     amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
     rows = amp_rows if amp_rows else predictions
@@ -90,7 +92,7 @@ def mutation_heatmap_html(original: str, final: str) -> str:
     fin = final or ""
     max_len = max(len(orig), len(fin))
-    # Display in a fixed-width monospace container.
     out: List[str] = [
         "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
     ]
@@ -142,7 +144,7 @@ def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_
     orig_seq = orig_seq or ""
     final_seq = final_seq or ""
-    # Compute properties only if non-empty.
     props_orig = compute_properties(orig_seq) if orig_seq else {}
     props_final = compute_properties(final_seq) if final_seq else {}
@@ -198,8 +200,7 @@ def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float
     """
     Returns: (label, color_css)
     """
-    # If the model is *extremely* confident, treat it as strong regardless
-    # of charge/hydrophobicity heuristics (prevents "moderate" at ~99%).
     if conf_prob >= 0.9:
         return "Strong AMP candidate", "#2ca02c"
     if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:

+"""UI-facing formatting and summary helpers shared across pages."""
 import html as _html
 from typing import Dict, List, Tuple, Optional
         return None
     if pred == "AMP":
         return p_amp
+    # Convert AMP probability into confidence for the predicted class.
     return 1.0 - p_amp
     if not predictions:
         return None
+    # Prefer AMP rows first, then fall back to highest-confidence overall row.
     amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
     rows = amp_rows if amp_rows else predictions
     fin = final or ""
     max_len = max(len(orig), len(fin))
+    # Use monospace layout so per-position residue changes align visually.
     out: List[str] = [
         "<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
     ]
     orig_seq = orig_seq or ""
     final_seq = final_seq or ""
+    # Property deltas drive the compact "what changed" summary panel.
     props_orig = compute_properties(orig_seq) if orig_seq else {}
     props_final = compute_properties(final_seq) if final_seq else {}
     """
     Returns: (label, color_css)
     """
+    # Very high model confidence is treated as strong even outside ideal property ranges.
     if conf_prob >= 0.9:
         return "Strong AMP candidate", "#2ca02c"
     if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:

StreamlitApp/utils/visualize.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.manifold import TSNE
@@ -6,14 +8,15 @@ import torch
 import numpy as np
 from utils.predict import encode_sequence
-# t-SNE Visualization
 def tsne_visualization(sequences, model):
     st.info("Generating embeddings... this may take a moment.")
     embeddings = []
     for seq in sequences:
         x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
         with torch.no_grad():
-            emb = model.layers[0](x)  # Grab first layer embedding
         embeddings.append(emb.numpy().flatten())
     embeddings = np.vstack(embeddings)

+"""Legacy t-SNE helper retained for ad-hoc embedding previews."""
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.manifold import TSNE
 import numpy as np
 from utils.predict import encode_sequence
 def tsne_visualization(sequences, model):
+    """Project model embeddings into 2D and render a quick scatter plot."""
     st.info("Generating embeddings... this may take a moment.")
     embeddings = []
     for seq in sequences:
         x = torch.tensor(encode_sequence(seq), dtype=torch.float32).unsqueeze(0)
         with torch.no_grad():
+            # Use an early hidden layer as a compact learned representation.
+            emb = model.layers[0](x)
         embeddings.append(emb.numpy().flatten())
     embeddings = np.vstack(embeddings)