Sbhat2026 commited on
Commit
6497d29
Β·
1 Parent(s): 524ce0a

fix: add client+server input validation and GO hierarchy filtering

Browse files
Files changed (2) hide show
  1. server.py +234 -40
  2. static/interface.html +193 -64
server.py CHANGED
@@ -9,6 +9,7 @@ import torch.nn as nn
9
  import pandas as pd
10
  import joblib
11
  import json
 
12
  import os
13
  import re
14
  import time
@@ -30,10 +31,20 @@ esm_model = None
30
  batch_converter = None
31
  mlb = None
32
  go_map = {}
 
 
33
  mf_indices = None
34
  thresholds = {}
35
  NUM_LABELS = 0
36
 
 
 
 
 
 
 
 
 
37
 
38
  def _download_with_retry(fname):
39
  from huggingface_hub import hf_hub_download
@@ -96,12 +107,141 @@ def load_thresholds():
96
  return {}
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  @asynccontextmanager
100
  async def lifespan(app: FastAPI):
101
  global model, esm_model, batch_converter
102
- global mlb, go_map, mf_indices, thresholds, NUM_LABELS
103
 
104
- # Step 1: download missing files (network is ready here, unlike module load time)
105
  ensure_model_files()
106
 
107
  # Step 2: GO name map
@@ -116,29 +256,59 @@ async def lifespan(app: FastAPI):
116
  NUM_LABELS = len(mlb.classes_)
117
  print(f"MLB loaded: {NUM_LABELS} labels")
118
 
119
- # Step 4: MF-only whitelist (mlb is now defined)
120
- mf_go_ids = {
121
- go_id for go_id, name in go_map.items()
122
- if name != go_id and not name.startswith("GO:")
123
- }
124
- if mf_go_ids:
125
- mf_indices = {i for i, go_id in enumerate(mlb.classes_) if go_id in mf_go_ids}
126
- print(f"MF-only filter: {len(mf_indices)} labels active")
127
  else:
128
- mf_indices = None
129
- print("MF filter not applied (go_names.json absent or empty)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- # Step 5: per-label thresholds
132
  thresholds = load_thresholds()
133
 
134
- # Step 6: classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  class RecoveredBaselineModel(nn.Module):
136
- def __init__(self, input_dim=320, hidden_dim=1024, output_dim=NUM_LABELS, dropout=0.2):
 
137
  super().__init__()
138
- self.fc1 = nn.Linear(input_dim, hidden_dim)
139
- self.proj = nn.Linear(input_dim, hidden_dim)
140
- self.fc2 = nn.Linear(hidden_dim, hidden_dim)
141
- self.out = nn.Linear(hidden_dim, output_dim)
142
  self.relu = nn.ReLU()
143
  self.drop = nn.Dropout(dropout)
144
 
@@ -149,26 +319,33 @@ async def lifespan(app: FastAPI):
149
  h = self.drop(h)
150
  return self.out(h)
151
 
152
- device = torch.device("cpu")
153
- _model = RecoveredBaselineModel().to(device)
154
- ckpt = torch.load(os.path.join(BASE_DIR, "baseline_res.pth"), map_location=device)
155
- sd = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
156
- _model.load_state_dict(sd)
 
 
 
 
 
 
 
 
 
 
157
  _model.eval()
158
  model = _model
159
  print("Classifier loaded OK")
160
 
161
- # Step 7: ESM-2
162
- # THIS was the actual source of the curl error β€” esm.pretrained.esm2_t6_8M_UR50D()
163
- # internally runs curl/wget to download weights from huggingface.co at import time.
164
- # Being inside lifespan means it runs AFTER the container network stack is ready.
165
  import esm as esm_lib
166
  _esm_model, alphabet = esm_lib.pretrained.esm2_t6_8M_UR50D()
167
  esm_model = _esm_model.to(device).eval()
168
  batch_converter = alphabet.get_batch_converter()
169
  print("ESM-2 loaded OK")
170
 
171
- yield # app is live
172
 
173
  print("Shutting down.")
174
 
@@ -207,14 +384,23 @@ def parse_sequences(text):
207
 
208
  @app.post("/predict")
209
  async def predict(request: ProteinRequest):
210
- entries = parse_sequences(request.sequence)
211
- results = []
212
- device = torch.device("cpu")
 
213
 
214
  for name, sequence in entries:
 
 
 
 
 
 
 
215
  if len(sequence) > 2500:
216
  results.append({"name": name, "error": "Sequence too long (max 2500 aa)"})
217
  continue
 
218
  try:
219
  _, _, tokens = batch_converter([("p", sequence)])
220
  with torch.no_grad():
@@ -224,23 +410,31 @@ async def predict(request: ProteinRequest):
224
  if prob.dim() == 0:
225
  prob = prob.unsqueeze(0)
226
 
227
- preds = []
228
- active = mf_indices if mf_indices else range(len(mlb.classes_))
229
- for i in active:
230
  pv = float(prob[i])
231
  if pv >= float(thresholds.get(str(i), 0.5)):
232
  go_id = mlb.classes_[i]
233
- preds.append({
234
  "go_id": go_id,
235
  "name": go_map.get(go_id, go_id),
236
- "prob": round(pv, 3),
237
  })
238
- preds = sorted(preds, key=lambda x: x["prob"], reverse=True)[:12]
 
 
 
 
 
 
 
239
  results.append({
240
  "name": name,
241
  "sequence_length": len(sequence),
242
- "predictions": preds,
243
- "n_above_threshold": len(preds),
 
244
  })
245
  except Exception as e:
246
  results.append({"name": name, "error": str(e)})
 
9
  import pandas as pd
10
  import joblib
11
  import json
12
+ import math
13
  import os
14
  import re
15
  import time
 
31
  batch_converter = None
32
  mlb = None
33
  go_map = {}
34
+ mf_terms = set() # GO IDs with namespace == molecular_function (from OBO)
35
+ go_parents = {} # GO ID -> set of direct parent GO IDs (MF DAG only)
36
  mf_indices = None
37
  thresholds = {}
38
  NUM_LABELS = 0
39
 
40
+ # Biological complexity filter constants
41
+ MIN_SEQ_LENGTH = 30
42
+ MIN_ENTROPY_BITS = 2.5
43
+ MAX_DOMINANT_FRAC = 0.60
44
+ MIN_DISTINCT_AA = 5
45
+ INVALID_AA = set("BJOUXZ")
46
+ MF_ROOT = "GO:0003674"
47
+
48
 
49
  def _download_with_retry(fname):
50
  from huggingface_hub import hf_hub_download
 
107
  return {}
108
 
109
 
110
+ def parse_obo(path):
111
+ """
112
+ Parse go-basic.obo and return:
113
+ mf_terms : set of GO IDs with namespace == molecular_function
114
+ go_parents : dict mapping each MF GO ID -> set of direct parent GO IDs
115
+ (only is_a and part_of edges, restricted to MF namespace)
116
+ """
117
+ ns_map = {}
118
+ par_map = {}
119
+ cur_id = None
120
+ cur_ns = None
121
+ cur_par = set()
122
+ in_term = False
123
+
124
+ def flush():
125
+ nonlocal cur_id, cur_ns, cur_par
126
+ if cur_id and cur_ns:
127
+ ns_map[cur_id] = cur_ns
128
+ par_map[cur_id] = cur_par
129
+ cur_id = None
130
+ cur_ns = None
131
+ cur_par = set()
132
+
133
+ with open(path, "r", encoding="utf-8") as fh:
134
+ for raw in fh:
135
+ line = raw.strip()
136
+ if line == "[Term]":
137
+ flush()
138
+ in_term = True
139
+ continue
140
+ if line.startswith("[") and line != "[Term]":
141
+ flush()
142
+ in_term = False
143
+ continue
144
+ if not in_term:
145
+ continue
146
+ if line.startswith("id:"):
147
+ cur_id = line.split("id:", 1)[1].strip().split()[0]
148
+ elif line.startswith("namespace:"):
149
+ cur_ns = line.split("namespace:", 1)[1].strip()
150
+ elif line.startswith("is_obsolete:") and "true" in line:
151
+ cur_id = None
152
+ elif line.startswith("is_a:"):
153
+ parent = line.split("is_a:", 1)[1].strip().split()[0]
154
+ cur_par.add(parent)
155
+ elif line.startswith("relationship:"):
156
+ parts = line.split("relationship:", 1)[1].strip().split()
157
+ if len(parts) >= 2 and parts[0] == "part_of":
158
+ cur_par.add(parts[1])
159
+ flush()
160
+
161
+ mf = {gid for gid, n in ns_map.items() if n == "molecular_function"}
162
+ go_parents_mf = {gid: (parents & mf) for gid, parents in par_map.items() if gid in mf}
163
+ n_edges = sum(len(v) for v in go_parents_mf.values())
164
+ print(f"OBO parsed: {len(mf)} MF terms, {n_edges} parent edges")
165
+ return mf, go_parents_mf
166
+
167
+
168
+ def apply_hierarchy_filter(preds, go_parents_map):
169
+ """
170
+ Split predictions into (visible, suppressed).
171
+ A prediction is suppressed when it has at least one direct MF parent
172
+ but none of those parents appear in the predicted set.
173
+ The MF root and terms with no MF parents are always visible.
174
+ """
175
+ if not go_parents_map:
176
+ return preds, []
177
+
178
+ predicted_ids = {p["go_id"] for p in preds}
179
+ visible = []
180
+ suppressed = []
181
+
182
+ for pred in preds:
183
+ gid = pred["go_id"]
184
+ parents = go_parents_map.get(gid, set())
185
+ if gid == MF_ROOT or not parents:
186
+ visible.append(pred)
187
+ elif parents & predicted_ids:
188
+ visible.append(pred)
189
+ else:
190
+ suppressed.append(pred)
191
+
192
+ return visible, suppressed
193
+
194
+
195
+ def sequence_entropy(seq):
196
+ seq_upper = seq.upper()
197
+ counts = {}
198
+ for aa in seq_upper:
199
+ counts[aa] = counts.get(aa, 0) + 1
200
+ n = len(seq_upper)
201
+ return -sum((c / n) * math.log2(c / n) for c in counts.values())
202
+
203
+
204
+ def validate_sequence(name, seq):
205
+ """Returns an error string if the sequence should be rejected, else None."""
206
+ if len(seq) < MIN_SEQ_LENGTH:
207
+ return (f"'{name}' is too short ({len(seq)} aa β€” minimum {MIN_SEQ_LENGTH} aa). "
208
+ f"Sequences this short are unlikely to fold into a stable domain.")
209
+
210
+ bad = sorted({c.upper() for c in seq if c.upper() in INVALID_AA})
211
+ if bad:
212
+ return (f"'{name}' contains invalid amino acid character(s): "
213
+ f"{', '.join(bad)}. These ambiguity codes are not accepted.")
214
+
215
+ counts = {}
216
+ for aa in seq.upper():
217
+ counts[aa] = counts.get(aa, 0) + 1
218
+
219
+ if len(counts) < MIN_DISTINCT_AA:
220
+ return (f"'{name}' uses only {len(counts)} distinct residue type(s). "
221
+ f"Real proteins require at least {MIN_DISTINCT_AA}.")
222
+
223
+ dominant_frac = max(counts.values()) / len(seq)
224
+ if dominant_frac > MAX_DOMINANT_FRAC:
225
+ dominant_aa = max(counts, key=counts.get)
226
+ return (f"'{name}' is dominated by a single residue "
227
+ f"({dominant_aa} = {dominant_frac:.0%}). "
228
+ f"Low-complexity sequences produce unreliable embeddings.")
229
+
230
+ H = sequence_entropy(seq)
231
+ if H < MIN_ENTROPY_BITS:
232
+ return (f"'{name}' has very low sequence complexity "
233
+ f"(Shannon entropy {H:.2f} bits, minimum {MIN_ENTROPY_BITS:.1f} bits). "
234
+ f"Repetitive or artificially constructed sequences are not accepted.")
235
+
236
+ return None
237
+
238
+
239
  @asynccontextmanager
240
  async def lifespan(app: FastAPI):
241
  global model, esm_model, batch_converter
242
+ global mlb, go_map, mf_terms, go_parents, mf_indices, thresholds, NUM_LABELS
243
 
244
+ # Step 1: download missing files
245
  ensure_model_files()
246
 
247
  # Step 2: GO name map
 
256
  NUM_LABELS = len(mlb.classes_)
257
  print(f"MLB loaded: {NUM_LABELS} labels")
258
 
259
+ # Step 4: OBO β€” parse MF namespace and parent DAG
260
+ obo_path = os.path.join(BASE_DIR, "go-basic.obo")
261
+ if os.path.exists(obo_path):
262
+ mf_terms, go_parents = parse_obo(obo_path)
263
+ mf_in_mlb = sum(1 for gid in mlb.classes_ if gid in mf_terms)
264
+ print(f"OBO cross-check: {mf_in_mlb}/{NUM_LABELS} MLB labels are MF namespace")
 
 
265
  else:
266
+ print("WARNING: go-basic.obo not found β€” hierarchy filtering disabled. "
267
+ "Download from https://current.geneontology.org/ontology/go-basic.obo "
268
+ "and place it alongside server.py.")
269
+
270
+ # Step 5: MF-only whitelist β€” OBO namespace is authoritative, CSV is fallback
271
+ if mf_terms:
272
+ mf_indices = [i for i, gid in enumerate(mlb.classes_) if gid in mf_terms]
273
+ print(f"MF whitelist (OBO): {len(mf_indices)} active indices")
274
+ else:
275
+ mf_go_ids = {
276
+ go_id for go_id, name in go_map.items()
277
+ if name and name != go_id and not name.startswith("GO:")
278
+ }
279
+ mf_indices = [i for i, gid in enumerate(mlb.classes_) if gid in mf_go_ids] or list(range(NUM_LABELS))
280
+ print(f"MF whitelist (CSV fallback): {len(mf_indices)} active indices")
281
+
282
+ app.state.mf_indices = mf_indices
283
 
284
+ # Step 6: per-label thresholds
285
  thresholds = load_thresholds()
286
 
287
+ # Step 7: classifier β€” auto-detect architecture from checkpoint keys
288
+ class ResidualMLP(nn.Module):
289
+ """Matches General_Pipeline.ipynb β€” two skip-connection blocks."""
290
+ def __init__(self, in_dim=320, out_dim=NUM_LABELS, hidden=1024, dropout=0.2):
291
+ super().__init__()
292
+ self.fc_in = nn.Linear(in_dim, hidden)
293
+ self.block1 = nn.Sequential(nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden, hidden))
294
+ self.block2 = nn.Sequential(nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden, hidden))
295
+ self.fc_out = nn.Sequential(nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden, out_dim))
296
+
297
+ def forward(self, x):
298
+ h = self.fc_in(x)
299
+ h = torch.relu(h)
300
+ h = h + self.block1(h)
301
+ h = h + self.block2(h)
302
+ return self.fc_out(h)
303
+
304
  class RecoveredBaselineModel(nn.Module):
305
+ """Earlier server-side architecture β€” retained for backward compatibility."""
306
+ def __init__(self, in_dim=320, out_dim=NUM_LABELS, hidden=1024, dropout=0.2):
307
  super().__init__()
308
+ self.fc1 = nn.Linear(in_dim, hidden)
309
+ self.proj = nn.Linear(in_dim, hidden)
310
+ self.fc2 = nn.Linear(hidden, hidden)
311
+ self.out = nn.Linear(hidden, out_dim)
312
  self.relu = nn.ReLU()
313
  self.drop = nn.Dropout(dropout)
314
 
 
319
  h = self.drop(h)
320
  return self.out(h)
321
 
322
+ device = torch.device("cpu")
323
+ ckpt = torch.load(os.path.join(BASE_DIR, "baseline_res.pth"), map_location=device)
324
+ sd = ckpt["model"] if isinstance(ckpt, dict) and "model" in ckpt else ckpt
325
+ keys = set(sd.keys())
326
+
327
+ if any(k.startswith("fc_in") for k in keys):
328
+ _model = ResidualMLP().to(device)
329
+ print("Classifier architecture: ResidualMLP (notebook)")
330
+ elif any(k.startswith("fc1") for k in keys):
331
+ _model = RecoveredBaselineModel().to(device)
332
+ print("Classifier architecture: RecoveredBaselineModel (server)")
333
+ else:
334
+ raise RuntimeError(f"Unrecognised checkpoint architecture. First keys: {sorted(keys)[:8]}")
335
+
336
+ _model.load_state_dict(sd, strict=True)
337
  _model.eval()
338
  model = _model
339
  print("Classifier loaded OK")
340
 
341
+ # Step 8: ESM-2
 
 
 
342
  import esm as esm_lib
343
  _esm_model, alphabet = esm_lib.pretrained.esm2_t6_8M_UR50D()
344
  esm_model = _esm_model.to(device).eval()
345
  batch_converter = alphabet.get_batch_converter()
346
  print("ESM-2 loaded OK")
347
 
348
+ yield
349
 
350
  print("Shutting down.")
351
 
 
384
 
385
  @app.post("/predict")
386
  async def predict(request: ProteinRequest):
387
+ entries = parse_sequences(request.sequence)
388
+ results = []
389
+ device = torch.device("cpu")
390
+ mf_idx = app.state.mf_indices
391
 
392
  for name, sequence in entries:
393
+
394
+ # Biological complexity guard β€” reject before touching ESM-2
395
+ err = validate_sequence(name, sequence)
396
+ if err:
397
+ results.append({"name": name, "error": err})
398
+ continue
399
+
400
  if len(sequence) > 2500:
401
  results.append({"name": name, "error": "Sequence too long (max 2500 aa)"})
402
  continue
403
+
404
  try:
405
  _, _, tokens = batch_converter([("p", sequence)])
406
  with torch.no_grad():
 
410
  if prob.dim() == 0:
411
  prob = prob.unsqueeze(0)
412
 
413
+ # Collect all predictions above per-label threshold (no hard cap)
414
+ raw_preds = []
415
+ for i in mf_idx:
416
  pv = float(prob[i])
417
  if pv >= float(thresholds.get(str(i), 0.5)):
418
  go_id = mlb.classes_[i]
419
+ raw_preds.append({
420
  "go_id": go_id,
421
  "name": go_map.get(go_id, go_id),
422
+ "prob": round(pv, 4),
423
  })
424
+ raw_preds.sort(key=lambda x: x["prob"], reverse=True)
425
+
426
+ # Apply GO hierarchy filter
427
+ visible, suppressed = apply_hierarchy_filter(raw_preds, go_parents)
428
+
429
+ for p in visible: p["prob"] = round(p["prob"], 3)
430
+ for p in suppressed: p["prob"] = round(p["prob"], 3)
431
+
432
  results.append({
433
  "name": name,
434
  "sequence_length": len(sequence),
435
+ "predictions": visible,
436
+ "suppressed": suppressed,
437
+ "n_above_threshold": len(raw_preds),
438
  })
439
  except Exception as e:
440
  results.append({"name": name, "error": str(e)})
static/interface.html CHANGED
@@ -173,6 +173,24 @@
173
  .show-med-inline { font-size: 0.75rem; color: var(--med-text); background: var(--med-bg); border: 1px solid var(--med-border); border-radius: 6px; padding: 4px 12px; cursor: pointer; }
174
  .show-med-inline:hover { opacity: 0.8; }
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  /* ── Error card ── */
177
  .error-card { background: var(--low-bg); border-color: var(--low-border); }
178
  .seq-error { font-size: 0.8rem; color: var(--low-text); font-weight: 500; }
@@ -290,7 +308,6 @@
290
 
291
  <div id="status" class="status hidden"></div>
292
 
293
- <!-- Results toolbar (hidden until results exist) -->
294
  <div id="resultsToolbar" class="results-toolbar" style="display:none">
295
  <div class="toolbar-left">
296
  <input class="filter-input" id="termFilter" placeholder="Filter by term name…" oninput="applyFilters()" />
@@ -307,7 +324,6 @@
307
  <div id="results" class="results-area"></div>
308
  <div id="pagination" class="pagination" style="display:none"></div>
309
 
310
- <!-- History -->
311
  <div class="history-panel" id="historyPanel">
312
  <div class="history-header" onclick="toggleHistory()">
313
  <span class="history-title">Recent Predictions</span>
@@ -317,7 +333,6 @@
317
  <div id="historyList" style="display:none"></div>
318
  </div>
319
 
320
- <!-- README -->
321
  <div class="readme-panel">
322
  <div class="readme-header" onclick="toggleReadme()">
323
  <span class="readme-title">About & How to Use</span>
@@ -341,7 +356,7 @@
341
  <li>Multiple sequences can be submitted at once β€” separate each with a FASTA header line.</li>
342
  <li>The model accepts sequences up to 2500 amino acids. Longer sequences will be rejected.</li>
343
  <li>Click <strong>Predict Functions</strong> or press <strong>⌘ + Enter</strong>.</li>
344
- <li>Results are sorted by confidence. High confidence (β‰₯80%) predictions are most reliable.</li>
345
  </ul>
346
  </div>
347
  <hr class="readme-divider"/>
@@ -352,14 +367,14 @@
352
  <hr class="readme-divider"/>
353
  <div class="readme-section">
354
  <h3>Understanding GO Terms</h3>
355
- <p>Each prediction shows a GO term ID (e.g. GO:0004672) and its name. Clicking the GO ID opens <a href="https://amigo.geneontology.org" target="_blank" rel="noopener">AmiGO</a>, the Gene Ontology browser, where you can explore the term's definition, its place in the GO hierarchy, and which proteins are annotated with it. For broader context and ontology navigation, <a href="https://www.geneontology.org" target="_blank" rel="noopener">geneontology.org</a> provides the full ontology and annotation data.</p>
356
  </div>
357
  <hr class="readme-divider"/>
358
  <div class="readme-section">
359
  <h3>Confidence Levels</h3>
360
  <ul>
361
- <li><strong>High (β‰₯80%)</strong> β€” strong prediction, consistent with the training distribution.</li>
362
- <li><strong>Medium (55–80%)</strong> β€” moderate confidence, hidden by default to reduce noise. Toggle with the toolbar button.</li>
363
  <li><strong>Low (&lt;55%)</strong> β€” uncertain, hidden by default. Use cautiously as supplementary signal only.</li>
364
  </ul>
365
  </div>
@@ -380,7 +395,7 @@
380
  </div>
381
  </div>
382
 
383
- </div><!-- end .layout -->
384
 
385
  <script>
386
  const ta = document.getElementById('sequenceInput');
@@ -396,16 +411,17 @@
396
  if (savedTheme) applyTheme(savedTheme === 'dark');
397
  else applyTheme(window.matchMedia('(prefers-color-scheme: dark)').matches);
398
 
399
- // ── Char counter ──────────────────────────────────────────────────────────
400
  ta.addEventListener('input', () => {
401
- const seq = ta.value.replace(/>/g,'').replace(/[^A-Za-z]/g,'');
402
  cc.textContent = seq.length ? `${seq.length.toLocaleString()} aa` : '0 aa';
403
  });
404
  ta.addEventListener('keydown', e => { if (e.key === 'Enter' && e.metaKey) predict(); });
405
 
406
  // ── Helpers ───────────────────────────────────────────────────────────────
 
407
  function confClass(p) { return p >= 0.75 ? 'conf-high' : p >= 0.55 ? 'conf-med' : 'conf-low'; }
408
- function confLabel(p) { return p >= 0.80 ? 'High' : p >= 0.60 ? 'Medium' : 'Low'; }
409
  function probBar(p) {
410
  return `<div class="prob-bar-wrap"><div class="prob-bar-fill ${confClass(p)}" style="width:${Math.round(p*100)}%"></div></div>`;
411
  }
@@ -413,21 +429,110 @@
413
  return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
414
  }
415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  // ── State ─────────────────────────────────────────────────────────────────
417
- let allResults = []; // full results from last predict
418
- let showMed = true; // medium confidence visible
419
- let showLow = false; // low confidence hidden by default
420
- let currentPage = 1;
421
- const PAGE_SIZE = 10;
422
 
423
  // ── Predict ───────────────────────────────────────────────────────────────
424
  async function predict() {
425
- const seq = ta.value.trim();
426
- if (!seq) { shake(); return; }
427
 
428
  const btn = document.getElementById('predictBtn');
429
  const status = document.getElementById('status');
430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  btn.disabled = true;
432
  btn.querySelector('.btn-label').textContent = 'Running…';
433
  status.className = 'status visible';
@@ -440,7 +545,7 @@
440
  const res = await fetch('/predict', {
441
  method: 'POST',
442
  headers: { 'Content-Type': 'application/json' },
443
- body: JSON.stringify({ sequence: seq }),
444
  });
445
  if (!res.ok) throw new Error(`Server error ${res.status}`);
446
  const data = await res.json();
@@ -450,7 +555,7 @@
450
  renderPage();
451
  if (allResults.length > 0) {
452
  document.getElementById('resultsToolbar').style.display = 'flex';
453
- addToHistory(allResults, seq);
454
  }
455
  } catch(e) {
456
  status.className = 'status visible';
@@ -470,8 +575,8 @@
470
  }
471
 
472
  function renderPagination() {
473
- const total = Math.ceil(allResults.length / PAGE_SIZE);
474
- const pag = document.getElementById('pagination');
475
  if (total <= 1) { pag.style.display = 'none'; return; }
476
  pag.style.display = 'flex';
477
  let html = `<button class="page-btn" onclick="goPage(${currentPage-1})" ${currentPage===1?'disabled':''}>β€Ή Prev</button>`;
@@ -492,6 +597,23 @@
492
  }
493
 
494
  // ── Render results ────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  function renderResults(results, offset) {
496
  const container = document.getElementById('results');
497
  if (!results || results.length === 0) {
@@ -510,30 +632,22 @@
510
  </div>
511
  </div>`;
512
  }
513
- const preds = r.predictions || [];
514
- const seqName = r.name || `Sequence ${globalIdx+1}`;
515
- const highCount = preds.filter(p => p.prob >= 0.75).length;
516
- const medCount = preds.filter(p => p.prob >= 0.55 && p.prob < 0.75).length;
517
- const lowCount = preds.filter(p => p.prob < 0.55).length;
 
 
518
 
519
  const predHTML = preds.length === 0
520
  ? '<p class="no-preds">No functions predicted above confidence threshold.</p>'
521
  : preds.map(p => {
522
- const cc2 = confClass(p.prob);
523
- const isMed = cc2 === 'conf-med';
524
- const isLow = cc2 === 'conf-low';
525
- const hiddenClass = (isMed && !showMed) ? ' hidden-med' : (isLow && !showLow) ? ' hidden-low' : '';
526
- return `<div class="pred-row ${cc2}${hiddenClass}" data-name="${escHtml(p.name.toLowerCase())}" data-goid="${escHtml(p.go_id)}">
527
- <div class="pred-main">
528
- <span class="pred-name">${escHtml(p.name)}</span>
529
- <span class="pred-goid"><a href="https://amigo.geneontology.org/amigo/term/${p.go_id}" target="_blank" rel="noopener">${escHtml(p.go_id)}</a></span>
530
- </div>
531
- <div class="pred-right">
532
- <span class="pred-conf-label">${confLabel(p.prob)}</span>
533
- <span class="pred-prob">${(p.prob*100).toFixed(1)}%</span>
534
- ${probBar(p.prob)}
535
- </div>
536
- </div>`;
537
  }).join('');
538
 
539
  const medCollapseRow = medCount > 0
@@ -542,6 +656,16 @@
542
  </div>`
543
  : '';
544
 
 
 
 
 
 
 
 
 
 
 
545
  return `
546
  <div class="result-card" style="animation-delay:${idx*60}ms" data-idx="${globalIdx}">
547
  <div class="result-header">
@@ -553,28 +677,36 @@
553
  <span class="stat-chip">${highCount} high</span>
554
  ${medCount ? `<span class="stat-chip" style="color:var(--med-text);background:var(--med-bg)">${medCount} med</span>` : ''}
555
  ${lowCount ? `<span class="stat-chip muted">${lowCount} low</span>` : ''}
 
556
  </div>
557
  </div>
558
  <div class="pred-list">${predHTML}${medCollapseRow}</div>
 
559
  </div>`;
560
  }).join('');
561
 
562
  applyFilters();
563
  }
564
 
 
 
 
 
 
 
565
  // ── Filters ───────────────────────────────────────────────────────────────
566
  function applyFilters() {
567
  const query = (document.getElementById('termFilter').value || '').toLowerCase().trim();
568
  document.querySelectorAll('.pred-row').forEach(row => {
569
- const name = row.dataset.name || '';
570
- const goid = row.dataset.goid || '';
571
  const isMed = row.classList.contains('conf-med');
572
  const isLow = row.classList.contains('conf-low');
573
- const hiddenByConf = (isMed && !showMed) || (isLow && !showLow);
574
  const hiddenByFilter = query && !name.includes(query) && !goid.includes(query);
575
- row.classList.toggle('hidden-med', isMed && !showMed);
576
- row.classList.toggle('hidden-low', isLow && !showLow);
577
- row.classList.toggle('filtered-out', !hiddenByConf && hiddenByFilter);
578
  });
579
  }
580
 
@@ -611,7 +743,6 @@
611
  if (r.error) continue;
612
  const goList = (r.predictions || []).map(p => `${p.go_id}|${p.name}|${(p.prob*100).toFixed(1)}%`).join('; ');
613
  lines.push(`>${r.name} [${r.sequence_length} aa] GO:MF=${goList}`);
614
- // sequence not returned by server β€” note this in the file
615
  lines.push('; sequence not included (submit FASTA input to preserve sequence)');
616
  }
617
  triggerDownload(lines.join('\n'), 'protfunc_results.fasta', 'text/plain');
@@ -634,9 +765,7 @@
634
  catch { return []; }
635
  }
636
 
637
- function saveHistory(h) {
638
- localStorage.setItem(HISTORY_KEY, JSON.stringify(h.slice(0, HISTORY_MAX)));
639
- }
640
 
641
  function addToHistory(results, inputSeq) {
642
  const h = loadHistory();
@@ -648,9 +777,9 @@
648
  }
649
 
650
  function renderHistory() {
651
- const h = loadHistory();
652
- const meta = document.getElementById('historyMeta');
653
- const list = document.getElementById('historyList');
654
  meta.textContent = `${h.length} / ${HISTORY_MAX}`;
655
  if (h.length === 0) {
656
  list.innerHTML = '<div class="history-empty">No predictions yet.</div>';
@@ -658,7 +787,7 @@
658
  }
659
  list.innerHTML = `<div class="history-list">` +
660
  h.map((entry, i) => {
661
- const date = new Date(entry.ts).toLocaleString(undefined, { month:'short', day:'numeric', hour:'2-digit', minute:'2-digit' });
662
  const label = entry.n > 1 ? `${entry.name} +${entry.n-1} more` : entry.name;
663
  return `<div class="history-item" onclick="restoreHistory(${i})">
664
  <span class="history-item-name">${escHtml(label)}</span>
@@ -674,18 +803,18 @@
674
  allResults = h[i].results;
675
  currentPage = 1;
676
  ta.value = h[i].inputSeq || '';
677
- const seq = ta.value.replace(/>/g,'').replace(/[^A-Za-z]/g,'');
678
  cc.textContent = seq.length ? `${seq.length.toLocaleString()} aa` : '0 aa';
679
  document.getElementById('resultsToolbar').style.display = 'flex';
680
  renderPage();
681
  }
682
 
683
  function toggleHistory() {
684
- const list = document.getElementById('historyList');
685
- const toggle = document.getElementById('historyToggle');
686
- const visible = list.style.display !== 'none';
687
- list.style.display = visible ? 'none' : 'block';
688
- toggle.textContent = visible ? 'β–Ύ Show' : 'β–΄ Hide';
689
  }
690
 
691
  // ── README ────────────────────────────────────────────────────────────────
@@ -708,7 +837,7 @@ MASLHPPSFAYMRDGRNLSLAESVPAEIMHMVDPYWYQWPPLEPMWFGIIGFVIAILGTMSLAGNFIVMYIFTSSKGLRT
708
  const seq = DEMOS[key];
709
  if (!seq) return;
710
  ta.value = seq;
711
- const aa = seq.replace(/>/g,'').replace(/[^A-Za-z]/g,'');
712
  cc.textContent = aa.length ? `${aa.length.toLocaleString()} aa` : '0 aa';
713
  ta.focus();
714
  }
@@ -723,4 +852,4 @@ MASLHPPSFAYMRDGRNLSLAESVPAEIMHMVDPYWYQWPPLEPMWFGIIGFVIAILGTMSLAGNFIVMYIFTSSKGLRT
723
  renderHistory();
724
  </script>
725
  </body>
726
- </html>
 
173
  .show-med-inline { font-size: 0.75rem; color: var(--med-text); background: var(--med-bg); border: 1px solid var(--med-border); border-radius: 6px; padding: 4px 12px; cursor: pointer; }
174
  .show-med-inline:hover { opacity: 0.8; }
175
 
176
+ /* ── Suppressed predictions panel ── */
177
+ .suppressed-toggle {
178
+ display: flex; align-items: center; gap: 8px; width: 100%;
179
+ padding: 10px 16px; border: none; border-top: 1px dashed var(--border-strong);
180
+ background: transparent; color: var(--text-muted); font-size: 0.78rem;
181
+ font-weight: 500; cursor: pointer; text-align: left;
182
+ transition: color 0.15s, background 0.15s; letter-spacing: 0.01em;
183
+ }
184
+ .suppressed-toggle:hover { color: var(--text-2); background: var(--surface-2); }
185
+ .suppressed-toggle .toggle-icon { font-size: 0.65rem; transition: transform 0.2s ease; flex-shrink: 0; }
186
+ .suppressed-toggle[aria-expanded="true"] .toggle-icon { transform: rotate(90deg); }
187
+ .suppressed-tooltip { margin-left: auto; font-size: 0.68rem; color: var(--text-muted); font-style: italic; font-weight: 400; }
188
+ .suppressed-list { display: none; flex-direction: column; gap: 4px; padding: 8px 16px 12px; border-top: 1px dashed var(--border); background: var(--surface-2); }
189
+ .suppressed-list.open { display: flex; }
190
+ .pred-row.suppressed { opacity: 0.70; border-style: dashed; }
191
+ .pred-row.suppressed .pred-name { color: var(--text-muted); }
192
+ .suppressed-reason { font-size: 0.68rem; color: var(--text-muted); font-style: italic; margin-top: 1px; }
193
+
194
  /* ── Error card ── */
195
  .error-card { background: var(--low-bg); border-color: var(--low-border); }
196
  .seq-error { font-size: 0.8rem; color: var(--low-text); font-weight: 500; }
 
308
 
309
  <div id="status" class="status hidden"></div>
310
 
 
311
  <div id="resultsToolbar" class="results-toolbar" style="display:none">
312
  <div class="toolbar-left">
313
  <input class="filter-input" id="termFilter" placeholder="Filter by term name…" oninput="applyFilters()" />
 
324
  <div id="results" class="results-area"></div>
325
  <div id="pagination" class="pagination" style="display:none"></div>
326
 
 
327
  <div class="history-panel" id="historyPanel">
328
  <div class="history-header" onclick="toggleHistory()">
329
  <span class="history-title">Recent Predictions</span>
 
333
  <div id="historyList" style="display:none"></div>
334
  </div>
335
 
 
336
  <div class="readme-panel">
337
  <div class="readme-header" onclick="toggleReadme()">
338
  <span class="readme-title">About & How to Use</span>
 
356
  <li>Multiple sequences can be submitted at once β€” separate each with a FASTA header line.</li>
357
  <li>The model accepts sequences up to 2500 amino acids. Longer sequences will be rejected.</li>
358
  <li>Click <strong>Predict Functions</strong> or press <strong>⌘ + Enter</strong>.</li>
359
+ <li>Results are sorted by confidence. High confidence (β‰₯75%) predictions are most reliable.</li>
360
  </ul>
361
  </div>
362
  <hr class="readme-divider"/>
 
367
  <hr class="readme-divider"/>
368
  <div class="readme-section">
369
  <h3>Understanding GO Terms</h3>
370
+ <p>Each prediction shows a GO term ID (e.g. GO:0004672) and its name. Clicking the GO ID opens <a href="https://amigo.geneontology.org" target="_blank" rel="noopener">AmiGO</a>, the Gene Ontology browser, where you can explore the term's definition, its place in the GO hierarchy, and which proteins are annotated with it.</p>
371
  </div>
372
  <hr class="readme-divider"/>
373
  <div class="readme-section">
374
  <h3>Confidence Levels</h3>
375
  <ul>
376
+ <li><strong>High (β‰₯75%)</strong> β€” strong prediction, consistent with the training distribution.</li>
377
+ <li><strong>Medium (55–75%)</strong> β€” moderate confidence, hidden by default to reduce noise. Toggle with the toolbar button.</li>
378
  <li><strong>Low (&lt;55%)</strong> β€” uncertain, hidden by default. Use cautiously as supplementary signal only.</li>
379
  </ul>
380
  </div>
 
395
  </div>
396
  </div>
397
 
398
+ </div>
399
 
400
  <script>
401
  const ta = document.getElementById('sequenceInput');
 
411
  if (savedTheme) applyTheme(savedTheme === 'dark');
412
  else applyTheme(window.matchMedia('(prefers-color-scheme: dark)').matches);
413
 
414
+ // ── Char counter β€” strips full FASTA header lines before counting ─────────
415
  ta.addEventListener('input', () => {
416
+ const seq = ta.value.split('\n').filter(l => !l.trimStart().startsWith('>')).join('').replace(/[^A-Za-z]/g, '');
417
  cc.textContent = seq.length ? `${seq.length.toLocaleString()} aa` : '0 aa';
418
  });
419
  ta.addEventListener('keydown', e => { if (e.key === 'Enter' && e.metaKey) predict(); });
420
 
421
  // ── Helpers ───────────────────────────────────────────────────────────────
422
+ // confClass and confLabel use matching thresholds (0.75 / 0.55)
423
  function confClass(p) { return p >= 0.75 ? 'conf-high' : p >= 0.55 ? 'conf-med' : 'conf-low'; }
424
+ function confLabel(p) { return p >= 0.75 ? 'High' : p >= 0.55 ? 'Medium' : 'Low'; }
425
  function probBar(p) {
426
  return `<div class="prob-bar-wrap"><div class="prob-bar-fill ${confClass(p)}" style="width:${Math.round(p*100)}%"></div></div>`;
427
  }
 
429
  return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
430
  }
431
 
432
+ // ── Validation ────────────────────────────────────────────────────────────
433
+ const MIN_SEQ_LENGTH = 30;
434
+ const MIN_ENTROPY_BITS = 2.5;
435
+ const MAX_DOMINANT_FRAC = 0.60;
436
+ const MIN_DISTINCT_AA = 5;
437
+ const INVALID_AA_RE = /[BJOUXZ]/gi;
438
+
439
+ function sequenceEntropy(seq) {
440
+ const upper = seq.toUpperCase();
441
+ const counts = {};
442
+ for (const aa of upper) counts[aa] = (counts[aa] || 0) + 1;
443
+ const n = upper.length;
444
+ return -Object.values(counts).reduce((h, c) => h + (c / n) * Math.log2(c / n), 0);
445
+ }
446
+
447
+ function parseSequences(raw) {
448
+ const lines = raw.split('\n');
449
+ const seqs = [];
450
+ let current = null;
451
+ for (const line of lines) {
452
+ const trimmed = line.trim();
453
+ if (trimmed.startsWith('>')) {
454
+ if (current) seqs.push(current);
455
+ current = { name: trimmed.slice(1).trim() || `Sequence ${seqs.length + 1}`, residues: '' };
456
+ } else if (trimmed) {
457
+ if (!current) current = { name: `Sequence ${seqs.length + 1}`, residues: '' };
458
+ current.residues += trimmed.replace(/\s+/g, '');
459
+ }
460
+ }
461
+ if (current) seqs.push(current);
462
+ return seqs;
463
+ }
464
+
465
+ function validateSequences(seqs) {
466
+ const errors = [];
467
+ for (const seq of seqs) {
468
+ const label = `"${escHtml(seq.name)}"`;
469
+ const res = seq.residues;
470
+
471
+ if (res.length === 0) { errors.push(`${label}: sequence is empty.`); continue; }
472
+
473
+ if (res.length < MIN_SEQ_LENGTH) {
474
+ errors.push(`${label}: too short (${res.length} aa β€” minimum ${MIN_SEQ_LENGTH} aa). Sequences this short are unlikely to fold into a stable domain.`);
475
+ continue;
476
+ }
477
+
478
+ const badChars = [...new Set((res.match(INVALID_AA_RE) || []).map(c => c.toUpperCase()))];
479
+ if (badChars.length > 0) {
480
+ errors.push(`${label}: contains invalid amino acid character(s): ${badChars.join(', ')}. Ambiguity codes are not accepted.`);
481
+ }
482
+
483
+ const upper = res.toUpperCase();
484
+ const counts = {};
485
+ for (const aa of upper) counts[aa] = (counts[aa] || 0) + 1;
486
+
487
+ if (Object.keys(counts).length < MIN_DISTINCT_AA) {
488
+ errors.push(`${label}: only ${Object.keys(counts).length} distinct residue type(s). Real proteins require at least ${MIN_DISTINCT_AA}.`);
489
+ continue;
490
+ }
491
+
492
+ const maxCount = Math.max(...Object.values(counts));
493
+ const domFrac = maxCount / res.length;
494
+ if (domFrac > MAX_DOMINANT_FRAC) {
495
+ const domAA = Object.keys(counts).find(k => counts[k] === maxCount);
496
+ errors.push(`${label}: dominated by a single residue (${domAA} = ${Math.round(domFrac * 100)}%). Low-complexity sequences produce unreliable embeddings.`);
497
+ continue;
498
+ }
499
+
500
+ const H = sequenceEntropy(res);
501
+ if (H < MIN_ENTROPY_BITS) {
502
+ errors.push(`${label}: very low sequence complexity (Shannon entropy ${H.toFixed(2)} bits β€” minimum ${MIN_ENTROPY_BITS} bits). Repetitive or artificially constructed sequences are not accepted.`);
503
+ }
504
+ }
505
+ return errors;
506
+ }
507
+
508
  // ── State ─────────────────────────────────────────────────────────────────
509
+ let allResults = [];
510
+ let showMed = true;
511
+ let showLow = false;
512
+ let currentPage = 1;
513
+ const PAGE_SIZE = 10;
514
 
515
  // ── Predict ───────────────────────────────────────────────────────────────
516
  async function predict() {
517
+ const raw = ta.value.trim();
518
+ if (!raw) { shake(); return; }
519
 
520
  const btn = document.getElementById('predictBtn');
521
  const status = document.getElementById('status');
522
 
523
+ // Client-side validation
524
+ const seqs = parseSequences(raw);
525
+ if (seqs.length === 0) { shake(); return; }
526
+
527
+ const validationErrors = validateSequences(seqs);
528
+ if (validationErrors.length > 0) {
529
+ status.className = 'status visible';
530
+ status.innerHTML = `<span class="err-msg">⚠ ${validationErrors.join('<br>⚠ ')}</span>`;
531
+ document.getElementById('results').innerHTML = '';
532
+ shake();
533
+ return;
534
+ }
535
+
536
  btn.disabled = true;
537
  btn.querySelector('.btn-label').textContent = 'Running…';
538
  status.className = 'status visible';
 
545
  const res = await fetch('/predict', {
546
  method: 'POST',
547
  headers: { 'Content-Type': 'application/json' },
548
+ body: JSON.stringify({ sequence: raw }),
549
  });
550
  if (!res.ok) throw new Error(`Server error ${res.status}`);
551
  const data = await res.json();
 
555
  renderPage();
556
  if (allResults.length > 0) {
557
  document.getElementById('resultsToolbar').style.display = 'flex';
558
+ addToHistory(allResults, raw);
559
  }
560
  } catch(e) {
561
  status.className = 'status visible';
 
575
  }
576
 
577
  function renderPagination() {
578
+ const total = Math.ceil(allResults.length / PAGE_SIZE);
579
+ const pag = document.getElementById('pagination');
580
  if (total <= 1) { pag.style.display = 'none'; return; }
581
  pag.style.display = 'flex';
582
  let html = `<button class="page-btn" onclick="goPage(${currentPage-1})" ${currentPage===1?'disabled':''}>β€Ή Prev</button>`;
 
597
  }
598
 
599
  // ── Render results ────────────────────────────────────────────────────────
600
+ function predRowHTML(p, extraClass) {
601
+ extraClass = extraClass || '';
602
+ const cc2 = confClass(p.prob);
603
+ return `<div class="pred-row ${cc2} ${extraClass}" data-name="${escHtml((p.name||'').toLowerCase())}" data-goid="${escHtml(p.go_id||'')}">
604
+ <div class="pred-main">
605
+ <span class="pred-name">${escHtml(p.name)}</span>
606
+ <span class="pred-goid"><a href="https://amigo.geneontology.org/amigo/term/${p.go_id}" target="_blank" rel="noopener">${escHtml(p.go_id)}</a></span>
607
+ ${extraClass === 'suppressed' ? '<span class="suppressed-reason">Parent term not predicted above threshold</span>' : ''}
608
+ </div>
609
+ <div class="pred-right">
610
+ <span class="pred-conf-label">${confLabel(p.prob)}</span>
611
+ <span class="pred-prob">${(p.prob*100).toFixed(1)}%</span>
612
+ ${probBar(p.prob)}
613
+ </div>
614
+ </div>`;
615
+ }
616
+
617
  function renderResults(results, offset) {
618
  const container = document.getElementById('results');
619
  if (!results || results.length === 0) {
 
632
  </div>
633
  </div>`;
634
  }
635
+
636
+ const preds = r.predictions || [];
637
+ const suppressed = r.suppressed || [];
638
+ const seqName = r.name || `Sequence ${globalIdx+1}`;
639
+ const highCount = preds.filter(p => p.prob >= 0.75).length;
640
+ const medCount = preds.filter(p => p.prob >= 0.55 && p.prob < 0.75).length;
641
+ const lowCount = preds.filter(p => p.prob < 0.55).length;
642
 
643
  const predHTML = preds.length === 0
644
  ? '<p class="no-preds">No functions predicted above confidence threshold.</p>'
645
  : preds.map(p => {
646
+ const cc2 = confClass(p.prob);
647
+ const isMed = cc2 === 'conf-med';
648
+ const isLow = cc2 === 'conf-low';
649
+ const hiddenCl = (isMed && !showMed) ? ' hidden-med' : (isLow && !showLow) ? ' hidden-low' : '';
650
+ return predRowHTML(p, hiddenCl.trim());
 
 
 
 
 
 
 
 
 
 
651
  }).join('');
652
 
653
  const medCollapseRow = medCount > 0
 
656
  </div>`
657
  : '';
658
 
659
+ const suppressedHTML = suppressed.length === 0 ? '' : `
660
+ <button class="suppressed-toggle" aria-expanded="false" onclick="toggleSuppressed(this)">
661
+ <span class="toggle-icon">β–Ά</span>
662
+ ${suppressed.length} potential function${suppressed.length > 1 ? 's' : ''} hidden
663
+ <span class="suppressed-tooltip">parent term not predicted β€” click to inspect</span>
664
+ </button>
665
+ <div class="suppressed-list">
666
+ ${suppressed.map(p => predRowHTML(p, 'suppressed')).join('')}
667
+ </div>`;
668
+
669
  return `
670
  <div class="result-card" style="animation-delay:${idx*60}ms" data-idx="${globalIdx}">
671
  <div class="result-header">
 
677
  <span class="stat-chip">${highCount} high</span>
678
  ${medCount ? `<span class="stat-chip" style="color:var(--med-text);background:var(--med-bg)">${medCount} med</span>` : ''}
679
  ${lowCount ? `<span class="stat-chip muted">${lowCount} low</span>` : ''}
680
+ ${suppressed.length ? `<span class="stat-chip muted">${suppressed.length} suppressed</span>` : ''}
681
  </div>
682
  </div>
683
  <div class="pred-list">${predHTML}${medCollapseRow}</div>
684
+ ${suppressedHTML}
685
  </div>`;
686
  }).join('');
687
 
688
  applyFilters();
689
  }
690
 
691
+ function toggleSuppressed(btn) {
692
+ const expanded = btn.getAttribute('aria-expanded') === 'true';
693
+ btn.setAttribute('aria-expanded', String(!expanded));
694
+ btn.nextElementSibling.classList.toggle('open', !expanded);
695
+ }
696
+
697
  // ── Filters ───────────────────────────────────────────────────────────────
698
  function applyFilters() {
699
  const query = (document.getElementById('termFilter').value || '').toLowerCase().trim();
700
  document.querySelectorAll('.pred-row').forEach(row => {
701
+ const name = row.dataset.name || '';
702
+ const goid = row.dataset.goid || '';
703
  const isMed = row.classList.contains('conf-med');
704
  const isLow = row.classList.contains('conf-low');
705
+ const hiddenByConf = (isMed && !showMed) || (isLow && !showLow);
706
  const hiddenByFilter = query && !name.includes(query) && !goid.includes(query);
707
+ row.classList.toggle('hidden-med', isMed && !showMed);
708
+ row.classList.toggle('hidden-low', isLow && !showLow);
709
+ row.classList.toggle('filtered-out', !hiddenByConf && hiddenByFilter);
710
  });
711
  }
712
 
 
743
  if (r.error) continue;
744
  const goList = (r.predictions || []).map(p => `${p.go_id}|${p.name}|${(p.prob*100).toFixed(1)}%`).join('; ');
745
  lines.push(`>${r.name} [${r.sequence_length} aa] GO:MF=${goList}`);
 
746
  lines.push('; sequence not included (submit FASTA input to preserve sequence)');
747
  }
748
  triggerDownload(lines.join('\n'), 'protfunc_results.fasta', 'text/plain');
 
765
  catch { return []; }
766
  }
767
 
768
+ function saveHistory(h) { localStorage.setItem(HISTORY_KEY, JSON.stringify(h.slice(0, HISTORY_MAX))); }
 
 
769
 
770
  function addToHistory(results, inputSeq) {
771
  const h = loadHistory();
 
777
  }
778
 
779
  function renderHistory() {
780
+ const h = loadHistory();
781
+ const meta = document.getElementById('historyMeta');
782
+ const list = document.getElementById('historyList');
783
  meta.textContent = `${h.length} / ${HISTORY_MAX}`;
784
  if (h.length === 0) {
785
  list.innerHTML = '<div class="history-empty">No predictions yet.</div>';
 
787
  }
788
  list.innerHTML = `<div class="history-list">` +
789
  h.map((entry, i) => {
790
+ const date = new Date(entry.ts).toLocaleString(undefined, { month:'short', day:'numeric', hour:'2-digit', minute:'2-digit' });
791
  const label = entry.n > 1 ? `${entry.name} +${entry.n-1} more` : entry.name;
792
  return `<div class="history-item" onclick="restoreHistory(${i})">
793
  <span class="history-item-name">${escHtml(label)}</span>
 
803
  allResults = h[i].results;
804
  currentPage = 1;
805
  ta.value = h[i].inputSeq || '';
806
+ const seq = ta.value.split('\n').filter(l => !l.trimStart().startsWith('>')).join('').replace(/[^A-Za-z]/g, '');
807
  cc.textContent = seq.length ? `${seq.length.toLocaleString()} aa` : '0 aa';
808
  document.getElementById('resultsToolbar').style.display = 'flex';
809
  renderPage();
810
  }
811
 
812
  function toggleHistory() {
813
+ const list = document.getElementById('historyList');
814
+ const toggle = document.getElementById('historyToggle');
815
+ const vis = list.style.display !== 'none';
816
+ list.style.display = vis ? 'none' : 'block';
817
+ toggle.textContent = vis ? 'β–Ύ Show' : 'β–΄ Hide';
818
  }
819
 
820
  // ── README ────────────────────────────────────────────────────────────────
 
837
  const seq = DEMOS[key];
838
  if (!seq) return;
839
  ta.value = seq;
840
+ const aa = seq.split('\n').filter(l => !l.trimStart().startsWith('>')).join('').replace(/[^A-Za-z]/g, '');
841
  cc.textContent = aa.length ? `${aa.length.toLocaleString()} aa` : '0 aa';
842
  ta.focus();
843
  }
 
852
  renderHistory();
853
  </script>
854
  </body>
855
+ </html>