jebin511 commited on
Commit
12d3f02
·
verified ·
1 Parent(s): 40dcb6c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +55 -0
  2. mutation_model.joblib +3 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import joblib
4
+ import numpy as np
5
+ from collections import Counter
6
+ from typing import List
7
+
8
+ # helper: k-mer extraction / vectorize (k=3)
9
+ def kmer_counts(seq: str, k=3):
10
+ seq = seq.strip().upper()
11
+ counts = Counter()
12
+ if len(seq) < k:
13
+ return counts
14
+ for i in range(len(seq)-k+1):
15
+ counts[seq[i:i+k]] += 1
16
+ return counts
17
+
18
+ def vectorize_single(seq: str, vocab: List[str], k=3):
19
+ x = np.zeros((1, len(vocab)), dtype=float)
20
+ c = kmer_counts(seq, k)
21
+ for j,kmer in enumerate(vocab):
22
+ x[0,j] = c.get(kmer, 0)
23
+ return x
24
+
25
+ # load model+vocab (mutation_model.joblib must be uploaded too)
26
+ model, vocab = joblib.load("mutation_model.joblib")
27
+
28
+ def predict_sequence(sequence: str):
29
+ if not sequence or len(sequence.strip()) < 3:
30
+ return {"error":"sequence too short"}
31
+ X = vectorize_single(sequence, vocab=vocab, k=3)
32
+ pred = model.predict(X)[0]
33
+ prob = float(model.predict_proba(X).max()) if hasattr(model, "predict_proba") else None
34
+ return {
35
+ "sequence": sequence,
36
+ "mutation_detected": bool(pred),
37
+ "confidence": prob
38
+ }
39
+
40
+ # Gradio UI
41
+ with gr.Blocks() as demo:
42
+ gr.Markdown("# DNA Mutation Detector (Quick Space)")
43
+ seq_in = gr.Textbox(label="DNA sequence", placeholder="ATGCGTACGTTAGC...")
44
+ btn = gr.Button("Analyze")
45
+ out = gr.JSON()
46
+ btn.click(fn=predict_sequence, inputs=seq_in, outputs=out)
47
+
48
+ # Expose a simple inference API endpoint (Gradio provides /api/predict automatically)
49
+ # but we also expose a programmatic function name for convenience:
50
+ def api_predict(payload: dict):
51
+ seq = payload.get("sequence", "")
52
+ return predict_sequence(seq)
53
+
54
+ if __name__ == "__main__":
55
+ demo.launch()
mutation_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cecba933e452e5a139a2f7f7cb85b35976128c42f20e48e26013f3cabfc56d75
3
+ size 305695
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=3.0
2
+ joblib
3
+ numpy
4
+ scikit-learn