File size: 2,378 Bytes
715f504
12d3f02
 
 
 
 
e1678bf
 
 
 
12d3f02
 
 
 
 
 
e1678bf
12d3f02
 
 
 
e1678bf
12d3f02
e1678bf
 
 
 
 
 
 
 
 
 
 
 
12d3f02
e1678bf
12d3f02
e1678bf
12d3f02
 
e1678bf
 
12d3f02
 
 
e1678bf
12d3f02
 
 
e1678bf
12d3f02
 
e1678bf
 
 
 
 
 
715f504
e1678bf
 
 
 
 
 
 
 
 
 
 
 
 
 
12d3f02
e1678bf
12d3f02
 
 
 
 
715f504
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# file: app.py
import gradio as gr
import joblib
import numpy as np
from collections import Counter
from typing import List
import os

# --- Helper Functions ---
BASES = ['A', 'T', 'C', 'G']

def kmer_counts(seq: str, k=3):
    seq = seq.strip().upper()
    counts = Counter()
    if len(seq) < k:
        return counts
    for i in range(len(seq) - k + 1):
        counts[seq[i:i+k]] += 1
    return counts

def vectorize_single(seq: str, vocab: List[str], k=3):
    X = np.zeros((1, len(vocab)), dtype=float)
    c = kmer_counts(seq, k)
    for j, kmer in enumerate(vocab):
        X[0, j] = c.get(kmer, 0)
    return X

# --- Load Model ---
MODEL_PATH = "mutation_model.joblib"

if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(
        f"⚠️ Model file '{MODEL_PATH}' not found. "
        "Please upload 'mutation_model.joblib' along with this app."
    )

model, vocab = joblib.load(MODEL_PATH)

# --- Prediction Logic ---
def predict_sequence(sequence: str):
    if not sequence or len(sequence.strip()) < 3:
        return {"error": "Please enter a valid DNA sequence (≥3 bases)."}

    X = vectorize_single(sequence, vocab=vocab, k=3)
    pred = model.predict(X)[0]
    prob = float(model.predict_proba(X).max()) if hasattr(model, "predict_proba") else None

    return {
        "sequence": sequence,
        "mutation_detected": bool(pred),
        "confidence": round(prob, 3) if prob else "N/A"
    }

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        <h1 style="text-align:center;">🧬 DNA Mutation Analyzer</h1>
        <p style="text-align:center;">
        Enter a DNA sequence to check for mutations using the ML model.
        </p>
        """
    )
    
    with gr.Row():
        seq_input = gr.Textbox(
            label="DNA Sequence",
            placeholder="Enter sequence like ATGCGTACGTTAGC...",
            lines=2,
        )
    analyze_btn = gr.Button("🔍 Analyze Sequence")
    result = gr.JSON(label="Analysis Result")
    
    analyze_btn.click(fn=predict_sequence, inputs=seq_input, outputs=result)

# --- API Endpoint for Programmatic Access ---
def api_predict(payload: dict):
    seq = payload.get("sequence", "")
    return predict_sequence(seq)

if __name__ == "__main__":
    # Launch Gradio with a public link
    demo.launch(share=True, ssr_mode=False)