Spaces:

Rthur2003
/

crowncode-backend

Sleeping

App Files Files Community

Rthur2003 commited on Apr 16

Commit

4e0bd69

1 Parent(s): d20e581

feat: add local demo for AURIS AI music detection with Gradio interface

Browse files

Files changed (1) hide show

local_demo.py +488 -0

local_demo.py ADDED Viewed

	@@ -0,0 +1,488 @@

+"""
+AURIS Local Demo — AI Music Detection
+Gradio arayüzü ile eğitilmiş modeli doğrudan test et.
+Backend'e gerek yok, model local'de çalışır.
+Çalıştır:
+    python local_demo.py
+"""
+import io
+import json
+import pickle
+import time
+from pathlib import Path
+import gradio as gr
+import numpy as np
+# ── Model yükleme ──────────────────────────────────────────────────
+MODELS_DIR = Path(__file__).parent / "models"
+FIGURES_DIR = Path(__file__).parent.parent / "docs" / "academic" / "figures"
+with open(MODELS_DIR / "auris_classifier_v1.pkl", "rb") as f:
+    model = pickle.load(f)
+with open(MODELS_DIR / "feature_scaler_v1.pkl", "rb") as f:
+    scaler = pickle.load(f)
+with open(MODELS_DIR / "feature_columns_v1.json", "r") as f:
+    feature_cols = json.load(f)
+with open(MODELS_DIR / "training_results.json", "r") as f:
+    training_results = json.load(f)
+best_model_name = training_results.get("_best_model", "Gradient Boosting")
+n_features = training_results.get("_n_features", 47)
+importance = training_results.get("_feature_importance", {})
+top_features = sorted(importance.items(), key=lambda x: x[1], reverse=True)[:10]
+print(f"Model: {best_model_name} | Features: {n_features}")
+print(f"Figures: {FIGURES_DIR}")
+# ── Feature extraction (simplified — same as training pipeline) ────
+def extract_features_from_audio(audio_path: str) -> dict:
+    """Extract 47 features from audio file using librosa."""
+    import librosa
+    from scipy import stats as sp_stats
+    y, sr = librosa.load(audio_path, sr=22050, mono=True, duration=60.0)
+    duration_sec = len(y) / sr
+    # RMS energy
+    rms = librosa.feature.rms(y=y, hop_length=512)[0]
+    rms_mean = float(np.mean(rms))
+    rms_std = float(np.std(rms))
+    rms_dynamic_range = float(np.max(rms) - np.min(rms))
+    # Spectral features
+    cent = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=512)[0]
+    flat = librosa.feature.spectral_flatness(y=y, hop_length=512)[0]
+    bw = librosa.feature.spectral_bandwidth(y=y, sr=sr, hop_length=512)[0]
+    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=512)[0]
+    contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=512)
+    # MFCCs
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512)
+    mfcc_delta = librosa.feature.delta(mfcc)
+    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
+    # Zero crossing
+    zcr = librosa.feature.zero_crossing_rate(y, hop_length=512)[0]
+    # Tempo
+    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
+    tempo_val = float(np.atleast_1d(tempo)[0])
+    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
+    if len(beat_times) > 1:
+        ibi = np.diff(beat_times)
+        tempo_stability = float(np.std(ibi))
+        tempo_cv = float(np.std(ibi) / np.mean(ibi)) if np.mean(ibi) > 0 else 0.0
+    else:
+        tempo_stability = 0.0
+        tempo_cv = 0.0
+    # Chroma
+    chroma = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=512)
+    chroma_std = float(np.mean(np.std(chroma, axis=1)))
+    chroma_entropy = float(-np.sum(
+        np.mean(chroma, axis=1) * np.log2(np.mean(chroma, axis=1) + 1e-10)
+    ))
+    chroma_diff = np.diff(chroma, axis=1)
+    chroma_transition_rate = float(np.mean(np.abs(chroma_diff)))
+    # Tonnetz
+    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
+    tonnetz_std = float(np.mean(np.std(tonnetz, axis=1)))
+    # Harmonic ratio
+    y_harm, y_perc = librosa.effects.hpss(y)
+    harm_energy = float(np.sum(y_harm ** 2))
+    perc_energy = float(np.sum(y_perc ** 2))
+    total_energy = harm_energy + perc_energy + 1e-10
+    harmonic_ratio = harm_energy / total_energy
+    # Mel features
+    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=512)
+    mel_db = librosa.power_to_db(mel)
+    mel_flatness = float(np.mean(librosa.feature.spectral_flatness(S=mel)))
+    # Onset
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
+    # Pitch
+    pitches, magnitudes = librosa.piptrack(y=y, sr=sr, hop_length=512)
+    pitch_vals = []
+    for t in range(pitches.shape[1]):
+        idx = magnitudes[:, t].argmax()
+        p = pitches[idx, t]
+        if p > 50:
+            pitch_vals.append(p)
+    pitch_mean_hz = float(np.mean(pitch_vals)) if pitch_vals else 0.0
+    if len(pitch_vals) > 1 and pitch_mean_hz > 0:
+        cents = 1200 * np.log2(np.array(pitch_vals) / pitch_mean_hz + 1e-10)
+        pitch_std_cents = float(np.std(cents))
+    else:
+        pitch_std_cents = 0.0
+    # Heuristic scores (same sigmoid as training)
+    def _sigmoid(x, center=0.5, steepness=6.0):
+        return 1.0 / (1.0 + np.exp(-steepness * (x - center)))
+    spectral_regularity = float(_sigmoid(1.0 - float(np.std(flat)), 0.5, 4))
+    temporal_patterns = float(_sigmoid(1.0 - tempo_cv, 0.6, 5) if tempo_cv > 0 else 0.5)
+    harmonic_structure = float(_sigmoid(harmonic_ratio, 0.5, 4))
+    # Build feature dict matching feature_columns_v1.json order
+    feats = {
+        "rms_energy": rms_mean,
+        "rms_std": rms_std,
+        "spectral_centroid_mean": float(np.mean(cent)),
+        "spectral_centroid_std": float(np.std(cent)),
+        "spectral_flatness_mean": float(np.mean(flat)),
+        "spectral_flatness_std": float(np.std(flat)),
+        "spectral_bandwidth_mean": float(np.mean(bw)),
+        "spectral_bandwidth_std": float(np.std(bw)),
+        "spectral_rolloff_mean": float(np.mean(rolloff)),
+        "spectral_rolloff_std": float(np.std(rolloff)),
+        "spectral_contrast_mean": float(np.mean(contrast)),
+        "spectral_contrast_std": float(np.std(contrast)),
+        "mfcc_variance": float(np.mean(np.var(mfcc, axis=1))),
+        "mfcc_delta_var": float(np.mean(np.var(mfcc_delta, axis=1))),
+        "mfcc_delta2_var": float(np.mean(np.var(mfcc_delta2, axis=1))),
+        "zero_crossing_rate": float(np.mean(zcr)),
+        "zero_crossing_std": float(np.std(zcr)),
+        "tempo_bpm": tempo_val,
+        "tempo_stability": tempo_stability,
+        "tempo_cv": tempo_cv,
+        "beat_count": float(len(beats)),
+        "rms_dynamic_range": rms_dynamic_range,
+        "chroma_std": chroma_std,
+        "chroma_entropy": chroma_entropy,
+        "chroma_transition_rate": chroma_transition_rate,
+        "tonnetz_std": tonnetz_std,
+        "harmonic_ratio": harmonic_ratio,
+        "mel_flatness": mel_flatness,
+        "onset_strength_mean": float(np.mean(onset_env)),
+        "onset_strength_std": float(np.std(onset_env)),
+        "pitch_mean_hz": pitch_mean_hz,
+        "pitch_std_cents": pitch_std_cents,
+        "spectral_regularity": spectral_regularity,
+        "temporal_patterns": temporal_patterns,
+        "harmonic_structure": harmonic_structure,
+        "vocal_confidence": 0.0,
+        "vocal_ai_score": 0.0,
+        "vocal_energy_ratio": 0.0,
+        "vocal_harmonic_ratio": 0.0,
+        "vocal_texture_score": 0.0,
+        "has_vocals": 0.0,
+        "pitch_stability_score": float(_sigmoid(1.0 - min(pitch_std_cents / 200, 1.0), 0.5, 4)),
+        "vibrato_rate_hz": 0.0,
+        "vibrato_extent_cents": 0.0,
+        "vibrato_regularity_score": 0.0,
+        "formant_consistency_score": 0.0,
+        "breath_pattern_score": float(_sigmoid(rms_dynamic_range, 0.3, 5)),
+    }
+    return feats, duration_sec
+# ── Prediction ──────────────────────────────────────────────────
+def predict(audio_file):
+    """Run AURIS model on uploaded audio."""
+    if audio_file is None:
+        return (
+            "Dosya yükleyin / Upload a file",
+            None, None, None, None, None
+        )
+    t0 = time.time()
+    # Handle Gradio audio input (can be tuple or path)
+    if isinstance(audio_file, tuple):
+        audio_path = audio_file[0] if isinstance(audio_file[0], str) else None
+        if audio_path is None:
+            return ("Geçersiz dosya", None, None, None, None, None)
+    else:
+        audio_path = audio_file
+    try:
+        feats, duration = extract_features_from_audio(audio_path)
+    except Exception as e:
+        return (f"Hata: {e}", None, None, None, None, None)
+    # Build feature vector in correct column order
+    X = np.array([[feats.get(col, 0.0) for col in feature_cols]], dtype=np.float32)
+    X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
+    X_scaled = scaler.transform(X)
+    elapsed = time.time() - t0
+    # Get probability
+    prob = model.predict_proba(X_scaled)[0]
+    ai_prob = float(prob[1])
+    human_prob = float(prob[0])
+    is_ai = ai_prob > 0.5
+    # Verdict
+    if ai_prob > 0.8:
+        verdict = f"AI Üretimi Müzik Tespit Edildi — %{ai_prob*100:.1f} güven"
+        color = "#a64b3c"
+    elif ai_prob > 0.5:
+        verdict = f"Muhtemelen AI Üretimi — %{ai_prob*100:.1f} güven"
+        color = "#c99347"
+    elif ai_prob > 0.3:
+        verdict = f"Muhtemelen İnsan Yapımı — %{human_prob*100:.1f} güven"
+        color = "#c99347"
+    else:
+        verdict = f"İnsan Yapımı Müzik — %{human_prob*100:.1f} güven"
+        color = "#7fb069"
+    # Feature scores display
+    sr_pct = feats["spectral_regularity"] * 100
+    tp_pct = feats["temporal_patterns"] * 100
+    hs_pct = feats["harmonic_structure"] * 100
+    details_md = f"""
+## Sonuç / Result
+| | |
+|---|---|
+| **Karar** | {'AI Üretimi' if is_ai else 'İnsan Yapımı'} |
+| **AI Olasılığı** | %{ai_prob*100:.1f} |
+| **İnsan Olasılığı** | %{human_prob*100:.1f} |
+| **Model** | {best_model_name} |
+| **Süre** | {duration:.1f}s |
+| **İşlem Süresi** | {elapsed:.2f}s |
+## Ses Özellik Analizi
+| Özellik | Skor | Yorum |
+|---------|------|-------|
+| Spektral Düzenlilik | %{sr_pct:.0f} | {'AI benzeri düzenlilik' if sr_pct > 60 else 'Doğal varyasyon'} |
+| Zamansal Örüntüler | %{tp_pct:.0f} | {'Metronomik hassasiyet' if tp_pct > 60 else 'Doğal zamanlama'} |
+| Harmonik Yapı | %{hs_pct:.0f} | {'Tahmin edilebilir paternler' if hs_pct > 60 else 'Organik harmonik yapı'} |
+## En Önemli 10 Özellik (Bu Dosya İçin)
+| Özellik | Değer | Global Önem |
+|---------|-------|-------------|
+"""
+    for fname, imp in top_features:
+        val = feats.get(fname, 0.0)
+        details_md += f"| {fname} | {val:.4f} | {imp:.4f} |\n"
+    # Gauge plot
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import matplotlib.patches as mpatches
+    fig, ax = plt.subplots(figsize=(6, 3), subplot_kw={"projection": "polar"})
+    fig.patch.set_facecolor("#1a1207")
+    theta = np.linspace(np.pi, 0, 100)
+    r = np.ones(100)
+    # Background arc
+    ax.plot(theta, r, color="#3d2817", linewidth=20, alpha=0.3)
+    # Score arc
+    score_end = int(ai_prob * 100)
+    if score_end > 0:
+        c = "#7fb069" if ai_prob < 0.4 else "#c99347" if ai_prob < 0.7 else "#a64b3c"
+        ax.plot(theta[:score_end], r[:score_end], color=c, linewidth=20)
+    # Needle
+    needle_angle = np.pi - ai_prob * np.pi
+    ax.plot([needle_angle, needle_angle], [0, 0.85], color="#faf6ed", linewidth=2)
+    ax.scatter([needle_angle], [0.85], color="#faf6ed", s=30, zorder=5)
+    ax.set_ylim(0, 1.2)
+    ax.set_yticklabels([])
+    ax.set_xticklabels([])
+    ax.spines["polar"].set_visible(False)
+    ax.grid(False)
+    ax.text(0, -0.3, f"%{ai_prob*100:.0f}", ha="center", va="center",
+            fontsize=28, fontweight="bold", color="#faf6ed",
+            transform=ax.transAxes)
+    ax.text(0, -0.45, "AI Olasılığı", ha="center", va="center",
+            fontsize=10, color="#c99347", transform=ax.transAxes)
+    plt.tight_layout()
+    gauge_path = str(Path(__file__).parent / "_gauge_temp.png")
+    plt.savefig(gauge_path, dpi=100, bbox_inches="tight",
+                facecolor="#1a1207", edgecolor="none")
+    plt.close()
+    # Feature bars plot
+    fig2, ax2 = plt.subplots(figsize=(6, 2.5))
+    fig2.patch.set_facecolor("#1a1207")
+    ax2.set_facecolor("#1a1207")
+    bars_data = [
+        ("Spektral Düzenlilik", sr_pct),
+        ("Zamansal Örüntüler", tp_pct),
+        ("Harmonik Yapı", hs_pct),
+    ]
+    y_pos = np.arange(len(bars_data))
+    vals = [v for _, v in bars_data]
+    colors = ["#c99347" if v > 60 else "#7fb069" for v in vals]
+    ax2.barh(y_pos, vals, color=colors, edgecolor="#3d2817", height=0.6)
+    ax2.set_yticks(y_pos)
+    ax2.set_yticklabels([n for n, _ in bars_data], color="#faf6ed", fontsize=10)
+    ax2.set_xlim(0, 100)
+    ax2.set_xlabel("Skor (%)", color="#c99347")
+    ax2.tick_params(colors="#c99347")
+    ax2.spines["top"].set_visible(False)
+    ax2.spines["right"].set_visible(False)
+    ax2.spines["bottom"].set_color("#3d2817")
+    ax2.spines["left"].set_color("#3d2817")
+    for i, v in enumerate(vals):
+        ax2.text(v + 1, i, f"%{v:.0f}", va="center", color="#faf6ed", fontsize=10)
+    plt.tight_layout()
+    bars_path = str(Path(__file__).parent / "_bars_temp.png")
+    plt.savefig(bars_path, dpi=100, bbox_inches="tight",
+                facecolor="#1a1207", edgecolor="none")
+    plt.close()
+    return verdict, gauge_path, bars_path, details_md
+# ── Figures gallery ─────────────────────────────────────────────
+def get_figure_paths():
+    """Get all academic figure paths."""
+    if FIGURES_DIR.exists():
+        return sorted(str(p) for p in FIGURES_DIR.glob("*.png"))
+    return []
+# ── Gradio UI ───────────────────────────────────────────────────
+AURIS_CSS = """
+.gradio-container {
+    background: linear-gradient(135deg, #1a1207 0%, #2a1f10 50%, #1a1207 100%) !important;
+    font-family: 'Segoe UI', sans-serif;
+}
+.dark { background: #1a1207 !important; }
+h1, h2, h3 { color: #c99347 !important; }
+p, span, label { color: #faf6ed !important; }
+.gr-button-primary {
+    background: linear-gradient(135deg, #c99347, #e7c77a) !important;
+    color: #1a1207 !important;
+    border: none !important;
+    font-weight: bold !important;
+}
+footer { display: none !important; }
+"""
+HEADER_MD = """
+# AURIS — AI Music Detection System
+**Yapay Zeka Müzik Tespit Platformu**
+Model: **{model}** | Özellikler: **{n_feat}** | Veri: **{n_samples}** örnek | AUC: **{auc}**
+""".format(
+    model=best_model_name,
+    n_feat=n_features,
+    n_samples=training_results.get("_n_samples", "?"),
+    auc=training_results.get(best_model_name, {}).get("roc_auc", "?"),
+)
+ALL_MODELS_MD = "## Tüm Model Sonuçları\n\n| Model | Accuracy | F1 | ROC-AUC | Süre |\n|-------|----------|-----|---------|------|\n"
+for name, data in sorted(
+    ((k, v) for k, v in training_results.items()
+     if not k.startswith("_") and isinstance(v, dict)),
+    key=lambda x: -x[1].get("roc_auc", 0),
+):
+    ALL_MODELS_MD += (
+        f"| {name} | {data.get('accuracy', 0):.4f} | "
+        f"{data.get('f1', 0):.4f} | {data.get('roc_auc', 0):.4f} | "
+        f"{data.get('train_time_sec', 0):.1f}s |\n"
+    )
+with gr.Blocks(css=AURIS_CSS, title="AURIS — AI Music Detection", theme=gr.themes.Base()) as demo:
+    gr.Markdown(HEADER_MD)
+    with gr.Tabs():
+        # ── Tab 1: Analysis ──
+        with gr.Tab("Analiz / Analysis"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    audio_input = gr.Audio(
+                        label="Audio Dosyası Yükle",
+                        type="filepath",
+                    )
+                    analyze_btn = gr.Button(
+                        "Analiz Et / Analyze",
+                        variant="primary",
+                        size="lg",
+                    )
+                with gr.Column(scale=1):
+                    verdict_text = gr.Textbox(
+                        label="Sonuç / Verdict",
+                        interactive=False,
+                        lines=2,
+                    )
+                    gauge_img = gr.Image(
+                        label="AI Olasılığı",
+                        type="filepath",
+                        height=200,
+                    )
+                    bars_img = gr.Image(
+                        label="Özellik Skorları",
+                        type="filepath",
+                        height=180,
+                    )
+            details_output = gr.Markdown(label="Detaylar")
+            analyze_btn.click(
+                fn=predict,
+                inputs=[audio_input],
+                outputs=[verdict_text, gauge_img, bars_img, details_output],
+            )
+        # ── Tab 2: Model Comparison ──
+        with gr.Tab("Model Karşılaştırması"):
+            gr.Markdown(ALL_MODELS_MD)
+        # ── Tab 3: Academic Figures ──
+        with gr.Tab("Akademik Görseller"):
+            gr.Markdown("## Eğitim ve Değerlendirme Görselleri")
+            figure_paths = get_figure_paths()
+            if figure_paths:
+                gr.Gallery(
+                    value=figure_paths,
+                    label="Figures",
+                    columns=3,
+                    height="auto",
+                    object_fit="contain",
+                )
+            else:
+                gr.Markdown("*Görseller bulunamadı.*")
+    gr.Markdown(
+        "---\n"
+        "*AURIS v1 — Düzce Üniversitesi Bilgisayar Mühendisliği Bitirme Projesi*\n\n"
+        "*Hasan Arthur Altuntaş — 2026*"
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7861,
+        share=False,
+        inbrowser=True,
+    )