Spaces:
Sleeping
Sleeping
jefffffff9 Claude Sonnet 4.6 commited on
Commit ·
96cdb10
1
Parent(s): 6f4d8d0
Fix: replace YouTube download with audio upload (HF Spaces blocks outbound HTTP)
Browse filesHF cpu-basic tier has no outbound internet access, so yt-dlp can't reach
YouTube. Replaced with an upload widget — user converts video to MP3 locally
(ytmp3.cc / cobalt.tools), uploads here with transcription.
Also removed yt-dlp from requirements.txt.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app.py +64 -97
- requirements.txt +0 -3
app.py
CHANGED
|
@@ -499,88 +499,50 @@ def _load_phrase_additions_from_hub() -> None:
|
|
| 499 |
threading.Thread(target=_load_phrase_additions_from_hub, daemon=True).start()
|
| 500 |
|
| 501 |
|
| 502 |
-
def
|
| 503 |
-
"""
|
| 504 |
-
url = url.strip()
|
| 505 |
transcript = transcript.strip()
|
| 506 |
-
if
|
| 507 |
-
return "⚠️ Please
|
| 508 |
if not transcript:
|
| 509 |
-
return "⚠️ Please type the transcription
|
| 510 |
-
|
| 511 |
-
try:
|
| 512 |
-
import yt_dlp # noqa: F401
|
| 513 |
-
except ImportError:
|
| 514 |
-
return "⚠️ yt-dlp is not installed. Add 'yt-dlp' to requirements.txt and redeploy."
|
| 515 |
|
| 516 |
lang = SUPPORTED_LANGUAGES.get(lang_label, "bam")
|
|
|
|
|
|
|
|
|
|
| 517 |
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
with tempfile.TemporaryDirectory() as tmp:
|
| 521 |
-
out_template = os.path.join(tmp, "audio.%(ext)s")
|
| 522 |
-
ydl_opts = {
|
| 523 |
-
"format": "bestaudio/best",
|
| 524 |
-
"outtmpl": out_template,
|
| 525 |
-
"quiet": True,
|
| 526 |
-
"no_warnings": True,
|
| 527 |
-
"max_filesize": 50 * 1024 * 1024, # 50 MB cap
|
| 528 |
-
"postprocessors": [{
|
| 529 |
-
"key": "FFmpegExtractAudio",
|
| 530 |
-
"preferredcodec": "wav",
|
| 531 |
-
"preferredquality": "16",
|
| 532 |
-
}],
|
| 533 |
-
}
|
| 534 |
-
import yt_dlp
|
| 535 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 536 |
-
info = ydl.extract_info(url, download=True)
|
| 537 |
-
title = info.get("title", "unknown")
|
| 538 |
-
duration = info.get("duration", 0)
|
| 539 |
-
|
| 540 |
-
if duration and duration > 600:
|
| 541 |
-
return "⚠️ Video is longer than 10 minutes. Use a shorter clip or timestamp range."
|
| 542 |
-
|
| 543 |
-
# Find the downloaded file
|
| 544 |
-
wav_file = os.path.join(tmp, "audio.wav")
|
| 545 |
-
if not os.path.exists(wav_file):
|
| 546 |
-
# Try any audio file
|
| 547 |
-
for fname in os.listdir(tmp):
|
| 548 |
-
if fname.startswith("audio."):
|
| 549 |
-
wav_file = os.path.join(tmp, fname)
|
| 550 |
-
break
|
| 551 |
-
|
| 552 |
-
if not os.path.exists(wav_file):
|
| 553 |
-
return "❌ Audio download failed. The video may be unavailable or geo-restricted."
|
| 554 |
-
|
| 555 |
-
# Upload to HF Hub feedback dataset
|
| 556 |
-
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
| 557 |
-
audio_repo_path = f"youtube_audio/{lang}/{timestamp}.wav"
|
| 558 |
-
meta_repo_path = f"youtube_audio/{lang}/{timestamp}.txt"
|
| 559 |
-
|
| 560 |
-
if _hf_api is not None and FEEDBACK_REPO_ID:
|
| 561 |
-
import io
|
| 562 |
-
_hf_api.upload_file(
|
| 563 |
-
path_or_fileobj=wav_file,
|
| 564 |
-
path_in_repo=audio_repo_path,
|
| 565 |
-
repo_id=FEEDBACK_REPO_ID,
|
| 566 |
-
repo_type="dataset",
|
| 567 |
-
)
|
| 568 |
-
meta = f"title: {title}\nurl: {url}\nlanguage: {lang}\ntranscription: {transcript}\n"
|
| 569 |
-
_hf_api.upload_file(
|
| 570 |
-
path_or_fileobj=io.BytesIO(meta.encode()),
|
| 571 |
-
path_in_repo=meta_repo_path,
|
| 572 |
-
repo_id=FEEDBACK_REPO_ID,
|
| 573 |
-
repo_type="dataset",
|
| 574 |
-
)
|
| 575 |
-
return (f"✅ Saved to training dataset!\n"
|
| 576 |
-
f"Title: {title} ({duration}s)\n"
|
| 577 |
-
f"Audio: {audio_repo_path}\n"
|
| 578 |
-
f"The transcription is saved alongside it.")
|
| 579 |
-
else:
|
| 580 |
-
return "⚠️ HF_TOKEN not set — audio downloaded but could not be saved to Hub."
|
| 581 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
except Exception as exc:
|
| 583 |
-
return f"❌
|
| 584 |
|
| 585 |
|
| 586 |
# ── Main ask handler ──────────────────────────────────────────────────────────
|
|
@@ -797,37 +759,42 @@ def build_ui() -> gr.Blocks:
|
|
| 797 |
kb_import_btn = gr.Button("➕ Add to Knowledge Base", variant="primary")
|
| 798 |
kb_status = gr.Textbox(label="Status", interactive=False, lines=3)
|
| 799 |
|
| 800 |
-
# ── Right:
|
| 801 |
with gr.Column():
|
| 802 |
gr.Markdown(
|
| 803 |
-
"### 🎬
|
| 804 |
-
"
|
| 805 |
-
"
|
| 806 |
-
"
|
| 807 |
-
"
|
| 808 |
-
"-
|
| 809 |
-
"
|
| 810 |
-
"
|
| 811 |
-
"-
|
| 812 |
-
"
|
| 813 |
-
"the
|
| 814 |
)
|
| 815 |
yt_lang = gr.Dropdown(
|
| 816 |
choices=["Bambara (bam)", "Fula (ful)"],
|
| 817 |
value="Bambara (bam)",
|
| 818 |
-
label="Language spoken in the
|
| 819 |
)
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
|
|
|
| 823 |
)
|
| 824 |
yt_transcript = gr.Textbox(
|
| 825 |
lines=5,
|
| 826 |
-
placeholder="Type what is said in the
|
| 827 |
-
"
|
| 828 |
-
label="Transcription — what is said in this
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
)
|
| 830 |
-
yt_btn = gr.Button("
|
| 831 |
yt_status = gr.Textbox(label="Status", interactive=False, lines=4)
|
| 832 |
|
| 833 |
kb_import_btn.click(
|
|
@@ -836,8 +803,8 @@ def build_ui() -> gr.Blocks:
|
|
| 836 |
outputs=[kb_status],
|
| 837 |
)
|
| 838 |
yt_btn.click(
|
| 839 |
-
fn=
|
| 840 |
-
inputs=[yt_lang,
|
| 841 |
outputs=[yt_status],
|
| 842 |
)
|
| 843 |
|
|
|
|
| 499 |
threading.Thread(target=_load_phrase_additions_from_hub, daemon=True).start()
|
| 500 |
|
| 501 |
|
| 502 |
+
def _save_audio_for_training(lang_label: str, audio_path: str | None, transcript: str, source_note: str) -> str:
|
| 503 |
+
"""Save an uploaded audio file + transcription as a training sample to HF Hub."""
|
|
|
|
| 504 |
transcript = transcript.strip()
|
| 505 |
+
if audio_path is None:
|
| 506 |
+
return "⚠️ Please upload an audio file first."
|
| 507 |
if not transcript:
|
| 508 |
+
return "⚠️ Please type the transcription — what is said in this audio."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
lang = SUPPORTED_LANGUAGES.get(lang_label, "bam")
|
| 511 |
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
| 512 |
+
audio_repo_path = f"training_audio/{lang}/{timestamp}.wav"
|
| 513 |
+
meta_repo_path = f"training_audio/{lang}/{timestamp}.txt"
|
| 514 |
|
| 515 |
+
if _hf_api is None or not FEEDBACK_REPO_ID:
|
| 516 |
+
return "⚠️ HF_TOKEN not set — file saved locally only, not uploaded to Hub."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
|
| 518 |
+
try:
|
| 519 |
+
import io
|
| 520 |
+
_hf_api.upload_file(
|
| 521 |
+
path_or_fileobj=audio_path,
|
| 522 |
+
path_in_repo=audio_repo_path,
|
| 523 |
+
repo_id=FEEDBACK_REPO_ID,
|
| 524 |
+
repo_type="dataset",
|
| 525 |
+
)
|
| 526 |
+
meta = (
|
| 527 |
+
f"language: {lang}\n"
|
| 528 |
+
f"transcription: {transcript}\n"
|
| 529 |
+
f"source: {source_note.strip() or 'uploaded'}\n"
|
| 530 |
+
f"timestamp: {timestamp}\n"
|
| 531 |
+
)
|
| 532 |
+
_hf_api.upload_file(
|
| 533 |
+
path_or_fileobj=io.BytesIO(meta.encode()),
|
| 534 |
+
path_in_repo=meta_repo_path,
|
| 535 |
+
repo_id=FEEDBACK_REPO_ID,
|
| 536 |
+
repo_type="dataset",
|
| 537 |
+
)
|
| 538 |
+
return (
|
| 539 |
+
f"✅ Saved to training dataset!\n"
|
| 540 |
+
f"Audio: {audio_repo_path}\n"
|
| 541 |
+
f"Transcription: {transcript[:80]}{'…' if len(transcript) > 80 else ''}\n"
|
| 542 |
+
f"Run the training notebook on Kaggle to include this in the next model update."
|
| 543 |
+
)
|
| 544 |
except Exception as exc:
|
| 545 |
+
return f"❌ Upload failed: {exc}"
|
| 546 |
|
| 547 |
|
| 548 |
# ── Main ask handler ──────────────────────────────────────────────────────────
|
|
|
|
| 759 |
kb_import_btn = gr.Button("➕ Add to Knowledge Base", variant="primary")
|
| 760 |
kb_status = gr.Textbox(label="Status", interactive=False, lines=3)
|
| 761 |
|
| 762 |
+
# ── Right: audio upload for training ─────────────────────
|
| 763 |
with gr.Column():
|
| 764 |
gr.Markdown(
|
| 765 |
+
"### 🎬 Add audio from YouTube (or anywhere)\n"
|
| 766 |
+
"HuggingFace Spaces cannot download YouTube directly, "
|
| 767 |
+
"so convert the video to audio first on your computer:\n\n"
|
| 768 |
+
"**Free online converters:**\n"
|
| 769 |
+
"- [ytmp3.cc](https://ytmp3.cc) — paste YouTube URL → download MP3\n"
|
| 770 |
+
"- [cobalt.tools](https://cobalt.tools) — paste any video URL → download audio\n"
|
| 771 |
+
"- [y2mate.com](https://y2mate.com) — paste YouTube URL → download MP3\n\n"
|
| 772 |
+
"**Good YouTube search terms:**\n"
|
| 773 |
+
"- Bambara: *'Bamanankan conversation'*, *'Bambara leçon'*, *'donsomana'*\n"
|
| 774 |
+
"- Fula: *'Fulfulde leçon'*, *'Pular conversation'*, *'Fula radio'*\n\n"
|
| 775 |
+
"Then upload the MP3/WAV file below with its transcription."
|
| 776 |
)
|
| 777 |
yt_lang = gr.Dropdown(
|
| 778 |
choices=["Bambara (bam)", "Fula (ful)"],
|
| 779 |
value="Bambara (bam)",
|
| 780 |
+
label="Language spoken in the audio",
|
| 781 |
)
|
| 782 |
+
yt_audio = gr.Audio(
|
| 783 |
+
sources=["upload"],
|
| 784 |
+
type="filepath",
|
| 785 |
+
label="Upload audio file (MP3 or WAV)",
|
| 786 |
)
|
| 787 |
yt_transcript = gr.Textbox(
|
| 788 |
lines=5,
|
| 789 |
+
placeholder="Type what is said in the audio (as much as you can).\n"
|
| 790 |
+
"Example:\nJam waali. No mbadda. Mi woni ɗoo wallude ma.",
|
| 791 |
+
label="Transcription — what is said in this audio",
|
| 792 |
+
)
|
| 793 |
+
yt_source = gr.Textbox(
|
| 794 |
+
placeholder="e.g. YouTube: Bambara lesson by Moussa Kouyaté",
|
| 795 |
+
label="Source (optional — for your records)",
|
| 796 |
)
|
| 797 |
+
yt_btn = gr.Button("💾 Save Audio for Training", variant="secondary")
|
| 798 |
yt_status = gr.Textbox(label="Status", interactive=False, lines=4)
|
| 799 |
|
| 800 |
kb_import_btn.click(
|
|
|
|
| 803 |
outputs=[kb_status],
|
| 804 |
)
|
| 805 |
yt_btn.click(
|
| 806 |
+
fn=_save_audio_for_training,
|
| 807 |
+
inputs=[yt_lang, yt_audio, yt_transcript, yt_source],
|
| 808 |
outputs=[yt_status],
|
| 809 |
)
|
| 810 |
|
requirements.txt
CHANGED
|
@@ -51,6 +51,3 @@ scipy==1.15.2
|
|
| 51 |
|
| 52 |
# Phrase matching (fuzzy match for Whisper mis-transcriptions of Bambara/Fula)
|
| 53 |
rapidfuzz==3.13.0
|
| 54 |
-
|
| 55 |
-
# YouTube audio download for training data collection
|
| 56 |
-
yt-dlp==2025.3.31
|
|
|
|
| 51 |
|
| 52 |
# Phrase matching (fuzzy match for Whisper mis-transcriptions of Bambara/Fula)
|
| 53 |
rapidfuzz==3.13.0
|
|
|
|
|
|
|
|
|