Spaces:

MataStrategy
/

ground-zero

Sleeping

jefffffff9 Claude Sonnet 4.6 commited on Apr 7

Commit

96cdb10

1 Parent(s): 6f4d8d0

Fix: replace YouTube download with audio upload (HF Spaces blocks outbound HTTP)

HF cpu-basic tier has no outbound internet access, so yt-dlp can't reach
YouTube. Replaced with an upload widget — user converts video to MP3 locally
(ytmp3.cc / cobalt.tools), uploads here with transcription.

Also removed yt-dlp from requirements.txt.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

app.py +64 -97
requirements.txt +0 -3

app.py CHANGED Viewed

@@ -499,88 +499,50 @@ def _load_phrase_additions_from_hub() -> None:
 threading.Thread(target=_load_phrase_additions_from_hub, daemon=True).start()
-def _download_youtube_for_training(lang_label: str, url: str, transcript: str) -> str:
-    """Download YouTube audio and save as a training sample to HF Hub."""
-    url = url.strip()
     transcript = transcript.strip()
-    if not url:
-        return "⚠️ Please enter a YouTube URL."
     if not transcript:
-        return "⚠️ Please type the transcription (what is said in the video)."
-    try:
-        import yt_dlp  # noqa: F401
-    except ImportError:
-        return "⚠️ yt-dlp is not installed. Add 'yt-dlp' to requirements.txt and redeploy."
     lang = SUPPORTED_LANGUAGES.get(lang_label, "bam")
-    try:
-        import tempfile, os
-        with tempfile.TemporaryDirectory() as tmp:
-            out_template = os.path.join(tmp, "audio.%(ext)s")
-            ydl_opts = {
-                "format": "bestaudio/best",
-                "outtmpl": out_template,
-                "quiet": True,
-                "no_warnings": True,
-                "max_filesize": 50 * 1024 * 1024,  # 50 MB cap
-                "postprocessors": [{
-                    "key": "FFmpegExtractAudio",
-                    "preferredcodec": "wav",
-                    "preferredquality": "16",
-                }],
-            }
-            import yt_dlp
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                info = ydl.extract_info(url, download=True)
-                title = info.get("title", "unknown")
-                duration = info.get("duration", 0)
-            if duration and duration > 600:
-                return "⚠️ Video is longer than 10 minutes. Use a shorter clip or timestamp range."
-            # Find the downloaded file
-            wav_file = os.path.join(tmp, "audio.wav")
-            if not os.path.exists(wav_file):
-                # Try any audio file
-                for fname in os.listdir(tmp):
-                    if fname.startswith("audio."):
-                        wav_file = os.path.join(tmp, fname)
-                        break
-            if not os.path.exists(wav_file):
-                return "❌ Audio download failed. The video may be unavailable or geo-restricted."
-            # Upload to HF Hub feedback dataset
-            timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
-            audio_repo_path = f"youtube_audio/{lang}/{timestamp}.wav"
-            meta_repo_path  = f"youtube_audio/{lang}/{timestamp}.txt"
-            if _hf_api is not None and FEEDBACK_REPO_ID:
-                import io
-                _hf_api.upload_file(
-                    path_or_fileobj=wav_file,
-                    path_in_repo=audio_repo_path,
-                    repo_id=FEEDBACK_REPO_ID,
-                    repo_type="dataset",
-                )
-                meta = f"title: {title}\nurl: {url}\nlanguage: {lang}\ntranscription: {transcript}\n"
-                _hf_api.upload_file(
-                    path_or_fileobj=io.BytesIO(meta.encode()),
-                    path_in_repo=meta_repo_path,
-                    repo_id=FEEDBACK_REPO_ID,
-                    repo_type="dataset",
-                )
-                return (f"✅ Saved to training dataset!\n"
-                        f"Title: {title} ({duration}s)\n"
-                        f"Audio: {audio_repo_path}\n"
-                        f"The transcription is saved alongside it.")
-            else:
-                return "⚠️ HF_TOKEN not set — audio downloaded but could not be saved to Hub."
     except Exception as exc:
-        return f"❌ Download failed: {exc}"
 # ── Main ask handler ──────────────────────────────────────────────────────────
@@ -797,37 +759,42 @@ def build_ui() -> gr.Blocks:
                         kb_import_btn = gr.Button("➕ Add to Knowledge Base", variant="primary")
                         kb_status = gr.Textbox(label="Status", interactive=False, lines=3)
-                    # ── Right: YouTube import ─────────────────────────────────
                     with gr.Column():
                         gr.Markdown(
-                            "### 🎬 Import audio from YouTube\n"
-                            "Paste a link to a Bambara or Fula YouTube video "
-                            "(lesson, conversation, song, news, etc.). "
-                            "The audio is downloaded and saved to the training dataset.\n\n"
-                            "**Tips for finding good videos:**\n"
-                            "- Search YouTube for **'Bambara conversation'**, **'Bamanankan'**, "
-                            "**'Pular leçon'**, **'Fulfulde'**\n"
-                            "- Language learning channels and radio recordings work best\n"
-                            "- Videos under 10 minutes are preferred\n\n"
-                            "After saving, run the training notebook on Kaggle/Colab to fine-tune "
-                            "the speech model with your new audio."
                         )
                         yt_lang = gr.Dropdown(
                             choices=["Bambara (bam)", "Fula (ful)"],
                             value="Bambara (bam)",
-                            label="Language spoken in the video",
                         )
-                        yt_url = gr.Textbox(
-                            placeholder="https://www.youtube.com/watch?v=...",
-                            label="YouTube URL",
                         )
                         yt_transcript = gr.Textbox(
                             lines=5,
-                            placeholder="Type what is said in the video (as much as you can). "
-                                        "This transcription will be used to train the speech model.",
-                            label="Transcription — what is said in this video",
                         )
-                        yt_btn = gr.Button("⬇️ Download & Save for Training", variant="secondary")
                         yt_status = gr.Textbox(label="Status", interactive=False, lines=4)
                 kb_import_btn.click(
@@ -836,8 +803,8 @@ def build_ui() -> gr.Blocks:
                     outputs=[kb_status],
                 )
                 yt_btn.click(
-                    fn=_download_youtube_for_training,
-                    inputs=[yt_lang, yt_url, yt_transcript],
                     outputs=[yt_status],
                 )

 threading.Thread(target=_load_phrase_additions_from_hub, daemon=True).start()
+def _save_audio_for_training(lang_label: str, audio_path: str | None, transcript: str, source_note: str) -> str:
+    """Save an uploaded audio file + transcription as a training sample to HF Hub."""
     transcript = transcript.strip()
+    if audio_path is None:
+        return "⚠️ Please upload an audio file first."
     if not transcript:
+        return "⚠️ Please type the transcription — what is said in this audio."
     lang = SUPPORTED_LANGUAGES.get(lang_label, "bam")
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    audio_repo_path = f"training_audio/{lang}/{timestamp}.wav"
+    meta_repo_path  = f"training_audio/{lang}/{timestamp}.txt"
+    if _hf_api is None or not FEEDBACK_REPO_ID:
+        return "⚠️ HF_TOKEN not set — file saved locally only, not uploaded to Hub."
+    try:
+        import io
+        _hf_api.upload_file(
+            path_or_fileobj=audio_path,
+            path_in_repo=audio_repo_path,
+            repo_id=FEEDBACK_REPO_ID,
+            repo_type="dataset",
+        )
+        meta = (
+            f"language: {lang}\n"
+            f"transcription: {transcript}\n"
+            f"source: {source_note.strip() or 'uploaded'}\n"
+            f"timestamp: {timestamp}\n"
+        )
+        _hf_api.upload_file(
+            path_or_fileobj=io.BytesIO(meta.encode()),
+            path_in_repo=meta_repo_path,
+            repo_id=FEEDBACK_REPO_ID,
+            repo_type="dataset",
+        )
+        return (
+            f"✅ Saved to training dataset!\n"
+            f"Audio: {audio_repo_path}\n"
+            f"Transcription: {transcript[:80]}{'…' if len(transcript) > 80 else ''}\n"
+            f"Run the training notebook on Kaggle to include this in the next model update."
+        )
     except Exception as exc:
+        return f"❌ Upload failed: {exc}"
 # ── Main ask handler ──────────────────────────────────────────────────────────
                         kb_import_btn = gr.Button("➕ Add to Knowledge Base", variant="primary")
                         kb_status = gr.Textbox(label="Status", interactive=False, lines=3)
+                    # ── Right: audio upload for training ─────────────────────
                     with gr.Column():
                         gr.Markdown(
+                            "### 🎬 Add audio from YouTube (or anywhere)\n"
+                            "HuggingFace Spaces cannot download YouTube directly, "
+                            "so convert the video to audio first on your computer:\n\n"
+                            "**Free online converters:**\n"
+                            "- [ytmp3.cc](https://ytmp3.cc) — paste YouTube URL → download MP3\n"
+                            "- [cobalt.tools](https://cobalt.tools) — paste any video URL → download audio\n"
+                            "- [y2mate.com](https://y2mate.com) — paste YouTube URL → download MP3\n\n"
+                            "**Good YouTube search terms:**\n"
+                            "- Bambara: *'Bamanankan conversation'*, *'Bambara leçon'*, *'donsomana'*\n"
+                            "- Fula: *'Fulfulde leçon'*, *'Pular conversation'*, *'Fula radio'*\n\n"
+                            "Then upload the MP3/WAV file below with its transcription."
                         )
                         yt_lang = gr.Dropdown(
                             choices=["Bambara (bam)", "Fula (ful)"],
                             value="Bambara (bam)",
+                            label="Language spoken in the audio",
                         )
+                        yt_audio = gr.Audio(
+                            sources=["upload"],
+                            type="filepath",
+                            label="Upload audio file (MP3 or WAV)",
                         )
                         yt_transcript = gr.Textbox(
                             lines=5,
+                            placeholder="Type what is said in the audio (as much as you can).\n"
+                                        "Example:\nJam waali. No mbadda. Mi woni ɗoo wallude ma.",
+                            label="Transcription — what is said in this audio",
+                        )
+                        yt_source = gr.Textbox(
+                            placeholder="e.g. YouTube: Bambara lesson by Moussa Kouyaté",
+                            label="Source (optional — for your records)",
                         )
+                        yt_btn = gr.Button("💾 Save Audio for Training", variant="secondary")
                         yt_status = gr.Textbox(label="Status", interactive=False, lines=4)
                 kb_import_btn.click(
                     outputs=[kb_status],
                 )
                 yt_btn.click(
+                    fn=_save_audio_for_training,
+                    inputs=[yt_lang, yt_audio, yt_transcript, yt_source],
                     outputs=[yt_status],
                 )

requirements.txt CHANGED Viewed

@@ -51,6 +51,3 @@ scipy==1.15.2
 # Phrase matching (fuzzy match for Whisper mis-transcriptions of Bambara/Fula)
 rapidfuzz==3.13.0
-# YouTube audio download for training data collection
-yt-dlp==2025.3.31


51
52	# Phrase matching (fuzzy match for Whisper mis-transcriptions of Bambara/Fula)
53	rapidfuzz==3.13.0