mustango-API-2

Sleeping

App Files Files Community

multimodalart HF Staff commited on May 25

Commit

b7bca64

verified ·

1 Parent(s): d3ab0d1

[Admin maintenance] Migrate to ZeroGPU

Browse files

Files changed (2) hide show

app.py +24 -6
requirements.txt +7 -9

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 import json
 import torch
 import wavio
 import numpy as np
@@ -15,7 +17,6 @@ sys.path.insert(0, "diffusers/src")
 from diffusers import DDPMScheduler
 from models import MusicAudioDiffusion
 from gradio import Markdown
-import spaces
 # Automatic device detection
@@ -139,16 +140,26 @@ class MusicFeaturePredictor:
             num_return_sequences=1,
         )
-        generated_chords = self.chords_tokenizer.decode(
             generated_chords[0],
-            skip_special_tokens=True,
             clean_up_tokenization_spaces=True,
-        ).split(" n ")
         predicted_chords, predicted_chords_times = [], []
-        for item in generated_chords:
             c, ct = item.split(" at ")
-            predicted_chords.append(c)
             predicted_chords_times.append(float(ct))
         return predicted_beats, predicted_chords, predicted_chords_times
@@ -240,6 +251,13 @@ class Mustango:
             return wave[0]
 # Initialize Mustango
 mustango = Mustango(device="cpu")
 mustango.vae.to(device_type)

+import spaces
 import gradio as gr
 import json
+import re
 import torch
 import wavio
 import numpy as np
 from diffusers import DDPMScheduler
 from models import MusicAudioDiffusion
 from gradio import Markdown
 # Automatic device detection
             num_return_sequences=1,
         )
+        # Mustango's chord T5 was finetuned with '\n' between chord entries.
+        # SentencePiece T5 has no '\n' in its vocab, so the trainer encoded each
+        # separator as <unk> (id 2) and the model learned to emit <unk>. With
+        # skip_special_tokens=True, <unk> is silently stripped, collapsing every
+        # entry onto one line and breaking the ' at ' parse. Decode with specials
+        # preserved and split on the separator the model emits: '<unk>' or
+        # '<unk>n' (an extra 'n' token sometimes follows the <unk>), and on the
+        # legacy ' n ' literal that the torch-2.0.1 reference Space produced.
+        decoded = self.chords_tokenizer.decode(
             generated_chords[0],
+            skip_special_tokens=False,
             clean_up_tokenization_spaces=True,
+        )
+        decoded = decoded.replace("<pad>", "").replace("</s>", "").strip()
+        items = [p.strip() for p in re.split(r"\s*<unk>\s*n?\s+|\s+n\s+", decoded) if p.strip()]
         predicted_chords, predicted_chords_times = [], []
+        for item in items:
             c, ct = item.split(" at ")
+            predicted_chords.append(c.strip())
             predicted_chords_times.append(float(ct))
         return predicted_beats, predicted_chords, predicted_chords_times
             return wave[0]
+# Disable TF32 / nondeterministic cuDNN kernels so chord/beats T5 sampling
+# stays as close as possible to the torch 2.0.1 reference Space's output.
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.backends.cudnn.allow_tf32 = False
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
 # Initialize Mustango
 mustango = Mustango(device="cpu")
 mustango.vae.to(device_type)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-torch==2.0.1
-torchaudio==2.0.2
-torchvision==0.15.2
 transformers==4.31.0
 accelerate==0.21.0
 datasets==2.1.0
@@ -10,10 +10,10 @@ huggingface_hub>=0.33.5
 importlib_metadata==6.3.0
 librosa==0.9.2
 matplotlib==3.5.2
-numpy==1.23.0
 omegaconf==2.3.0
 packaging==23.1
-pandas==1.4.1
 progressbar33==2.4
 protobuf==3.20.*
 resampy==0.4.2
@@ -23,10 +23,8 @@ scikit_image==0.19.3
 scikit_learn==1.2.2
 scipy==1.8.0
 soundfile==0.12.1
-#ssr_eval==0.0.6
 torchlibrosa==0.1.0
 tqdm==4.63.1
-wandb==0.12.14
-#ipython==8.12.0
 wavio==0.0.7
-hf-xet==1.1.8

+torch==2.8.0
+torchaudio==2.8.0
+torchvision==0.23.0
 transformers==4.31.0
 accelerate==0.21.0
 datasets==2.1.0
 importlib_metadata==6.3.0
 librosa==0.9.2
 matplotlib==3.5.2
+numpy<2
 omegaconf==2.3.0
 packaging==23.1
+pandas
 progressbar33==2.4
 protobuf==3.20.*
 resampy==0.4.2
 scikit_learn==1.2.2
 scipy==1.8.0
 soundfile==0.12.1
 torchlibrosa==0.1.0
 tqdm==4.63.1
 wavio==0.0.7
+hf-xet==1.1.8
+diffusers==0.18.2