twangodev commited on
Commit
1df078a
·
verified ·
1 Parent(s): 58090b5

feat: add DAC codec implementation and codec registry

Browse files
Files changed (6) hide show
  1. app.py +125 -0
  2. compare_codec/__init__.py +49 -0
  3. compare_codec/dac.py +81 -0
  4. pyproject.toml +17 -0
  5. requirements.txt +431 -0
  6. uv.lock +0 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Codecomp — upload audio, pick codecs, hear reconstructions side by side."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from pathlib import Path
7
+
8
+ import gradio as gr
9
+
10
+ from compare_codec import CodecConfig, get_all
11
+
12
+ MAX_DURATION_S = 30.0
13
+
14
+
15
+ def _codec_choices() -> list[str]:
16
+ """Return display labels for every (codec, config) pair."""
17
+ choices = []
18
+ for codec in get_all().values():
19
+ for cfg in codec.configs():
20
+ choices.append(f"{codec.name} — {cfg.name}")
21
+ return choices
22
+
23
+
24
+ def _resolve_selection(label: str) -> tuple[str, CodecConfig, int]:
25
+ """Map a display label back to (codec_name, config, output_sample_rate)."""
26
+ codec_name, cfg_name = label.split(" — ", 1)
27
+ codec = get_all()[codec_name]
28
+ for cfg in codec.configs():
29
+ if cfg.name == cfg_name:
30
+ sr = cfg.params.get("sample_rate", codec.sample_rate)
31
+ return codec_name, cfg, sr
32
+ raise ValueError(f"Unknown config: {label}")
33
+
34
+
35
+ def compare(audio_path: str | None, selected: list[str]) -> list[dict]:
36
+ if audio_path is None or not selected:
37
+ return []
38
+
39
+ results = []
40
+ for label in selected:
41
+ codec_name, cfg, sr = _resolve_selection(label)
42
+ codec = get_all()[codec_name]
43
+
44
+ t0 = time.perf_counter()
45
+ audio_out = codec.encode_decode(Path(audio_path), cfg)
46
+ elapsed = time.perf_counter() - t0
47
+
48
+ max_samples = int(MAX_DURATION_S * sr)
49
+ if len(audio_out) > max_samples:
50
+ audio_out = audio_out[:max_samples]
51
+
52
+ results.append({"label": label, "audio": (sr, audio_out), "time": elapsed})
53
+ return results
54
+
55
+
56
+ def build_ui() -> gr.Blocks:
57
+ all_choices = _codec_choices()
58
+ max_codecs = len(all_choices)
59
+
60
+ with gr.Blocks(title="Codecomp") as demo:
61
+ gr.Markdown(
62
+ "# Codecomp\n\n"
63
+ "Upload audio, select one or more codec configurations, "
64
+ "and listen to the reconstructions side by side."
65
+ )
66
+
67
+ with gr.Row():
68
+ with gr.Column(scale=1):
69
+ audio_in = gr.Audio(
70
+ sources=["upload", "microphone"],
71
+ type="filepath",
72
+ label="Input audio",
73
+ )
74
+ codec_select = gr.CheckboxGroup(
75
+ choices=all_choices,
76
+ label="Codec configurations",
77
+ )
78
+ run_btn = gr.Button("Compare", variant="primary")
79
+
80
+ with gr.Column(scale=2):
81
+ output_components: list[tuple[gr.Group, gr.Markdown, gr.Audio]] = []
82
+ for i in range(max_codecs):
83
+ with gr.Group(visible=False) as group:
84
+ md = gr.Markdown()
85
+ audio_out = gr.Audio(
86
+ label=f"Result {i + 1}",
87
+ type="numpy",
88
+ interactive=False,
89
+ )
90
+ output_components.append((group, md, audio_out))
91
+
92
+ def on_compare(audio_path: str | None, selected: list[str]) -> list:
93
+ results = compare(audio_path, selected)
94
+ updates = []
95
+ for i in range(max_codecs):
96
+ if i < len(results):
97
+ r = results[i]
98
+ updates.append(gr.update(visible=True))
99
+ updates.append(
100
+ gr.update(value=f"**{r['label']}** — {r['time']:.2f}s")
101
+ )
102
+ updates.append(gr.update(value=r["audio"], label=r["label"]))
103
+ else:
104
+ updates.append(gr.update(visible=False))
105
+ updates.append(gr.update(value=""))
106
+ updates.append(gr.update(value=None))
107
+ return updates
108
+
109
+ all_outputs: list[gr.Component] = []
110
+ for group, md, audio_out in output_components:
111
+ all_outputs.extend([group, md, audio_out])
112
+
113
+ run_btn.click(
114
+ on_compare,
115
+ inputs=[audio_in, codec_select],
116
+ outputs=all_outputs,
117
+ )
118
+
119
+ return demo
120
+
121
+
122
+ demo = build_ui()
123
+
124
+ if __name__ == "__main__":
125
+ demo.launch()
compare_codec/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Codec registry — protocol, config dataclass, and discovery."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ import numpy as np
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class CodecConfig:
14
+ """A single configuration offered by a codec (e.g. '44kHz / 9 quantizers')."""
15
+
16
+ name: str
17
+ params: dict = field(default_factory=dict)
18
+
19
+
20
+ @runtime_checkable
21
+ class AudioCodec(Protocol):
22
+ """Interface every codec must implement."""
23
+
24
+ @property
25
+ def name(self) -> str: ...
26
+
27
+ @property
28
+ def sample_rate(self) -> int: ...
29
+
30
+ def configs(self) -> list[CodecConfig]: ...
31
+
32
+ def encode_decode(self, audio_path: Path, config: CodecConfig) -> np.ndarray:
33
+ """Round-trip: raw file in -> mono float32 numpy array out at self.sample_rate."""
34
+ ...
35
+
36
+
37
+ _REGISTRY: dict[str, AudioCodec] = {}
38
+
39
+
40
+ def register(codec: AudioCodec) -> None:
41
+ _REGISTRY[codec.name] = codec
42
+
43
+
44
+ def get_all() -> dict[str, AudioCodec]:
45
+ return dict(_REGISTRY)
46
+
47
+
48
+ # Import codec modules so they self-register on startup.
49
+ from compare_codec import dac as _dac # noqa: E402, F401
compare_codec/dac.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DAC (Descript Audio Codec) — wraps the descript-audio-codec package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ import torch
9
+
10
+ from compare_codec import CodecConfig, register
11
+
12
+
13
+ class DACCodec:
14
+ """DAC codec with lazy model loading."""
15
+
16
+ def __init__(self) -> None:
17
+ self._models: dict[str, object] = {}
18
+
19
+ @property
20
+ def name(self) -> str:
21
+ return "DAC"
22
+
23
+ @property
24
+ def sample_rate(self) -> int:
25
+ return 44_100
26
+
27
+ def configs(self) -> list[CodecConfig]:
28
+ configs = []
29
+ for model_type, sr, max_nq in [
30
+ ("44khz", 44_100, 9),
31
+ ("24khz", 24_000, 9),
32
+ ("16khz", 16_000, 9),
33
+ ]:
34
+ for nq in (max_nq, 6, 4, 2):
35
+ configs.append(
36
+ CodecConfig(
37
+ name=f"{model_type} / {nq} quantizers",
38
+ params={
39
+ "model_type": model_type,
40
+ "n_quantizers": nq,
41
+ "sample_rate": sr,
42
+ },
43
+ )
44
+ )
45
+ return configs
46
+
47
+ def _get_model(self, model_type: str) -> object:
48
+ if model_type not in self._models:
49
+ import dac as _dac
50
+
51
+ model_path = _dac.utils.download(model_type=model_type)
52
+ model = _dac.DAC.load(model_path)
53
+ model.eval()
54
+ self._models[model_type] = model
55
+ return self._models[model_type]
56
+
57
+ @torch.no_grad()
58
+ def encode_decode(self, audio_path: Path, config: CodecConfig) -> np.ndarray:
59
+ from audiotools import AudioSignal
60
+
61
+ model_type: str = config.params["model_type"]
62
+ n_quantizers: int = config.params["n_quantizers"]
63
+ target_sr: int = config.params["sample_rate"]
64
+
65
+ model = self._get_model(model_type)
66
+
67
+ signal = AudioSignal(str(audio_path))
68
+ if signal.audio_data.shape[1] > 1:
69
+ signal.audio_data = signal.audio_data.mean(dim=1, keepdim=True)
70
+ if signal.sample_rate != target_sr:
71
+ signal = signal.resample(target_sr)
72
+
73
+ signal = signal.to(model.device)
74
+ x = model.preprocess(signal.audio_data, signal.sample_rate)
75
+ z, codes, latents, _, _ = model.encode(x, n_quantizers=n_quantizers)
76
+ y = model.decode(z)
77
+
78
+ return y.squeeze(0).squeeze(0).cpu().numpy()
79
+
80
+
81
+ register(DACCodec())
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "compare-codec"
3
+ version = "0.1.0"
4
+ description = "Compare audio codecs side by side in a web UI"
5
+ readme = "README.md"
6
+ license = "Apache-2.0"
7
+ requires-python = ">=3.12"
8
+ dependencies = [
9
+ "descript-audio-codec>=1.0.0",
10
+ "gradio",
11
+ "numpy",
12
+ ]
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "ruff>=0.15.10",
17
+ ]
requirements.txt ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+
3
+ # This file was autogenerated by uv via the following command:
4
+ # uv export --format requirements-txt --no-hashes --no-emit-project
5
+ absl-py==2.4.0
6
+ # via tensorboard
7
+ annotated-doc==0.0.4
8
+ # via
9
+ # fastapi
10
+ # typer
11
+ annotated-types==0.7.0
12
+ # via pydantic
13
+ anyio==4.13.0
14
+ # via
15
+ # gradio
16
+ # httpx
17
+ # starlette
18
+ argbind==0.3.9
19
+ # via
20
+ # descript-audio-codec
21
+ # descript-audiotools
22
+ asttokens==3.0.1
23
+ # via stack-data
24
+ audioop-lts==0.2.2 ; python_full_version >= '3.13'
25
+ # via
26
+ # gradio
27
+ # standard-aifc
28
+ # standard-sunau
29
+ audioread==3.1.0
30
+ # via librosa
31
+ brotli==1.2.0
32
+ # via gradio
33
+ certifi==2026.2.25
34
+ # via
35
+ # httpcore
36
+ # httpx
37
+ # requests
38
+ cffi==2.0.0
39
+ # via soundfile
40
+ charset-normalizer==3.4.7
41
+ # via requests
42
+ click==8.3.2
43
+ # via
44
+ # typer
45
+ # uvicorn
46
+ colorama==0.4.6 ; sys_platform == 'win32'
47
+ # via
48
+ # click
49
+ # ipython
50
+ # tqdm
51
+ contourpy==1.3.3
52
+ # via matplotlib
53
+ cuda-bindings==13.2.0 ; sys_platform == 'linux'
54
+ # via torch
55
+ cuda-pathfinder==1.5.2 ; sys_platform == 'linux'
56
+ # via cuda-bindings
57
+ cuda-toolkit==13.0.2 ; sys_platform == 'linux'
58
+ # via torch
59
+ cycler==0.12.1
60
+ # via matplotlib
61
+ decorator==5.2.1
62
+ # via
63
+ # ipython
64
+ # librosa
65
+ descript-audio-codec==1.0.0
66
+ # via codec-arena
67
+ descript-audiotools==0.7.2
68
+ # via descript-audio-codec
69
+ docstring-parser==0.17.0
70
+ # via argbind
71
+ einops==0.8.2
72
+ # via descript-audio-codec
73
+ executing==2.2.1
74
+ # via stack-data
75
+ fastapi==0.135.3
76
+ # via gradio
77
+ ffmpy==1.0.0
78
+ # via descript-audiotools
79
+ filelock==3.25.2
80
+ # via
81
+ # huggingface-hub
82
+ # torch
83
+ fire==0.7.1
84
+ # via randomname
85
+ flatten-dict==0.4.2
86
+ # via descript-audiotools
87
+ fonttools==4.62.1
88
+ # via matplotlib
89
+ fsspec==2026.3.0
90
+ # via
91
+ # gradio-client
92
+ # huggingface-hub
93
+ # torch
94
+ gradio==6.12.0
95
+ # via codec-arena
96
+ gradio-client==2.4.1
97
+ # via
98
+ # gradio
99
+ # hf-gradio
100
+ groovy==0.1.2
101
+ # via gradio
102
+ grpcio==1.80.0
103
+ # via tensorboard
104
+ h11==0.16.0
105
+ # via
106
+ # httpcore
107
+ # uvicorn
108
+ hf-gradio==0.3.0
109
+ # via gradio
110
+ hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
111
+ # via huggingface-hub
112
+ httpcore==1.0.9
113
+ # via httpx
114
+ httpx==0.28.1
115
+ # via
116
+ # gradio
117
+ # gradio-client
118
+ # huggingface-hub
119
+ # safehttpx
120
+ huggingface-hub==1.10.1
121
+ # via
122
+ # gradio
123
+ # gradio-client
124
+ idna==3.11
125
+ # via
126
+ # anyio
127
+ # httpx
128
+ # requests
129
+ importlib-resources==7.1.0
130
+ # via descript-audiotools
131
+ ipython==9.12.0
132
+ # via descript-audiotools
133
+ ipython-pygments-lexers==1.1.1
134
+ # via ipython
135
+ jedi==0.19.2
136
+ # via ipython
137
+ jinja2==3.1.6
138
+ # via
139
+ # gradio
140
+ # torch
141
+ joblib==1.5.3
142
+ # via
143
+ # librosa
144
+ # scikit-learn
145
+ julius==0.2.7
146
+ # via descript-audiotools
147
+ kiwisolver==1.5.0
148
+ # via matplotlib
149
+ lazy-loader==0.5
150
+ # via librosa
151
+ librosa==0.11.0
152
+ # via descript-audiotools
153
+ llvmlite==0.47.0
154
+ # via numba
155
+ markdown==3.10.2
156
+ # via tensorboard
157
+ markdown-it-py==4.0.0
158
+ # via rich
159
+ markdown2==2.5.5
160
+ # via descript-audiotools
161
+ markupsafe==3.0.3
162
+ # via
163
+ # gradio
164
+ # jinja2
165
+ # werkzeug
166
+ matplotlib==3.10.8
167
+ # via descript-audiotools
168
+ matplotlib-inline==0.2.1
169
+ # via ipython
170
+ mdurl==0.1.2
171
+ # via markdown-it-py
172
+ mpmath==1.3.0
173
+ # via sympy
174
+ msgpack==1.1.2
175
+ # via librosa
176
+ networkx==3.6.1
177
+ # via torch
178
+ numba==0.65.0
179
+ # via librosa
180
+ numpy==2.4.4
181
+ # via
182
+ # codec-arena
183
+ # contourpy
184
+ # descript-audio-codec
185
+ # descript-audiotools
186
+ # gradio
187
+ # librosa
188
+ # matplotlib
189
+ # numba
190
+ # pandas
191
+ # pyloudnorm
192
+ # pystoi
193
+ # scikit-learn
194
+ # scipy
195
+ # soundfile
196
+ # soxr
197
+ # tensorboard
198
+ # torch-stoi
199
+ nvidia-cublas==13.1.0.3 ; sys_platform == 'linux'
200
+ # via
201
+ # cuda-toolkit
202
+ # nvidia-cudnn-cu13
203
+ # nvidia-cusolver
204
+ nvidia-cuda-cupti==13.0.85 ; sys_platform == 'linux'
205
+ # via cuda-toolkit
206
+ nvidia-cuda-nvrtc==13.0.88 ; sys_platform == 'linux'
207
+ # via cuda-toolkit
208
+ nvidia-cuda-runtime==13.0.96 ; sys_platform == 'linux'
209
+ # via cuda-toolkit
210
+ nvidia-cudnn-cu13==9.19.0.56 ; sys_platform == 'linux'
211
+ # via torch
212
+ nvidia-cufft==12.0.0.61 ; sys_platform == 'linux'
213
+ # via cuda-toolkit
214
+ nvidia-cufile==1.15.1.6 ; sys_platform == 'linux'
215
+ # via cuda-toolkit
216
+ nvidia-curand==10.4.0.35 ; sys_platform == 'linux'
217
+ # via cuda-toolkit
218
+ nvidia-cusolver==12.0.4.66 ; sys_platform == 'linux'
219
+ # via cuda-toolkit
220
+ nvidia-cusparse==12.6.3.3 ; sys_platform == 'linux'
221
+ # via
222
+ # cuda-toolkit
223
+ # nvidia-cusolver
224
+ nvidia-cusparselt-cu13==0.8.0 ; sys_platform == 'linux'
225
+ # via torch
226
+ nvidia-nccl-cu13==2.28.9 ; sys_platform == 'linux'
227
+ # via torch
228
+ nvidia-nvjitlink==13.0.88 ; sys_platform == 'linux'
229
+ # via
230
+ # cuda-toolkit
231
+ # nvidia-cufft
232
+ # nvidia-cusolver
233
+ # nvidia-cusparse
234
+ nvidia-nvshmem-cu13==3.4.5 ; sys_platform == 'linux'
235
+ # via torch
236
+ nvidia-nvtx==13.0.85 ; sys_platform == 'linux'
237
+ # via cuda-toolkit
238
+ orjson==3.11.8
239
+ # via gradio
240
+ packaging==26.0
241
+ # via
242
+ # gradio
243
+ # gradio-client
244
+ # huggingface-hub
245
+ # lazy-loader
246
+ # matplotlib
247
+ # pooch
248
+ # tensorboard
249
+ pandas==3.0.2
250
+ # via gradio
251
+ parso==0.8.6
252
+ # via jedi
253
+ pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
254
+ # via ipython
255
+ pillow==12.2.0
256
+ # via
257
+ # gradio
258
+ # matplotlib
259
+ # tensorboard
260
+ platformdirs==4.9.6
261
+ # via pooch
262
+ pooch==1.9.0
263
+ # via librosa
264
+ prompt-toolkit==3.0.52
265
+ # via ipython
266
+ protobuf==3.19.6
267
+ # via
268
+ # descript-audiotools
269
+ # tensorboard
270
+ ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
271
+ # via pexpect
272
+ pure-eval==0.2.3
273
+ # via stack-data
274
+ pycparser==3.0 ; implementation_name != 'PyPy'
275
+ # via cffi
276
+ pydantic==2.12.5
277
+ # via
278
+ # fastapi
279
+ # gradio
280
+ pydantic-core==2.41.5
281
+ # via pydantic
282
+ pydub==0.25.1
283
+ # via gradio
284
+ pygments==2.20.0
285
+ # via
286
+ # ipython
287
+ # ipython-pygments-lexers
288
+ # rich
289
+ pyloudnorm==0.2.0
290
+ # via descript-audiotools
291
+ pyparsing==3.3.2
292
+ # via matplotlib
293
+ pystoi==0.4.1
294
+ # via
295
+ # descript-audiotools
296
+ # torch-stoi
297
+ python-dateutil==2.9.0.post0
298
+ # via
299
+ # matplotlib
300
+ # pandas
301
+ python-multipart==0.0.26
302
+ # via gradio
303
+ pytz==2026.1.post1
304
+ # via gradio
305
+ pyyaml==6.0.3
306
+ # via
307
+ # argbind
308
+ # gradio
309
+ # huggingface-hub
310
+ randomname==0.2.1
311
+ # via descript-audiotools
312
+ requests==2.33.1
313
+ # via pooch
314
+ rich==14.3.4
315
+ # via
316
+ # descript-audiotools
317
+ # typer
318
+ safehttpx==0.1.7
319
+ # via gradio
320
+ scikit-learn==1.8.0
321
+ # via librosa
322
+ scipy==1.17.1
323
+ # via
324
+ # descript-audiotools
325
+ # librosa
326
+ # pyloudnorm
327
+ # pystoi
328
+ # scikit-learn
329
+ semantic-version==2.10.0
330
+ # via gradio
331
+ setuptools==81.0.0
332
+ # via
333
+ # tensorboard
334
+ # torch
335
+ shellingham==1.5.4
336
+ # via typer
337
+ six==1.17.0
338
+ # via
339
+ # flatten-dict
340
+ # python-dateutil
341
+ soundfile==0.13.1
342
+ # via
343
+ # descript-audiotools
344
+ # librosa
345
+ soxr==1.0.0
346
+ # via librosa
347
+ stack-data==0.6.3
348
+ # via ipython
349
+ standard-aifc==3.13.0 ; python_full_version >= '3.13'
350
+ # via
351
+ # audioread
352
+ # librosa
353
+ standard-chunk==3.13.0 ; python_full_version >= '3.13'
354
+ # via standard-aifc
355
+ standard-sunau==3.13.0 ; python_full_version >= '3.13'
356
+ # via
357
+ # audioread
358
+ # librosa
359
+ starlette==1.0.0
360
+ # via
361
+ # fastapi
362
+ # gradio
363
+ sympy==1.14.0
364
+ # via torch
365
+ tensorboard==2.20.0
366
+ # via descript-audiotools
367
+ tensorboard-data-server==0.7.2
368
+ # via tensorboard
369
+ termcolor==3.3.0
370
+ # via fire
371
+ threadpoolctl==3.6.0
372
+ # via scikit-learn
373
+ tomlkit==0.14.0
374
+ # via gradio
375
+ torch==2.11.0
376
+ # via
377
+ # descript-audio-codec
378
+ # descript-audiotools
379
+ # julius
380
+ # torch-stoi
381
+ torch-stoi==0.2.3
382
+ # via descript-audiotools
383
+ torchaudio==2.11.0
384
+ # via
385
+ # descript-audio-codec
386
+ # descript-audiotools
387
+ # torch-stoi
388
+ tqdm==4.67.3
389
+ # via
390
+ # descript-audio-codec
391
+ # descript-audiotools
392
+ # huggingface-hub
393
+ traitlets==5.14.3
394
+ # via
395
+ # ipython
396
+ # matplotlib-inline
397
+ triton==3.6.0 ; sys_platform == 'linux'
398
+ # via torch
399
+ typer==0.24.1
400
+ # via
401
+ # gradio
402
+ # hf-gradio
403
+ # huggingface-hub
404
+ typing-extensions==4.15.0
405
+ # via
406
+ # anyio
407
+ # fastapi
408
+ # gradio
409
+ # gradio-client
410
+ # grpcio
411
+ # huggingface-hub
412
+ # librosa
413
+ # pydantic
414
+ # pydantic-core
415
+ # starlette
416
+ # torch
417
+ # typing-inspection
418
+ typing-inspection==0.4.2
419
+ # via
420
+ # fastapi
421
+ # pydantic
422
+ tzdata==2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
423
+ # via pandas
424
+ urllib3==2.6.3
425
+ # via requests
426
+ uvicorn==0.44.0
427
+ # via gradio
428
+ wcwidth==0.6.0
429
+ # via prompt-toolkit
430
+ werkzeug==3.1.8
431
+ # via tensorboard
uv.lock ADDED
The diff for this file is too large to render. See raw diff