Spaces:
Sleeping
Sleeping
| """DAC (Descript Audio Codec) — wraps the descript-audio-codec package.""" | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import numpy as np | |
| import torch | |
| from compare_codec import CodecConfig, register | |
| _device = torch.device( | |
| "cuda" | |
| if torch.cuda.is_available() | |
| else "mps" | |
| if torch.backends.mps.is_available() | |
| else "cpu" | |
| ) | |
| class DACCodec: | |
| """DAC codec with lazy model loading.""" | |
| def __init__(self) -> None: | |
| self._models: dict[str, object] = {} | |
| def name(self) -> str: | |
| return "DAC" | |
| def sample_rate(self) -> int: | |
| return 44_100 | |
| def configs(self) -> list[CodecConfig]: | |
| configs = [] | |
| for model_type, sr, max_nq in [ | |
| ("44khz", 44_100, 9), | |
| ("24khz", 24_000, 9), | |
| ("16khz", 16_000, 9), | |
| ]: | |
| for nq in (max_nq, 6, 4, 2): | |
| configs.append( | |
| CodecConfig( | |
| name=f"{model_type} / {nq} quantizers", | |
| params={ | |
| "model_type": model_type, | |
| "n_quantizers": nq, | |
| "sample_rate": sr, | |
| }, | |
| ) | |
| ) | |
| return configs | |
| def _get_model(self, model_type: str) -> object: | |
| if model_type not in self._models: | |
| import dac as _dac | |
| model_path = _dac.utils.download(model_type=model_type) | |
| model = _dac.DAC.load(model_path) | |
| model.eval().to(_device) | |
| self._models[model_type] = model | |
| return self._models[model_type] | |
| def encode_decode(self, audio_path: Path, config: CodecConfig) -> np.ndarray: | |
| from audiotools import AudioSignal | |
| model_type: str = config.params["model_type"] | |
| n_quantizers: int = config.params["n_quantizers"] | |
| target_sr: int = config.params["sample_rate"] | |
| model = self._get_model(model_type) | |
| signal = AudioSignal(str(audio_path)) | |
| if signal.audio_data.shape[1] > 1: | |
| signal.audio_data = signal.audio_data.mean(dim=1, keepdim=True) | |
| if signal.sample_rate != target_sr: | |
| signal = signal.resample(target_sr) | |
| signal = signal.to(model.device) | |
| x = model.preprocess(signal.audio_data, signal.sample_rate) | |
| z, codes, latents, _, _ = model.encode(x, n_quantizers=n_quantizers) | |
| y = model.decode(z) | |
| return y.squeeze(0).squeeze(0).cpu().numpy() | |
| register(DACCodec()) | |