Feature Extraction
Transformers
Safetensors
English
usad2
automatic-speech-recognition
audio-classification
audio
speech
music
custom_code
Instructions to use MIT-SLS/USAD2-Large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use MIT-SLS/USAD2-Large with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="MIT-SLS/USAD2-Large", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("MIT-SLS/USAD2-Large", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 1,536 Bytes
8710021 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | from typing import List, Tuple
import torch
from transformers import PreTrainedModel
from .configuration_usad2 import Usad2Config
from .usad_model import UsadModel
class Usad2Model(PreTrainedModel):
config_class = Usad2Config
base_model_prefix = "model"
main_input_name = "wavs"
def __init__(self, config: Usad2Config):
super().__init__(config)
self.model = UsadModel(config)
def forward(self, *args, **kwargs):
return self.model(*args, **kwargs)
@property
def sample_rate(self) -> int:
return 16000 # Hz
@property
def encoder_frame_rate(self) -> int:
return round(100 / self.config.conv_subsample_rate) # Hz
@property
def mel_dim(self) -> int:
return self.config.input_dim
@property
def encoder_dim(self) -> int:
return self.config.encoder_dim
@property
def num_layers(self) -> int:
return self.config.num_layers
@property
def device(self) -> torch.device:
return next(self.parameters()).device
@property
def dtype(self) -> torch.dtype:
return next(self.parameters()).dtype
def set_audio_chunk_size(self, seconds: float = 30.0) -> None:
self.model.set_audio_chunk_size(seconds)
def load_audio(self, audio_path: str) -> torch.Tensor:
return self.model.load_audio(audio_path)
def load_audio_batch(
self, audio_paths: List[str]
) -> Tuple[torch.Tensor, torch.Tensor]:
return self.model.load_audio_batch(audio_paths)
|