File size: 557 Bytes
ffb2413 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | import sys, torch, torchaudio
from transformers import AutoModel, AutoProcessor
REPO = "."
DEV = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModel.from_pretrained(REPO, trust_remote_code=True).to(DEV).eval()
proc = AutoProcessor.from_pretrained(REPO, trust_remote_code=True)
for path in sys.argv[1:]:
wav, sr = torchaudio.load(path)
wav = wav.mean(0) if wav.shape[0] > 1 else wav.squeeze(0)
inputs = proc(wav, sampling_rate=sr, return_tensors="pt").to(DEV)
print(f"{path}\t{proc.batch_decode(model.generate(**inputs))[0]}")
|