Instructions to use BasketTechologies/bark with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use BasketTechologies/bark with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="BasketTechologies/bark")# Load model directly from transformers import AutoProcessor, AutoModelForTextToWaveform processor = AutoProcessor.from_pretrained("BasketTechologies/bark") model = AutoModelForTextToWaveform.from_pretrained("BasketTechologies/bark") - Notebooks
- Google Colab
- Kaggle
| from typing import Dict, List, Any | |
| from transformers import AutoProcessor, AutoModel | |
| import scipy.io.wavfile # Assuming WAV output format | |
| class EndpointHandler: | |
| def __init__(self, path=""): | |
| self.processor = AutoProcessor.from_pretrained("suno/bark") | |
| self.model = AutoModel.from_pretrained("suno/bark") | |
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: | |
| try: | |
| text_prompt = data.get("inputs") | |
| if not text_prompt: | |
| raise ValueError("Missing required 'inputs' field in request data.") | |
| inputs = self.processor(text=[text_prompt], return_tensors="pt") | |
| speech_values = self.model.generate(**inputs, do_sample=True) | |
| # Assuming model returns audio as NumPy array | |
| audio_data = speech_values[0].numpy() | |
| sampling_rate = 22050 # Adjust as needed based on model documentation | |
| # Return audio data as a byte string | |
| audio_bytes = audio_data.tobytes() | |
| return {"audio": audio_bytes, "sampling_rate": sampling_rate} | |
| except Exception as e: | |
| return {"error": str(e)} | |