Spaces:

Rthur2003
/

crowncode-backend

Running

App Files Files Community

crowncode-backend / app /services /audio_processor.py

Rthur2003

Initial commit: CrownCode Backend for Hugging Face Spaces

7ac6163 5 months ago

raw

history blame contribute delete

2.51 kB

	"""
	Audio processing service for data augmentation and manipulation.
	"""

	import io
	import logging
	import numpy as np
	import librosa
	import soundfile as sf
	import scipy.signal
	from fastapi import UploadFile

	from app.schemas import AudioAugmentationOptions

	logger = logging.getLogger(__name__)

	def process_audio(file_bytes: bytes, options: AudioAugmentationOptions) -> io.BytesIO:
	"""
	Process audio file with requested augmentation options.
	Returns processed audio as BytesIO (WAV format).
	"""
	try:
	# Load audio from bytes
	# librosa.load expects a file path or file-like object
	y, sr = librosa.load(io.BytesIO(file_bytes), sr=None)

	# 1. Trim Silence
	if options.trim_silence:
	y, _ = librosa.effects.trim(y, top_db=20)
	logger.info("Applied trim_silence")

	# 2. Pitch Shift (Randomly between -2 and +2 semitones if enabled)
	if options.pitch_shift:
	n_steps = np.random.uniform(-2, 2)
	y = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
	logger.info(f"Applied pitch_shift: {n_steps:.2f}")

	# 3. Speed Change (Randomly between 0.9x and 1.1x)
	if options.speed_change:
	rate = np.random.uniform(0.9, 1.1)
	y = librosa.effects.time_stretch(y, rate=rate)
	logger.info(f"Applied speed_change: {rate:.2f}")

	# 4. Add Noise
	if options.add_noise:
	noise_amp = 0.005 * np.max(np.abs(y))
	y = y + noise_amp * np.random.normal(size=len(y))
	logger.info("Applied add_noise")

	# 5. Bass Boost (Simple Low-Shelf Filter)
	if options.bass_boost:
	# Create a simple low-shelf filter emphasizing < 200Hz
	# This is a basic implementation using scipy
	sos = scipy.signal.butter(10, 200, 'lp', fs=sr, output='sos')
	y_boosted = scipy.signal.sosfilt(sos, y)
	# Mix original with boosted low-end
	y = y + (y_boosted * 0.5)
	# Normalize to prevent clipping
	y = librosa.util.normalize(y)
	logger.info("Applied bass_boost")

	# Export to BytesIO as WAV
	out_buffer = io.BytesIO()
	sf.write(out_buffer, y, sr, format='WAV')
	out_buffer.seek(0)

	return out_buffer

	except Exception as e:
	logger.error(f"Error processing audio: {str(e)}", exc_info=True)
	raise ValueError(f"Audio processing failed: {str(e)}")