| | |
| | from functools import partial |
| | from pathlib import Path |
| | import argparse |
| | import librosa |
| | import librosa.display |
| | import numpy as np |
| | import matplotlib.pyplot as plt |
| | import soundfile as sf |
| | import scipy.signal as sig |
| | import psola |
| |
|
| |
|
| | SEMITONES_IN_OCTAVE = 12 |
| |
|
| |
|
| | def degrees_from(scale: str): |
| | """Return the pitch classes (degrees) that correspond to the given scale""" |
| | degrees = librosa.key_to_degrees(scale) |
| | |
| | |
| | |
| | degrees = np.concatenate((degrees, [degrees[0] + SEMITONES_IN_OCTAVE])) |
| | return degrees |
| |
|
| |
|
| | def closest_pitch(f0): |
| | """Round the given pitch values to the nearest MIDI note numbers""" |
| | midi_note = np.around(librosa.hz_to_midi(f0)) |
| | |
| | nan_indices = np.isnan(f0) |
| | midi_note[nan_indices] = np.nan |
| | |
| | return librosa.midi_to_hz(midi_note) |
| |
|
| |
|
| | def closest_pitch_from_scale(f0, scale): |
| | """Return the pitch closest to f0 that belongs to the given scale""" |
| | |
| | if np.isnan(f0): |
| | return np.nan |
| | degrees = degrees_from(scale) |
| | midi_note = librosa.hz_to_midi(f0) |
| | |
| | |
| | degree = midi_note % SEMITONES_IN_OCTAVE |
| | |
| | degree_id = np.argmin(np.abs(degrees - degree)) |
| | |
| | degree_difference = degree - degrees[degree_id] |
| | |
| | midi_note -= degree_difference |
| | |
| | return librosa.midi_to_hz(midi_note) |
| |
|
| |
|
| | def aclosest_pitch_from_scale(f0, scale): |
| | """Map each pitch in the f0 array to the closest pitch belonging to the given scale.""" |
| | sanitized_pitch = np.zeros_like(f0) |
| | for i in np.arange(f0.shape[0]): |
| | sanitized_pitch[i] = closest_pitch_from_scale(f0[i], scale) |
| | |
| | smoothed_sanitized_pitch = sig.medfilt(sanitized_pitch, kernel_size=11) |
| | |
| | smoothed_sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] = sanitized_pitch[np.isnan(smoothed_sanitized_pitch)] |
| | return smoothed_sanitized_pitch |
| |
|
| |
|
| | def autotune(audio, sr, correction_function, plot=False): |
| | |
| | frame_length = 2048 |
| | hop_length = frame_length // 4 |
| | fmin = librosa.note_to_hz('C2') |
| | fmax = librosa.note_to_hz('C7') |
| |
|
| | |
| | f0, voiced_flag, voiced_probabilities = librosa.pyin(audio, |
| | frame_length=frame_length, |
| | hop_length=hop_length, |
| | sr=sr, |
| | fmin=fmin, |
| | fmax=fmax) |
| |
|
| | |
| | corrected_f0 = correction_function(f0) |
| |
|
| | if plot: |
| | |
| | |
| | stft = librosa.stft(audio, n_fft=frame_length, hop_length=hop_length) |
| | time_points = librosa.times_like(stft, sr=sr, hop_length=hop_length) |
| | log_stft = librosa.amplitude_to_db(np.abs(stft), ref=np.max) |
| | fig, ax = plt.subplots() |
| | img = librosa.display.specshow(log_stft, x_axis='time', y_axis='log', ax=ax, sr=sr, hop_length=hop_length, fmin=fmin, fmax=fmax) |
| | fig.colorbar(img, ax=ax, format="%+2.f dB") |
| | ax.plot(time_points, f0, label='original pitch', color='cyan', linewidth=2) |
| | ax.plot(time_points, corrected_f0, label='corrected pitch', color='orange', linewidth=1) |
| | ax.legend(loc='upper right') |
| | plt.ylabel('Frequency [Hz]') |
| | plt.xlabel('Time [M:SS]') |
| | plt.savefig('pitch_correction.png', dpi=300, bbox_inches='tight') |
| |
|
| | |
| | return psola.vocode(audio, sample_rate=int(sr), target_pitch=corrected_f0, fmin=fmin, fmax=fmax) |
| |
|
| |
|
| | def main(): |
| | |
| | ap = argparse.ArgumentParser() |
| | ap.add_argument('vocals_file') |
| | ap.add_argument('--plot', '-p', action='store_true', default=False, |
| | help='if set, will produce a plot of the results') |
| | ap.add_argument('--correction-method', '-c', choices=['closest', 'scale'], default='closest') |
| | ap.add_argument('--scale', '-s', type=str, help='see librosa.key_to_degrees;' |
| | ' used only for the "scale" correction' |
| | ' method') |
| | args = ap.parse_args() |
| | |
| | filepath = Path(args.vocals_file) |
| |
|
| | |
| | y, sr = librosa.load(str(filepath), sr=None, mono=False) |
| |
|
| | |
| | if y.ndim > 1: |
| | y = y[0, :] |
| |
|
| | |
| | correction_function = closest_pitch if args.correction_method == 'closest' else partial(aclosest_pitch_from_scale, scale=args.scale) |
| |
|
| | |
| | pitch_corrected_y = autotune(y, sr, correction_function, args.plot) |
| |
|
| | |
| | filepath = filepath.parent / (filepath.stem + '_pitch_corrected' + filepath.suffix) |
| | sf.write(str(filepath), pitch_corrected_y, sr) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|