| | import gradio as gr |
| | import numpy as np |
| | from scipy.io.wavfile import read |
| | import matplotlib.pyplot as plt |
| | import torch |
| | import math |
| | import yaml |
| | import json |
| | import pyloudnorm as pyln |
| | from hydra.utils import instantiate |
| | from soxr import resample |
| | from functools import partial, reduce |
| | from torchcomp import coef2ms, ms2coef |
| | from copy import deepcopy |
| |
|
| | from modules.utils import vec2statedict, get_chunks |
| | from modules.fx import clip_delay_eq_Q |
| | from plot_utils import get_log_mags_from_eq |
| |
|
| |
|
| | def chain_functions(*functions): |
| | return lambda *initial_args: reduce( |
| | lambda xs, f: f(*xs) if isinstance(xs, tuple) else f(xs), |
| | functions, |
| | initial_args, |
| | ) |
| |
|
| |
|
| | title_md = "# Vocal Effects Generator" |
| | description_md = """ |
| | This is a demo of the paper [DiffVox: A Differentiable Model for Capturing and Analysing Professional Effects Distributions](https://arxiv.org/abs/2504.14735), accepted at DAFx 2025. |
| | In this demo, you can upload a raw vocal audio file (in mono) and use our model to apply professional-quality vocal processing by tweaking generated effects settings to enhance your vocals! |
| | |
| | The effects consist of series of EQ, compressor, delay, and reverb. |
| | The generator is a PCA model derived from 365 vocal effects presets fitted with the same effects chain. |
| | This interface allows you to control the principal components (PCs) of the generator, randomise them, and render the audio. |
| | |
| | To give you some idea, we empirically found that the first PC controls the amount of reverb and the second PC controls the amount of brightness. |
| | Note that adding these PCs together does not necessarily mean that their effects are additive in the final audio. |
| | We found sometimes the effects of least important PCs are more perceptible. |
| | Try to play around with the sliders and buttons and see what you can come up with! |
| | |
| | > **_Note:_** To upload your own audio, click X on the top right corner of the input audio block. |
| | """ |
| |
|
| | SLIDER_MAX = 3 |
| | SLIDER_MIN = -3 |
| | NUMBER_OF_PCS = 4 |
| | TEMPERATURE = 0.7 |
| | CONFIG_PATH = "presets/rt_config.yaml" |
| | PCA_PARAM_FILE = "presets/internal/gaussian.npz" |
| | INFO_PATH = "presets/internal/info.json" |
| | MASK_PATH = "presets/internal/feature_mask.npy" |
| | PRESET_PATH = "presets/internal/raw_params.npy" |
| | TRAIN_INDEX_PATH = "presets/internal/train_index.npy" |
| | EXAMPLE_PATH = "eleanor_erased.wav" |
| |
|
| |
|
| | with open(CONFIG_PATH) as fp: |
| | fx_config = yaml.safe_load(fp)["model"] |
| |
|
| | |
| | global_fx = instantiate(fx_config) |
| | global_fx.eval() |
| |
|
| | raw_params = torch.from_numpy(np.load(PRESET_PATH)) |
| | train_index = torch.from_numpy(np.load(TRAIN_INDEX_PATH)) |
| | feature_mask = torch.from_numpy(np.load(MASK_PATH)) |
| | presets = raw_params[train_index][:, feature_mask].contiguous() |
| |
|
| | pca_params = np.load(PCA_PARAM_FILE) |
| | mean = pca_params["mean"] |
| | cov = pca_params["cov"] |
| | eigvals, eigvecs = np.linalg.eigh(cov) |
| | eigvals = np.flip(eigvals, axis=0) |
| | eigvecs = np.flip(eigvecs, axis=1) |
| | eigsqrt = torch.from_numpy(eigvals.copy()).float().sqrt() |
| | U = torch.from_numpy(eigvecs.copy()).float() |
| | mean = torch.from_numpy(mean).float() |
| |
|
| | |
| | |
| |
|
| | with open(INFO_PATH) as f: |
| | info = json.load(f) |
| |
|
| | param_keys = info["params_keys"] |
| | original_shapes = list( |
| | map(lambda lst: lst if len(lst) else [1], info["params_original_shapes"]) |
| | ) |
| |
|
| | *vec2dict_args, _ = get_chunks(param_keys, original_shapes) |
| | vec2dict_args = [param_keys, original_shapes] + vec2dict_args |
| | vec2dict = partial( |
| | vec2statedict, |
| | **dict( |
| | zip( |
| | [ |
| | "keys", |
| | "original_shapes", |
| | "selected_chunks", |
| | "position", |
| | "U_matrix_shape", |
| | ], |
| | vec2dict_args, |
| | ) |
| | ), |
| | ) |
| | global_fx.load_state_dict(vec2dict(mean), strict=False) |
| |
|
| |
|
| | meter = pyln.Meter(44100) |
| |
|
| |
|
| | @torch.no_grad() |
| | def z2x(z): |
| | |
| | plt.close("all") |
| | x = U @ (z * eigsqrt) + mean |
| | |
| | |
| | |
| | return x |
| |
|
| |
|
| | @torch.no_grad() |
| | def fx2x(fx): |
| | plt.close("all") |
| | state_dict = fx.state_dict() |
| | flattened = torch.cat([state_dict[k].flatten() for k in param_keys]) |
| | x = flattened[feature_mask] |
| | return x |
| |
|
| |
|
| | @torch.no_grad() |
| | def x2z(x): |
| | z = U.T @ (x - mean) |
| | return z / eigsqrt |
| |
|
| |
|
| | @torch.no_grad() |
| | def inference(audio, ratio, fx): |
| | sr, y = audio |
| | if sr != 44100: |
| | y = resample(y, sr, 44100) |
| | if y.dtype.kind != "f": |
| | y = y / 32768.0 |
| |
|
| | if y.ndim == 1: |
| | y = y[:, None] |
| | loudness = meter.integrated_loudness(y) |
| | y = pyln.normalize.loudness(y, loudness, -18.0) |
| |
|
| | y = torch.from_numpy(y).float().T.unsqueeze(0) |
| | if y.shape[1] != 1: |
| | y = y.mean(dim=1, keepdim=True) |
| |
|
| | direct, wet = fx(y) |
| | direct = direct.squeeze(0).T.numpy() |
| | wet = wet.squeeze(0).T.numpy() |
| | angle = ratio * math.pi * 0.5 |
| | test_clipping = direct + wet |
| | |
| | if np.max(np.abs(test_clipping)) > 1: |
| | scaler = np.max(np.abs(test_clipping)) |
| | |
| | direct = direct / scaler |
| | wet = wet / scaler |
| |
|
| | rendered = math.sqrt(2) * (math.cos(angle) * direct + math.sin(angle) * wet) |
| | return ( |
| | (44100, (rendered * 32768).astype(np.int16)), |
| | (44100, (direct * 32768).astype(np.int16)), |
| | ( |
| | 44100, |
| | (wet * 32768).astype(np.int16), |
| | ), |
| | ) |
| |
|
| |
|
| | def get_important_pcs(n=10, **kwargs): |
| | sliders = [ |
| | gr.Slider(minimum=SLIDER_MIN, maximum=SLIDER_MAX, label=f"PC {i}", **kwargs) |
| | for i in range(1, n + 1) |
| | ] |
| | return sliders |
| |
|
| |
|
| | def model2json(fx): |
| | fx_names = ["PK1", "PK2", "LS", "HS", "LP", "HP", "DRC"] |
| | results = {k: v.toJSON() for k, v in zip(fx_names, fx)} | { |
| | "Panner": fx[7].pan.toJSON() |
| | } |
| | spatial_fx = { |
| | "DLY": fx[7].effects[0].toJSON() | {"LP": fx[7].effects[0].eq.toJSON()}, |
| | "FDN": fx[7].effects[1].toJSON() |
| | | { |
| | "Tone correction PEQ": { |
| | k: v.toJSON() for k, v in zip(fx_names[:4], fx[7].effects[1].eq) |
| | } |
| | }, |
| | "Cross Send (dB)": fx[7].params.sends_0.log10().mul(20).item(), |
| | } |
| | return { |
| | "Direct": results, |
| | "Sends": spatial_fx, |
| | } |
| |
|
| |
|
| | @torch.no_grad() |
| | def plot_eq(fx): |
| | fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) |
| | w, eq_log_mags = get_log_mags_from_eq(fx[:6]) |
| | ax.plot(w, sum(eq_log_mags), color="black", linestyle="-") |
| | for i, eq_log_mag in enumerate(eq_log_mags): |
| | ax.plot(w, eq_log_mag, "k-", alpha=0.3) |
| | ax.fill_between(w, eq_log_mag, 0, facecolor="gray", edgecolor="none", alpha=0.1) |
| | ax.set_xlabel("Frequency (Hz)") |
| | ax.set_ylabel("Magnitude (dB)") |
| | ax.set_xlim(20, 20000) |
| | ax.set_ylim(-40, 20) |
| | ax.set_xscale("log") |
| | ax.grid() |
| | return fig |
| |
|
| |
|
| | @torch.no_grad() |
| | def plot_comp(fx): |
| | fig, ax = plt.subplots(figsize=(6, 5), constrained_layout=True) |
| | comp = fx[6] |
| | cmp_th = comp.params.cmp_th.item() |
| | exp_th = comp.params.exp_th.item() |
| | cmp_ratio = comp.params.cmp_ratio.item() |
| | exp_ratio = comp.params.exp_ratio.item() |
| | make_up = comp.params.make_up.item() |
| | |
| |
|
| | comp_in = np.linspace(-80, 0, 100) |
| | comp_curve = np.where( |
| | comp_in > cmp_th, |
| | comp_in - (comp_in - cmp_th) * (cmp_ratio - 1) / cmp_ratio, |
| | comp_in, |
| | ) |
| | comp_out = ( |
| | np.where( |
| | comp_curve < exp_th, |
| | comp_curve - (exp_th - comp_curve) / exp_ratio, |
| | comp_curve, |
| | ) |
| | + make_up |
| | ) |
| | ax.plot(comp_in, comp_out, c="black", linestyle="-") |
| | ax.plot(comp_in, comp_in, c="r", alpha=0.5) |
| | ax.set_xlabel("Input Level (dB)") |
| | ax.set_ylabel("Output Level (dB)") |
| | ax.set_xlim(-80, 0) |
| | ax.set_ylim(-80, 0) |
| | ax.grid() |
| | return fig |
| |
|
| |
|
| | @torch.no_grad() |
| | def plot_delay(fx): |
| | fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) |
| | delay = fx[7].effects[0] |
| | w, eq_log_mags = get_log_mags_from_eq([delay.eq]) |
| | log_gain = delay.params.gain.log10().item() * 20 |
| | d = delay.params.delay.item() / 1000 |
| | log_mag = sum(eq_log_mags) |
| | ax.plot(w, log_mag + log_gain, color="black", linestyle="-") |
| |
|
| | log_feedback = delay.params.feedback.log10().item() * 20 |
| | for i in range(1, 10): |
| | feedback_log_mag = log_mag * (i + 1) + log_feedback * i + log_gain |
| | ax.plot( |
| | w, |
| | feedback_log_mag, |
| | c="black", |
| | alpha=max(0, (10 - i * d * 4) / 10), |
| | linestyle="-", |
| | ) |
| |
|
| | ax.set_xscale("log") |
| | ax.set_xlim(20, 20000) |
| | ax.set_ylim(-80, 0) |
| | ax.set_xlabel("Frequency (Hz)") |
| | ax.set_ylabel("Magnitude (dB)") |
| | ax.grid() |
| | return fig |
| |
|
| |
|
| | @torch.no_grad() |
| | def plot_reverb(fx): |
| | fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) |
| | fdn = fx[7].effects[1] |
| | w, eq_log_mags = get_log_mags_from_eq(fdn.eq) |
| |
|
| | bc = fdn.params.c.norm() * fdn.params.b.norm() |
| | log_bc = torch.log10(bc).item() * 20 |
| | |
| | |
| | eq_log_mags = sum(eq_log_mags) + log_bc |
| | ax.plot(w, eq_log_mags, color="black", linestyle="-") |
| |
|
| | ax.set_xlabel("Frequency (Hz)") |
| | ax.set_ylabel("Magnitude (dB)") |
| | ax.set_xlim(20, 20000) |
| | ax.set_ylim(-40, 20) |
| | ax.set_xscale("log") |
| | ax.grid() |
| | return fig |
| |
|
| |
|
| | @torch.no_grad() |
| | def plot_t60(fx): |
| | fig, ax = plt.subplots(figsize=(6, 4), constrained_layout=True) |
| | fdn = fx[7].effects[1] |
| | gamma = fdn.params.gamma.squeeze().numpy() |
| | delays = fdn.delays.numpy() |
| | w = np.linspace(0, 22050, gamma.size) |
| | t60 = -60 / (20 * np.log10(gamma + 1e-10) / np.min(delays)) / 44100 |
| | ax.plot(w, t60, color="black", linestyle="-") |
| | ax.set_xlabel("Frequency (Hz)") |
| | ax.set_ylabel("T60 (s)") |
| | ax.set_xlim(20, 20000) |
| | ax.set_ylim(0, 9) |
| | ax.set_xscale("log") |
| | ax.grid() |
| | return fig |
| |
|
| |
|
| | @torch.no_grad() |
| | def update_param(m, attr_name, value): |
| | match type(getattr(m, attr_name)): |
| | case torch.nn.Parameter: |
| | getattr(m, attr_name).data.copy_(value) |
| | case _: |
| | if getattr(m, attr_name).ndim == 0: |
| | setattr(m, attr_name, torch.tensor(value)) |
| | else: |
| | setattr(m, attr_name, torch.tensor([value])) |
| |
|
| |
|
| | @torch.no_grad() |
| | def update_atrt(comp, attr_name, value): |
| | setattr(comp, attr_name, ms2coef(torch.tensor(value), 44100)) |
| |
|
| |
|
| | def vec2fx(x): |
| | fx = deepcopy(global_fx) |
| | fx.load_state_dict(vec2dict(x), strict=False) |
| | fx.apply(partial(clip_delay_eq_Q, Q=0.707)) |
| | return fx |
| |
|
| |
|
| | get_last_attribute = lambda m, attr_name: ( |
| | (m, attr_name) |
| | if "." not in attr_name |
| | else (lambda x, *remain: get_last_attribute(getattr(m, x), ".".join(remain)))( |
| | *attr_name.split(".") |
| | ) |
| | ) |
| |
|
| |
|
| | with gr.Blocks() as demo: |
| | z = gr.State(torch.zeros_like(mean)) |
| | fx_params = gr.State(mean) |
| | fx = vec2fx(fx_params.value) |
| | sr, y = read(EXAMPLE_PATH) |
| |
|
| | default_pc_slider = partial( |
| | gr.Slider, minimum=SLIDER_MIN, maximum=SLIDER_MAX, interactive=True, value=0 |
| | ) |
| | default_audio_block = partial(gr.Audio, type="numpy", loop=True) |
| | default_freq_slider = partial(gr.Slider, label="Frequency (Hz)", interactive=True) |
| | default_gain_slider = partial(gr.Slider, label="Gain (dB)", interactive=True) |
| | default_q_slider = partial(gr.Slider, label="Q", interactive=True) |
| |
|
| | gr.Markdown( |
| | title_md, |
| | elem_id="title", |
| | ) |
| | with gr.Row(): |
| | gr.Markdown( |
| | description_md, |
| | elem_id="description", |
| | ) |
| | gr.Image("diffvox_diagram.png", elem_id="diagram") |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | audio_input = default_audio_block( |
| | sources="upload", label="Input Audio", value=(sr, y) |
| | ) |
| | with gr.Row(): |
| | random_button = gr.Button( |
| | f"Randomise PCs", |
| | elem_id="randomise-button", |
| | ) |
| | reset_button = gr.Button( |
| | "Reset", |
| | elem_id="reset-button", |
| | ) |
| | render_button = gr.Button( |
| | "Run", elem_id="render-button", variant="primary" |
| | ) |
| | with gr.Row(): |
| | s1 = default_pc_slider(label="PC 1") |
| | s2 = default_pc_slider(label="PC 2") |
| |
|
| | with gr.Row(): |
| | s3 = default_pc_slider(label="PC 3") |
| | s4 = default_pc_slider(label="PC 4") |
| |
|
| | sliders = [s1, s2, s3, s4] |
| |
|
| | with gr.Row(): |
| | with gr.Column(): |
| | extra_pc_dropdown = gr.Dropdown( |
| | list(range(NUMBER_OF_PCS + 1, mean.numel() + 1)), |
| | label=f"PC > {NUMBER_OF_PCS}", |
| | info="Select which extra PC to adjust", |
| | interactive=True, |
| | ) |
| | extra_slider = default_pc_slider(label="Extra PC") |
| |
|
| | preset_dropdown = gr.Dropdown( |
| | ["none"] + list(range(1, presets.shape[0] + 1)), |
| | value="none", |
| | label=f"Select Preset (1-{presets.shape[0]})", |
| | info="Select a preset to load (this will override the current settings)", |
| | interactive=True, |
| | ) |
| |
|
| | with gr.Column(): |
| | audio_output = default_audio_block(label="Output Audio", interactive=False) |
| | dry_wet_ratio = gr.Slider( |
| | minimum=0, |
| | maximum=1, |
| | value=0.5, |
| | label="Dry/Wet Ratio", |
| | interactive=True, |
| | ) |
| | direct_output = default_audio_block(label="Direct Audio", interactive=False) |
| | wet_output = default_audio_block(label="Wet Audio", interactive=False) |
| |
|
| | _ = gr.Markdown("## Parametric EQ") |
| | peq_plot = gr.Plot(plot_eq(fx), label="PEQ Frequency Response", elem_id="peq-plot") |
| | with gr.Row(): |
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("High Pass") |
| | hp = fx[5] |
| | hp_freq = default_freq_slider( |
| | minimum=16, maximum=5300, value=hp.params.freq.item() |
| | ) |
| | hp_q = default_q_slider(minimum=0.5, maximum=10, value=hp.params.Q.item()) |
| |
|
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("Low Shelf") |
| | ls = fx[2] |
| | ls_freq = default_freq_slider( |
| | minimum=30, maximum=200, value=ls.params.freq.item() |
| | ) |
| | ls_gain = default_gain_slider( |
| | minimum=-12, maximum=12, value=ls.params.gain.item() |
| | ) |
| |
|
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("Peak filter 1") |
| | pk1 = fx[0] |
| | pk1_freq = default_freq_slider( |
| | minimum=33, maximum=5400, value=pk1.params.freq.item() |
| | ) |
| | pk1_gain = default_gain_slider( |
| | minimum=-12, maximum=12, value=pk1.params.gain.item() |
| | ) |
| | pk1_q = default_q_slider(minimum=0.2, maximum=20, value=pk1.params.Q.item()) |
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("Peak filter 2") |
| | pk2 = fx[1] |
| | pk2_freq = default_freq_slider( |
| | minimum=200, maximum=17500, value=pk2.params.freq.item() |
| | ) |
| | pk2_gain = default_gain_slider( |
| | minimum=-12, maximum=12, value=pk2.params.gain.item() |
| | ) |
| | pk2_q = default_q_slider(minimum=0.2, maximum=20, value=pk2.params.Q.item()) |
| |
|
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("High Shelf") |
| | hs = fx[3] |
| | hs_freq = default_freq_slider( |
| | minimum=750, maximum=8300, value=hs.params.freq.item() |
| | ) |
| | hs_gain = default_gain_slider( |
| | minimum=-12, maximum=12, value=hs.params.gain.item() |
| | ) |
| | with gr.Column(min_width=160): |
| | _ = gr.Markdown("Low Pass") |
| | lp = fx[4] |
| | lp_freq = default_freq_slider( |
| | minimum=200, maximum=18000, value=lp.params.freq.item() |
| | ) |
| | lp_q = default_q_slider(minimum=0.5, maximum=10, value=lp.params.Q.item()) |
| |
|
| | _ = gr.Markdown("## Compressor and Expander") |
| | with gr.Row(): |
| | with gr.Column(): |
| | comp = fx[6] |
| | cmp_th = gr.Slider( |
| | minimum=-60, |
| | maximum=0, |
| | value=comp.params.cmp_th.item(), |
| | interactive=True, |
| | label="Threshold (dB)", |
| | ) |
| | cmp_ratio = gr.Slider( |
| | minimum=1, |
| | maximum=20, |
| | value=comp.params.cmp_ratio.item(), |
| | interactive=True, |
| | label="Comp. Ratio", |
| | ) |
| | make_up = gr.Slider( |
| | minimum=-12, |
| | maximum=12, |
| | value=comp.params.make_up.item(), |
| | interactive=True, |
| | label="Make Up (dB)", |
| | ) |
| | attack_time = gr.Slider( |
| | minimum=0.1, |
| | maximum=100, |
| | value=coef2ms(comp.params.at, 44100).item(), |
| | interactive=True, |
| | label="Attack Time (ms)", |
| | ) |
| | release_time = gr.Slider( |
| | minimum=50, |
| | maximum=1000, |
| | value=coef2ms(comp.params.rt, 44100).item(), |
| | interactive=True, |
| | label="Release Time (ms)", |
| | ) |
| | exp_ratio = gr.Slider( |
| | minimum=0, |
| | maximum=1, |
| | value=comp.params.exp_ratio.item(), |
| | interactive=True, |
| | label="Exp. Ratio", |
| | ) |
| | exp_th = gr.Slider( |
| | minimum=-80, |
| | maximum=0, |
| | value=comp.params.exp_th.item(), |
| | interactive=True, |
| | label="Exp. Threshold (dB)", |
| | ) |
| | avg_coef = gr.Slider( |
| | minimum=0, |
| | maximum=1, |
| | value=comp.params.avg_coef.item(), |
| | interactive=True, |
| | label="RMS Averaging Coefficient", |
| | ) |
| | with gr.Column(): |
| | comp_plot = gr.Plot( |
| | plot_comp(fx), label="Compressor Curve", elem_id="comp-plot" |
| | ) |
| |
|
| | _ = gr.Markdown("## Ping-Pong Delay") |
| | with gr.Row(): |
| | with gr.Column(): |
| | delay = fx[7].effects[0] |
| | delay_time = gr.Slider( |
| | minimum=100, |
| | maximum=1000, |
| | value=delay.params.delay.item(), |
| | interactive=True, |
| | label="Delay Time (ms)", |
| | ) |
| | feedback = gr.Slider( |
| | minimum=0, |
| | maximum=1, |
| | value=delay.params.feedback.item(), |
| | interactive=True, |
| | label="Feedback", |
| | ) |
| | delay_gain = gr.Slider( |
| | minimum=-80, |
| | maximum=0, |
| | value=delay.params.gain.log10().item() * 20, |
| | interactive=True, |
| | label="Gain (dB)", |
| | ) |
| | odd_pan = gr.Slider( |
| | minimum=-100, |
| | maximum=100, |
| | value=delay.odd_pan.params.pan.item() * 200 - 100, |
| | interactive=True, |
| | label="Odd Delay Pan", |
| | ) |
| | even_pan = gr.Slider( |
| | minimum=-100, |
| | maximum=100, |
| | value=delay.even_pan.params.pan.item() * 200 - 100, |
| | interactive=True, |
| | label="Even Delay Pan", |
| | ) |
| | delay_lp_freq = gr.Slider( |
| | minimum=200, |
| | maximum=16000, |
| | value=delay.eq.params.freq.item(), |
| | interactive=True, |
| | label="Low Pass Frequency (Hz)", |
| | ) |
| | reverb_send = gr.Slider( |
| | minimum=-80, |
| | maximum=0, |
| | value=fx[7].params.sends_0.log10().item() * 20, |
| | interactive=True, |
| | label="Reverb Send (dB)", |
| | ) |
| | with gr.Column(): |
| | delay_plot = gr.Plot( |
| | plot_delay(fx), label="Delay Frequency Response", elem_id="delay-plot" |
| | ) |
| |
|
| | with gr.Row(): |
| | reverb_plot = gr.Plot( |
| | plot_reverb(fx), |
| | label="Reverb Tone Correction PEQ", |
| | elem_id="reverb-plot", |
| | min_width=160, |
| | ) |
| | t60_plot = gr.Plot( |
| | plot_t60(fx), label="Reverb T60", elem_id="t60-plot", min_width=160 |
| | ) |
| |
|
| | with gr.Row(): |
| | json_output = gr.JSON( |
| | model2json(fx), label="Effect Settings", max_height=800, open=True |
| | ) |
| |
|
| | update_pc = lambda z, i: z[:NUMBER_OF_PCS].tolist() + [z[i - 1].item()] |
| | update_pc_outputs = sliders + [extra_slider] |
| |
|
| | peq_sliders = [ |
| | pk1_freq, |
| | pk1_gain, |
| | pk1_q, |
| | pk2_freq, |
| | pk2_gain, |
| | pk2_q, |
| | ls_freq, |
| | ls_gain, |
| | hs_freq, |
| | hs_gain, |
| | lp_freq, |
| | lp_q, |
| | hp_freq, |
| | hp_q, |
| | ] |
| | peq_attr_names = ( |
| | ["freq", "gain", "Q"] * 2 + ["freq", "gain"] * 2 + ["freq", "Q"] * 2 |
| | ) |
| | peq_indices = [0] * 3 + [1] * 3 + [2] * 2 + [3] * 2 + [4] * 2 + [5] * 2 |
| |
|
| | cmp_sliders = [ |
| | cmp_th, |
| | cmp_ratio, |
| | make_up, |
| | exp_ratio, |
| | exp_th, |
| | avg_coef, |
| | attack_time, |
| | release_time, |
| | ] |
| | cmp_update_funcs = [update_param] * 6 + [update_atrt] * 2 |
| | cmp_attr_names = [ |
| | "cmp_th", |
| | "cmp_ratio", |
| | "make_up", |
| | "exp_ratio", |
| | "exp_th", |
| | "avg_coef", |
| | "at", |
| | "rt", |
| | ] |
| | cmp_update_plot_flag = [True] * 5 + [False] * 3 |
| |
|
| | delay_sliders = [delay_time, feedback, delay_lp_freq, delay_gain, odd_pan, even_pan] |
| | delay_update_funcs = ( |
| | [update_param] * 3 |
| | + [lambda m, a, v: update_param(m, a, 10 ** (v / 20))] |
| | + [lambda m, a, v: update_param(m, a, (v + 100) / 200)] * 2 |
| | ) |
| | delay_attr_names = [ |
| | "params.delay", |
| | "params.feedback", |
| | "eq.params.freq", |
| | "params.gain", |
| | "odd_pan.params.pan", |
| | "even_pan.params.pan", |
| | ] |
| | delay_update_plot_flag = [True] * 4 + [False] * 2 |
| |
|
| | all_effect_sliders = peq_sliders + cmp_sliders + delay_sliders + [reverb_send] |
| | split_sizes = [len(peq_sliders), len(cmp_sliders), len(delay_sliders), 1] |
| |
|
| | def assign_fx_params(fx, *args): |
| | peq_sliders, cmp_sliders, delay_sliders = ( |
| | args[: split_sizes[0]], |
| | args[split_sizes[0] : sum(split_sizes[:2])], |
| | args[sum(split_sizes[:2]) : -1], |
| | ) |
| | reverb_send_slider = args[-1] |
| | for idx, s, attr_name in zip(peq_indices, peq_sliders, peq_attr_names): |
| | update_param(fx[idx].params, attr_name, s) |
| |
|
| | for f, s, attr_name in zip(cmp_update_funcs, cmp_sliders, cmp_attr_names): |
| | f(fx[6].params, attr_name, s) |
| |
|
| | for f, s, attr_name in zip(delay_update_funcs, delay_sliders, delay_attr_names): |
| | m, name = get_last_attribute(fx[7].effects[0], attr_name) |
| | f(m, name, s) |
| |
|
| | update_param(fx[7].params, "sends_0", 10 ** (reverb_send_slider / 20)) |
| |
|
| | return fx |
| |
|
| | accum_func_results = lambda init, *fs: reduce( |
| | lambda x, f: (f(x[0]), *x), fs, (init,) |
| | ) |
| |
|
| | x2z_common_steps = chain_functions( |
| | lambda x, *all_s: assign_fx_params(vec2fx(x), *all_s), |
| | lambda fx: accum_func_results(fx, fx2x, x2z), |
| | ) |
| |
|
| | for s in peq_sliders: |
| | s.input( |
| | chain_functions( |
| | lambda x, i, *args: x2z_common_steps(x, *args) + (i,), |
| | lambda z, x, fx, extra_pc_idx: [z, x] |
| | + [model2json(fx), plot_eq(fx)] |
| | + update_pc(z, extra_pc_idx), |
| | ), |
| | inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, |
| | outputs=[z, fx_params, json_output, peq_plot] + update_pc_outputs, |
| | ) |
| |
|
| | for s, update_plot in zip(cmp_sliders, cmp_update_plot_flag): |
| | s.input( |
| | chain_functions( |
| | lambda x, i, *args: x2z_common_steps(x, *args) + (i,), |
| | lambda z, x, fx, e_pc_i, update_plot=update_plot: [z, x] |
| | + [model2json(fx)] |
| | + ([plot_comp(fx)] if update_plot else []) |
| | + update_pc(z, e_pc_i), |
| | ), |
| | inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, |
| | outputs=[z, fx_params, json_output] |
| | + ([comp_plot] if update_plot else []) |
| | + update_pc_outputs, |
| | ) |
| |
|
| | for s, update_plot in zip(delay_sliders, delay_update_plot_flag): |
| | s.input( |
| | chain_functions( |
| | lambda x, i, *args: x2z_common_steps(x, *args) + (i,), |
| | lambda z, x, fx, e_pc_i, update_plot=update_plot: ( |
| | [z, x] |
| | + [model2json(fx)] |
| | + ([plot_delay(fx)] if update_plot else []) |
| | + update_pc(z, e_pc_i) |
| | ), |
| | ), |
| | inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, |
| | outputs=[z, fx_params] |
| | + [json_output] |
| | + ([delay_plot] if update_plot else []) |
| | + update_pc_outputs, |
| | ) |
| |
|
| | reverb_send.input( |
| | chain_functions( |
| | lambda x, i, *args: x2z_common_steps(x, *args) + (i,), |
| | lambda z, x, fx, e_pc_i: [z, x] + [model2json(fx)] + update_pc(z, e_pc_i), |
| | ), |
| | inputs=[fx_params, extra_pc_dropdown] + all_effect_sliders, |
| | outputs=[z, fx_params, json_output] + update_pc_outputs, |
| | ) |
| |
|
| | render_button.click( |
| | chain_functions( |
| | lambda audio, ratio, x, *all_s: ( |
| | audio, |
| | ratio, |
| | assign_fx_params(vec2fx(x), *all_s), |
| | ), |
| | inference, |
| | ), |
| | inputs=[ |
| | audio_input, |
| | dry_wet_ratio, |
| | fx_params, |
| | ] |
| | + all_effect_sliders, |
| | outputs=[ |
| | audio_output, |
| | direct_output, |
| | wet_output, |
| | ], |
| | ) |
| |
|
| | update_fx = lambda fx: [ |
| | fx[0].params.freq.item(), |
| | fx[0].params.gain.item(), |
| | fx[0].params.Q.item(), |
| | fx[1].params.freq.item(), |
| | fx[1].params.gain.item(), |
| | fx[1].params.Q.item(), |
| | fx[2].params.freq.item(), |
| | fx[2].params.gain.item(), |
| | fx[3].params.freq.item(), |
| | fx[3].params.gain.item(), |
| | fx[4].params.freq.item(), |
| | fx[4].params.Q.item(), |
| | fx[5].params.freq.item(), |
| | fx[5].params.Q.item(), |
| | fx[6].params.cmp_th.item(), |
| | fx[6].params.cmp_ratio.item(), |
| | fx[6].params.make_up.item(), |
| | fx[6].params.exp_th.item(), |
| | fx[6].params.exp_ratio.item(), |
| | coef2ms(fx[6].params.at, 44100).item(), |
| | coef2ms(fx[6].params.rt, 44100).item(), |
| | fx[7].effects[0].params.delay.item(), |
| | fx[7].effects[0].params.feedback.item(), |
| | fx[7].effects[0].params.gain.log10().item() * 20, |
| | fx[7].effects[0].eq.params.freq.item(), |
| | fx[7].effects[0].odd_pan.params.pan.item() * 200 - 100, |
| | fx[7].effects[0].even_pan.params.pan.item() * 200 - 100, |
| | fx[7].params.sends_0.log10().item() * 20, |
| | ] |
| | update_fx_outputs = [ |
| | pk1_freq, |
| | pk1_gain, |
| | pk1_q, |
| | pk2_freq, |
| | pk2_gain, |
| | pk2_q, |
| | ls_freq, |
| | ls_gain, |
| | hs_freq, |
| | hs_gain, |
| | lp_freq, |
| | lp_q, |
| | hp_freq, |
| | hp_q, |
| | cmp_th, |
| | cmp_ratio, |
| | make_up, |
| | exp_th, |
| | exp_ratio, |
| | attack_time, |
| | release_time, |
| | delay_time, |
| | feedback, |
| | delay_gain, |
| | delay_lp_freq, |
| | odd_pan, |
| | even_pan, |
| | reverb_send, |
| | ] |
| | update_plots = lambda fx: [ |
| | plot_eq(fx), |
| | plot_comp(fx), |
| | plot_delay(fx), |
| | plot_reverb(fx), |
| | plot_t60(fx), |
| | ] |
| | update_plots_outputs = [ |
| | peq_plot, |
| | comp_plot, |
| | delay_plot, |
| | reverb_plot, |
| | t60_plot, |
| | ] |
| |
|
| | update_all = ( |
| | lambda z, fx, i: update_pc(z, i) |
| | + update_fx(fx) |
| | + update_plots(fx) |
| | + [model2json(fx)] |
| | ) |
| | update_all_outputs = ( |
| | update_pc_outputs + update_fx_outputs + update_plots_outputs + [json_output] |
| | ) |
| |
|
| | z2x_common_steps = chain_functions( |
| | lambda z: accum_func_results(z, z2x, vec2fx), |
| | lambda fx, x, z: (z, x, fx), |
| | ) |
| | random_button.click( |
| | chain_functions( |
| | lambda i: ( |
| | *z2x_common_steps(torch.randn_like(mean).clip(SLIDER_MIN, SLIDER_MAX)), |
| | i, |
| | ), |
| | lambda z, x, fx, i: [z, x] + update_all(z, fx, i), |
| | ), |
| | inputs=extra_pc_dropdown, |
| | outputs=[z, fx_params] + update_all_outputs, |
| | ) |
| | reset_button.click( |
| | chain_functions( |
| | lambda: z2x_common_steps(torch.zeros_like(mean)), |
| | lambda z, x, fx: [z, x] + update_all(z, fx, NUMBER_OF_PCS), |
| | ), |
| | outputs=[z, fx_params] + update_all_outputs, |
| | ) |
| |
|
| | def update_z(z, s, i): |
| | z[i] = s |
| | return z |
| |
|
| | for i, slider in enumerate(sliders): |
| | slider.input( |
| | chain_functions( |
| | lambda z, s, i=i: update_z(z, s, i), |
| | z2x_common_steps, |
| | lambda z, x, fx: [z, x, model2json(fx)] |
| | + update_fx(fx) |
| | + update_plots(fx), |
| | ), |
| | inputs=[z, slider], |
| | outputs=[z, fx_params, json_output] |
| | + update_fx_outputs |
| | + update_plots_outputs, |
| | ) |
| | extra_slider.input( |
| | chain_functions( |
| | lambda z, s, i: update_z(z, s, i - 1), |
| | z2x_common_steps, |
| | lambda z, x, fx: [z, x, model2json(fx)] + update_fx(fx) + update_plots(fx), |
| | ), |
| | inputs=[z, extra_slider, extra_pc_dropdown], |
| | outputs=[z, fx_params, json_output] + update_fx_outputs + update_plots_outputs, |
| | ) |
| |
|
| | extra_pc_dropdown.input( |
| | lambda z, i: z[i - 1].item(), |
| | inputs=[z, extra_pc_dropdown], |
| | outputs=extra_slider, |
| | ) |
| |
|
| | preset_dropdown.input( |
| | chain_functions( |
| | lambda i, _: (mean if i == "none" else presets[i - 1], _), |
| | lambda x, i: (x2z(x), x, vec2fx(x), i), |
| | lambda z, x, fx, i: [z, x] + update_all(z, fx, i), |
| | ), |
| | inputs=[preset_dropdown, extra_pc_dropdown], |
| | outputs=[z, fx_params] + update_all_outputs, |
| | ) |
| |
|
| | dry_wet_ratio.input( |
| | chain_functions( |
| | lambda _, *args: (_, *map(lambda x: x[1] / 32768, args)), |
| | lambda ratio, d, w: math.sqrt(2) |
| | * ( |
| | math.cos(ratio * math.pi * 0.5) * d |
| | + math.sin(ratio * math.pi * 0.5) * w |
| | ), |
| | lambda x: (44100, (x * 32768).astype(np.int16)), |
| | ), |
| | inputs=[dry_wet_ratio, direct_output, wet_output], |
| | outputs=[audio_output], |
| | ) |
| |
|
| | demo.launch() |
| |
|