import pandas as pd # 1. Metadata with updated F02 labeling SPEAKER_META = { "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"}, "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"}, "F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"}, "M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"}, "M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"}, "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"}, "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"}, "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"}, "F02 (UA)": {"Gender": "Female", "Severity": "Severe", "Dataset": "UA-Speech"} } def get_loss_data(): raw_data = [ [50, 0.7128, 1.0397], [100, 0.5804, 0.8406], [150, 0.4600, 0.6205], [200, 0.4260, 0.6141], [250, 0.3605, 0.5091], [300, 0.3405, 0.5198], [350, 0.2775, 0.5297], [400, 0.2349, 0.5460], [450, 0.2527, 0.5389], [500, 0.2365, 0.4874], [550, 0.2489, 0.4931], [600, 0.2234, 0.4765], [650, 0.1876, 0.4866], [700, 0.1309, 0.5421], [750, 0.1675, 0.5201], [800, 0.1952, 0.5205], [850, 0.1826, 0.5152], [900, 0.1767, 0.5452], [950, 0.1579, 0.5313] ] rows = [] for r in raw_data: rows.append({"Step": r[0], "Loss": r[1], "Metric": "Training Loss"}) rows.append({"Step": r[0], "Loss": r[2], "Metric": "Validation Loss"}) return pd.DataFrame(rows) def get_loso_f01_data(): steps = [100, 250, 500, 650, 700, 750, 850, 950] # Accuracy values acc_dsr = [14.54, 22.47, 25.55, 26.87, 29.52, 28.19, 26.87, 28.19] acc_whisper = 4.85 acc_wav2vec = 7.05 # WER values wer_dsr = [0.88, 0.66, 0.63, 0.58, 0.57, 0.57, 0.58, 0.58] wer_whisper = 0.99 wer_wav2vec = 0.87 rows = [] for i, step in enumerate(steps): # Accuracy rows rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"}) rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"}) rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"}) # WER rows rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"}) rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"}) rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"}) return pd.DataFrame(rows) def get_zeroshot_ua_data(): steps = [100, 250, 500, 650, 700, 750, 850, 950] # Accuracy values acc_dsr = [1.45, 3.62, 2.90, 2.90, 1.45, 2.90, 2.17, 2.90] acc_whisper = 3.62 acc_wav2vec = 2.17 # WER values wer_dsr = [1.57, 1.49, 1.54, 1.24, 1.36, 1.57, 1.37, 1.36] wer_whisper = 1.97 wer_wav2vec = 2.11 rows = [] for i, step in enumerate(steps): # Accuracy rows rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"}) rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"}) rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"}) # WER rows rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"}) rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"}) rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"}) return pd.DataFrame(rows) def get_arbitration_table(): data = [ ["C100", "81.82% (9/11)", "8.29% (17/205)"], ["C250", "81.82% (9/11)", "17.07% (35/205)"], ["C500", "81.82% (9/11)", "20.98% (43/205)"], ["C650", "81.82% (9/11)", "21.46% (44/205)"], ["C700", "72.73% (8/11)", "24.88% (51/205)"], ["C750", "90.91% (10/11)", "22.44% (46/205)"], ["C850", "90.91% (10/11)", "20.98% (43/205)"], ["C950", "90.91% (10/11)", "22.44% (46/205)"] ] return pd.DataFrame(data, columns=["Checkpoint", "Whisper Retention (n=11)", "Pure Correction (n=205)"])