Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| # 1. Metadata with updated F02 labeling | |
| SPEAKER_META = { | |
| "F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"}, | |
| "F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"}, | |
| "F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"}, | |
| "M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"}, | |
| "M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"}, | |
| "M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"}, | |
| "M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"}, | |
| "M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"}, | |
| "F02 (UA)": {"Gender": "Female", "Severity": "Severe", "Dataset": "UA-Speech"} | |
| } | |
| def get_loss_data(): | |
| raw_data = [ | |
| [50, 0.7128, 1.0397], [100, 0.5804, 0.8406], [150, 0.4600, 0.6205], | |
| [200, 0.4260, 0.6141], [250, 0.3605, 0.5091], [300, 0.3405, 0.5198], | |
| [350, 0.2775, 0.5297], [400, 0.2349, 0.5460], [450, 0.2527, 0.5389], | |
| [500, 0.2365, 0.4874], [550, 0.2489, 0.4931], [600, 0.2234, 0.4765], | |
| [650, 0.1876, 0.4866], [700, 0.1309, 0.5421], [750, 0.1675, 0.5201], | |
| [800, 0.1952, 0.5205], [850, 0.1826, 0.5152], [900, 0.1767, 0.5452], | |
| [950, 0.1579, 0.5313] | |
| ] | |
| rows = [] | |
| for r in raw_data: | |
| rows.append({"Step": r[0], "Loss": r[1], "Metric": "Training Loss"}) | |
| rows.append({"Step": r[0], "Loss": r[2], "Metric": "Validation Loss"}) | |
| return pd.DataFrame(rows) | |
| def get_loso_f01_data(): | |
| steps = [100, 250, 500, 650, 700, 750, 850, 950] | |
| # Accuracy values | |
| acc_dsr = [14.54, 22.47, 25.55, 26.87, 29.52, 28.19, 26.87, 28.19] | |
| acc_whisper = 4.85 | |
| acc_wav2vec = 7.05 | |
| # WER values | |
| wer_dsr = [0.88, 0.66, 0.63, 0.58, 0.57, 0.57, 0.58, 0.58] | |
| wer_whisper = 0.99 | |
| wer_wav2vec = 0.87 | |
| rows = [] | |
| for i, step in enumerate(steps): | |
| # Accuracy rows | |
| rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"}) | |
| rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"}) | |
| rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"}) | |
| # WER rows | |
| rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"}) | |
| rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"}) | |
| rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"}) | |
| return pd.DataFrame(rows) | |
| def get_zeroshot_ua_data(): | |
| steps = [100, 250, 500, 650, 700, 750, 850, 950] | |
| # Accuracy values | |
| acc_dsr = [1.45, 3.62, 2.90, 2.90, 1.45, 2.90, 2.17, 2.90] | |
| acc_whisper = 3.62 | |
| acc_wav2vec = 2.17 | |
| # WER values | |
| wer_dsr = [1.57, 1.49, 1.54, 1.24, 1.36, 1.57, 1.37, 1.36] | |
| wer_whisper = 1.97 | |
| wer_wav2vec = 2.11 | |
| rows = [] | |
| for i, step in enumerate(steps): | |
| # Accuracy rows | |
| rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"}) | |
| rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"}) | |
| rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"}) | |
| # WER rows | |
| rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"}) | |
| rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"}) | |
| rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"}) | |
| return pd.DataFrame(rows) | |
| def get_arbitration_table(): | |
| data = [ | |
| ["C100", "81.82% (9/11)", "8.29% (17/205)"], | |
| ["C250", "81.82% (9/11)", "17.07% (35/205)"], | |
| ["C500", "81.82% (9/11)", "20.98% (43/205)"], | |
| ["C650", "81.82% (9/11)", "21.46% (44/205)"], | |
| ["C700", "72.73% (8/11)", "24.88% (51/205)"], | |
| ["C750", "90.91% (10/11)", "22.44% (46/205)"], | |
| ["C850", "90.91% (10/11)", "20.98% (43/205)"], | |
| ["C950", "90.91% (10/11)", "22.44% (46/205)"] | |
| ] | |
| return pd.DataFrame(data, columns=["Checkpoint", "Whisper Retention (n=11)", "Pure Correction (n=205)"]) |