Torgo-DSR-Lab / stats_data.py
st192011's picture
Update stats_data.py
538577c verified
import pandas as pd
# 1. Metadata with updated F02 labeling
SPEAKER_META = {
"F01": {"Gender": "Female", "Severity": "Severe", "Dataset": "Torgo"},
"F03": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
"F04": {"Gender": "Female", "Severity": "Mild", "Dataset": "Torgo"},
"M01": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
"M02": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
"M03": {"Gender": "Male", "Severity": "Mild", "Dataset": "Torgo"},
"M04": {"Gender": "Male", "Severity": "Moderate", "Dataset": "Torgo"},
"M05": {"Gender": "Male", "Severity": "Severe", "Dataset": "Torgo"},
"F02 (UA)": {"Gender": "Female", "Severity": "Severe", "Dataset": "UA-Speech"}
}
def get_loss_data():
raw_data = [
[50, 0.7128, 1.0397], [100, 0.5804, 0.8406], [150, 0.4600, 0.6205],
[200, 0.4260, 0.6141], [250, 0.3605, 0.5091], [300, 0.3405, 0.5198],
[350, 0.2775, 0.5297], [400, 0.2349, 0.5460], [450, 0.2527, 0.5389],
[500, 0.2365, 0.4874], [550, 0.2489, 0.4931], [600, 0.2234, 0.4765],
[650, 0.1876, 0.4866], [700, 0.1309, 0.5421], [750, 0.1675, 0.5201],
[800, 0.1952, 0.5205], [850, 0.1826, 0.5152], [900, 0.1767, 0.5452],
[950, 0.1579, 0.5313]
]
rows = []
for r in raw_data:
rows.append({"Step": r[0], "Loss": r[1], "Metric": "Training Loss"})
rows.append({"Step": r[0], "Loss": r[2], "Metric": "Validation Loss"})
return pd.DataFrame(rows)
def get_loso_f01_data():
steps = [100, 250, 500, 650, 700, 750, 850, 950]
# Accuracy values
acc_dsr = [14.54, 22.47, 25.55, 26.87, 29.52, 28.19, 26.87, 28.19]
acc_whisper = 4.85
acc_wav2vec = 7.05
# WER values
wer_dsr = [0.88, 0.66, 0.63, 0.58, 0.57, 0.57, 0.58, 0.58]
wer_whisper = 0.99
wer_wav2vec = 0.87
rows = []
for i, step in enumerate(steps):
# Accuracy rows
rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"})
rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"})
rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"})
# WER rows
rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"})
rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"})
rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"})
return pd.DataFrame(rows)
def get_zeroshot_ua_data():
steps = [100, 250, 500, 650, 700, 750, 850, 950]
# Accuracy values
acc_dsr = [1.45, 3.62, 2.90, 2.90, 1.45, 2.90, 2.17, 2.90]
acc_whisper = 3.62
acc_wav2vec = 2.17
# WER values
wer_dsr = [1.57, 1.49, 1.54, 1.24, 1.36, 1.57, 1.37, 1.36]
wer_whisper = 1.97
wer_wav2vec = 2.11
rows = []
for i, step in enumerate(steps):
# Accuracy rows
rows.append({"Step": step, "Value": acc_dsr[i], "Model": "Gemma DSR", "Metric": "Accuracy (%)"})
rows.append({"Step": step, "Value": acc_whisper, "Model": "Whisper Baseline", "Metric": "Accuracy (%)"})
rows.append({"Step": step, "Value": acc_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "Accuracy (%)"})
# WER rows
rows.append({"Step": step, "Value": wer_dsr[i], "Model": "Gemma DSR", "Metric": "WER"})
rows.append({"Step": step, "Value": wer_whisper, "Model": "Whisper Baseline", "Metric": "WER"})
rows.append({"Step": step, "Value": wer_wav2vec, "Model": "Wav2Vec Baseline", "Metric": "WER"})
return pd.DataFrame(rows)
def get_arbitration_table():
data = [
["C100", "81.82% (9/11)", "8.29% (17/205)"],
["C250", "81.82% (9/11)", "17.07% (35/205)"],
["C500", "81.82% (9/11)", "20.98% (43/205)"],
["C650", "81.82% (9/11)", "21.46% (44/205)"],
["C700", "72.73% (8/11)", "24.88% (51/205)"],
["C750", "90.91% (10/11)", "22.44% (46/205)"],
["C850", "90.91% (10/11)", "20.98% (43/205)"],
["C950", "90.91% (10/11)", "22.44% (46/205)"]
]
return pd.DataFrame(data, columns=["Checkpoint", "Whisper Retention (n=11)", "Pure Correction (n=205)"])