File size: 4,645 Bytes
9c60174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
"""Draft bar chart replacement for tab:main_results.

Single panel, k=30 only (k=20 is incomplete for agentic/structured methods).
Each method group has 3 bars (Qwen3.5 / Kimi / GPT-5.5) plus a FullR@30 marker.
"""

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch

# Methods listed in the order they appear in the table, k=30 column.
# (label, category, FullR@30 (None if not applicable), QW3.5, Kimi, G5.5)
ROWS = [
    ("Full context",          "long",    None, 2.4,  0.0,  8.0),
    ("200K subsample",        "long",    None, 12.4, 13.0, 12.0),
    ("Stella V5",             "retr",    36.6, 11.8, 8.2,  15.0),
    ("GTE 7B",                "retr",    54.6, 14.6, 15.4, 9.8),
    ("Keyword + RR",          "retr",    18.0, 7.8,  4.8,  6.8),
    ("LME QE (sum.)",         "retr",    27.0, 11.8, 11.9, 12.6),
    ("LME QE (facts)",        "retr",    30.0, 14.6, 11.6, 15.6),
    ("MemoChat",              "agent",   14.6, 8.6,  5.4,  15.4),
    ("ReadAgent",             "agent",   13.8, 21.0, 16.8, 26.0),
    ("RAPTOR (L=1)",          "agent",   22.6, 23.2, 18.0, 25.0),
    ("RAPTOR (L=3)",          "agent",   23.2, 24.0, 19.6, 26.2),
]

CATEGORY_COLORS = {
    "long":  "#cfd8dc",  # cool grey background
    "retr":  "#dbeeff",  # cool blue background
    "agent": "#fde7d3",  # warm orange background
}
CATEGORY_LABEL = {
    "long":  "Long-Context Prompting",
    "retr":  "Retrieval-Based",
    "agent": "Agentic / Structured Memory",
}

MODEL_COLORS = {
    "QW3.5": "#4c72b0",
    "Kimi":  "#55a868",
    "G5.5":  "#c44e52",
}


def main():
    labels      = [r[0] for r in ROWS]
    categories  = [r[1] for r in ROWS]
    fullr       = [r[2] for r in ROWS]
    qw          = np.array([r[3] for r in ROWS])
    kimi        = np.array([r[4] for r in ROWS])
    g55         = np.array([r[5] for r in ROWS])

    n = len(ROWS)
    x = np.arange(n)
    bar_w = 0.28

    fig, ax = plt.subplots(figsize=(9.0, 2.6))

    # Category background shading
    cat_runs = []
    start = 0
    for i in range(1, n + 1):
        if i == n or categories[i] != categories[start]:
            cat_runs.append((start, i - 1, categories[start]))
            start = i
    for s, e, c in cat_runs:
        ax.axvspan(s - 0.5, e + 0.5, color=CATEGORY_COLORS[c], alpha=0.55, zorder=0)

    # Bars
    ax.bar(x - bar_w, qw,   bar_w, label="Qwen3.5-397B", color=MODEL_COLORS["QW3.5"], zorder=2)
    ax.bar(x,         kimi, bar_w, label="Kimi-K2.6",    color=MODEL_COLORS["Kimi"],  zorder=2)
    ax.bar(x + bar_w, g55,  bar_w, label="GPT-5.5",      color=MODEL_COLORS["G5.5"],  zorder=2)

    # Numeric labels above each bar
    for xi, v in zip(x - bar_w, qw):
        ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["QW3.5"])
    for xi, v in zip(x, kimi):
        ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["Kimi"])
    for xi, v in zip(x + bar_w, g55):
        ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["G5.5"])

    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=25, ha="right", fontsize=7.5)
    ax.tick_params(axis="y", labelsize=7.5)
    ax.set_ylabel("Strict Accuracy (%) @ $k$=30", fontsize=8.5)
    ax.set_ylim(0, 30)
    ax.set_xlim(-0.55, n - 0.45)
    ax.grid(axis="y", linestyle=":", alpha=0.5, zorder=1)
    ax.set_axisbelow(True)
    for spine in ("top", "right"):
        ax.spines[spine].set_visible(False)

    # Top legend: models + FullR marker + category swatches
    legend_handles = [
        Patch(color=MODEL_COLORS["QW3.5"], label="Qwen3.5-397B"),
        Patch(color=MODEL_COLORS["Kimi"],  label="Kimi-K2.6"),
        Patch(color=MODEL_COLORS["G5.5"],  label="GPT-5.5"),
        Patch(facecolor=CATEGORY_COLORS["long"], edgecolor="none", label=CATEGORY_LABEL["long"]),
        Patch(facecolor=CATEGORY_COLORS["retr"], edgecolor="none", label=CATEGORY_LABEL["retr"]),
        Patch(facecolor=CATEGORY_COLORS["agent"],edgecolor="none", label=CATEGORY_LABEL["agent"]),
    ]
    ax.legend(handles=legend_handles, ncol=6, loc="lower left",
              frameon=False, fontsize=6.8, bbox_to_anchor=(-0.01, 1.0),
              handlelength=1.2, handletextpad=0.4, columnspacing=1.2,
              borderaxespad=0.1)

    plt.tight_layout(pad=0.3)
    out_pdf = "main_results_bar.pdf"
    out_png = out_pdf.replace(".pdf", ".png")
    plt.savefig(out_pdf, bbox_inches="tight")
    plt.savefig(out_png, dpi=180, bbox_inches="tight")
    print(f"wrote {out_pdf}\nwrote {out_png}")


if __name__ == "__main__":
    main()