#!/usr/bin/env python3 """Draft bar chart replacement for tab:main_results. Single panel, k=30 only (k=20 is incomplete for agentic/structured methods). Each method group has 3 bars (Qwen3.5 / Kimi / GPT-5.5) plus a FullR@30 marker. """ import matplotlib.pyplot as plt import numpy as np from matplotlib.patches import Patch # Methods listed in the order they appear in the table, k=30 column. # (label, category, FullR@30 (None if not applicable), QW3.5, Kimi, G5.5) ROWS = [ ("Full context", "long", None, 2.4, 0.0, 8.0), ("200K subsample", "long", None, 12.4, 13.0, 12.0), ("Stella V5", "retr", 36.6, 11.8, 8.2, 15.0), ("GTE 7B", "retr", 54.6, 14.6, 15.4, 9.8), ("Keyword + RR", "retr", 18.0, 7.8, 4.8, 6.8), ("LME QE (sum.)", "retr", 27.0, 11.8, 11.9, 12.6), ("LME QE (facts)", "retr", 30.0, 14.6, 11.6, 15.6), ("MemoChat", "agent", 14.6, 8.6, 5.4, 15.4), ("ReadAgent", "agent", 13.8, 21.0, 16.8, 26.0), ("RAPTOR (L=1)", "agent", 22.6, 23.2, 18.0, 25.0), ("RAPTOR (L=3)", "agent", 23.2, 24.0, 19.6, 26.2), ] CATEGORY_COLORS = { "long": "#cfd8dc", # cool grey background "retr": "#dbeeff", # cool blue background "agent": "#fde7d3", # warm orange background } CATEGORY_LABEL = { "long": "Long-Context Prompting", "retr": "Retrieval-Based", "agent": "Agentic / Structured Memory", } MODEL_COLORS = { "QW3.5": "#4c72b0", "Kimi": "#55a868", "G5.5": "#c44e52", } def main(): labels = [r[0] for r in ROWS] categories = [r[1] for r in ROWS] fullr = [r[2] for r in ROWS] qw = np.array([r[3] for r in ROWS]) kimi = np.array([r[4] for r in ROWS]) g55 = np.array([r[5] for r in ROWS]) n = len(ROWS) x = np.arange(n) bar_w = 0.28 fig, ax = plt.subplots(figsize=(9.0, 2.6)) # Category background shading cat_runs = [] start = 0 for i in range(1, n + 1): if i == n or categories[i] != categories[start]: cat_runs.append((start, i - 1, categories[start])) start = i for s, e, c in cat_runs: ax.axvspan(s - 0.5, e + 0.5, color=CATEGORY_COLORS[c], alpha=0.55, zorder=0) # Bars ax.bar(x - bar_w, qw, bar_w, label="Qwen3.5-397B", color=MODEL_COLORS["QW3.5"], zorder=2) ax.bar(x, kimi, bar_w, label="Kimi-K2.6", color=MODEL_COLORS["Kimi"], zorder=2) ax.bar(x + bar_w, g55, bar_w, label="GPT-5.5", color=MODEL_COLORS["G5.5"], zorder=2) # Numeric labels above each bar for xi, v in zip(x - bar_w, qw): ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["QW3.5"]) for xi, v in zip(x, kimi): ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["Kimi"]) for xi, v in zip(x + bar_w, g55): ax.text(xi, v + 0.3, f"{v:.1f}", ha="center", va="bottom", fontsize=5.5, color=MODEL_COLORS["G5.5"]) ax.set_xticks(x) ax.set_xticklabels(labels, rotation=25, ha="right", fontsize=7.5) ax.tick_params(axis="y", labelsize=7.5) ax.set_ylabel("Strict Accuracy (%) @ $k$=30", fontsize=8.5) ax.set_ylim(0, 30) ax.set_xlim(-0.55, n - 0.45) ax.grid(axis="y", linestyle=":", alpha=0.5, zorder=1) ax.set_axisbelow(True) for spine in ("top", "right"): ax.spines[spine].set_visible(False) # Top legend: models + FullR marker + category swatches legend_handles = [ Patch(color=MODEL_COLORS["QW3.5"], label="Qwen3.5-397B"), Patch(color=MODEL_COLORS["Kimi"], label="Kimi-K2.6"), Patch(color=MODEL_COLORS["G5.5"], label="GPT-5.5"), Patch(facecolor=CATEGORY_COLORS["long"], edgecolor="none", label=CATEGORY_LABEL["long"]), Patch(facecolor=CATEGORY_COLORS["retr"], edgecolor="none", label=CATEGORY_LABEL["retr"]), Patch(facecolor=CATEGORY_COLORS["agent"],edgecolor="none", label=CATEGORY_LABEL["agent"]), ] ax.legend(handles=legend_handles, ncol=6, loc="lower left", frameon=False, fontsize=6.8, bbox_to_anchor=(-0.01, 1.0), handlelength=1.2, handletextpad=0.4, columnspacing=1.2, borderaxespad=0.1) plt.tight_layout(pad=0.3) out_pdf = "main_results_bar.pdf" out_png = out_pdf.replace(".pdf", ".png") plt.savefig(out_pdf, bbox_inches="tight") plt.savefig(out_png, dpi=180, bbox_inches="tight") print(f"wrote {out_pdf}\nwrote {out_png}") if __name__ == "__main__": main()