Spaces:

qpluslab
/

OpenRA-Bench

Running

App Files Files Community

OpenRA-Bench / tests /test_app.py

yxc20098

Add HF identity verification and anonymous submission support

6f326d5 3 months ago

raw

history blame contribute delete

46.9 kB

	"""Tests for the Gradio leaderboard app."""

	import json
	import sys
	from pathlib import Path
	from unittest.mock import patch

	import pandas as pd
	import pytest

	sys.path.insert(0, str(Path(__file__).parent.parent))

	from app import (
	AGENT_TYPE_COLORS,
	DISPLAY_COLUMNS,
	GAMES_JSONL,
	MAX_SUBMITS_PER_HOUR,
	MIN_GAMES_FOR_LEADERBOARD,
	VALID_OPPONENTS,
	_aggregate_agent_games,
	_build_response,
	_check_rate_limit,
	_load_raw_games,
	_process_identity,
	_rebuild_leaderboard,
	_safe_agent_link,
	_safe_replay_link,
	_sanitize_csv_value,
	_save_raw_game,
	_single_game_row,
	_submit_times,
	_verified_badge,
	_verify_hf_token,
	add_type_badges,
	build_app,
	filter_leaderboard,
	handle_upload,
	handle_api_submit,
	handle_api_submit_with_replay,
	load_data,
	validate_submission,
	)


	class TestLoadData:
	"""Test data loading."""

	def test_returns_dataframe(self):
	df = load_data()
	assert isinstance(df, pd.DataFrame)

	def test_has_display_columns(self):
	df = load_data()
	for col in DISPLAY_COLUMNS:
	assert col in df.columns, f"Missing column: {col}"

	def test_has_rank_column(self):
	df = load_data()
	if len(df) > 0:
	assert df["Rank"].iloc[0] == 1

	def test_sorted_by_score_descending(self):
	df = load_data()
	if len(df) > 1:
	scores = df["Score"].tolist()
	assert scores == sorted(scores, reverse=True)

	def test_handles_missing_file(self):
	with patch("app.DATA_PATH", Path("/nonexistent/data.csv")):
	df = load_data()
	assert isinstance(df, pd.DataFrame)
	assert len(df) == 0


	class TestBadges:
	"""Test type badge rendering."""

	def test_scripted_badge_has_gold(self):
	df = pd.DataFrame({"Type": ["Scripted"]})
	result = add_type_badges(df)
	assert "#ffcd75" in result["Type"].iloc[0]

	def test_llm_badge_has_blue(self):
	df = pd.DataFrame({"Type": ["LLM"]})
	result = add_type_badges(df)
	assert "#7497db" in result["Type"].iloc[0]

	def test_rl_badge_has_gray(self):
	df = pd.DataFrame({"Type": ["RL"]})
	result = add_type_badges(df)
	assert "#75809c" in result["Type"].iloc[0]

	def test_all_types_have_colors(self):
	for t in ["Scripted", "LLM", "RL"]:
	assert t in AGENT_TYPE_COLORS


	class TestFilter:
	"""Test leaderboard filtering."""

	def test_returns_dataframe(self):
	df = filter_leaderboard("", [], "All")
	assert isinstance(df, pd.DataFrame)

	def test_search_filters_by_name(self):
	df = filter_leaderboard("qwen", [], "All")
	if len(df) > 0:
	assert all("qwen" in str(row).lower() for row in df["Agent"])

	def test_opponent_filter(self):
	df = filter_leaderboard("", [], "Beginner")
	if len(df) > 0:
	assert all(df["Opponent"] == "Beginner")

	def test_opponent_filter_hard(self):
	df = filter_leaderboard("", [], "Hard")
	# May be empty if no Hard entries exist
	assert isinstance(df, pd.DataFrame)


	class TestBuildApp:
	"""Test app construction."""

	def test_builds_without_error(self):
	app = build_app()
	assert app is not None


	class TestValidateSubmission:
	"""Test submission validation."""

	def _valid_data(self):
	return {
	"agent_name": "TestBot",
	"agent_type": "LLM",
	"opponent": "Beginner",
	"result": "loss",
	"ticks": 27000,
	"kills_cost": 1000,
	"deaths_cost": 2900,
	"assets_value": 9050,
	}

	def test_valid_submission(self):
	valid, err = validate_submission(self._valid_data())
	assert valid
	assert err == ""

	def test_missing_field(self):
	data = {"agent_name": "Bot"}
	valid, err = validate_submission(data)
	assert not valid
	assert "Missing required field" in err

	def test_invalid_opponent(self):
	data = self._valid_data()
	data["opponent"] = "Brutal"
	valid, err = validate_submission(data)
	assert not valid
	assert "Invalid opponent" in err

	def test_invalid_agent_type(self):
	data = self._valid_data()
	data["agent_type"] = "MCTS"
	valid, err = validate_submission(data)
	assert not valid
	assert "Invalid agent_type" in err

	def test_all_opponents_accepted(self):
	for opp in VALID_OPPONENTS:
	data = self._valid_data()
	data["opponent"] = opp
	valid, _ = validate_submission(data)
	assert valid, f"Opponent '{opp}' should be valid"

	def test_all_agent_types_accepted(self):
	for at in ["Scripted", "LLM", "RL"]:
	data = self._valid_data()
	data["agent_type"] = at
	valid, _ = validate_submission(data)
	assert valid, f"Agent type '{at}' should be valid"


	class TestApiSubmit:
	"""Test API submission handler."""

	def test_valid_json(self, tmp_path):
	data = {
	"agent_name": "TestBot",
	"agent_type": "LLM",
	"opponent": "Easy",
	"result": "win",
	"win": True,
	"ticks": 5000,
	"kills_cost": 3000,
	"deaths_cost": 1000,
	"assets_value": 8000,
	}
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"
	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	result = handle_api_submit(json.dumps(data))
	assert "OK" in result
	assert "TestBot" in result

	def test_valid_json_with_hf_token(self, tmp_path):
	data = {
	"agent_name": "TestBot",
	"agent_type": "LLM",
	"opponent": "Easy",
	"result": "win",
	"win": True,
	"ticks": 5000,
	"kills_cost": 3000,
	"deaths_cost": 1000,
	"assets_value": 8000,
	"hf_token": "hf_test",
	}
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"
	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	patch("app._verify_hf_token", return_value=("testuser", "")):
	result = handle_api_submit(json.dumps(data))
	assert "OK" in result
	assert "TestBot" in result
	assert "1/5" in result

	# Verify token is not in stored data
	saved = json.loads(games_path.read_text().strip())
	assert "hf_token" not in saved
	assert saved["hf_username"] == "testuser"

	def test_invalid_json(self):
	_submit_times.clear()
	result = handle_api_submit("not json")
	assert "Invalid JSON" in result

	def test_missing_fields(self):
	_submit_times.clear()
	result = handle_api_submit(json.dumps({"agent_name": "Bot"}))
	assert "Validation error" in result


	class TestDisplayColumns:
	"""Test display column configuration."""

	def test_replay_in_display_columns(self):
	assert "Replay" in DISPLAY_COLUMNS

	def test_display_columns_count(self):
	assert len(DISPLAY_COLUMNS) == 15


	class TestAgentUrl:
	"""Test agent URL hyperlink rendering."""

	def test_agent_url_in_submission(self, tmp_path):
	data = {
	"agent_name": "DeathBot",
	"agent_type": "RL",
	"agent_url": "https://github.com/user/deathbot",
	"opponent": "Normal",
	"result": "win",
	"ticks": 5000,
	"kills_cost": 3000,
	"deaths_cost": 1000,
	"assets_value": 8000,
	}
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"
	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	result = handle_api_submit(json.dumps(data))
	assert "OK" in result
	assert "DeathBot" in result

	def test_agent_url_renders_link(self):
	"""When agent_url is set, Agent column should be a hyperlink."""
	import tempfile
	csv_content = (
	"agent_name,agent_type,opponent,games,win_rate,score,"
	"avg_kills,avg_deaths,kd_ratio,avg_economy,avg_game_length,"
	"timestamp,replay_url,agent_url\n"
	"DeathBot,RL,Normal,10,50.0,60.0,"
	"2000,1500,1.33,9000,15000,"
	"2026-02-26,,https://github.com/user/deathbot\n"
	)
	with tempfile.NamedTemporaryFile(
	mode="w", suffix=".csv", delete=False
	) as f:
	f.write(csv_content)
	temp_path = Path(f.name)
	with patch("app.DATA_PATH", temp_path):
	df = load_data()
	assert '<a href="https://github.com/user/deathbot"' in df["Agent"].iloc[0]
	temp_path.unlink(missing_ok=True)

	def test_no_url_renders_plain_name(self):
	"""When agent_url is empty, Agent column is plain text."""
	import tempfile
	csv_content = (
	"agent_name,agent_type,opponent,games,win_rate,score,"
	"avg_kills,avg_deaths,kd_ratio,avg_economy,avg_game_length,"
	"timestamp,replay_url,agent_url\n"
	"PlainBot,LLM,Easy,5,20.0,30.0,"
	"1000,2000,0.5,5000,10000,"
	"2026-02-26,,\n"
	)
	with tempfile.NamedTemporaryFile(
	mode="w", suffix=".csv", delete=False
	) as f:
	f.write(csv_content)
	temp_path = Path(f.name)
	with patch("app.DATA_PATH", temp_path):
	df = load_data()
	assert df["Agent"].iloc[0] == "PlainBot"
	temp_path.unlink(missing_ok=True)


	class TestReplayColumn:
	"""Test replay download link rendering."""

	def test_replay_link_rendered(self):
	"""Replay column shows download link when replay_url is set."""
	import tempfile
	csv_content = (
	"agent_name,agent_type,opponent,games,win_rate,score,"
	"avg_kills,avg_deaths,kd_ratio,avg_economy,avg_game_length,"
	"timestamp,replay_url,agent_url\n"
	"TestBot,LLM,Easy,1,0.0,18.0,"
	"1000,2000,0.5,5000,10000,"
	"2026-02-26,replay-test-123.orarep,\n"
	)
	with tempfile.NamedTemporaryFile(
	mode="w", suffix=".csv", delete=False
	) as f:
	f.write(csv_content)
	temp_path = Path(f.name)
	with patch("app.DATA_PATH", temp_path):
	df = load_data()
	assert "/replays/replay-test-123.orarep" in df["Replay"].iloc[0]
	assert "download" in df["Replay"].iloc[0]
	temp_path.unlink(missing_ok=True)

	def test_empty_replay_no_link(self):
	"""Replay column is empty when no replay_url."""
	df = load_data()
	if len(df) > 0:
	# The default test data has no replay
	replay_val = df["Replay"].iloc[0]
	assert replay_val == "" or not str(replay_val).strip()


	class TestXssPrevention:
	"""Test that user input is HTML-escaped to prevent XSS."""

	def test_javascript_url_blocked(self):
	"""javascript: URLs should NOT produce a clickable link."""
	result = _safe_agent_link("Bot", "javascript:alert(1)")
	assert "javascript:" not in result
	assert "Bot" in result

	def test_data_url_blocked(self):
	result = _safe_agent_link("Bot", "data:text/html,<script>alert(1)</script>")
	assert "data:" not in result

	def test_html_in_name_escaped(self):
	result = _safe_agent_link('<script>alert("xss")</script>', "")
	assert "<script>" not in result
	assert "<script>" in result

	def test_quote_injection_in_url_escaped(self):
	result = _safe_agent_link("Bot", 'https://ok.com" onclick="alert(1)')
	assert 'onclick' not in result or '"' in result

	def test_valid_https_url_works(self):
	result = _safe_agent_link("Bot", "https://github.com/user/repo")
	assert '<a href="https://github.com/user/repo"' in result
	assert 'rel="noopener"' in result

	def test_replay_link_sanitized(self):
	result = _safe_replay_link('"><script>alert(1)</script>.orarep')
	assert "<script>" not in result

	def test_replay_path_traversal_stripped(self):
	"""Path traversal characters (/) are stripped from replay filenames."""
	result = _safe_replay_link("replay/../../../etc/passwd")
	# The href after /replays/ should have no slashes (traversal stripped)
	href_part = result.split('href="')[1].split('"')[0]
	filename = href_part.replace("/replays/", "")
	assert "/" not in filename


	class TestInputValidation:
	"""Test stricter input validation."""

	def _valid_data(self):
	return {
	"agent_name": "TestBot",
	"agent_type": "LLM",
	"opponent": "Beginner",
	"result": "loss",
	"ticks": 27000,
	"kills_cost": 1000,
	"deaths_cost": 2900,
	"assets_value": 9050,
	}

	def test_string_ticks_rejected(self):
	data = self._valid_data()
	data["ticks"] = "not a number"
	valid, err = validate_submission(data)
	assert not valid
	assert "must be a number" in err

	def test_dict_kills_rejected(self):
	data = self._valid_data()
	data["kills_cost"] = {"nested": True}
	valid, err = validate_submission(data)
	assert not valid

	def test_long_agent_name_rejected(self):
	data = self._valid_data()
	data["agent_name"] = "A" * 101
	valid, err = validate_submission(data)
	assert not valid
	assert "100 characters" in err

	def test_javascript_agent_url_rejected(self):
	data = self._valid_data()
	data["agent_url"] = "javascript:alert(1)"
	valid, err = validate_submission(data)
	assert not valid
	assert "HTTP(S)" in err

	def test_valid_agent_url_accepted(self):
	data = self._valid_data()
	data["agent_url"] = "https://github.com/user/repo"
	valid, _ = validate_submission(data)
	assert valid

	def test_empty_agent_url_accepted(self):
	data = self._valid_data()
	data["agent_url"] = ""
	valid, _ = validate_submission(data)
	assert valid

	def test_long_agent_url_rejected(self):
	data = self._valid_data()
	data["agent_url"] = "https://example.com/" + "a" * 500
	valid, err = validate_submission(data)
	assert not valid
	assert "500 characters" in err


	class TestCsvSanitization:
	"""Test CSV injection prevention."""

	def test_formula_trigger_stripped(self):
	assert _sanitize_csv_value("=cmd\|'/c calc'!A0") == "cmd\|'/c calc'!A0"

	def test_plus_trigger_stripped(self):
	assert _sanitize_csv_value("+cmd") == "cmd"

	def test_at_trigger_stripped(self):
	assert _sanitize_csv_value("@SUM(A1)") == "SUM(A1)"

	def test_newlines_replaced(self):
	assert _sanitize_csv_value("line1\nline2\rline3") == "line1 line2 line3"

	def test_normal_string_unchanged(self):
	assert _sanitize_csv_value("DeathBot-9000") == "DeathBot-9000"

	def test_numbers_unchanged(self):
	assert _sanitize_csv_value(42) == 42
	assert _sanitize_csv_value(3.14) == 3.14


	class TestRateLimiting:
	"""Test rate limiting on submissions."""

	def test_rate_limit_allows_normal_usage(self):
	_submit_times.clear()
	allowed, _ = _check_rate_limit("test_normal")
	assert allowed

	def test_rate_limit_blocks_after_max(self):
	_submit_times.clear()
	key = "test_flood"
	for _ in range(MAX_SUBMITS_PER_HOUR):
	allowed, _ = _check_rate_limit(key)
	assert allowed
	allowed, err = _check_rate_limit(key)
	assert not allowed
	assert "Rate limit" in err

	def test_rate_limit_resets_after_expiry(self):
	import time as _time
	_submit_times.clear()
	key = "test_expiry"
	# Fill with old timestamps
	_submit_times[key] = [_time.time() - 3601] * MAX_SUBMITS_PER_HOUR
	allowed, _ = _check_rate_limit(key)
	assert allowed


	class TestSearchSafety:
	"""Test that malformed regex doesn't crash the search."""

	def test_invalid_regex_falls_back(self):
	"""An invalid regex pattern should not raise an exception."""
	df = filter_leaderboard("[invalid(regex", [], "All")
	assert isinstance(df, pd.DataFrame)


	# ── New aggregation tests ────────────────────────────────────────────────────


	class TestSaveRawGame:
	"""Test raw game storage."""

	def test_appends_to_jsonl(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	_save_raw_game({"agent_name": "Bot1", "result": "win"})
	_save_raw_game({"agent_name": "Bot2", "result": "lose"})

	lines = games_path.read_text().strip().splitlines()
	assert len(lines) == 2
	assert json.loads(lines[0])["agent_name"] == "Bot1"
	assert json.loads(lines[1])["agent_name"] == "Bot2"

	def test_also_writes_results_jsonl(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	_save_raw_game({"agent_name": "Bot1", "result": "win"})

	jsonl_path = tmp_path / "results.jsonl"
	assert jsonl_path.exists()
	lines = jsonl_path.read_text().strip().splitlines()
	assert len(lines) == 1
	assert json.loads(lines[0])["agent_name"] == "Bot1"


	class TestLoadRawGames:
	"""Test raw game loading."""

	def test_returns_empty_when_no_file(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	with patch("app.GAMES_JSONL", games_path):
	assert _load_raw_games() == []

	def test_loads_multiple_games(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	games_path.write_text(
	'{"agent_name": "A"}\n'
	'{"agent_name": "B"}\n'
	)
	with patch("app.GAMES_JSONL", games_path):
	games = _load_raw_games()
	assert len(games) == 2
	assert games[0]["agent_name"] == "A"

	def test_skips_invalid_json_lines(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	games_path.write_text(
	'{"agent_name": "A"}\n'
	'not valid json\n'
	'{"agent_name": "B"}\n'
	)
	with patch("app.GAMES_JSONL", games_path):
	games = _load_raw_games()
	assert len(games) == 2

	def test_skips_blank_lines(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	games_path.write_text(
	'{"agent_name": "A"}\n'
	'\n'
	' \n'
	'{"agent_name": "B"}\n'
	)
	with patch("app.GAMES_JSONL", games_path):
	games = _load_raw_games()
	assert len(games) == 2


	class TestAggregation:
	"""Test game aggregation logic."""

	def _make_game(self, agent="Bot", opponent="Normal", win=True, kills=1000,
	deaths=500, assets=5000, ticks=2000, hf_username="testuser"):
	return {
	"agent_name": agent,
	"agent_type": "RL",
	"opponent": opponent,
	"result": "win" if win else "lose",
	"win": win,
	"kills_cost": kills,
	"deaths_cost": deaths,
	"assets_value": assets,
	"ticks": ticks,
	"timestamp": "2026-03-02",
	"hf_username": hf_username,
	}

	def test_below_threshold_returns_none(self):
	games = [self._make_game() for _ in range(MIN_GAMES_FOR_LEADERBOARD - 1)]
	count, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert count == MIN_GAMES_FOR_LEADERBOARD - 1
	assert agg is None

	def test_at_threshold_returns_row(self):
	games = [self._make_game() for _ in range(MIN_GAMES_FOR_LEADERBOARD)]
	count, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert count == MIN_GAMES_FOR_LEADERBOARD
	assert agg is not None
	assert agg["games"] == MIN_GAMES_FOR_LEADERBOARD
	assert agg["win_rate"] == 100.0

	def test_applies_difficulty_multiplier(self):
	games_normal = [self._make_game(opponent="Normal") for _ in range(5)]
	games_hard = [self._make_game(opponent="Hard") for _ in range(5)]
	_, agg_normal = _aggregate_agent_games("Bot", "RL", "Normal", games_normal, hf_username="testuser")
	_, agg_hard = _aggregate_agent_games("Bot", "RL", "Hard", games_hard, hf_username="testuser")
	assert agg_hard["score"] > agg_normal["score"]

	def test_filters_by_agent_and_opponent(self):
	games = (
	[self._make_game(agent="Bot1", opponent="Normal") for _ in range(5)]
	+ [self._make_game(agent="Bot2", opponent="Normal") for _ in range(3)]
	+ [self._make_game(agent="Bot1", opponent="Hard") for _ in range(2)]
	)
	count1, agg1 = _aggregate_agent_games("Bot1", "RL", "Normal", games, hf_username="testuser")
	count2, _ = _aggregate_agent_games("Bot2", "RL", "Normal", games, hf_username="testuser")
	count3, _ = _aggregate_agent_games("Bot1", "RL", "Hard", games, hf_username="testuser")
	assert count1 == 5
	assert agg1 is not None
	assert count2 == 3 # Below threshold
	assert count3 == 2 # Below threshold

	def test_mixed_results_aggregated(self):
	games = [
	self._make_game(win=True, kills=5000, deaths=1000),
	self._make_game(win=True, kills=3000, deaths=2000),
	self._make_game(win=False, kills=1000, deaths=4000),
	self._make_game(win=True, kills=4000, deaths=1500),
	self._make_game(win=False, kills=2000, deaths=3000),
	]
	count, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert count == 5
	assert agg["win_rate"] == 60.0
	assert agg["avg_kills"] == 3000 # (5000+3000+1000+4000+2000)/5
	assert agg["games"] == 5

	def test_kd_ratio_computed(self):
	games = [self._make_game(kills=2000, deaths=1000) for _ in range(5)]
	_, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert agg["kd_ratio"] == 2.0

	def test_zero_deaths_kd_ratio(self):
	games = [self._make_game(kills=1000, deaths=0) for _ in range(5)]
	_, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert agg["kd_ratio"] == 5000.0 # 5000 total kills / max(0, 1)

	def test_aggregation_includes_metadata(self):
	games = [self._make_game() for _ in range(5)]
	games[-1]["agent_url"] = "https://github.com/user/bot"
	games[-1]["replay_url"] = "replay-bot.orarep"
	_, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert agg["agent_url"] == "https://github.com/user/bot"
	assert agg["replay_url"] == "replay-bot.orarep"
	assert agg["difficulty"] == "Normal"
	assert agg["verified"] is True
	assert agg["hf_username"] == "testuser"

	def test_filters_by_agent_type(self):
	"""Different agent_types for the same name should not mix."""
	games = (
	[self._make_game(agent="Bot") for _ in range(5)] # agent_type="RL"
	)
	# Query with wrong agent_type
	count, agg = _aggregate_agent_games("Bot", "LLM", "Normal", games, hf_username="testuser")
	assert count == 0
	assert agg is None

	def test_win_field_fallback_to_result(self):
	"""When 'win' field is missing, fall back to result=='win'."""
	games = []
	for _ in range(5):
	g = self._make_game()
	del g["win"]
	g["result"] = "win"
	games.append(g)
	_, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="testuser")
	assert agg["win_rate"] == 100.0

	def test_anonymous_not_aggregated(self):
	"""Games without hf_username should never be aggregated."""
	games = [self._make_game(hf_username="") for _ in range(10)]
	count, agg = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="")
	assert count == 0
	assert agg is None

	def test_different_hf_users_not_mixed(self):
	"""Games from different HF users should not be mixed."""
	games = (
	[self._make_game(hf_username="user_a") for _ in range(5)]
	+ [self._make_game(hf_username="user_b") for _ in range(5)]
	)
	count_a, agg_a = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="user_a")
	count_b, agg_b = _aggregate_agent_games("Bot", "RL", "Normal", games, hf_username="user_b")
	assert count_a == 5
	assert count_b == 5
	assert agg_a is not None
	assert agg_b is not None


	class TestRebuildLeaderboard:
	"""Test leaderboard rebuild from raw games."""

	def test_filters_below_threshold(self, tmp_path):
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	# Save 3 verified games for Bot1 (below threshold)
	for _ in range(3):
	_save_raw_game({
	"agent_name": "Bot1", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	"timestamp": "2026-03-02", "hf_username": "user1",
	})
	# Save 5 verified games for Bot2 (at threshold)
	for _ in range(5):
	_save_raw_game({
	"agent_name": "Bot2", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 2000,
	"deaths_cost": 1000, "assets_value": 8000, "ticks": 1500,
	"timestamp": "2026-03-02", "hf_username": "user2",
	})

	_rebuild_leaderboard()

	df = pd.read_csv(data_path)
	# Only Bot2 aggregated (5 games). Bot1 below threshold (3 verified games).
	verified_rows = df[df["verified"] == True]
	assert len(verified_rows) == 1
	assert verified_rows.iloc[0]["agent_name"] == "Bot2"
	assert verified_rows.iloc[0]["games"] == 5

	def test_rebuild_sorts_by_score(self, tmp_path):
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	# Bot with worse stats
	for _ in range(5):
	_save_raw_game({
	"agent_name": "WeakBot", "agent_type": "RL", "opponent": "Normal",
	"result": "lose", "win": False, "kills_cost": 100,
	"deaths_cost": 5000, "assets_value": 1000, "ticks": 5000,
	"timestamp": "2026-03-02", "hf_username": "user1",
	})
	# Bot with better stats
	for _ in range(5):
	_save_raw_game({
	"agent_name": "StrongBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 5000,
	"deaths_cost": 500, "assets_value": 20000, "ticks": 1000,
	"timestamp": "2026-03-02", "hf_username": "user2",
	})

	_rebuild_leaderboard()

	df = pd.read_csv(data_path)
	verified = df[df["verified"] == True]
	assert len(verified) == 2
	assert verified.iloc[0]["agent_name"] == "StrongBot"
	assert verified.iloc[1]["agent_name"] == "WeakBot"

	def test_rebuild_no_games_preserves_csv(self, tmp_path):
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	# Create an existing CSV
	data_path.write_text("agent_name,score\nOldBot,50\n")

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path):
	_rebuild_leaderboard()

	# File should be unchanged (no games.jsonl = no rebuild)
	assert "OldBot" in data_path.read_text()

	def test_rebuild_no_qualifying_agents_with_anonymous(self, tmp_path):
	"""Anonymous-only games still produce rows (unverified)."""
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	for _ in range(2):
	_save_raw_game({
	"agent_name": "Bot1", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	"timestamp": "2026-03-02", "hf_username": "",
	})

	_rebuild_leaderboard()

	df = pd.read_csv(data_path)
	assert len(df) == 2 # 2 anonymous individual rows
	assert all(df["verified"] == False)

	def test_rebuild_includes_difficulty_column(self, tmp_path):
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	for _ in range(5):
	_save_raw_game({
	"agent_name": "Bot1", "agent_type": "RL", "opponent": "Hard",
	"result": "win", "win": True, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	"timestamp": "2026-03-02", "hf_username": "user1",
	})
	_rebuild_leaderboard()

	df = pd.read_csv(data_path)
	verified = df[df["verified"] == True]
	assert "difficulty" in df.columns
	assert verified.iloc[0]["difficulty"] == "Hard"

	def test_rebuild_includes_verified_column(self, tmp_path):
	data_path = tmp_path / "results.csv"
	games_path = tmp_path / "games.jsonl"

	with patch("app.DATA_PATH", data_path), \
	patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	# Verified games
	for _ in range(5):
	_save_raw_game({
	"agent_name": "VerifiedBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	"timestamp": "2026-03-02", "hf_username": "hfuser",
	})
	# Anonymous game
	_save_raw_game({
	"agent_name": "AnonBot", "agent_type": "LLM", "opponent": "Normal",
	"result": "win", "win": True, "kills_cost": 2000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	"timestamp": "2026-03-02", "hf_username": "",
	})
	_rebuild_leaderboard()

	df = pd.read_csv(data_path)
	assert "verified" in df.columns
	assert "hf_username" in df.columns
	verified_rows = df[df["verified"] == True]
	anon_rows = df[df["verified"] == False]
	assert len(verified_rows) == 1
	assert verified_rows.iloc[0]["agent_name"] == "VerifiedBot"
	assert len(anon_rows) == 1
	assert anon_rows.iloc[0]["agent_name"] == "AnonBot"


	class TestFriendlyMessages:
	"""Test submission response messages."""

	def _mock_hf_verify(self, username="testuser"):
	"""Return a patcher that makes _verify_hf_token return the given username."""
	return patch("app._verify_hf_token", return_value=(username, ""))

	def test_below_threshold_message(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	data = {
	"agent_name": "TestBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "ticks": 2000, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000,
	"hf_token": "hf_test",
	}
	result = handle_api_submit(json.dumps(data))

	assert "1/5" in result
	assert "Play 4 more" in result
	assert "leaderboard" in result

	def test_at_threshold_message(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	data = {
	"agent_name": "TestBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "ticks": 2000, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000,
	"hf_token": "hf_test",
	}
	# Submit 5 games
	for _ in range(4):
	handle_api_submit(json.dumps(data))
	result = handle_api_submit(json.dumps(data))

	assert "updated" in result
	assert "5 games" in result
	assert "score" in result

	def test_singular_remaining_game(self, tmp_path):
	"""When only 1 game remains, message should say 'game' not 'games'."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	data = {
	"agent_name": "TestBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "ticks": 2000, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000,
	"hf_token": "hf_test",
	}
	# Submit 4 games
	for _ in range(3):
	handle_api_submit(json.dumps(data))
	result = handle_api_submit(json.dumps(data))

	assert "4/5" in result
	assert "Play 1 more game " in result # No 's' — singular

	def test_progress_increments(self, tmp_path):
	"""Each submission should increment the game count."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	data = {
	"agent_name": "TestBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "ticks": 2000, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000,
	"hf_token": "hf_test",
	}
	r1 = handle_api_submit(json.dumps(data))
	r2 = handle_api_submit(json.dumps(data))
	r3 = handle_api_submit(json.dumps(data))

	assert "1/5" in r1
	assert "2/5" in r2
	assert "3/5" in r3

	def test_anonymous_message(self, tmp_path):
	"""No hf_token → anonymous message."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	data = {
	"agent_name": "AnonBot", "agent_type": "RL", "opponent": "Normal",
	"result": "win", "ticks": 2000, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000,
	}
	result = handle_api_submit(json.dumps(data))

	assert "anonymous" in result.lower()
	assert "AnonBot" in result
	assert "HF token" in result


	class TestHandleUploadAggregation:
	"""Test the UI upload handler with aggregation."""

	def _valid_data(self, hf_token="hf_test"):
	d = {
	"agent_name": "UploadBot",
	"agent_type": "LLM",
	"opponent": "Easy",
	"result": "win",
	"ticks": 3000,
	"kills_cost": 2000,
	"deaths_cost": 800,
	"assets_value": 7000,
	}
	if hf_token:
	d["hf_token"] = hf_token
	return d

	def _mock_hf_verify(self, username="testuser"):
	return patch("app._verify_hf_token", return_value=(username, ""))

	def test_upload_below_threshold_message(self, tmp_path):
	"""Upload handler should show progress message below threshold."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	# Create a JSON file for upload
	json_path = tmp_path / "upload.json"
	json_path.write_text(json.dumps(self._valid_data()))

	class FakeFile:
	def __init__(self, p):
	self.name = str(p)

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	msg, df = handle_upload(FakeFile(json_path), None)

	assert "1/5" in msg
	assert "UploadBot" in msg
	assert "leaderboard" in msg

	def test_upload_at_threshold_message(self, tmp_path):
	"""Upload handler should show score when threshold reached."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"
	data = self._valid_data()

	# Pre-seed 4 verified games
	for _ in range(4):
	safe = {k: v for k, v in data.items() if k != "hf_token"}
	safe["hf_username"] = "testuser"
	with open(games_path, "a") as f:
	f.write(json.dumps(safe) + "\n")

	json_path = tmp_path / "upload.json"
	json_path.write_text(json.dumps(data))

	class FakeFile:
	def __init__(self, p):
	self.name = str(p)

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path), \
	self._mock_hf_verify():
	msg, df = handle_upload(FakeFile(json_path), None)

	assert "updated" in msg
	assert "5 games" in msg
	assert "score" in msg

	def test_upload_anonymous_message(self, tmp_path):
	"""Upload without HF token should show anonymous message."""
	games_path = tmp_path / "games.jsonl"
	data_path = tmp_path / "results.csv"

	json_path = tmp_path / "upload.json"
	json_path.write_text(json.dumps(self._valid_data(hf_token="")))

	class FakeFile:
	def __init__(self, p):
	self.name = str(p)

	_submit_times.clear()
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.DATA_PATH", data_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	msg, df = handle_upload(FakeFile(json_path), None)

	assert "anonymous" in msg.lower()
	assert "UploadBot" in msg


	# ── HF Identity & Verification Tests ────────────────────────────────────────


	class TestVerifyHfToken:
	"""Test HF token verification."""

	def test_empty_token_returns_empty(self):
	username, err = _verify_hf_token("")
	assert username == ""
	assert "no token" in err

	def test_none_token_returns_empty(self):
	username, err = _verify_hf_token(None)
	assert username == ""

	def test_valid_token_returns_username(self):
	mock_api = type("MockHfApi", (), {
	"whoami": lambda self, token: {"name": "alice"},
	})()
	with patch("huggingface_hub.HfApi", return_value=mock_api):
	username, err = _verify_hf_token("hf_valid_token")
	assert username == "alice"
	assert err == ""

	def test_invalid_token_returns_error(self):
	mock_api = type("MockHfApi", (), {
	"whoami": lambda self, token: (_ for _ in ()).throw(Exception("401 Unauthorized")),
	})()
	with patch("huggingface_hub.HfApi", return_value=mock_api):
	username, err = _verify_hf_token("hf_bad_token")
	assert username == ""
	assert "invalid token" in err


	class TestProcessIdentity:
	"""Test the identity processing helper."""

	def test_with_valid_token(self):
	data = {"agent_name": "Bot", "hf_token": "hf_test"}
	with patch("app._verify_hf_token", return_value=("alice", "")):
	username, warning = _process_identity(data)
	assert username == "alice"
	assert warning == ""
	assert data["hf_username"] == "alice"
	assert "hf_token" not in data # Token should be popped

	def test_with_invalid_token(self):
	data = {"agent_name": "Bot", "hf_token": "hf_bad"}
	with patch("app._verify_hf_token", return_value=("", "invalid")):
	username, warning = _process_identity(data)
	assert username == ""
	assert "anonymous" in warning.lower() or "failed" in warning.lower()
	assert data["hf_username"] == ""

	def test_without_token(self):
	data = {"agent_name": "Bot"}
	username, warning = _process_identity(data)
	assert username == ""
	assert warning == ""
	assert data["hf_username"] == ""


	class TestVerifiedBadge:
	"""Test the verified/unverified badge rendering."""

	def test_verified_badge_green(self):
	badge = _verified_badge(True)
	assert "#4caf50" in badge
	assert "Verified" in badge

	def test_unverified_badge_orange(self):
	badge = _verified_badge(False)
	assert "#ff9800" in badge
	assert "Unverified" in badge

	def test_string_true(self):
	badge = _verified_badge("True")
	assert "Verified" in badge
	assert "Unverified" not in badge

	def test_string_false(self):
	badge = _verified_badge("False")
	assert "Unverified" in badge


	class TestSingleGameRow:
	"""Test anonymous single game row creation."""

	def test_creates_unverified_row(self):
	game = {
	"agent_name": "AnonBot",
	"agent_type": "LLM",
	"opponent": "Normal",
	"result": "win",
	"win": True,
	"kills_cost": 3000,
	"deaths_cost": 1000,
	"assets_value": 8000,
	"ticks": 2000,
	"timestamp": "2026-03-02T10:00:00Z",
	"hf_username": "",
	}
	row = _single_game_row(game)
	assert row["agent_name"] == "AnonBot"
	assert row["verified"] is False
	assert row["hf_username"] == ""
	assert row["games"] == 1
	assert row["win_rate"] == 100.0
	assert row["score"] > 0

	def test_loss_row(self):
	game = {
	"agent_name": "Bot",
	"agent_type": "RL",
	"opponent": "Hard",
	"result": "lose",
	"win": False,
	"kills_cost": 500,
	"deaths_cost": 3000,
	"assets_value": 2000,
	"ticks": 5000,
	"timestamp": "2026-03-02",
	"hf_username": "",
	}
	row = _single_game_row(game)
	assert row["win_rate"] == 0.0
	assert row["difficulty"] == "Hard"


	class TestSaveRawGameStripsToken:
	"""Verify hf_token is NOT persisted in games.jsonl."""

	def test_token_stripped(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	with patch("app.GAMES_JSONL", games_path), \
	patch("app.SUBMISSIONS_DIR", tmp_path):
	_save_raw_game({
	"agent_name": "Bot",
	"hf_token": "hf_secret_token_123",
	"hf_username": "alice",
	})

	saved = json.loads(games_path.read_text().strip())
	assert "hf_token" not in saved
	assert saved["hf_username"] == "alice"


	class TestBuildResponse:
	"""Test response message builder."""

	def test_anonymous_response(self):
	msg = _build_response("Bot", "RL", "Normal", "", "")
	assert "anonymous" in msg.lower()
	assert "HF token" in msg

	def test_anonymous_with_warning(self):
	msg = _build_response("Bot", "RL", "Normal", "", "Token failed.")
	assert "Token failed" in msg
	assert "anonymous" in msg.lower()

	def test_verified_below_threshold(self, tmp_path):
	games_path = tmp_path / "games.jsonl"
	# Pre-seed 2 games
	for _ in range(2):
	with open(games_path, "a") as f:
	f.write(json.dumps({
	"agent_name": "Bot", "agent_type": "RL", "opponent": "Normal",
	"hf_username": "alice", "win": True, "kills_cost": 1000,
	"deaths_cost": 500, "assets_value": 5000, "ticks": 2000,
	}) + "\n")

	with patch("app.GAMES_JSONL", games_path):
	msg = _build_response("Bot", "RL", "Normal", "alice", "")
	assert "2/5" in msg
	assert "Play 3 more" in msg