hamverbot's picture
Upload src/benchmark/run_comparison.py
212eb07 verified
"""
RTB Bidding Algorithm Comparison Framework
===========================================
Runs all bidding algorithms on first-price auction simulations
and produces comprehensive comparison results.
Algorithms:
- DualOGD: Lagrangian dual + online gradient descent (Wang et al. 2023)
- TwoSidedDual: Budget cap + spend floor (k% minimum)
- ValueShading: Value shading for first-price
- RLB: MDP-based reinforcement learning (Cai et al. 2017)
- Linear: Proportional bidding baseline
- Threshold: Fixed-bid-if-pCTR baseline
"""
import sys
import os
import json
import time
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
# Add src to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def load_and_prepare_data(max_rows=100000):
"""Load Criteo_x4 and prepare features + labels."""
print("=" * 70)
print("LOADING CRITEO DATA")
print("=" * 70)
ds = load_dataset("reczoo/Criteo_x4", split="train", streaming=True)
rows = []
for i, row in enumerate(ds):
if i >= max_rows:
break
rows.append(row)
df = pd.DataFrame(rows)
print(f"Loaded {len(df)} rows, CTR: {df['Label'].mean():.4f}")
# Feature columns
dense_cols = [f'I{i}' for i in range(1, 14)]
sparse_cols = [f'C{i}' for i in range(1, 27)]
# Handle missing
for col in dense_cols:
df[col] = df[col].fillna(df[col].median())
for col in sparse_cols:
df[col] = df[col].fillna("MISSING")
# Encode sparse
for col in sparse_cols:
le = LabelEncoder()
df[col] = le.fit_transform(df[col].astype(str))
# Normalize dense
scaler = StandardScaler()
dense_data = scaler.fit_transform(df[dense_cols].values)
for i, col in enumerate(dense_cols):
df[col] = dense_data[:, i]
# Normalize sparse
sparse_data = df[sparse_cols].values.astype(np.float32)
sparse_data = (sparse_data - sparse_data.mean(axis=0)) / (sparse_data.std(axis=0) + 1e-8)
for i, col in enumerate(sparse_cols):
df[col] = sparse_data[:, i]
feature_cols = dense_cols + sparse_cols
X = df[feature_cols].values.astype(np.float32)
y = df['Label'].values.astype(np.float32)
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
return X_train, X_test, y_train, y_test, df, feature_cols
def train_ctr_model(X_train, y_train):
"""Train a CTR prediction model (Logistic Regression baseline)."""
print("\n" + "=" * 70)
print("TRAINING CTR MODEL")
print("=" * 70)
model = LogisticRegression(max_iter=500, C=0.1, random_state=42)
model.fit(X_train, y_train)
train_auc = roc_auc_score_safe(y_train, model.predict_proba(X_train)[:, 1])
print(f"Train AUC: {train_auc:.4f}")
return model
def roc_auc_score_safe(y_true, y_pred):
"""Safe AUC computation."""
from sklearn.metrics import roc_auc_score
if len(np.unique(y_true)) < 2:
return 0.5
return roc_auc_score(y_true, y_pred)
def run_benchmark(
X_test, y_test, ctr_model,
budget=5000.0,
T=10000,
value_per_click=50.0,
k=0.8, # Minimum spend fraction
n_runs=3,
seed=42
):
"""Run all bidding algorithms and compare."""
print("\n" + "=" * 70)
print("RUNNING BIDDING BENCHMARK")
print("=" * 70)
print(f"Budget: {budget}, T: {T}, Value/Click: {value_per_click}")
print(f"Minimum spend: {k*100:.0f}%, Runs: {n_runs}")
from src.benchmark.auction_simulator import FirstPriceAuctionSimulator
from src.algorithms.dual_ogd import DualOGDBidder, TwoSidedDualBidder
from src.algorithms.baselines import LinearBidder, ThresholdBidder, ValueShadingBidder, RLBBidder
# Get CTR predictions
pctr_test = ctr_model.predict_proba(X_test)[:, 1]
print(f"pCTR range: [{pctr_test.min():.4f}, {pctr_test.max():.4f}]")
print(f"pCTR mean: {pctr_test.mean():.4f}")
all_results = {}
for run in range(n_runs):
run_seed = seed + run
print(f"\n--- Run {run + 1}/{n_runs} (seed={run_seed}) ---")
# Create fresh simulator for each run
sim = FirstPriceAuctionSimulator(
features=X_test[:T],
pctr_true=pctr_test[:T],
click_labels=y_test[:T],
value_per_click=value_per_click,
market_price_config={
'base_mean': 20.0,
'ctr_correlation': 10.0,
'noise_std': 0.6,
},
seed=run_seed
)
# Define algorithms
algorithms = {
'DualOGD': DualOGDBidder(budget, T, value_per_click),
'TwoSidedDual': TwoSidedDualBidder(budget, T, value_per_click, k=k),
'ValueShading': ValueShadingBidder(budget, T, value_per_click),
'RLB': RLBBidder(budget, T, value_per_click),
'Linear': LinearBidder(20.0, float(pctr_test.mean())),
'Threshold': ThresholdBidder(0.3, 30.0),
}
# Set budgets
for algo in algorithms.values():
if hasattr(algo, 'B'):
algo.B = budget
algo.remaining_budget = budget
# Run
run_results = sim.run_comparison(algorithms)
for name, results in run_results.items():
if name not in all_results:
all_results[name] = []
all_results[name].append(results)
return all_results, pctr_test
def aggregate_results(all_results):
"""Aggregate results across runs."""
print("\n" + "=" * 70)
print("AGGREGATED RESULTS")
print("=" * 70)
aggregated = {}
for name, runs in all_results.items():
clicks = [r['total_clicks'] for r in runs]
cpc = [r.get('cpc', 0) for r in runs]
budget_used = [r.get('budget_used_frac', 0) for r in runs]
win_rate = [r.get('win_rate', 0) for r in runs]
aggregated[name] = {
'clicks_mean': np.mean(clicks),
'clicks_std': np.std(clicks),
'cpc_mean': np.mean(cpc),
'cpc_std': np.std(cpc),
'budget_used_mean': np.mean(budget_used),
'budget_used_std': np.std(budget_used),
'win_rate_mean': np.mean(win_rate),
'win_rate_std': np.std(win_rate),
}
# Print table
print(f"\n{'Algorithm':<18} {'Clicks':>10} {'CPC':>10} {'Budget%':>10} {'WinRate':>10}")
print("-" * 58)
# Sort by clicks
sorted_algos = sorted(aggregated.items(), key=lambda x: x[1]['clicks_mean'], reverse=True)
for name, stats in sorted_algos:
print(f"{name:<18} {stats['clicks_mean']:>8.0f}±{stats['clicks_std']:.0f} "
f"{stats['cpc_mean']:>8.2f} {stats['budget_used_mean']:>8.1%} "
f"{stats['win_rate_mean']:>8.1%}")
return aggregated
def main():
import argparse
parser = argparse.ArgumentParser(description='RTB Bidding Benchmark')
parser.add_argument('--max_rows', type=int, default=100000)
parser.add_argument('--budget', type=float, default=5000.0)
parser.add_argument('--T', type=int, default=10000)
parser.add_argument('--vpc', type=float, default=50.0)
parser.add_argument('--k', type=float, default=0.8)
parser.add_argument('--n_runs', type=int, default=3)
parser.add_argument('--output', type=str, default='/app/results/benchmark_results.json')
parser.add_argument('--seed', type=int, default=42)
args = parser.parse_args()
# Load data
X_train, X_test, y_train, y_test, df, feature_cols = load_and_prepare_data(
max_rows=args.max_rows
)
# Train CTR model
ctr_model = train_ctr_model(X_train, y_train)
# Run benchmark
all_results, pctr_test = run_benchmark(
X_test, y_test, ctr_model,
budget=args.budget,
T=args.T,
value_per_click=args.vpc,
k=args.k,
n_runs=args.n_runs,
seed=args.seed
)
# Aggregate
aggregated = aggregate_results(all_results)
# Save
os.makedirs(os.path.dirname(args.output), exist_ok=True)
output = {
'config': {
'max_rows': args.max_rows,
'budget': args.budget,
'T': args.T,
'value_per_click': args.vpc,
'k': args.k,
'n_runs': args.n_runs,
'seed': args.seed,
},
'aggregated': {k: {kk: float(vv) if isinstance(vv, (np.floating, np.integer)) else vv
for kk, vv in v.items()}
for k, v in aggregated.items()},
'raw_runs': {k: [{kk: float(vv) if isinstance(vv, (np.floating, np.integer)) else vv
for kk, vv in r.items()}
for r in runs]
for k, runs in all_results.items()},
}
with open(args.output, 'w') as f:
json.dump(output, f, indent=2)
print(f"\nResults saved to {args.output}")
if __name__ == '__main__':
main()