hamverbot's picture
Upload src/algorithms/baselines.py
bff9b48 verified
"""
Bidding Algorithm Baselines for First-Price Auctions
Includes:
1. LinearBidder — proportional bidding based on pCTR
2. ThresholdBidder — fixed bid if pCTR above threshold
3. ValueShadingBidder — value shading for first-price (bid = v/(1+λ))
4. RLBBidder — simplified MDP-based RL bidding (Cai et al. 2017)
"""
import numpy as np
from collections import deque
class LinearBidder:
"""Simple linear bidding: bid proportional to pCTR."""
def __init__(self, base_bid, avg_pctr, name="Linear"):
self.base_bid = base_bid
self.avg_pctr = avg_pctr
self.name = name
self.total_spent = 0.0
self.remaining_budget = float('inf')
self.total_wins = 0
self.t = 0
def bid(self, pctr, features=None):
self.t += 1
if self.remaining_budget <= 0:
return 0.0
bid = self.base_bid * (pctr / max(self.avg_pctr, 1e-6))
return min(bid, self.remaining_budget)
def update(self, won, cost, pctr, d_t=None):
if won:
self.total_spent += cost
self.remaining_budget -= cost
self.total_wins += 1
def set_budget(self, budget):
self.remaining_budget = budget
def get_stats(self):
return {
'name': self.name,
'spent': float(self.total_spent),
'remaining': float(self.remaining_budget),
'wins': self.total_wins,
't': self.t,
}
class ThresholdBidder:
"""Threshold bidding: fixed bid if pCTR exceeds threshold, else skip."""
def __init__(self, threshold, bid_value, name="Threshold"):
self.threshold = threshold
self.bid_value = bid_value
self.name = name
self.total_spent = 0.0
self.remaining_budget = float('inf')
self.total_wins = 0
self.t = 0
def bid(self, pctr, features=None):
self.t += 1
if self.remaining_budget < self.bid_value:
return 0.0
return self.bid_value if pctr > self.threshold else 0.0
def update(self, won, cost, pctr, d_t=None):
if won:
self.total_spent += cost
self.remaining_budget -= cost
self.total_wins += 1
def set_budget(self, budget):
self.remaining_budget = budget
def get_stats(self):
return {
'name': self.name,
'spent': float(self.total_spent),
'remaining': float(self.remaining_budget),
'wins': self.total_wins,
't': self.t,
}
class ValueShadingBidder:
"""
Value shading for first-price auctions.
bid = v / (1 + λ) where λ is estimated from historical outcomes.
Unlike second-price auctions where you bid your true value,
in first-price auctions you shade your bid below value.
"""
def __init__(self, budget, T, value_per_click, name="ValueShading"):
self.B = budget
self.T = T
self.rho = budget / T
self.vpc = value_per_click
self.name = name
# Shading factor λ
self.lambd = 0.0
self.epsilon = 1.0 / np.sqrt(T)
self.total_spent = 0.0
self.remaining_budget = budget
self.total_wins = 0
self.t = 0
self.competing_bids = []
def bid(self, pctr, features=None):
self.t += 1
v = pctr * self.vpc
if self.remaining_budget <= 0:
return 0.0
# Shade: bid below value based on competition
if len(self.competing_bids) > 0:
avg_competing = np.mean(self.competing_bids)
shade_factor = 1.0 / (1.0 + self.lambd + 0.1)
bid = v * shade_factor
# Clamp to competing bid range
bid = np.clip(bid, avg_competing * 0.5, v * 0.9)
else:
bid = v * 0.5 # Initial exploration
return min(bid, self.remaining_budget)
def update(self, won, cost, pctr, d_t=None):
if won:
self.total_spent += cost
self.remaining_budget -= cost
self.total_wins += 1
if d_t is not None:
self.competing_bids.append(d_t)
cost_feedback = cost if won else 0.0
self.lambd = max(0.0, self.lambd - self.epsilon * (self.rho - cost_feedback))
def get_stats(self):
return {
'name': self.name,
'lambda': float(self.lambd),
'spent': float(self.total_spent),
'remaining': float(self.remaining_budget),
'wins': self.total_wins,
't': self.t,
}
class RLBBidder:
"""
Simplified RLB (Reinforcement Learning for Bidding).
Based on: Cai et al. "Real-Time Bidding by Reinforcement Learning" (WSDM 2017)
arXiv: 1701.02490
Uses a simplified MDP with discretized state space:
State = (budget_bucket, pCTR_bucket)
Action = bid multiplier
Maintains a Q-table updated via temporal difference learning.
"""
def __init__(
self,
budget,
T,
value_per_click,
n_budget_buckets=10,
n_pctr_buckets=5,
n_bid_multipliers=10,
learning_rate=0.1,
discount=0.95,
exploration_rate=0.1,
name="RLB"
):
self.B = budget
self.T = T
self.vpc = value_per_click
self.name = name
self.n_budget = n_budget_buckets
self.n_pctr = n_pctr_buckets
self.n_actions = n_bid_multipliers
# Bid multipliers: 0.1x to 2.0x of value
self.bid_multipliers = np.linspace(0.1, 2.0, n_bid_multipliers)
# Q-table: (budget_bucket, pctr_bucket, action)
self.Q = np.zeros((n_budget_buckets, n_pctr_buckets, n_bid_multipliers))
self.lr = learning_rate
self.gamma = discount
self.epsilon_greedy = exploration_rate
self.total_spent = 0.0
self.remaining_budget = budget
self.total_wins = 0
self.t = 0
# For TD learning
self.last_state = None
self.last_action = None
def _get_state(self, pctr):
"""Discretize state: (budget_ratio_bucket, pctr_bucket)."""
budget_ratio = self.remaining_budget / max(self.B, 1)
budget_bucket = min(int(budget_ratio * self.n_budget), self.n_budget - 1)
pctr_bucket = min(int(pctr * self.n_pctr), self.n_pctr - 1)
return (budget_bucket, pctr_bucket)
def bid(self, pctr, features=None):
self.t += 1
if self.remaining_budget <= 0:
return 0.0
state = self._get_state(pctr)
v = pctr * self.vpc
# ε-greedy action selection
if np.random.random() < self.epsilon_greedy:
action = np.random.randint(self.n_actions)
else:
action = np.argmax(self.Q[state[0], state[1], :])
self.last_state = state
self.last_action = action
bid = min(v * self.bid_multipliers[action], self.remaining_budget)
return bid
def update(self, won, cost, pctr, d_t=None):
if won:
self.total_spent += cost
self.remaining_budget -= cost
self.total_wins += 1
# TD update
if self.last_state is not None:
reward = (pctr * self.vpc) if won else 0.0
new_state = self._get_state(pctr)
# Q-learning update
old_q = self.Q[self.last_state[0], self.last_state[1], self.last_action]
max_future_q = np.max(self.Q[new_state[0], new_state[1], :])
new_q = old_q + self.lr * (reward + self.gamma * max_future_q - old_q)
self.Q[self.last_state[0], self.last_state[1], self.last_action] = new_q
def get_stats(self):
return {
'name': self.name,
'spent': float(self.total_spent),
'remaining': float(self.remaining_budget),
'wins': self.total_wins,
't': self.t,
'q_table_mean': float(np.mean(self.Q)),
}