""" Bidding Algorithm Baselines for First-Price Auctions Includes: 1. LinearBidder — proportional bidding based on pCTR 2. ThresholdBidder — fixed bid if pCTR above threshold 3. ValueShadingBidder — value shading for first-price (bid = v/(1+λ)) 4. RLBBidder — simplified MDP-based RL bidding (Cai et al. 2017) """ import numpy as np from collections import deque class LinearBidder: """Simple linear bidding: bid proportional to pCTR.""" def __init__(self, base_bid, avg_pctr, name="Linear"): self.base_bid = base_bid self.avg_pctr = avg_pctr self.name = name self.total_spent = 0.0 self.remaining_budget = float('inf') self.total_wins = 0 self.t = 0 def bid(self, pctr, features=None): self.t += 1 if self.remaining_budget <= 0: return 0.0 bid = self.base_bid * (pctr / max(self.avg_pctr, 1e-6)) return min(bid, self.remaining_budget) def update(self, won, cost, pctr, d_t=None): if won: self.total_spent += cost self.remaining_budget -= cost self.total_wins += 1 def set_budget(self, budget): self.remaining_budget = budget def get_stats(self): return { 'name': self.name, 'spent': float(self.total_spent), 'remaining': float(self.remaining_budget), 'wins': self.total_wins, 't': self.t, } class ThresholdBidder: """Threshold bidding: fixed bid if pCTR exceeds threshold, else skip.""" def __init__(self, threshold, bid_value, name="Threshold"): self.threshold = threshold self.bid_value = bid_value self.name = name self.total_spent = 0.0 self.remaining_budget = float('inf') self.total_wins = 0 self.t = 0 def bid(self, pctr, features=None): self.t += 1 if self.remaining_budget < self.bid_value: return 0.0 return self.bid_value if pctr > self.threshold else 0.0 def update(self, won, cost, pctr, d_t=None): if won: self.total_spent += cost self.remaining_budget -= cost self.total_wins += 1 def set_budget(self, budget): self.remaining_budget = budget def get_stats(self): return { 'name': self.name, 'spent': float(self.total_spent), 'remaining': float(self.remaining_budget), 'wins': self.total_wins, 't': self.t, } class ValueShadingBidder: """ Value shading for first-price auctions. bid = v / (1 + λ) where λ is estimated from historical outcomes. Unlike second-price auctions where you bid your true value, in first-price auctions you shade your bid below value. """ def __init__(self, budget, T, value_per_click, name="ValueShading"): self.B = budget self.T = T self.rho = budget / T self.vpc = value_per_click self.name = name # Shading factor λ self.lambd = 0.0 self.epsilon = 1.0 / np.sqrt(T) self.total_spent = 0.0 self.remaining_budget = budget self.total_wins = 0 self.t = 0 self.competing_bids = [] def bid(self, pctr, features=None): self.t += 1 v = pctr * self.vpc if self.remaining_budget <= 0: return 0.0 # Shade: bid below value based on competition if len(self.competing_bids) > 0: avg_competing = np.mean(self.competing_bids) shade_factor = 1.0 / (1.0 + self.lambd + 0.1) bid = v * shade_factor # Clamp to competing bid range bid = np.clip(bid, avg_competing * 0.5, v * 0.9) else: bid = v * 0.5 # Initial exploration return min(bid, self.remaining_budget) def update(self, won, cost, pctr, d_t=None): if won: self.total_spent += cost self.remaining_budget -= cost self.total_wins += 1 if d_t is not None: self.competing_bids.append(d_t) cost_feedback = cost if won else 0.0 self.lambd = max(0.0, self.lambd - self.epsilon * (self.rho - cost_feedback)) def get_stats(self): return { 'name': self.name, 'lambda': float(self.lambd), 'spent': float(self.total_spent), 'remaining': float(self.remaining_budget), 'wins': self.total_wins, 't': self.t, } class RLBBidder: """ Simplified RLB (Reinforcement Learning for Bidding). Based on: Cai et al. "Real-Time Bidding by Reinforcement Learning" (WSDM 2017) arXiv: 1701.02490 Uses a simplified MDP with discretized state space: State = (budget_bucket, pCTR_bucket) Action = bid multiplier Maintains a Q-table updated via temporal difference learning. """ def __init__( self, budget, T, value_per_click, n_budget_buckets=10, n_pctr_buckets=5, n_bid_multipliers=10, learning_rate=0.1, discount=0.95, exploration_rate=0.1, name="RLB" ): self.B = budget self.T = T self.vpc = value_per_click self.name = name self.n_budget = n_budget_buckets self.n_pctr = n_pctr_buckets self.n_actions = n_bid_multipliers # Bid multipliers: 0.1x to 2.0x of value self.bid_multipliers = np.linspace(0.1, 2.0, n_bid_multipliers) # Q-table: (budget_bucket, pctr_bucket, action) self.Q = np.zeros((n_budget_buckets, n_pctr_buckets, n_bid_multipliers)) self.lr = learning_rate self.gamma = discount self.epsilon_greedy = exploration_rate self.total_spent = 0.0 self.remaining_budget = budget self.total_wins = 0 self.t = 0 # For TD learning self.last_state = None self.last_action = None def _get_state(self, pctr): """Discretize state: (budget_ratio_bucket, pctr_bucket).""" budget_ratio = self.remaining_budget / max(self.B, 1) budget_bucket = min(int(budget_ratio * self.n_budget), self.n_budget - 1) pctr_bucket = min(int(pctr * self.n_pctr), self.n_pctr - 1) return (budget_bucket, pctr_bucket) def bid(self, pctr, features=None): self.t += 1 if self.remaining_budget <= 0: return 0.0 state = self._get_state(pctr) v = pctr * self.vpc # ε-greedy action selection if np.random.random() < self.epsilon_greedy: action = np.random.randint(self.n_actions) else: action = np.argmax(self.Q[state[0], state[1], :]) self.last_state = state self.last_action = action bid = min(v * self.bid_multipliers[action], self.remaining_budget) return bid def update(self, won, cost, pctr, d_t=None): if won: self.total_spent += cost self.remaining_budget -= cost self.total_wins += 1 # TD update if self.last_state is not None: reward = (pctr * self.vpc) if won else 0.0 new_state = self._get_state(pctr) # Q-learning update old_q = self.Q[self.last_state[0], self.last_state[1], self.last_action] max_future_q = np.max(self.Q[new_state[0], new_state[1], :]) new_q = old_q + self.lr * (reward + self.gamma * max_future_q - old_q) self.Q[self.last_state[0], self.last_state[1], self.last_action] = new_q def get_stats(self): return { 'name': self.name, 'spent': float(self.total_spent), 'remaining': float(self.remaining_budget), 'wins': self.total_wins, 't': self.t, 'q_table_mean': float(np.mean(self.Q)), }