| """ |
| Bidding Algorithm Baselines for First-Price Auctions |
| |
| Includes: |
| 1. LinearBidder — proportional bidding based on pCTR |
| 2. ThresholdBidder — fixed bid if pCTR above threshold |
| 3. ValueShadingBidder — value shading for first-price (bid = v/(1+λ)) |
| 4. RLBBidder — simplified MDP-based RL bidding (Cai et al. 2017) |
| """ |
| import numpy as np |
| from collections import deque |
|
|
|
|
| class LinearBidder: |
| """Simple linear bidding: bid proportional to pCTR.""" |
| |
| def __init__(self, base_bid, avg_pctr, name="Linear"): |
| self.base_bid = base_bid |
| self.avg_pctr = avg_pctr |
| self.name = name |
| self.total_spent = 0.0 |
| self.remaining_budget = float('inf') |
| self.total_wins = 0 |
| self.t = 0 |
| |
| def bid(self, pctr, features=None): |
| self.t += 1 |
| if self.remaining_budget <= 0: |
| return 0.0 |
| bid = self.base_bid * (pctr / max(self.avg_pctr, 1e-6)) |
| return min(bid, self.remaining_budget) |
| |
| def update(self, won, cost, pctr, d_t=None): |
| if won: |
| self.total_spent += cost |
| self.remaining_budget -= cost |
| self.total_wins += 1 |
| |
| def set_budget(self, budget): |
| self.remaining_budget = budget |
| |
| def get_stats(self): |
| return { |
| 'name': self.name, |
| 'spent': float(self.total_spent), |
| 'remaining': float(self.remaining_budget), |
| 'wins': self.total_wins, |
| 't': self.t, |
| } |
|
|
|
|
| class ThresholdBidder: |
| """Threshold bidding: fixed bid if pCTR exceeds threshold, else skip.""" |
| |
| def __init__(self, threshold, bid_value, name="Threshold"): |
| self.threshold = threshold |
| self.bid_value = bid_value |
| self.name = name |
| self.total_spent = 0.0 |
| self.remaining_budget = float('inf') |
| self.total_wins = 0 |
| self.t = 0 |
| |
| def bid(self, pctr, features=None): |
| self.t += 1 |
| if self.remaining_budget < self.bid_value: |
| return 0.0 |
| return self.bid_value if pctr > self.threshold else 0.0 |
| |
| def update(self, won, cost, pctr, d_t=None): |
| if won: |
| self.total_spent += cost |
| self.remaining_budget -= cost |
| self.total_wins += 1 |
| |
| def set_budget(self, budget): |
| self.remaining_budget = budget |
| |
| def get_stats(self): |
| return { |
| 'name': self.name, |
| 'spent': float(self.total_spent), |
| 'remaining': float(self.remaining_budget), |
| 'wins': self.total_wins, |
| 't': self.t, |
| } |
|
|
|
|
| class ValueShadingBidder: |
| """ |
| Value shading for first-price auctions. |
| bid = v / (1 + λ) where λ is estimated from historical outcomes. |
| |
| Unlike second-price auctions where you bid your true value, |
| in first-price auctions you shade your bid below value. |
| """ |
| |
| def __init__(self, budget, T, value_per_click, name="ValueShading"): |
| self.B = budget |
| self.T = T |
| self.rho = budget / T |
| self.vpc = value_per_click |
| self.name = name |
| |
| |
| self.lambd = 0.0 |
| self.epsilon = 1.0 / np.sqrt(T) |
| |
| self.total_spent = 0.0 |
| self.remaining_budget = budget |
| self.total_wins = 0 |
| self.t = 0 |
| self.competing_bids = [] |
| |
| def bid(self, pctr, features=None): |
| self.t += 1 |
| v = pctr * self.vpc |
| |
| if self.remaining_budget <= 0: |
| return 0.0 |
| |
| |
| if len(self.competing_bids) > 0: |
| avg_competing = np.mean(self.competing_bids) |
| shade_factor = 1.0 / (1.0 + self.lambd + 0.1) |
| bid = v * shade_factor |
| |
| bid = np.clip(bid, avg_competing * 0.5, v * 0.9) |
| else: |
| bid = v * 0.5 |
| |
| return min(bid, self.remaining_budget) |
| |
| def update(self, won, cost, pctr, d_t=None): |
| if won: |
| self.total_spent += cost |
| self.remaining_budget -= cost |
| self.total_wins += 1 |
| |
| if d_t is not None: |
| self.competing_bids.append(d_t) |
| |
| cost_feedback = cost if won else 0.0 |
| self.lambd = max(0.0, self.lambd - self.epsilon * (self.rho - cost_feedback)) |
| |
| def get_stats(self): |
| return { |
| 'name': self.name, |
| 'lambda': float(self.lambd), |
| 'spent': float(self.total_spent), |
| 'remaining': float(self.remaining_budget), |
| 'wins': self.total_wins, |
| 't': self.t, |
| } |
|
|
|
|
| class RLBBidder: |
| """ |
| Simplified RLB (Reinforcement Learning for Bidding). |
| Based on: Cai et al. "Real-Time Bidding by Reinforcement Learning" (WSDM 2017) |
| arXiv: 1701.02490 |
| |
| Uses a simplified MDP with discretized state space: |
| State = (budget_bucket, pCTR_bucket) |
| Action = bid multiplier |
| |
| Maintains a Q-table updated via temporal difference learning. |
| """ |
| |
| def __init__( |
| self, |
| budget, |
| T, |
| value_per_click, |
| n_budget_buckets=10, |
| n_pctr_buckets=5, |
| n_bid_multipliers=10, |
| learning_rate=0.1, |
| discount=0.95, |
| exploration_rate=0.1, |
| name="RLB" |
| ): |
| self.B = budget |
| self.T = T |
| self.vpc = value_per_click |
| self.name = name |
| |
| self.n_budget = n_budget_buckets |
| self.n_pctr = n_pctr_buckets |
| self.n_actions = n_bid_multipliers |
| |
| |
| self.bid_multipliers = np.linspace(0.1, 2.0, n_bid_multipliers) |
| |
| |
| self.Q = np.zeros((n_budget_buckets, n_pctr_buckets, n_bid_multipliers)) |
| |
| self.lr = learning_rate |
| self.gamma = discount |
| self.epsilon_greedy = exploration_rate |
| |
| self.total_spent = 0.0 |
| self.remaining_budget = budget |
| self.total_wins = 0 |
| self.t = 0 |
| |
| |
| self.last_state = None |
| self.last_action = None |
| |
| def _get_state(self, pctr): |
| """Discretize state: (budget_ratio_bucket, pctr_bucket).""" |
| budget_ratio = self.remaining_budget / max(self.B, 1) |
| budget_bucket = min(int(budget_ratio * self.n_budget), self.n_budget - 1) |
| pctr_bucket = min(int(pctr * self.n_pctr), self.n_pctr - 1) |
| return (budget_bucket, pctr_bucket) |
| |
| def bid(self, pctr, features=None): |
| self.t += 1 |
| |
| if self.remaining_budget <= 0: |
| return 0.0 |
| |
| state = self._get_state(pctr) |
| v = pctr * self.vpc |
| |
| |
| if np.random.random() < self.epsilon_greedy: |
| action = np.random.randint(self.n_actions) |
| else: |
| action = np.argmax(self.Q[state[0], state[1], :]) |
| |
| self.last_state = state |
| self.last_action = action |
| |
| bid = min(v * self.bid_multipliers[action], self.remaining_budget) |
| return bid |
| |
| def update(self, won, cost, pctr, d_t=None): |
| if won: |
| self.total_spent += cost |
| self.remaining_budget -= cost |
| self.total_wins += 1 |
| |
| |
| if self.last_state is not None: |
| reward = (pctr * self.vpc) if won else 0.0 |
| new_state = self._get_state(pctr) |
| |
| |
| old_q = self.Q[self.last_state[0], self.last_state[1], self.last_action] |
| max_future_q = np.max(self.Q[new_state[0], new_state[1], :]) |
| new_q = old_q + self.lr * (reward + self.gamma * max_future_q - old_q) |
| self.Q[self.last_state[0], self.last_state[1], self.last_action] = new_q |
| |
| def get_stats(self): |
| return { |
| 'name': self.name, |
| 'spent': float(self.total_spent), |
| 'remaining': float(self.remaining_budget), |
| 'wins': self.total_wins, |
| 't': self.t, |
| 'q_table_mean': float(np.mean(self.Q)), |
| } |
|
|