DouDou commited on
Commit
2aa06ec
·
verified ·
1 Parent(s): fe8e53f

Upload data3/estimate_budget.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. data3/estimate_budget.sh +95 -0
data3/estimate_budget.sh ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # 快速估算不同预算可以生成多少数据
3
+
4
+ echo "💰 OpenAI API 预算估算工具"
5
+ echo "=========================================="
6
+ echo ""
7
+ echo "基于您的测试结果:"
8
+ echo " - 2个样本成本: \$0.001819"
9
+ echo " - 平均每样本: \$0.0009095"
10
+ echo ""
11
+
12
+ # 从测试结果计算的平均值
13
+ AVG_INPUT_TOKENS=1917
14
+ AVG_OUTPUT_TOKENS=2552
15
+
16
+ # 标准 API 定价 (gpt-4o-mini)
17
+ STANDARD_INPUT_PRICE=0.15 # per 1M tokens
18
+ STANDARD_OUTPUT_PRICE=0.60 # per 1M tokens
19
+
20
+ # Batch API 定价 (50% off)
21
+ BATCH_INPUT_PRICE=0.075 # per 1M tokens
22
+ BATCH_OUTPUT_PRICE=0.30 # per 1M tokens
23
+
24
+ echo "📊 不同预算对比:"
25
+ echo "=========================================="
26
+ printf "%-15s %-15s %-15s %-15s\n" "预算" "标准API" "Batch API" "节省"
27
+ echo "----------------------------------------"
28
+
29
+ for BUDGET in 1 5 10 20 50 100; do
30
+ # 计算标准 API 能生成多少
31
+ STANDARD_COUNT=$(python3 -c "
32
+ import math
33
+ avg_cost_per_sample = ($AVG_INPUT_TOKENS * $STANDARD_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $STANDARD_OUTPUT_PRICE / 1_000_000)
34
+ print(int($BUDGET / avg_cost_per_sample))
35
+ ")
36
+
37
+ # 计算 Batch API 能生成多少
38
+ BATCH_COUNT=$(python3 -c "
39
+ import math
40
+ avg_cost_per_sample = ($AVG_INPUT_TOKENS * $BATCH_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $BATCH_OUTPUT_PRICE / 1_000_000)
41
+ print(int($BUDGET / avg_cost_per_sample))
42
+ ")
43
+
44
+ SAVINGS=$((BATCH_COUNT - STANDARD_COUNT))
45
+
46
+ printf "%-15s %-15s %-15s %-15s\n" "\$$BUDGET" "$STANDARD_COUNT" "$BATCH_COUNT" "+$SAVINGS"
47
+ done
48
+
49
+ echo ""
50
+ echo "🎯 推荐配置 (基于 \$10 预算):"
51
+ echo "=========================================="
52
+
53
+ # 估算 $10 预算下的详细信息
54
+ python3 -c "
55
+ budget = 10.0
56
+ avg_input = $AVG_INPUT_TOKENS
57
+ avg_output = $AVG_OUTPUT_TOKENS
58
+
59
+ # Batch API
60
+ batch_input_price = $BATCH_INPUT_PRICE / 1_000_000
61
+ batch_output_price = $BATCH_OUTPUT_PRICE / 1_000_000
62
+ batch_cost_per_sample = (avg_input * batch_input_price) + (avg_output * batch_output_price)
63
+ batch_samples = int(budget / batch_cost_per_sample)
64
+
65
+ # Standard API
66
+ std_input_price = $STANDARD_INPUT_PRICE / 1_000_000
67
+ std_output_price = $STANDARD_OUTPUT_PRICE / 1_000_000
68
+ std_cost_per_sample = (avg_input * std_input_price) + (avg_output * std_output_price)
69
+ std_samples = int(budget / std_cost_per_sample)
70
+
71
+ print(f'使用 Batch API:')
72
+ print(f' - 可生成样本数: {batch_samples:,}')
73
+ print(f' - 每样本成本: \${batch_cost_per_sample:.6f}')
74
+ print(f' - 总输入tokens: {batch_samples * avg_input:,}')
75
+ print(f' - 总输出tokens: {batch_samples * avg_output:,}')
76
+ print(f'')
77
+ print(f'使用标准 API:')
78
+ print(f' - 可生成样本数: {std_samples:,}')
79
+ print(f' - 每样本成本: \${std_cost_per_sample:.6f}')
80
+ print(f'')
81
+ print(f'💰 节省:')
82
+ print(f' - 多生成样本: {batch_samples - std_samples:,} ({((batch_samples - std_samples) / std_samples * 100):.1f}%)')
83
+ print(f' - 节省金额: \${budget * 0.5:.2f} (50%)')
84
+ "
85
+
86
+ echo ""
87
+ echo "📝 使用建议:"
88
+ echo "=========================================="
89
+ echo "1. 先小规模测试 (100-1000 样本)"
90
+ echo "2. 确认质量后再大规模生成"
91
+ echo "3. 使用 --min-score 90+ 保证高质量"
92
+ echo "4. Batch API 处理时间: 通常几小时内完成"
93
+ echo ""
94
+ echo "💡 运行估算命令:"
95
+ echo " python3 generate_problems_batch.py estimate --num-requests 20000"