DouDou commited on
Upload data3/estimate_budget.sh with huggingface_hub
Browse files- data3/estimate_budget.sh +95 -0
data3/estimate_budget.sh
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# 快速估算不同预算可以生成多少数据
|
| 3 |
+
|
| 4 |
+
echo "💰 OpenAI API 预算估算工具"
|
| 5 |
+
echo "=========================================="
|
| 6 |
+
echo ""
|
| 7 |
+
echo "基于您的测试结果:"
|
| 8 |
+
echo " - 2个样本成本: \$0.001819"
|
| 9 |
+
echo " - 平均每样本: \$0.0009095"
|
| 10 |
+
echo ""
|
| 11 |
+
|
| 12 |
+
# 从测试结果计算的平均值
|
| 13 |
+
AVG_INPUT_TOKENS=1917
|
| 14 |
+
AVG_OUTPUT_TOKENS=2552
|
| 15 |
+
|
| 16 |
+
# 标准 API 定价 (gpt-4o-mini)
|
| 17 |
+
STANDARD_INPUT_PRICE=0.15 # per 1M tokens
|
| 18 |
+
STANDARD_OUTPUT_PRICE=0.60 # per 1M tokens
|
| 19 |
+
|
| 20 |
+
# Batch API 定价 (50% off)
|
| 21 |
+
BATCH_INPUT_PRICE=0.075 # per 1M tokens
|
| 22 |
+
BATCH_OUTPUT_PRICE=0.30 # per 1M tokens
|
| 23 |
+
|
| 24 |
+
echo "📊 不同预算对比:"
|
| 25 |
+
echo "=========================================="
|
| 26 |
+
printf "%-15s %-15s %-15s %-15s\n" "预算" "标准API" "Batch API" "节省"
|
| 27 |
+
echo "----------------------------------------"
|
| 28 |
+
|
| 29 |
+
for BUDGET in 1 5 10 20 50 100; do
|
| 30 |
+
# 计算标准 API 能生成多少
|
| 31 |
+
STANDARD_COUNT=$(python3 -c "
|
| 32 |
+
import math
|
| 33 |
+
avg_cost_per_sample = ($AVG_INPUT_TOKENS * $STANDARD_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $STANDARD_OUTPUT_PRICE / 1_000_000)
|
| 34 |
+
print(int($BUDGET / avg_cost_per_sample))
|
| 35 |
+
")
|
| 36 |
+
|
| 37 |
+
# 计算 Batch API 能生成多少
|
| 38 |
+
BATCH_COUNT=$(python3 -c "
|
| 39 |
+
import math
|
| 40 |
+
avg_cost_per_sample = ($AVG_INPUT_TOKENS * $BATCH_INPUT_PRICE / 1_000_000) + ($AVG_OUTPUT_TOKENS * $BATCH_OUTPUT_PRICE / 1_000_000)
|
| 41 |
+
print(int($BUDGET / avg_cost_per_sample))
|
| 42 |
+
")
|
| 43 |
+
|
| 44 |
+
SAVINGS=$((BATCH_COUNT - STANDARD_COUNT))
|
| 45 |
+
|
| 46 |
+
printf "%-15s %-15s %-15s %-15s\n" "\$$BUDGET" "$STANDARD_COUNT" "$BATCH_COUNT" "+$SAVINGS"
|
| 47 |
+
done
|
| 48 |
+
|
| 49 |
+
echo ""
|
| 50 |
+
echo "🎯 推荐配置 (基于 \$10 预算):"
|
| 51 |
+
echo "=========================================="
|
| 52 |
+
|
| 53 |
+
# 估算 $10 预算下的详细信息
|
| 54 |
+
python3 -c "
|
| 55 |
+
budget = 10.0
|
| 56 |
+
avg_input = $AVG_INPUT_TOKENS
|
| 57 |
+
avg_output = $AVG_OUTPUT_TOKENS
|
| 58 |
+
|
| 59 |
+
# Batch API
|
| 60 |
+
batch_input_price = $BATCH_INPUT_PRICE / 1_000_000
|
| 61 |
+
batch_output_price = $BATCH_OUTPUT_PRICE / 1_000_000
|
| 62 |
+
batch_cost_per_sample = (avg_input * batch_input_price) + (avg_output * batch_output_price)
|
| 63 |
+
batch_samples = int(budget / batch_cost_per_sample)
|
| 64 |
+
|
| 65 |
+
# Standard API
|
| 66 |
+
std_input_price = $STANDARD_INPUT_PRICE / 1_000_000
|
| 67 |
+
std_output_price = $STANDARD_OUTPUT_PRICE / 1_000_000
|
| 68 |
+
std_cost_per_sample = (avg_input * std_input_price) + (avg_output * std_output_price)
|
| 69 |
+
std_samples = int(budget / std_cost_per_sample)
|
| 70 |
+
|
| 71 |
+
print(f'使用 Batch API:')
|
| 72 |
+
print(f' - 可生成样本数: {batch_samples:,}')
|
| 73 |
+
print(f' - 每样本成本: \${batch_cost_per_sample:.6f}')
|
| 74 |
+
print(f' - 总输入tokens: {batch_samples * avg_input:,}')
|
| 75 |
+
print(f' - 总输出tokens: {batch_samples * avg_output:,}')
|
| 76 |
+
print(f'')
|
| 77 |
+
print(f'使用标准 API:')
|
| 78 |
+
print(f' - 可生成样本数: {std_samples:,}')
|
| 79 |
+
print(f' - 每样本成本: \${std_cost_per_sample:.6f}')
|
| 80 |
+
print(f'')
|
| 81 |
+
print(f'💰 节省:')
|
| 82 |
+
print(f' - 多生成样本: {batch_samples - std_samples:,} ({((batch_samples - std_samples) / std_samples * 100):.1f}%)')
|
| 83 |
+
print(f' - 节省金额: \${budget * 0.5:.2f} (50%)')
|
| 84 |
+
"
|
| 85 |
+
|
| 86 |
+
echo ""
|
| 87 |
+
echo "📝 使用建议:"
|
| 88 |
+
echo "=========================================="
|
| 89 |
+
echo "1. 先小规模测试 (100-1000 样本)"
|
| 90 |
+
echo "2. 确认质量后再大规模生成"
|
| 91 |
+
echo "3. 使用 --min-score 90+ 保证高质量"
|
| 92 |
+
echo "4. Batch API 处理时间: 通常几小时内完成"
|
| 93 |
+
echo ""
|
| 94 |
+
echo "💡 运行估算命令:"
|
| 95 |
+
echo " python3 generate_problems_batch.py estimate --num-requests 20000"
|