Hanrui / test /start_server.sh
Lekr0's picture
Add files using upload-large-folder tool
7c50656 verified
#!/bin/bash
# Step 2: Launch SGLang server with STANDALONE speculative decoding.
# Usage:
# bash start_server.sh
# bash start_server.sh 8 # use tp=8
set -e
TP=${1:-2}
BASE_MODEL=/workspace/models/Qwen3-8B
MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-sft-32gpu-v2-merged
INTRANET_IP=10.1.1.72
PORT=30000
if [ ! -d "$MERGED" ]; then
echo "[ERROR] Merged model not found: $MERGED"
echo " Run: conda activate sglang && python3 merge_lora.py"
exit 1
fi
echo "============================================"
echo " SGLang STANDALONE Speculative Decoding"
echo " target : $BASE_MODEL"
echo " draft : $MERGED"
echo " host : $INTRANET_IP:$PORT"
echo " tp : $TP"
echo "============================================"
/workspace/miniconda3/envs/sglang/bin/python3 -m sglang.launch_server \
--model-path $BASE_MODEL \
--speculative-algorithm STANDALONE \
--speculative-draft-model-path $MERGED \
--speculative-num-steps 4 \
--speculative-eagle-topk 1 \
--speculative-num-draft-tokens 4 \
--tp-size $TP \
--mem-fraction-static 0.30 \
--trust-remote-code \
--host $INTRANET_IP \
--port $PORT \
--dtype bfloat16