Hanrui / test /start_server_dflash.sh
Lekr0's picture
Add files using upload-large-folder tool
7c50656 verified
#!/bin/bash
# Launch SGLang server with DFLASH speculative decoding.
# Usage:
# bash start_server_dflash.sh
# bash start_server_dflash.sh 4 # use tp=4
set -e
TP=${1:-2}
BASE_MODEL=/workspace/models/Qwen3-8B
DRAFT_MODEL=/workspace/models/Qwen3-8B-DFlash-b16
INTRANET_IP=10.1.1.22
PORT=30000
echo "============================================"
echo " SGLang DFLASH Speculative Decoding"
echo " target : $BASE_MODEL"
echo " draft : $DRAFT_MODEL"
echo " host : $INTRANET_IP:$PORT"
echo " tp : $TP"
echo "============================================"
export SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
/workspace/miniconda3/envs/dflash/bin/python -m sglang.launch_server \
--model-path $BASE_MODEL \
--speculative-algorithm DFLASH \
--speculative-draft-model-path $DRAFT_MODEL \
--tp-size $TP \
--dtype bfloat16 \
--attention-backend fa3 \
--mem-fraction-static 0.30 \
--trust-remote-code \
--host $INTRANET_IP \
--port $PORT