FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / test /start_server.sh

Lekr0's picture

Add files using upload-large-folder tool

7c50656 verified 22 days ago

history blame contribute delete

1.35 kB

	#!/bin/bash
	# Step 2: Launch SGLang server with STANDALONE speculative decoding.
	# Usage:
	# bash start_server.sh
	# bash start_server.sh 8 # use tp=8

	set -e

	TP=${1:-2}

	BASE_MODEL=/workspace/models/Qwen3-8B
	MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-sft-32gpu-v2-merged
	INTRANET_IP=10.1.1.72
	PORT=30000

	if [ ! -d "$MERGED" ]; then
	echo "[ERROR] Merged model not found: $MERGED"
	echo " Run: conda activate sglang && python3 merge_lora.py"
	exit 1
	fi

	echo "============================================"
	echo " SGLang STANDALONE Speculative Decoding"
	echo " target : $BASE_MODEL"
	echo " draft : $MERGED"
	echo " host : $INTRANET_IP:$PORT"
	echo " tp : $TP"
	echo "============================================"

	/workspace/miniconda3/envs/sglang/bin/python3 -m sglang.launch_server \
	--model-path $BASE_MODEL \
	--speculative-algorithm STANDALONE \
	--speculative-draft-model-path $MERGED \
	--speculative-num-steps 4 \
	--speculative-eagle-topk 1 \
	--speculative-num-draft-tokens 4 \
	--tp-size $TP \
	--mem-fraction-static 0.30 \
	--trust-remote-code \
	--host $INTRANET_IP \
	--port $PORT \
	--dtype bfloat16