FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / test /start_server_dflash.sh

Lekr0's picture

Add files using upload-large-folder tool

7c50656 verified 22 days ago

history blame contribute delete

1.12 kB

	#!/bin/bash
	# Launch SGLang server with DFLASH speculative decoding.
	# Usage:
	# bash start_server_dflash.sh
	# bash start_server_dflash.sh 4 # use tp=4

	set -e

	TP=${1:-2}

	BASE_MODEL=/workspace/models/Qwen3-8B
	DRAFT_MODEL=/workspace/models/Qwen3-8B-DFlash-b16
	INTRANET_IP=10.1.1.22
	PORT=30000

	echo "============================================"
	echo " SGLang DFLASH Speculative Decoding"
	echo " target : $BASE_MODEL"
	echo " draft : $DRAFT_MODEL"
	echo " host : $INTRANET_IP:$PORT"
	echo " tp : $TP"
	echo "============================================"

	export SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1

	/workspace/miniconda3/envs/dflash/bin/python -m sglang.launch_server \
	--model-path $BASE_MODEL \
	--speculative-algorithm DFLASH \
	--speculative-draft-model-path $DRAFT_MODEL \
	--tp-size $TP \
	--dtype bfloat16 \
	--attention-backend fa3 \
	--mem-fraction-static 0.30 \
	--trust-remote-code \
	--host $INTRANET_IP \
	--port $PORT