SciCode
/

dataset-builder

Model card Files Files and versions

dataset-builder / data3 /run.sh

DouDou

Upload data3/run.sh with huggingface_hub

a8b22df verified 21 days ago

history blame contribute delete

1.22 kB

	CUDA_VISIBLE_DEVICES=0,1

	# vllm serve Qwen/Qwen3-0.6B-GPTQ-Int8 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3


	# vllm serve Qwen/Qwen3-30B-A3B-GPTQ-Int4 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3

	# vllm serve Qwen/Qwen3-14B-MLX-8bit --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --served-model-name Qwen3

	# vllm serve Qwen/Qwen3-1.7B-GPTQ-Int8 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3

	vllm serve Qwen/Qwen3-32B-AWQ --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization awq --served-model-name Qwen3