DouDou commited on
Upload data3/run.sh with huggingface_hub
Browse files- data3/run.sh +12 -0
data3/run.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUDA_VISIBLE_DEVICES=0,1
|
| 2 |
+
|
| 3 |
+
# vllm serve Qwen/Qwen3-0.6B-GPTQ-Int8 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# vllm serve Qwen/Qwen3-30B-A3B-GPTQ-Int4 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3
|
| 7 |
+
|
| 8 |
+
# vllm serve Qwen/Qwen3-14B-MLX-8bit --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --served-model-name Qwen3
|
| 9 |
+
|
| 10 |
+
# vllm serve Qwen/Qwen3-1.7B-GPTQ-Int8 --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization gptq --served-model-name Qwen3
|
| 11 |
+
|
| 12 |
+
vllm serve Qwen/Qwen3-32B-AWQ --tensor-parallel-size 2 --dtype auto --no-enable-chunked-prefill --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --quantization awq --served-model-name Qwen3
|