| |
| |
| """ |
| Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
| """ |
| from dotenv import load_dotenv |
| load_dotenv() |
| import os |
| import subprocess |
| import sys |
|
|
| MODEL = "google/gemma-3n-E4B-it" |
| PORT = os.environ.get("VLLM_PORT", "8000") |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
| if not HF_TOKEN: |
| print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
| sys.exit(1) |
|
|
| cmd = [ |
| sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
| "--model", MODEL, |
| "--port", PORT, |
| "--host", "0.0.0.0", |
| "--token", HF_TOKEN |
| ] |
|
|
| print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
| subprocess.run(cmd) |
| |
| """ |
| Launch vLLM OpenAI-compatible server for google/gemma-3n-E4B-it in venv. |
| """ |
| from dotenv import load_dotenv |
| load_dotenv() |
| import os |
| import subprocess |
| import sys |
|
|
| MODEL = "google/gemma-3n-E4B-it" |
| PORT = os.environ.get("VLLM_PORT", "8000") |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
| if not HF_TOKEN: |
| print("[ERROR] Please set the HF_TOKEN environment variable for model download.") |
| sys.exit(1) |
|
|
| cmd = [ |
| sys.executable, "-m", "vllm.entrypoints.openai.api_server", |
| "--model", MODEL, |
| "--port", PORT, |
| "--host", "0.0.0.0", |
| "--token", HF_TOKEN |
| ] |
|
|
| print(f"[INFO] Launching vLLM server for {MODEL} on port {PORT}...") |
| subprocess.run(cmd) |
|
|