Spaces:
Running on Zero
Running on Zero
git lfs install
#1
by sushiwill - opened
- README.md +0 -2
- app.py +58 -63
- requirements.txt +1 -2
README.md
CHANGED
|
@@ -7,8 +7,6 @@ sdk: gradio
|
|
| 7 |
sdk_version: 5.49.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
hf_oauth: true
|
| 11 |
-
hf_oauth_expiration_minutes: 480
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 7 |
sdk_version: 5.49.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import time
|
|
| 6 |
import uuid
|
| 7 |
import subprocess
|
| 8 |
import requests
|
| 9 |
-
|
| 10 |
from typing import List, Dict, Any, Iterator
|
| 11 |
|
| 12 |
from dotenv import load_dotenv
|
|
@@ -26,8 +25,6 @@ from agentflow.models.utils import make_json_serializable_truncated
|
|
| 26 |
from pathlib import Path
|
| 27 |
from huggingface_hub import CommitScheduler
|
| 28 |
|
| 29 |
-
import spaces
|
| 30 |
-
|
| 31 |
# Get Huggingface token from environment variable
|
| 32 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 33 |
|
|
@@ -59,68 +56,67 @@ scheduler = CommitScheduler(
|
|
| 59 |
|
| 60 |
########### vLLM Service Management ###########
|
| 61 |
VLLM_MODEL_NAME = "AgentFlow/agentflow-planner-7b"
|
| 62 |
-
VLLM_PORT =
|
| 63 |
-
VLLM_HOST =
|
| 64 |
VLLM_PROCESS = None
|
| 65 |
|
| 66 |
def check_vllm_service() -> bool:
|
| 67 |
"""Check if vLLM service is running"""
|
| 68 |
try:
|
| 69 |
-
response = requests.get(f"{VLLM_HOST}/v1/models", timeout=2)
|
| 70 |
return response.status_code == 200
|
| 71 |
except:
|
| 72 |
return False
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
#
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
#
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
# print("π vLLM service stopped")
|
| 117 |
|
| 118 |
def get_vllm_status() -> str:
|
| 119 |
"""Get vLLM service status message"""
|
| 120 |
if check_vllm_service():
|
| 121 |
-
return "
|
| 122 |
else:
|
| 123 |
-
return "β οΈ vLLM service not running"
|
| 124 |
|
| 125 |
########### End of vLLM Service Management ###########
|
| 126 |
|
|
@@ -486,7 +482,7 @@ def parse_arguments():
|
|
| 486 |
parser.add_argument("--openai_api_source", default="we_provided", choices=["we_provided", "user_provided"], help="Source of OpenAI API key.")
|
| 487 |
return parser.parse_args()
|
| 488 |
|
| 489 |
-
|
| 490 |
def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine=None, enabled_tools=None):
|
| 491 |
"""
|
| 492 |
Wrapper function to connect the solver to Gradio.
|
|
@@ -537,8 +533,7 @@ def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine
|
|
| 537 |
toolbox_metadata=initializer.toolbox_metadata,
|
| 538 |
available_tools=initializer.available_tools,
|
| 539 |
verbose=False,
|
| 540 |
-
temperature=0.7
|
| 541 |
-
base_url=f"{VLLM_HOST}/v1"
|
| 542 |
)
|
| 543 |
|
| 544 |
# Instantiate Memory
|
|
@@ -827,15 +822,15 @@ if __name__ == "__main__":
|
|
| 827 |
# NOTE: Use the same name for the query cache directory as the dataset directory
|
| 828 |
args.root_cache_dir = DATASET_DIR.name
|
| 829 |
|
| 830 |
-
#
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
|
| 840 |
# Register cleanup function
|
| 841 |
# atexit.register(stop_vllm_service)
|
|
|
|
| 6 |
import uuid
|
| 7 |
import subprocess
|
| 8 |
import requests
|
|
|
|
| 9 |
from typing import List, Dict, Any, Iterator
|
| 10 |
|
| 11 |
from dotenv import load_dotenv
|
|
|
|
| 25 |
from pathlib import Path
|
| 26 |
from huggingface_hub import CommitScheduler
|
| 27 |
|
|
|
|
|
|
|
| 28 |
# Get Huggingface token from environment variable
|
| 29 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 30 |
|
|
|
|
| 56 |
|
| 57 |
########### vLLM Service Management ###########
|
| 58 |
VLLM_MODEL_NAME = "AgentFlow/agentflow-planner-7b"
|
| 59 |
+
VLLM_PORT = 8000
|
| 60 |
+
VLLM_HOST = "localhost"
|
| 61 |
VLLM_PROCESS = None
|
| 62 |
|
| 63 |
def check_vllm_service() -> bool:
|
| 64 |
"""Check if vLLM service is running"""
|
| 65 |
try:
|
| 66 |
+
response = requests.get(f"http://{VLLM_HOST}:{VLLM_PORT}/v1/models", timeout=2)
|
| 67 |
return response.status_code == 200
|
| 68 |
except:
|
| 69 |
return False
|
| 70 |
|
| 71 |
+
def start_vllm_service() -> bool:
|
| 72 |
+
"""Start vLLM service in background"""
|
| 73 |
+
global VLLM_PROCESS
|
| 74 |
+
|
| 75 |
+
if check_vllm_service():
|
| 76 |
+
print(f"π’ vLLM service already running on port {VLLM_PORT}")
|
| 77 |
+
return True
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
print(f"π Starting vLLM service for {VLLM_MODEL_NAME}...")
|
| 81 |
+
|
| 82 |
+
# Start vLLM server in background
|
| 83 |
+
VLLM_PROCESS = subprocess.Popen(
|
| 84 |
+
[
|
| 85 |
+
"vllm", "serve", VLLM_MODEL_NAME,
|
| 86 |
+
"--port", str(VLLM_PORT),
|
| 87 |
+
"--host", VLLM_HOST
|
| 88 |
+
],
|
| 89 |
+
text=True
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Wait for service to be ready (max 60 seconds)
|
| 93 |
+
for i in range(180):
|
| 94 |
+
time.sleep(1)
|
| 95 |
+
if check_vllm_service():
|
| 96 |
+
print(f"π’ vLLM service started successfully on port {VLLM_PORT}")
|
| 97 |
+
return True
|
| 98 |
+
|
| 99 |
+
print("β οΈ vLLM service failed to start within 60 seconds")
|
| 100 |
+
return False
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"β Failed to start vLLM service: {e}")
|
| 104 |
+
return False
|
| 105 |
+
|
| 106 |
+
def stop_vllm_service():
|
| 107 |
+
"""Stop vLLM service if running"""
|
| 108 |
+
global VLLM_PROCESS
|
| 109 |
+
if VLLM_PROCESS:
|
| 110 |
+
VLLM_PROCESS.terminate()
|
| 111 |
+
VLLM_PROCESS.wait()
|
| 112 |
+
print("π vLLM service stopped")
|
|
|
|
| 113 |
|
| 114 |
def get_vllm_status() -> str:
|
| 115 |
"""Get vLLM service status message"""
|
| 116 |
if check_vllm_service():
|
| 117 |
+
return f"π’ vLLM service running on port {VLLM_PORT}"
|
| 118 |
else:
|
| 119 |
+
return f"β οΈ vLLM service not running"
|
| 120 |
|
| 121 |
########### End of vLLM Service Management ###########
|
| 122 |
|
|
|
|
| 482 |
parser.add_argument("--openai_api_source", default="we_provided", choices=["we_provided", "user_provided"], help="Source of OpenAI API key.")
|
| 483 |
return parser.parse_args()
|
| 484 |
|
| 485 |
+
|
| 486 |
def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine=None, enabled_tools=None):
|
| 487 |
"""
|
| 488 |
Wrapper function to connect the solver to Gradio.
|
|
|
|
| 533 |
toolbox_metadata=initializer.toolbox_metadata,
|
| 534 |
available_tools=initializer.available_tools,
|
| 535 |
verbose=False,
|
| 536 |
+
temperature=0.7
|
|
|
|
| 537 |
)
|
| 538 |
|
| 539 |
# Instantiate Memory
|
|
|
|
| 822 |
# NOTE: Use the same name for the query cache directory as the dataset directory
|
| 823 |
args.root_cache_dir = DATASET_DIR.name
|
| 824 |
|
| 825 |
+
# Start vLLM service
|
| 826 |
+
print("=" * 60)
|
| 827 |
+
print("π Checking vLLM service status...")
|
| 828 |
+
if not check_vllm_service():
|
| 829 |
+
print(f"β οΈ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
|
| 830 |
+
start_vllm_service()
|
| 831 |
+
else:
|
| 832 |
+
print(f"β
vLLM service is already running on port {VLLM_PORT}")
|
| 833 |
+
print("=" * 60)
|
| 834 |
|
| 835 |
# Register cleanup function
|
| 836 |
# atexit.register(stop_vllm_service)
|
requirements.txt
CHANGED
|
@@ -19,5 +19,4 @@ dashscope==1.24.2
|
|
| 19 |
gradio
|
| 20 |
# litellm==2.1.1
|
| 21 |
# ollama==0.5.1
|
| 22 |
-
# e2b_code_interpreter==2.0.0
|
| 23 |
-
spaces
|
|
|
|
| 19 |
gradio
|
| 20 |
# litellm==2.1.1
|
| 21 |
# ollama==0.5.1
|
| 22 |
+
# e2b_code_interpreter==2.0.0
|
|
|