Spaces:

AgentFlow
/

agentflow

Running on Zero

App Files Files Community

git lfs install

by sushiwill - opened Oct 14, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+59

-67

Files changed (3) hide show

README.md +0 -2
app.py +58 -63
requirements.txt +1 -2

README.md CHANGED Viewed

@@ -7,8 +7,6 @@ sdk: gradio
 sdk_version: 5.49.0
 app_file: app.py
 pinned: false
-hf_oauth: true
-hf_oauth_expiration_minutes: 480
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 5.49.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import time
 import uuid
 import subprocess
 import requests
 from typing import List, Dict, Any, Iterator
 from dotenv import load_dotenv
@@ -26,8 +25,6 @@ from agentflow.models.utils import make_json_serializable_truncated
 from pathlib import Path
 from huggingface_hub import CommitScheduler
-import spaces
 # Get Huggingface token from environment variable
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
@@ -59,68 +56,67 @@ scheduler = CommitScheduler(
 ########### vLLM Service Management ###########
 VLLM_MODEL_NAME = "AgentFlow/agentflow-planner-7b"
-VLLM_PORT = "Lambda"
-VLLM_HOST = os.getenv("VLLM_HOST", "localhost")
 VLLM_PROCESS = None
 def check_vllm_service() -> bool:
     """Check if vLLM service is running"""
     try:
-        response = requests.get(f"{VLLM_HOST}/v1/models", timeout=2)
         return response.status_code == 200
     except:
         return False
-# def start_vllm_service() -> bool:
-#     """Start vLLM service in background"""
-#     global VLLM_PROCESS
-#
-#     if check_vllm_service():
-#         print(f"🟢 vLLM service already running on port {VLLM_PORT}")
-#         return True
-#
-#     try:
-#         print(f"🚀 Starting vLLM service for {VLLM_MODEL_NAME}...")
-#
-#         # Start vLLM server in background
-#         VLLM_PROCESS = subprocess.Popen(
-#             [
-#                 "vllm", "serve", VLLM_MODEL_NAME,
-#                 "--host", VLLM_HOST,
-#                 "--tensor-parallel-size", "1",
-#                 "--gpu-memory-utilization", "0.95"
-#             ],
-#             text=True
-#         )
-#
-#         # Wait for service to be ready (max 60 seconds)
-#         for i in range(180):
-#             time.sleep(1)
-#             if check_vllm_service():
-#                 print(f"🟢 vLLM service started successfully on port {VLLM_PORT}")
-#                 return True
-#
-#         print("⚠️ vLLM service failed to start within 60 seconds")
-#         return False
-#
-#     except Exception as e:
-#         print(f"❌ Failed to start vLLM service: {e}")
-#         return False
-# def stop_vllm_service():
-#     """Stop vLLM service if running"""
-#     global VLLM_PROCESS
-#     if VLLM_PROCESS:
-#         VLLM_PROCESS.terminate()
-#         VLLM_PROCESS.wait()
-#         print("🛑 vLLM service stopped")
 def get_vllm_status() -> str:
     """Get vLLM service status message"""
     if check_vllm_service():
-        return "✅ vLLM service deployed"
     else:
-        return "⚠️ vLLM service not running"
 ########### End of vLLM Service Management ###########
@@ -486,7 +482,7 @@ def parse_arguments():
     parser.add_argument("--openai_api_source", default="we_provided", choices=["we_provided", "user_provided"], help="Source of OpenAI API key.")
     return parser.parse_args()
-@spaces.GPU(duration=300)
 def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine=None, enabled_tools=None):
     """
     Wrapper function to connect the solver to Gradio.
@@ -537,8 +533,7 @@ def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine
         toolbox_metadata=initializer.toolbox_metadata,
         available_tools=initializer.available_tools,
         verbose=False,
-        temperature=0.7,
-        base_url=f"{VLLM_HOST}/v1"
     )
     # Instantiate Memory
@@ -827,15 +822,15 @@ if __name__ == "__main__":
     # NOTE: Use the same name for the query cache directory as the dataset directory
     args.root_cache_dir = DATASET_DIR.name
-    # # Start vLLM service
-    # print("=" * 60)
-    # print("🔍 Checking vLLM service status...")
-    # if not check_vllm_service():
-    #     print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
-    #     start_vllm_service()
-    # else:
-    #     print(f"✅ vLLM service is already running on port {VLLM_PORT}")
-    # print("=" * 60)
     # Register cleanup function
     # atexit.register(stop_vllm_service)

 import uuid
 import subprocess
 import requests
 from typing import List, Dict, Any, Iterator
 from dotenv import load_dotenv
 from pathlib import Path
 from huggingface_hub import CommitScheduler
 # Get Huggingface token from environment variable
 HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 ########### vLLM Service Management ###########
 VLLM_MODEL_NAME = "AgentFlow/agentflow-planner-7b"
+VLLM_PORT = 8000
+VLLM_HOST = "localhost"
 VLLM_PROCESS = None
 def check_vllm_service() -> bool:
     """Check if vLLM service is running"""
     try:
+        response = requests.get(f"http://{VLLM_HOST}:{VLLM_PORT}/v1/models", timeout=2)
         return response.status_code == 200
     except:
         return False
+def start_vllm_service() -> bool:
+    """Start vLLM service in background"""
+    global VLLM_PROCESS
+    if check_vllm_service():
+        print(f"🟢 vLLM service already running on port {VLLM_PORT}")
+        return True
+    try:
+        print(f"🚀 Starting vLLM service for {VLLM_MODEL_NAME}...")
+        # Start vLLM server in background
+        VLLM_PROCESS = subprocess.Popen(
+            [
+                "vllm", "serve", VLLM_MODEL_NAME,
+                "--port", str(VLLM_PORT),
+                "--host", VLLM_HOST
+            ],
+            text=True
+        )
+        # Wait for service to be ready (max 60 seconds)
+        for i in range(180):
+            time.sleep(1)
+            if check_vllm_service():
+                print(f"🟢 vLLM service started successfully on port {VLLM_PORT}")
+                return True
+        print("⚠️ vLLM service failed to start within 60 seconds")
+        return False
+    except Exception as e:
+        print(f"❌ Failed to start vLLM service: {e}")
+        return False
+def stop_vllm_service():
+    """Stop vLLM service if running"""
+    global VLLM_PROCESS
+    if VLLM_PROCESS:
+        VLLM_PROCESS.terminate()
+        VLLM_PROCESS.wait()
+        print("🛑 vLLM service stopped")
 def get_vllm_status() -> str:
     """Get vLLM service status message"""
     if check_vllm_service():
+        return f"🟢 vLLM service running on port {VLLM_PORT}"
     else:
+        return f"⚠️ vLLM service not running"
 ########### End of vLLM Service Management ###########
     parser.add_argument("--openai_api_source", default="we_provided", choices=["we_provided", "user_provided"], help="Source of OpenAI API key.")
     return parser.parse_args()
 def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine=None, enabled_tools=None):
     """
     Wrapper function to connect the solver to Gradio.
         toolbox_metadata=initializer.toolbox_metadata,
         available_tools=initializer.available_tools,
         verbose=False,
+        temperature=0.7
     )
     # Instantiate Memory
     # NOTE: Use the same name for the query cache directory as the dataset directory
     args.root_cache_dir = DATASET_DIR.name
+    # Start vLLM service
+    print("=" * 60)
+    print("🔍 Checking vLLM service status...")
+    if not check_vllm_service():
+        print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
+        start_vllm_service()
+    else:
+        print(f"✅ vLLM service is already running on port {VLLM_PORT}")
+    print("=" * 60)
     # Register cleanup function
     # atexit.register(stop_vllm_service)

requirements.txt CHANGED Viewed

@@ -19,5 +19,4 @@ dashscope==1.24.2
 gradio
 # litellm==2.1.1
 # ollama==0.5.1
-# e2b_code_interpreter==2.0.0
-spaces

 gradio
 # litellm==2.1.1
 # ollama==0.5.1
+# e2b_code_interpreter==2.0.0