Spaces:

AgentFlow
/

agentflow

Running on Zero

App Files Files Community

IPF commited on 18 days ago

Commit

2c18099

verified ·

1 Parent(s): 65ad880

Upload app.py

Browse files

Files changed (1) hide show

app.py +57 -57

app.py CHANGED Viewed

@@ -66,62 +66,61 @@ VLLM_PROCESS = None
 def check_vllm_service() -> bool:
     """Check if vLLM service is running"""
     try:
-        # response = requests.get(f"http://{VLLM_HOST}:{VLLM_PORT}/v1/models", timeout=2)
-        response = requests.get(f"http://{VLLM_HOST}/v1/models", timeout=2)
         return response.status_code == 200
     except:
         return False
-def start_vllm_service() -> bool:
-    """Start vLLM service in background"""
-    global VLLM_PROCESS
-    if check_vllm_service():
-        print(f"🟢 vLLM service already running on port {VLLM_PORT}")
-        return True
-    try:
-        print(f"🚀 Starting vLLM service for {VLLM_MODEL_NAME}...")
-        # Start vLLM server in background
-        VLLM_PROCESS = subprocess.Popen(
-            [
-                "vllm", "serve", VLLM_MODEL_NAME,
-                "--host", VLLM_HOST,
-                "--tensor-parallel-size", "1",
-                "--gpu-memory-utilization", "0.95"
-            ],
-            text=True
-        )
-        # Wait for service to be ready (max 60 seconds)
-        for i in range(180):
-            time.sleep(1)
-            if check_vllm_service():
-                print(f"🟢 vLLM service started successfully on port {VLLM_PORT}")
-                return True
-        print("⚠️ vLLM service failed to start within 60 seconds")
-        return False
-    except Exception as e:
-        print(f"❌ Failed to start vLLM service: {e}")
-        return False
-def stop_vllm_service():
-    """Stop vLLM service if running"""
-    global VLLM_PROCESS
-    if VLLM_PROCESS:
-        VLLM_PROCESS.terminate()
-        VLLM_PROCESS.wait()
-        print("🛑 vLLM service stopped")
 def get_vllm_status() -> str:
     """Get vLLM service status message"""
     if check_vllm_service():
-        return f"🟢 vLLM service running on port {VLLM_PORT}"
     else:
-        return f"⚠️ vLLM service not running"
 ########### End of vLLM Service Management ###########
@@ -538,7 +537,8 @@ def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine
         toolbox_metadata=initializer.toolbox_metadata,
         available_tools=initializer.available_tools,
         verbose=False,
-        temperature=0.7
     )
     # Instantiate Memory
@@ -827,15 +827,15 @@ if __name__ == "__main__":
     # NOTE: Use the same name for the query cache directory as the dataset directory
     args.root_cache_dir = DATASET_DIR.name
-    # Start vLLM service
-    print("=" * 60)
-    print("🔍 Checking vLLM service status...")
-    if not check_vllm_service():
-        print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
-        start_vllm_service()
-    else:
-        print(f"✅ vLLM service is already running on port {VLLM_PORT}")
-    print("=" * 60)
     # Register cleanup function
     # atexit.register(stop_vllm_service)

 def check_vllm_service() -> bool:
     """Check if vLLM service is running"""
     try:
+        response = requests.get(f"{VLLM_HOST}/v1/models", timeout=2)
         return response.status_code == 200
     except:
         return False
+# def start_vllm_service() -> bool:
+#     """Start vLLM service in background"""
+#     global VLLM_PROCESS
+#
+#     if check_vllm_service():
+#         print(f"🟢 vLLM service already running on port {VLLM_PORT}")
+#         return True
+#
+#     try:
+#         print(f"🚀 Starting vLLM service for {VLLM_MODEL_NAME}...")
+#
+#         # Start vLLM server in background
+#         VLLM_PROCESS = subprocess.Popen(
+#             [
+#                 "vllm", "serve", VLLM_MODEL_NAME,
+#                 "--host", VLLM_HOST,
+#                 "--tensor-parallel-size", "1",
+#                 "--gpu-memory-utilization", "0.95"
+#             ],
+#             text=True
+#         )
+#
+#         # Wait for service to be ready (max 60 seconds)
+#         for i in range(180):
+#             time.sleep(1)
+#             if check_vllm_service():
+#                 print(f"🟢 vLLM service started successfully on port {VLLM_PORT}")
+#                 return True
+#
+#         print("⚠️ vLLM service failed to start within 60 seconds")
+#         return False
+#
+#     except Exception as e:
+#         print(f"❌ Failed to start vLLM service: {e}")
+#         return False
+# def stop_vllm_service():
+#     """Stop vLLM service if running"""
+#     global VLLM_PROCESS
+#     if VLLM_PROCESS:
+#         VLLM_PROCESS.terminate()
+#         VLLM_PROCESS.wait()
+#         print("🛑 vLLM service stopped")
 def get_vllm_status() -> str:
     """Get vLLM service status message"""
     if check_vllm_service():
+        return "🟢 vLLM service deployed on Lambda"
     else:
+        return "⚠️ vLLM service not running"
 ########### End of vLLM Service Management ###########
         toolbox_metadata=initializer.toolbox_metadata,
         available_tools=initializer.available_tools,
         verbose=False,
+        temperature=0.7,
+        base_url=f"{VLLM_HOST}/v1"
     )
     # Instantiate Memory
     # NOTE: Use the same name for the query cache directory as the dataset directory
     args.root_cache_dir = DATASET_DIR.name
+    # # Start vLLM service
+    # print("=" * 60)
+    # print("🔍 Checking vLLM service status...")
+    # if not check_vllm_service():
+    #     print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
+    #     start_vllm_service()
+    # else:
+    #     print(f"✅ vLLM service is already running on port {VLLM_PORT}")
+    # print("=" * 60)
     # Register cleanup function
     # atexit.register(stop_vllm_service)