Spaces:
Running on Zero
Running on Zero
Upload app.py
Browse files
app.py
CHANGED
|
@@ -66,62 +66,61 @@ VLLM_PROCESS = None
|
|
| 66 |
def check_vllm_service() -> bool:
|
| 67 |
"""Check if vLLM service is running"""
|
| 68 |
try:
|
| 69 |
-
|
| 70 |
-
response = requests.get(f"http://{VLLM_HOST}/v1/models", timeout=2)
|
| 71 |
return response.status_code == 200
|
| 72 |
except:
|
| 73 |
return False
|
| 74 |
|
| 75 |
-
def start_vllm_service() -> bool:
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
def stop_vllm_service():
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
|
| 119 |
def get_vllm_status() -> str:
|
| 120 |
"""Get vLLM service status message"""
|
| 121 |
if check_vllm_service():
|
| 122 |
-
return
|
| 123 |
else:
|
| 124 |
-
return
|
| 125 |
|
| 126 |
########### End of vLLM Service Management ###########
|
| 127 |
|
|
@@ -538,7 +537,8 @@ def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine
|
|
| 538 |
toolbox_metadata=initializer.toolbox_metadata,
|
| 539 |
available_tools=initializer.available_tools,
|
| 540 |
verbose=False,
|
| 541 |
-
temperature=0.7
|
|
|
|
| 542 |
)
|
| 543 |
|
| 544 |
# Instantiate Memory
|
|
@@ -827,15 +827,15 @@ if __name__ == "__main__":
|
|
| 827 |
# NOTE: Use the same name for the query cache directory as the dataset directory
|
| 828 |
args.root_cache_dir = DATASET_DIR.name
|
| 829 |
|
| 830 |
-
# Start vLLM service
|
| 831 |
-
print("=" * 60)
|
| 832 |
-
print("π Checking vLLM service status...")
|
| 833 |
-
if not check_vllm_service():
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
else:
|
| 837 |
-
|
| 838 |
-
print("=" * 60)
|
| 839 |
|
| 840 |
# Register cleanup function
|
| 841 |
# atexit.register(stop_vllm_service)
|
|
|
|
| 66 |
def check_vllm_service() -> bool:
|
| 67 |
"""Check if vLLM service is running"""
|
| 68 |
try:
|
| 69 |
+
response = requests.get(f"{VLLM_HOST}/v1/models", timeout=2)
|
|
|
|
| 70 |
return response.status_code == 200
|
| 71 |
except:
|
| 72 |
return False
|
| 73 |
|
| 74 |
+
# def start_vllm_service() -> bool:
|
| 75 |
+
# """Start vLLM service in background"""
|
| 76 |
+
# global VLLM_PROCESS
|
| 77 |
+
#
|
| 78 |
+
# if check_vllm_service():
|
| 79 |
+
# print(f"π’ vLLM service already running on port {VLLM_PORT}")
|
| 80 |
+
# return True
|
| 81 |
+
#
|
| 82 |
+
# try:
|
| 83 |
+
# print(f"π Starting vLLM service for {VLLM_MODEL_NAME}...")
|
| 84 |
+
#
|
| 85 |
+
# # Start vLLM server in background
|
| 86 |
+
# VLLM_PROCESS = subprocess.Popen(
|
| 87 |
+
# [
|
| 88 |
+
# "vllm", "serve", VLLM_MODEL_NAME,
|
| 89 |
+
# "--host", VLLM_HOST,
|
| 90 |
+
# "--tensor-parallel-size", "1",
|
| 91 |
+
# "--gpu-memory-utilization", "0.95"
|
| 92 |
+
# ],
|
| 93 |
+
# text=True
|
| 94 |
+
# )
|
| 95 |
+
#
|
| 96 |
+
# # Wait for service to be ready (max 60 seconds)
|
| 97 |
+
# for i in range(180):
|
| 98 |
+
# time.sleep(1)
|
| 99 |
+
# if check_vllm_service():
|
| 100 |
+
# print(f"π’ vLLM service started successfully on port {VLLM_PORT}")
|
| 101 |
+
# return True
|
| 102 |
+
#
|
| 103 |
+
# print("β οΈ vLLM service failed to start within 60 seconds")
|
| 104 |
+
# return False
|
| 105 |
+
#
|
| 106 |
+
# except Exception as e:
|
| 107 |
+
# print(f"β Failed to start vLLM service: {e}")
|
| 108 |
+
# return False
|
| 109 |
+
|
| 110 |
+
# def stop_vllm_service():
|
| 111 |
+
# """Stop vLLM service if running"""
|
| 112 |
+
# global VLLM_PROCESS
|
| 113 |
+
# if VLLM_PROCESS:
|
| 114 |
+
# VLLM_PROCESS.terminate()
|
| 115 |
+
# VLLM_PROCESS.wait()
|
| 116 |
+
# print("π vLLM service stopped")
|
| 117 |
|
| 118 |
def get_vllm_status() -> str:
|
| 119 |
"""Get vLLM service status message"""
|
| 120 |
if check_vllm_service():
|
| 121 |
+
return "π’ vLLM service deployed on Lambda"
|
| 122 |
else:
|
| 123 |
+
return "β οΈ vLLM service not running"
|
| 124 |
|
| 125 |
########### End of vLLM Service Management ###########
|
| 126 |
|
|
|
|
| 537 |
toolbox_metadata=initializer.toolbox_metadata,
|
| 538 |
available_tools=initializer.available_tools,
|
| 539 |
verbose=False,
|
| 540 |
+
temperature=0.7,
|
| 541 |
+
base_url=f"{VLLM_HOST}/v1"
|
| 542 |
)
|
| 543 |
|
| 544 |
# Instantiate Memory
|
|
|
|
| 827 |
# NOTE: Use the same name for the query cache directory as the dataset directory
|
| 828 |
args.root_cache_dir = DATASET_DIR.name
|
| 829 |
|
| 830 |
+
# # Start vLLM service
|
| 831 |
+
# print("=" * 60)
|
| 832 |
+
# print("π Checking vLLM service status...")
|
| 833 |
+
# if not check_vllm_service():
|
| 834 |
+
# print(f"β οΈ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
|
| 835 |
+
# start_vllm_service()
|
| 836 |
+
# else:
|
| 837 |
+
# print(f"β
vLLM service is already running on port {VLLM_PORT}")
|
| 838 |
+
# print("=" * 60)
|
| 839 |
|
| 840 |
# Register cleanup function
|
| 841 |
# atexit.register(stop_vllm_service)
|