IPF commited on
Commit
2c18099
Β·
verified Β·
1 Parent(s): 65ad880

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -57
app.py CHANGED
@@ -66,62 +66,61 @@ VLLM_PROCESS = None
66
  def check_vllm_service() -> bool:
67
  """Check if vLLM service is running"""
68
  try:
69
- # response = requests.get(f"http://{VLLM_HOST}:{VLLM_PORT}/v1/models", timeout=2)
70
- response = requests.get(f"http://{VLLM_HOST}/v1/models", timeout=2)
71
  return response.status_code == 200
72
  except:
73
  return False
74
 
75
- def start_vllm_service() -> bool:
76
- """Start vLLM service in background"""
77
- global VLLM_PROCESS
78
-
79
- if check_vllm_service():
80
- print(f"🟒 vLLM service already running on port {VLLM_PORT}")
81
- return True
82
-
83
- try:
84
- print(f"πŸš€ Starting vLLM service for {VLLM_MODEL_NAME}...")
85
-
86
- # Start vLLM server in background
87
- VLLM_PROCESS = subprocess.Popen(
88
- [
89
- "vllm", "serve", VLLM_MODEL_NAME,
90
- "--host", VLLM_HOST,
91
- "--tensor-parallel-size", "1",
92
- "--gpu-memory-utilization", "0.95"
93
- ],
94
- text=True
95
- )
96
-
97
- # Wait for service to be ready (max 60 seconds)
98
- for i in range(180):
99
- time.sleep(1)
100
- if check_vllm_service():
101
- print(f"🟒 vLLM service started successfully on port {VLLM_PORT}")
102
- return True
103
-
104
- print("⚠️ vLLM service failed to start within 60 seconds")
105
- return False
106
-
107
- except Exception as e:
108
- print(f"❌ Failed to start vLLM service: {e}")
109
- return False
110
-
111
- def stop_vllm_service():
112
- """Stop vLLM service if running"""
113
- global VLLM_PROCESS
114
- if VLLM_PROCESS:
115
- VLLM_PROCESS.terminate()
116
- VLLM_PROCESS.wait()
117
- print("πŸ›‘ vLLM service stopped")
118
 
119
  def get_vllm_status() -> str:
120
  """Get vLLM service status message"""
121
  if check_vllm_service():
122
- return f"🟒 vLLM service running on port {VLLM_PORT}"
123
  else:
124
- return f"⚠️ vLLM service not running"
125
 
126
  ########### End of vLLM Service Management ###########
127
 
@@ -538,7 +537,8 @@ def solve_problem_gradio(user_query, max_steps=10, max_time=60, llm_model_engine
538
  toolbox_metadata=initializer.toolbox_metadata,
539
  available_tools=initializer.available_tools,
540
  verbose=False,
541
- temperature=0.7
 
542
  )
543
 
544
  # Instantiate Memory
@@ -827,15 +827,15 @@ if __name__ == "__main__":
827
  # NOTE: Use the same name for the query cache directory as the dataset directory
828
  args.root_cache_dir = DATASET_DIR.name
829
 
830
- # Start vLLM service
831
- print("=" * 60)
832
- print("πŸ” Checking vLLM service status...")
833
- if not check_vllm_service():
834
- print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
835
- start_vllm_service()
836
- else:
837
- print(f"βœ… vLLM service is already running on port {VLLM_PORT}")
838
- print("=" * 60)
839
 
840
  # Register cleanup function
841
  # atexit.register(stop_vllm_service)
 
66
  def check_vllm_service() -> bool:
67
  """Check if vLLM service is running"""
68
  try:
69
+ response = requests.get(f"{VLLM_HOST}/v1/models", timeout=2)
 
70
  return response.status_code == 200
71
  except:
72
  return False
73
 
74
+ # def start_vllm_service() -> bool:
75
+ # """Start vLLM service in background"""
76
+ # global VLLM_PROCESS
77
+ #
78
+ # if check_vllm_service():
79
+ # print(f"🟒 vLLM service already running on port {VLLM_PORT}")
80
+ # return True
81
+ #
82
+ # try:
83
+ # print(f"πŸš€ Starting vLLM service for {VLLM_MODEL_NAME}...")
84
+ #
85
+ # # Start vLLM server in background
86
+ # VLLM_PROCESS = subprocess.Popen(
87
+ # [
88
+ # "vllm", "serve", VLLM_MODEL_NAME,
89
+ # "--host", VLLM_HOST,
90
+ # "--tensor-parallel-size", "1",
91
+ # "--gpu-memory-utilization", "0.95"
92
+ # ],
93
+ # text=True
94
+ # )
95
+ #
96
+ # # Wait for service to be ready (max 60 seconds)
97
+ # for i in range(180):
98
+ # time.sleep(1)
99
+ # if check_vllm_service():
100
+ # print(f"🟒 vLLM service started successfully on port {VLLM_PORT}")
101
+ # return True
102
+ #
103
+ # print("⚠️ vLLM service failed to start within 60 seconds")
104
+ # return False
105
+ #
106
+ # except Exception as e:
107
+ # print(f"❌ Failed to start vLLM service: {e}")
108
+ # return False
109
+
110
+ # def stop_vllm_service():
111
+ # """Stop vLLM service if running"""
112
+ # global VLLM_PROCESS
113
+ # if VLLM_PROCESS:
114
+ # VLLM_PROCESS.terminate()
115
+ # VLLM_PROCESS.wait()
116
+ # print("πŸ›‘ vLLM service stopped")
117
 
118
  def get_vllm_status() -> str:
119
  """Get vLLM service status message"""
120
  if check_vllm_service():
121
+ return "🟒 vLLM service deployed on Lambda"
122
  else:
123
+ return "⚠️ vLLM service not running"
124
 
125
  ########### End of vLLM Service Management ###########
126
 
 
537
  toolbox_metadata=initializer.toolbox_metadata,
538
  available_tools=initializer.available_tools,
539
  verbose=False,
540
+ temperature=0.7,
541
+ base_url=f"{VLLM_HOST}/v1"
542
  )
543
 
544
  # Instantiate Memory
 
827
  # NOTE: Use the same name for the query cache directory as the dataset directory
828
  args.root_cache_dir = DATASET_DIR.name
829
 
830
+ # # Start vLLM service
831
+ # print("=" * 60)
832
+ # print("πŸ” Checking vLLM service status...")
833
+ # if not check_vllm_service():
834
+ # print(f"⚠️ vLLM service not running. Starting {VLLM_MODEL_NAME}...")
835
+ # start_vllm_service()
836
+ # else:
837
+ # print(f"βœ… vLLM service is already running on port {VLLM_PORT}")
838
+ # print("=" * 60)
839
 
840
  # Register cleanup function
841
  # atexit.register(stop_vllm_service)