gustajunq commited on
Commit
7eaaeb1
·
verified ·
1 Parent(s): 464d17b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -106
app.py CHANGED
@@ -1,119 +1,35 @@
 
1
  import subprocess
2
- import time
3
- import requests
4
- import gradio as gr
5
-
6
  from huggingface_hub import hf_hub_download
7
 
8
- # =========================
9
- # CONFIG
10
- # =========================
11
-
12
  MODEL_REPO = "gustajunq/OpenFable-4B-GGUF"
13
  MODEL_FILE = "openfable-4b-q4_k_m.gguf"
14
-
15
  MODEL_ALIAS = "openfable"
16
- LLAMA_PORT = 8000
17
-
18
- # =========================
19
- # DOWNLOAD MODEL
20
- # =========================
21
 
22
- print("Baixando GGUF...")
23
 
24
  model_path = hf_hub_download(
25
  repo_id=MODEL_REPO,
26
  filename=MODEL_FILE,
27
  )
28
 
29
- print("Modelo baixado:", model_path)
30
-
31
- # =========================
32
- # START SERVER
33
- # =========================
34
-
35
- server = subprocess.Popen(
36
- [
37
- "python",
38
- "-m",
39
- "llama_cpp.server",
40
- "--model",
41
- model_path,
42
- "--model_alias",
43
- MODEL_ALIAS,
44
- "--host",
45
- "0.0.0.0",
46
- "--port",
47
- str(LLAMA_PORT),
48
- ]
49
- )
50
-
51
- # =========================
52
- # WAIT FOR READY
53
- # =========================
54
-
55
- def wait_for_server(timeout=300):
56
- start = time.time()
57
-
58
- while time.time() - start < timeout:
59
- try:
60
- r = requests.get(
61
- f"http://127.0.0.1:{LLAMA_PORT}/v1/models",
62
- timeout=5,
63
- )
64
-
65
- if r.status_code == 200:
66
- return True
67
-
68
- except Exception:
69
- pass
70
-
71
- time.sleep(2)
72
-
73
- return False
74
-
75
- ONLINE = wait_for_server()
76
-
77
- # =========================
78
- # UI
79
- # =========================
80
-
81
- def get_status():
82
- try:
83
- r = requests.get(
84
- f"http://127.0.0.1:{LLAMA_PORT}/v1/models",
85
- timeout=10,
86
- )
87
-
88
- return r.json()
89
-
90
- except Exception as e:
91
- return {"error": str(e)}
92
-
93
- with gr.Blocks() as demo:
94
- gr.Markdown("# OpenFable API")
95
-
96
- if ONLINE:
97
- gr.Markdown(
98
- f"""
99
- ✅ API online
100
-
101
- Modelo: `{MODEL_ALIAS}`
102
-
103
- Endpoint:
104
-
105
- `/v1/chat/completions`
106
- """
107
- )
108
- else:
109
- gr.Markdown("❌ Servidor não iniciou.")
110
-
111
- btn = gr.Button("Status")
112
- output = gr.JSON()
113
-
114
- btn.click(
115
- get_status,
116
- outputs=output,
117
- )
118
-
119
- demo.launch(server_name="0.0.0.0")
 
1
+ import os
2
  import subprocess
 
 
 
 
3
  from huggingface_hub import hf_hub_download
4
 
 
 
 
 
5
  MODEL_REPO = "gustajunq/OpenFable-4B-GGUF"
6
  MODEL_FILE = "openfable-4b-q4_k_m.gguf"
 
7
  MODEL_ALIAS = "openfable"
 
 
 
 
 
8
 
9
+ print("Baixando modelo...")
10
 
11
  model_path = hf_hub_download(
12
  repo_id=MODEL_REPO,
13
  filename=MODEL_FILE,
14
  )
15
 
16
+ print("Modelo:", model_path)
17
+
18
+ cmd = [
19
+ "python",
20
+ "-m",
21
+ "llama_cpp.server",
22
+ "--model",
23
+ model_path,
24
+ "--model_alias",
25
+ MODEL_ALIAS,
26
+ "--host",
27
+ "0.0.0.0",
28
+ "--port",
29
+ "7860",
30
+ ]
31
+
32
+ print("Iniciando servidor...")
33
+ print(" ".join(cmd))
34
+
35
+ subprocess.run(cmd)