NOT-OMEGA commited on
Commit
9ca9aea
Β·
verified Β·
1 Parent(s): 1a9b340

Update app_gradio.py

Browse files
Files changed (1) hide show
  1. app_gradio.py +61 -40
app_gradio.py CHANGED
@@ -1,6 +1,11 @@
1
  """
2
  Log Classification System β€” HuggingFace Spaces
3
  Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier
 
 
 
 
 
4
  """
5
  from __future__ import annotations
6
  import io
@@ -21,11 +26,11 @@ SOURCES = [
21
  ]
22
 
23
  def get_tier_icon(tier_name: str) -> str:
24
- if "Regex" in tier_name: return "🟒"
25
- if "BERT" in tier_name: return "πŸ”΅"
26
- if "Cache Hit" in tier_name: return "⚑"
27
- if "fallback" in tier_name: return "🟠"
28
- if "LLM" in tier_name: return "🟑"
29
  return "βšͺ"
30
 
31
  EXAMPLE_LOGS = [
@@ -88,75 +93,91 @@ def classify_single(source: str, log_message: str):
88
  return "β€”", "β€”", "β€”", "β€”"
89
  if not _model_ready:
90
  return "⏳ Loading...", "Warming up", "β€”", "β€”"
91
-
92
  t0 = time.perf_counter()
93
  try:
94
  result = classify_log(source, log_message)
95
  latency = (time.perf_counter() - t0) * 1000
96
  icon = get_tier_icon(result["tier"])
97
  return (
98
- result["label"],
99
- f"{icon} {result['tier']}",
100
- f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
101
- f"{latency:.4f} ms"
102
  )
103
  except Exception as e:
104
  return f"Error: {str(e)}", "Fail", "β€”", "β€”"
105
 
 
106
  def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
107
- if file is None: return None, "⚠️ Please upload a CSV file."
108
-
 
109
  progress(0, desc="πŸš€ Initializing Engine...")
110
  t0 = time.perf_counter()
111
-
112
  try:
113
- # FIX: Generate a unique output path per user to prevent data bleeding
114
  unique_id = uuid.uuid4().hex
115
  safe_output_path = f"/tmp/classified_output_{unique_id}.csv"
116
-
117
  output_path, df = classify_csv(file.name, safe_output_path)
118
  total_time_sec = time.perf_counter() - t0
119
-
120
  progress(0.9, desc="πŸ“Š Calculating Metrics...")
121
-
122
- total = len(df)
123
  label_counts = df["predicted_label"].value_counts().to_dict()
124
- tier_counts = df["tier_used"].value_counts().to_dict()
125
-
126
  tier_lines = []
127
  for tier, count in tier_counts.items():
128
  tier_df = df[df["tier_used"] == tier]
129
- lats = tier_df["latency_ms"].dropna()
130
- icon = get_tier_icon(tier)
131
- pct = count / total
132
-
133
  if "BERT" in tier:
134
- total_ms = lats.sum()
135
- tier_lines.append(f" {icon} {tier}: Batch Latency {total_ms:.1f} ms (Over {count} logs)")
 
 
 
 
 
 
 
 
136
  elif "Regex" in tier:
137
  p50 = np.percentile(lats, 50) if not lats.empty else 0
138
- tier_lines.append(f" {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})")
 
 
139
  else:
140
  p50 = np.percentile(lats, 50) if not lats.empty else 0
141
  p95 = np.percentile(lats, 95) if not lats.empty else 0
142
  p99 = np.percentile(lats, 99) if not lats.empty else 0
143
- tier_lines.append(f" {icon} {tier}: {count} logs ({pct:.0%}) | p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms")
144
-
 
 
 
145
  tier_lines_str = "\n".join(tier_lines)
146
- label_lines = "\n".join([f" β€’ {k}: {v}" for k, v in label_counts.items()])
147
-
148
  stats = (
149
  f"βœ… Classified {total} logs in {total_time_sec:.2f} s\n\n"
150
  f"πŸ“Š Performance by Tier:\n{tier_lines_str}\n\n"
151
  f"🏷️ Label distribution:\n{label_lines}"
152
  )
153
-
154
  progress(1.0, desc="βœ… Success")
155
  return output_path, stats
156
 
157
  except Exception as e:
158
  return None, f"❌ System Error: {str(e)}"
159
 
 
160
  # ── Theme & Layout ──────────────────────────────────────────
161
  THEME = gr.themes.Base(
162
  primary_hue="blue",
@@ -175,14 +196,14 @@ with gr.Blocks(title="Log AI Engine") as demo:
175
  src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
176
  with gr.Column(scale=3):
177
  msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)
178
-
179
  run_btn = gr.Button("β–Ά CLASSIFY LOG", variant="primary")
180
-
181
  with gr.Row():
182
- lbl_out = gr.Textbox(label="PREDICTED LABEL")
183
  tier_out = gr.Textbox(label="TIER USED")
184
  conf_out = gr.Textbox(label="CONFIDENCE")
185
- lat_out = gr.Textbox(label="LATENCY")
186
 
187
  run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
188
  gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])
@@ -190,12 +211,12 @@ with gr.Blocks(title="Log AI Engine") as demo:
190
  with gr.Tab("πŸ“¦ BATCH PROCESSING"):
191
  with gr.Row():
192
  with gr.Column():
193
- csv_in = gr.File(label="UPLOAD CSV", file_types=[".csv"])
194
  batch_btn = gr.Button("β–Ά START BATCH PROCESS", variant="primary")
195
  with gr.Column():
196
- csv_out = gr.File(label="DOWNLOAD CLASSIFIED DATA")
197
  stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")
198
-
199
  batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])
200
 
201
  demo.queue(default_concurrency_limit=2).launch(
@@ -203,4 +224,4 @@ demo.queue(default_concurrency_limit=2).launch(
203
  server_port=7860,
204
  theme=THEME,
205
  css=CUSTOM_CSS
206
- )
 
1
  """
2
  Log Classification System β€” HuggingFace Spaces
3
  Ultra-Modern 3D UI | Optimized for Gradio 6.0 & HF Free Tier
4
+
5
+ Bug fixes vs previous version:
6
+ - BERT latency display: no longer shows cumulative sum (was showing 2,962,635 ms).
7
+ Now shows real per-log wall-clock latency from classify.py fix.
8
+ - Added bert_wall_ms tracking in stats display so batch total is visible clearly.
9
  """
10
  from __future__ import annotations
11
  import io
 
26
  ]
27
 
28
  def get_tier_icon(tier_name: str) -> str:
29
+ if "Regex" in tier_name: return "🟒"
30
+ if "BERT" in tier_name: return "πŸ”΅"
31
+ if "Cache Hit" in tier_name: return "⚑"
32
+ if "fallback" in tier_name: return "🟠"
33
+ if "LLM" in tier_name: return "🟑"
34
  return "βšͺ"
35
 
36
  EXAMPLE_LOGS = [
 
93
  return "β€”", "β€”", "β€”", "β€”"
94
  if not _model_ready:
95
  return "⏳ Loading...", "Warming up", "β€”", "β€”"
96
+
97
  t0 = time.perf_counter()
98
  try:
99
  result = classify_log(source, log_message)
100
  latency = (time.perf_counter() - t0) * 1000
101
  icon = get_tier_icon(result["tier"])
102
  return (
103
+ result["label"],
104
+ f"{icon} {result['tier']}",
105
+ f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
106
+ f"{latency:.4f} ms"
107
  )
108
  except Exception as e:
109
  return f"Error: {str(e)}", "Fail", "β€”", "β€”"
110
 
111
+
112
  def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
113
+ if file is None:
114
+ return None, "⚠️ Please upload a CSV file."
115
+
116
  progress(0, desc="πŸš€ Initializing Engine...")
117
  t0 = time.perf_counter()
118
+
119
  try:
120
+ # Generate a unique output path per user to prevent data bleeding
121
  unique_id = uuid.uuid4().hex
122
  safe_output_path = f"/tmp/classified_output_{unique_id}.csv"
123
+
124
  output_path, df = classify_csv(file.name, safe_output_path)
125
  total_time_sec = time.perf_counter() - t0
126
+
127
  progress(0.9, desc="πŸ“Š Calculating Metrics...")
128
+
129
+ total = len(df)
130
  label_counts = df["predicted_label"].value_counts().to_dict()
131
+ tier_counts = df["tier_used"].value_counts().to_dict()
132
+
133
  tier_lines = []
134
  for tier, count in tier_counts.items():
135
  tier_df = df[df["tier_used"] == tier]
136
+ lats = tier_df["latency_ms"].dropna()
137
+ icon = get_tier_icon(tier)
138
+ pct = count / total
139
+
140
  if "BERT" in tier:
141
+ # BUG FIX: latency_ms now holds true per-log wall-clock time.
142
+ # Show per-log p50 AND reconstructed batch total for clarity.
143
+ p50 = np.percentile(lats, 50) if not lats.empty else 0
144
+ # Each stored value is already per-log wall time (total_wall/n),
145
+ # so multiplying by count reconstructs actual batch wall time.
146
+ batch_ms = p50 * count
147
+ tier_lines.append(
148
+ f" {icon} {tier}: p50={p50:.2f} ms/log | "
149
+ f"Batch total ~{batch_ms/1000:.1f} s (Over {count} logs)"
150
+ )
151
  elif "Regex" in tier:
152
  p50 = np.percentile(lats, 50) if not lats.empty else 0
153
+ tier_lines.append(
154
+ f" {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})"
155
+ )
156
  else:
157
  p50 = np.percentile(lats, 50) if not lats.empty else 0
158
  p95 = np.percentile(lats, 95) if not lats.empty else 0
159
  p99 = np.percentile(lats, 99) if not lats.empty else 0
160
+ tier_lines.append(
161
+ f" {icon} {tier}: {count} logs ({pct:.0%}) | "
162
+ f"p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms"
163
+ )
164
+
165
  tier_lines_str = "\n".join(tier_lines)
166
+ label_lines = "\n".join([f" β€’ {k}: {v}" for k, v in label_counts.items()])
167
+
168
  stats = (
169
  f"βœ… Classified {total} logs in {total_time_sec:.2f} s\n\n"
170
  f"πŸ“Š Performance by Tier:\n{tier_lines_str}\n\n"
171
  f"🏷️ Label distribution:\n{label_lines}"
172
  )
173
+
174
  progress(1.0, desc="βœ… Success")
175
  return output_path, stats
176
 
177
  except Exception as e:
178
  return None, f"❌ System Error: {str(e)}"
179
 
180
+
181
  # ── Theme & Layout ──────────────────────────────────────────
182
  THEME = gr.themes.Base(
183
  primary_hue="blue",
 
196
  src_in = gr.Dropdown(choices=SOURCES, value="ModernCRM", label="SOURCE")
197
  with gr.Column(scale=3):
198
  msg_in = gr.Textbox(label="LOG MESSAGE", placeholder="Paste raw log string...", lines=3)
199
+
200
  run_btn = gr.Button("β–Ά CLASSIFY LOG", variant="primary")
201
+
202
  with gr.Row():
203
+ lbl_out = gr.Textbox(label="PREDICTED LABEL")
204
  tier_out = gr.Textbox(label="TIER USED")
205
  conf_out = gr.Textbox(label="CONFIDENCE")
206
+ lat_out = gr.Textbox(label="LATENCY")
207
 
208
  run_btn.click(classify_single, [src_in, msg_in], [lbl_out, tier_out, conf_out, lat_out])
209
  gr.Examples(examples=EXAMPLE_LOGS, inputs=[src_in, msg_in])
 
211
  with gr.Tab("πŸ“¦ BATCH PROCESSING"):
212
  with gr.Row():
213
  with gr.Column():
214
+ csv_in = gr.File(label="UPLOAD CSV", file_types=[".csv"])
215
  batch_btn = gr.Button("β–Ά START BATCH PROCESS", variant="primary")
216
  with gr.Column():
217
+ csv_out = gr.File(label="DOWNLOAD CLASSIFIED DATA")
218
  stats_out = gr.Textbox(label="PIPELINE ANALYTICS", lines=16, elem_classes="output-stats")
219
+
220
  batch_btn.click(classify_batch, inputs=[csv_in], outputs=[csv_out, stats_out])
221
 
222
  demo.queue(default_concurrency_limit=2).launch(
 
224
  server_port=7860,
225
  theme=THEME,
226
  css=CUSTOM_CSS
227
+ )