NOT-OMEGA commited on
Commit
5ef292a
Β·
verified Β·
1 Parent(s): b90444b

Update processor_llm.py

Browse files
Files changed (1) hide show
  1. processor_llm.py +8 -41
processor_llm.py CHANGED
@@ -1,17 +1,8 @@
1
  """
2
  processor_llm.py β€” Tier 3: LLM-based Classifier
3
- Used for:
4
- - LegacyCRM logs (Workflow Error, Deprecation Warning)
5
- - BERT fallback when confidence < threshold
6
- Production hardening in V3:
7
- - Timeout (configurable, default 5s)
8
- - Retry with exponential backoff (max 2 retries)
9
- - Explicit failure modes: returns "Unclassified" on all error paths
10
- - Token budget enforcement (max_tokens=15)
11
  """
12
  from __future__ import annotations
13
  import os
14
- import re
15
  import time
16
  import logging
17
 
@@ -27,8 +18,8 @@ VALID_CATEGORIES = ["Workflow Error", "Deprecation Warning"]
27
 
28
  # Retry / timeout config
29
  MAX_RETRIES = 2
30
- RETRY_DELAY_SEC = 1.0 # doubles on each retry (exponential backoff)
31
- REQUEST_TIMEOUT = 5 # seconds β€” fail fast, do not hang pipeline
32
 
33
  SYSTEM_PROMPT = (
34
  "You are an enterprise log classifier. "
@@ -67,17 +58,14 @@ def _build_messages(log_msg: str) -> list[dict]:
67
  {"role": "user", "content": user_content},
68
  ]
69
 
70
-
71
  # ── Normalize raw LLM output ─────────────────────────────────────────────────
72
  def _normalize(raw: str) -> str:
73
- """Map raw LLM output to a valid category or 'Unclassified'."""
74
  raw = raw.strip().strip('"').strip("'")
75
  for cat in VALID_CATEGORIES:
76
  if cat.lower() in raw.lower():
77
  return cat
78
  return "Unclassified"
79
 
80
-
81
  # ── Main classify function ────────────────────────────────────────────────────
82
  def classify_with_llm(log_msg: str) -> str:
83
  """
@@ -86,7 +74,6 @@ def classify_with_llm(log_msg: str) -> str:
86
  - Retry with exponential backoff (MAX_RETRIES attempts)
87
  - Explicit fallback to "Unclassified" on all error paths
88
  """
89
- # ── Inference with retry ─────────────────────────────────────────────────
90
  if not HF_TOKEN:
91
  logger.warning("[LLM] HF_TOKEN not set β€” returning Unclassified")
92
  return "Unclassified"
@@ -95,9 +82,8 @@ def classify_with_llm(log_msg: str) -> str:
95
 
96
  client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
97
  delay = RETRY_DELAY_SEC
98
- last_err: Optional[Exception] = None
99
 
100
- for attempt in range(1, MAX_RETRIES + 2): # +2: initial + MAX_RETRIES
101
  try:
102
  response = client.chat.completions.create(
103
  model=LLM_MODEL,
@@ -112,38 +98,19 @@ def classify_with_llm(log_msg: str) -> str:
112
  return label
113
 
114
  except Exception as e:
115
- # 🚨 JUGAD: Agar credits khatam hain (402), toh turant fallback do
116
- # Isse UI hang nahi hoga aur retry ka wait nahi karna padega
117
  if "402" in str(e) or "credits" in str(e).lower():
118
- logger.error(f"[LLM] Credits Finished (402). Returning Fallback Label.")
119
- return "Escalated: Manual Review Required (API Limit)"
120
 
121
- last_err = e
122
  if attempt <= MAX_RETRIES:
123
  logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
124
  time.sleep(delay)
125
- delay *= 2 # exponential backoff
126
  else:
127
  logger.error(f"[LLM] All attempts failed. Last error: {e}")
128
 
129
  return "Unclassified"
130
 
131
-
132
- # ── Batch classify (serial β€” LLM is already rate-limited) ────────────────────
133
  def classify_batch_llm(log_msgs: list[str]) -> list[str]:
134
- """Classify multiple logs through LLM. Each call is sequential to respect rate limits."""
135
- return [classify_with_llm(msg) for msg in log_msgs]
136
-
137
-
138
- # ── CLI test ─────────────────────────────────────────────────────────────────
139
- if __name__ == "__main__":
140
- logging.basicConfig(level=logging.INFO)
141
-
142
- test_logs = [
143
- "Case escalation for ticket ID 7324 failed because the assigned support agent is no longer active.",
144
- "The 'ReportGenerator' module will be retired in version 4.0. Migrate to 'AdvancedAnalyticsSuite'.",
145
- "System reboot initiated by user 12345.", # should be Unclassified
146
- ]
147
- for log in test_logs:
148
- result = classify_with_llm(log)
149
- print(f"{result:25s} | {log[:80]}")
 
1
  """
2
  processor_llm.py β€” Tier 3: LLM-based Classifier
 
 
 
 
 
 
 
 
3
  """
4
  from __future__ import annotations
5
  import os
 
6
  import time
7
  import logging
8
 
 
18
 
19
  # Retry / timeout config
20
  MAX_RETRIES = 2
21
+ RETRY_DELAY_SEC = 1.0
22
+ REQUEST_TIMEOUT = 5
23
 
24
  SYSTEM_PROMPT = (
25
  "You are an enterprise log classifier. "
 
58
  {"role": "user", "content": user_content},
59
  ]
60
 
 
61
  # ── Normalize raw LLM output ─────────────────────────────────────────────────
62
  def _normalize(raw: str) -> str:
 
63
  raw = raw.strip().strip('"').strip("'")
64
  for cat in VALID_CATEGORIES:
65
  if cat.lower() in raw.lower():
66
  return cat
67
  return "Unclassified"
68
 
 
69
  # ── Main classify function ────────────────────────────────────────────────────
70
  def classify_with_llm(log_msg: str) -> str:
71
  """
 
74
  - Retry with exponential backoff (MAX_RETRIES attempts)
75
  - Explicit fallback to "Unclassified" on all error paths
76
  """
 
77
  if not HF_TOKEN:
78
  logger.warning("[LLM] HF_TOKEN not set β€” returning Unclassified")
79
  return "Unclassified"
 
82
 
83
  client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
84
  delay = RETRY_DELAY_SEC
 
85
 
86
+ for attempt in range(1, MAX_RETRIES + 2):
87
  try:
88
  response = client.chat.completions.create(
89
  model=LLM_MODEL,
 
98
  return label
99
 
100
  except Exception as e:
101
+ # FIXED: Return standard "Unclassified" so we don't pollute the CSV
 
102
  if "402" in str(e) or "credits" in str(e).lower():
103
+ logger.error(f"[LLM] Credits Finished (402). Returning Unclassified.")
104
+ return "Unclassified"
105
 
 
106
  if attempt <= MAX_RETRIES:
107
  logger.warning(f"[LLM] Attempt {attempt} failed ({e}), retrying in {delay:.1f}s…")
108
  time.sleep(delay)
109
+ delay *= 2
110
  else:
111
  logger.error(f"[LLM] All attempts failed. Last error: {e}")
112
 
113
  return "Unclassified"
114
 
 
 
115
  def classify_batch_llm(log_msgs: list[str]) -> list[str]:
116
+ return [classify_with_llm(msg) for msg in log_msgs]