NguyenDinhHieu commited on
Commit
63a8d84
·
verified ·
1 Parent(s): 4b06903

Update appv2.py

Browse files
Files changed (1) hide show
  1. appv2.py +140 -152
appv2.py CHANGED
@@ -1,153 +1,141 @@
1
- from __future__ import annotations
2
-
3
- from langchain_core.output_parsers import StrOutputParser
4
- from langchain_core.prompts import PromptTemplate
5
- from langchain_community.llms import LlamaCpp
6
-
7
- import ast
8
- import atexit
9
- import os
10
- import re
11
- import sys
12
-
13
- FENCE_RE = re.compile(r"```(?:python)?\s*([\s\S]*?)\s*```", flags=re.IGNORECASE)
14
- TRAILING_PARENS_RE = re.compile(r"\)\)\s*$", flags=re.MULTILINE)
15
-
16
- # Install (Python env):
17
- # - pip install langchain langchain-community
18
- # - pip install llama-cpp-python
19
- # - pip install gpt4all (optional: if using LLM_BACKEND=gpt4all) $env:LLM_BACKEND='gpt4all'
20
-
21
-
22
- def _force_utf8_stdio() -> None:
23
- try:
24
- if hasattr(sys.stdout, "reconfigure"):
25
- sys.stdout.reconfigure(encoding="utf-8")
26
- if hasattr(sys.stderr, "reconfigure"):
27
- sys.stderr.reconfigure(encoding="utf-8")
28
- except Exception:
29
- pass
30
-
31
- # =====================
32
- # Config
33
- # =====================
34
- MODEL_FILE = "Cube-Python_v2.gguf"
35
- N_CTX = 4096
36
- TEMPERATURE = 0.1
37
- N_GPU_LAYERS = -1 # llama.cpp: -1 = try push all to GPU, set 0 to force CPU
38
-
39
- LLM_BACKEND = os.getenv("LLM_BACKEND", "llamacpp").strip().lower()
40
- MAX_FIX_ATTEMPTS = 2
41
-
42
- def load_llm():
43
- base_path = os.path.dirname(os.path.abspath(__file__))
44
- model_path = os.path.join(base_path, MODEL_FILE)
45
-
46
- if not os.path.exists(model_path):
47
- raise FileNotFoundError(f"Không tìm thấy file model tại: {model_path}")
48
-
49
- if LLM_BACKEND in {"gpt4all", "gpt4allcpp"}:
50
- try:
51
- from langchain_community.llms import GPT4All
52
- except Exception as e:
53
- raise RuntimeError(
54
- "Chưa cài GPT4All cho LangChain. Cài bằng:\n"
55
- " pip install gpt4all langchain-community\n"
56
- f"Chi tiết: {e}"
57
- )
58
-
59
- return GPT4All(model=model_path, temp=TEMPERATURE, verbose=False)
60
-
61
- return LlamaCpp(
62
- model_path=model_path,
63
- n_gpu_layers=N_GPU_LAYERS,
64
- n_ctx=N_CTX,
65
- temperature=TEMPERATURE,
66
- verbose=False, # Tắt log rác
67
- )
68
-
69
- def close_llm_safely(llm):
70
- try:
71
- client = getattr(llm, "client", None)
72
- close = getattr(client, "close", None)
73
- if callable(close):
74
- close()
75
- except Exception:
76
- pass
77
-
78
- def extract_python_code(text: str) -> str:
79
- if not text:
80
- return ""
81
-
82
- m = FENCE_RE.search(text)
83
- if m:
84
- return m.group(1).strip()
85
-
86
- return text.strip()
87
-
88
- def _syntax_error_message(code: str) -> str | None:
89
- try:
90
- ast.parse(code)
91
- return None
92
- except SyntaxError:
93
- # Re-parse to get rich info (cheap vs model inference, and avoids duplicate logic).
94
- try:
95
- ast.parse(code)
96
- return None
97
- except SyntaxError as e:
98
- line = (e.text or "").strip()
99
- where = f"line {e.lineno}, col {e.offset}" if e.lineno and e.offset else "unknown location"
100
- return f"{e.msg} ({where}). Offending line: {line}"
101
-
102
-
103
- def is_valid_python(code: str) -> bool:
104
- return _syntax_error_message(code) is None
105
-
106
-
107
- def generate_code(chain, question: str) -> str:
108
- raw = chain.invoke({"question": question})
109
- code = extract_python_code(raw)
110
-
111
- for _ in range(MAX_FIX_ATTEMPTS):
112
- err = _syntax_error_message(code)
113
- if err is None:
114
- return code
115
-
116
- raw = chain.invoke(
117
- {
118
- "question": (
119
- "Output trước bị sai pháp Python.\n"
120
- f"Lỗi: {err}\n\n"
121
- f"Output trước:\n{raw}\n\n"
122
- "Hãy trả lại code Python ĐÚNG cú pháp, chỉ code, không markdown."
123
- )
124
- }
125
- )
126
- code = extract_python_code(raw)
127
-
128
- code2 = TRAILING_PARENS_RE.sub(")", code)
129
- return code2 if is_valid_python(code2) else code
130
-
131
- template = """[INST] Bạn là một trợ lý AI chuyên nghiệp về lập trình Python.
132
- Hãy viết code Python chất lượng cao để giải quyết yêu cầu sau.
133
- Chỉ trả lời bằng code Python thuần (KHÔNG markdown, KHÔNG giải thích).
134
- Yêu cầu: {question} [/INST]"""
135
-
136
- prompt = PromptTemplate(input_variables=["question"], template=template)
137
-
138
- _force_utf8_stdio()
139
- llm = load_llm()
140
- atexit.register(close_llm_safely, llm)
141
- chain = prompt | llm | StrOutputParser()
142
-
143
- question = '''
144
- Write a Python program that extracts all email addresses from a given text.
145
- Input:
146
- A text: "Contact us at support@nlp.com or info@textprocessing.ai for more details."
147
- Desired Output:
148
- ['support@nlp.com', 'info@textprocessing.ai']'''
149
-
150
- try:
151
- print(generate_code(chain, question))
152
- finally:
153
  close_llm_safely(llm)
 
1
+ from __future__ import annotations
2
+
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import PromptTemplate
5
+
6
+ import ast
7
+ import atexit
8
+ import os
9
+ import re
10
+ import sys
11
+
12
+ FENCE_RE = re.compile(r"```(?:python)?\s*([\s\S]*?)\s*```", flags=re.IGNORECASE)
13
+ TRAILING_PARENS_RE = re.compile(r"\)\)\s*$", flags=re.MULTILINE)
14
+
15
+ # Install (Python env):
16
+ # - pip install langchain langchain-community
17
+ # - pip install gpt4all
18
+
19
+
20
+ def _force_utf8_stdio() -> None:
21
+ try:
22
+ if hasattr(sys.stdout, "reconfigure"):
23
+ sys.stdout.reconfigure(encoding="utf-8")
24
+ if hasattr(sys.stderr, "reconfigure"):
25
+ sys.stderr.reconfigure(encoding="utf-8")
26
+ except Exception:
27
+ pass
28
+
29
+ # =====================
30
+ # Config
31
+ # =====================
32
+ MODEL_FILE = "Cube-Python_v2.gguf"
33
+ N_CTX = 4096
34
+ TEMPERATURE = 0.1
35
+ N_GPU_LAYERS = -1 # llama.cpp: -1 = try push all to GPU, set 0 to force CPU
36
+
37
+ MAX_FIX_ATTEMPTS = 2
38
+
39
+ def load_llm():
40
+ base_path = os.path.dirname(os.path.abspath(__file__))
41
+ model_path = os.path.join(base_path, MODEL_FILE)
42
+
43
+ if not os.path.exists(model_path):
44
+ raise FileNotFoundError(f"Không tìm thấy file model tại: {model_path}")
45
+
46
+ try:
47
+ from langchain_community.llms import GPT4All
48
+ except Exception as e:
49
+ raise RuntimeError(
50
+ "Chưa cài GPT4All cho LangChain. Cài bằng:\n"
51
+ " pip install gpt4all langchain-community\n"
52
+ f"Chi tiết: {e}"
53
+ )
54
+
55
+ return GPT4All(model=model_path, temp=TEMPERATURE, verbose=False)
56
+
57
+ def close_llm_safely(llm):
58
+ try:
59
+ client = getattr(llm, "client", None)
60
+ close = getattr(client, "close", None)
61
+ if callable(close):
62
+ close()
63
+ except Exception:
64
+ pass
65
+
66
+ def extract_python_code(text: str) -> str:
67
+ if not text:
68
+ return ""
69
+
70
+ m = FENCE_RE.search(text)
71
+ if m:
72
+ return m.group(1).strip()
73
+
74
+ return text.strip()
75
+
76
+ def _syntax_error_message(code: str) -> str | None:
77
+ try:
78
+ ast.parse(code)
79
+ return None
80
+ except SyntaxError:
81
+ # Re-parse to get rich info (cheap vs model inference, and avoids duplicate logic).
82
+ try:
83
+ ast.parse(code)
84
+ return None
85
+ except SyntaxError as e:
86
+ line = (e.text or "").strip()
87
+ where = f"line {e.lineno}, col {e.offset}" if e.lineno and e.offset else "unknown location"
88
+ return f"{e.msg} ({where}). Offending line: {line}"
89
+
90
+
91
+ def is_valid_python(code: str) -> bool:
92
+ return _syntax_error_message(code) is None
93
+
94
+
95
+ def generate_code(chain, question: str) -> str:
96
+ raw = chain.invoke({"question": question})
97
+ code = extract_python_code(raw)
98
+
99
+ for _ in range(MAX_FIX_ATTEMPTS):
100
+ err = _syntax_error_message(code)
101
+ if err is None:
102
+ return code
103
+
104
+ raw = chain.invoke(
105
+ {
106
+ "question": (
107
+ "Output trước bị sai pháp Python.\n"
108
+ f"Lỗi: {err}\n\n"
109
+ f"Output trước:\n{raw}\n\n"
110
+ "Hãy trả lại code Python ĐÚNG cú pháp, chỉ code, không markdown."
111
+ )
112
+ }
113
+ )
114
+ code = extract_python_code(raw)
115
+
116
+ code2 = TRAILING_PARENS_RE.sub(")", code)
117
+ return code2 if is_valid_python(code2) else code
118
+
119
+ template = """[INST] Bạn một trợ AI chuyên nghiệp về lập trình Python.
120
+ Hãy viết code Python chất lượng cao để giải quyết yêu cầu sau.
121
+ Chỉ trả lời bằng code Python thuần (KHÔNG markdown, KHÔNG giải thích).
122
+ Yêu cầu: {question} [/INST]"""
123
+
124
+ prompt = PromptTemplate(input_variables=["question"], template=template)
125
+
126
+ _force_utf8_stdio()
127
+ llm = load_llm()
128
+ atexit.register(close_llm_safely, llm)
129
+ chain = prompt | llm | StrOutputParser()
130
+
131
+ question = '''
132
+ Write a Python program that extracts all email addresses from a given text.
133
+ Input:
134
+ A text: "Contact us at support@nlp.com or info@textprocessing.ai for more details."
135
+ Desired Output:
136
+ ['support@nlp.com', 'info@textprocessing.ai']'''
137
+
138
+ try:
139
+ print(generate_code(chain, question))
140
+ finally:
 
 
 
 
 
 
 
 
 
 
 
 
141
  close_llm_safely(llm)