Cube-Python-1.0 / appv2.py
NguyenDinhHieu's picture
Update appv2.py
63a8d84 verified
from __future__ import annotations
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
import ast
import atexit
import os
import re
import sys
FENCE_RE = re.compile(r"```(?:python)?\s*([\s\S]*?)\s*```", flags=re.IGNORECASE)
TRAILING_PARENS_RE = re.compile(r"\)\)\s*$", flags=re.MULTILINE)
# Install (Python env):
# - pip install langchain langchain-community
# - pip install gpt4all
def _force_utf8_stdio() -> None:
try:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
if hasattr(sys.stderr, "reconfigure"):
sys.stderr.reconfigure(encoding="utf-8")
except Exception:
pass
# =====================
# Config
# =====================
MODEL_FILE = "Cube-Python_v2.gguf"
N_CTX = 4096
TEMPERATURE = 0.1
N_GPU_LAYERS = -1 # llama.cpp: -1 = try push all to GPU, set 0 to force CPU
MAX_FIX_ATTEMPTS = 2
def load_llm():
base_path = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(base_path, MODEL_FILE)
if not os.path.exists(model_path):
raise FileNotFoundError(f"Không tìm thấy file model tại: {model_path}")
try:
from langchain_community.llms import GPT4All
except Exception as e:
raise RuntimeError(
"Chưa cài GPT4All cho LangChain. Cài bằng:\n"
" pip install gpt4all langchain-community\n"
f"Chi tiết: {e}"
)
return GPT4All(model=model_path, temp=TEMPERATURE, verbose=False)
def close_llm_safely(llm):
try:
client = getattr(llm, "client", None)
close = getattr(client, "close", None)
if callable(close):
close()
except Exception:
pass
def extract_python_code(text: str) -> str:
if not text:
return ""
m = FENCE_RE.search(text)
if m:
return m.group(1).strip()
return text.strip()
def _syntax_error_message(code: str) -> str | None:
try:
ast.parse(code)
return None
except SyntaxError:
# Re-parse to get rich info (cheap vs model inference, and avoids duplicate logic).
try:
ast.parse(code)
return None
except SyntaxError as e:
line = (e.text or "").strip()
where = f"line {e.lineno}, col {e.offset}" if e.lineno and e.offset else "unknown location"
return f"{e.msg} ({where}). Offending line: {line}"
def is_valid_python(code: str) -> bool:
return _syntax_error_message(code) is None
def generate_code(chain, question: str) -> str:
raw = chain.invoke({"question": question})
code = extract_python_code(raw)
for _ in range(MAX_FIX_ATTEMPTS):
err = _syntax_error_message(code)
if err is None:
return code
raw = chain.invoke(
{
"question": (
"Output trước bị sai cú pháp Python.\n"
f"Lỗi: {err}\n\n"
f"Output trước:\n{raw}\n\n"
"Hãy trả lại code Python ĐÚNG cú pháp, chỉ code, không markdown."
)
}
)
code = extract_python_code(raw)
code2 = TRAILING_PARENS_RE.sub(")", code)
return code2 if is_valid_python(code2) else code
template = """[INST] Bạn là một trợ lý AI chuyên nghiệp về lập trình Python.
Hãy viết code Python chất lượng cao để giải quyết yêu cầu sau.
Chỉ trả lời bằng code Python thuần (KHÔNG markdown, KHÔNG giải thích).
Yêu cầu: {question} [/INST]"""
prompt = PromptTemplate(input_variables=["question"], template=template)
_force_utf8_stdio()
llm = load_llm()
atexit.register(close_llm_safely, llm)
chain = prompt | llm | StrOutputParser()
question = '''
Write a Python program that extracts all email addresses from a given text.
Input:
A text: "Contact us at support@nlp.com or info@textprocessing.ai for more details."
Desired Output:
['support@nlp.com', 'info@textprocessing.ai']'''
try:
print(generate_code(chain, question))
finally:
close_llm_safely(llm)