saadkhi commited on
Commit
bb16527
Β·
verified Β·
1 Parent(s): 60e496e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -58
app.py CHANGED
@@ -1,102 +1,75 @@
1
- # CPU SAFE HuggingFace Space (2026 stable)
2
-
3
  import warnings
4
  warnings.filterwarnings("ignore")
5
 
6
  import torch
7
  import gradio as gr
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
- from peft import PeftModel
10
- from transformers import AutoConfig
11
 
12
- # reduce CPU overload on free tier
13
  torch.set_num_threads(1)
14
 
15
- # ─────────────────────────
16
- # Config
17
- # ─────────────────────────
18
- BASE_MODEL = "unsloth/Phi-3-mini-4k-instruct-bnb-4bit"
19
- LORA_PATH = "saadkhi/SQL_Chat_finetuned_model"
20
-
21
- MAX_NEW_TOKENS = 180
22
 
23
  print("Loading model...")
24
 
25
- # ─────────────────────────
26
- # Load base model
27
- # ─────────────────────────
28
-
29
- # Load config
30
- config = AutoConfig.from_pretrained(BASE_MODEL, trust_remote_code=True)
31
-
32
- # πŸ”΄ IMPORTANT FIX
33
- # Replace quantization config with empty dict (NOT None)
34
- config.quantization_config = {}
35
-
36
  model = AutoModelForCausalLM.from_pretrained(
37
  BASE_MODEL,
38
- config=config,
39
  device_map="cpu",
40
  torch_dtype=torch.float32,
41
- trust_remote_code=True,
42
  low_cpu_mem_usage=True,
43
  )
44
 
45
- print("Loading LoRA...")
46
- model = PeftModel.from_pretrained(model, LORA_PATH)
47
-
48
- print("Merging LoRA...")
49
- model = model.merge_and_unload()
50
-
51
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
52
 
53
  model.eval()
54
  print("Model ready")
55
 
56
-
57
- # ─────────────────────────
58
- # Inference
59
- # ─────────────────────────
60
  def generate_sql(question):
61
- if not question:
62
- return "Enter a SQL question."
63
 
64
- messages = [{"role": "user", "content": question}]
 
 
65
 
66
- input_ids = tokenizer.apply_chat_template(
67
- messages,
68
- tokenize=True,
69
- add_generation_prompt=True,
70
- return_tensors="pt",
71
- )
72
 
73
  with torch.no_grad():
74
  output = model.generate(
75
- input_ids,
76
- max_new_tokens=MAX_NEW_TOKENS,
77
- temperature=0,
78
  do_sample=False,
79
  pad_token_id=tokenizer.eos_token_id,
80
  )
81
 
82
  text = tokenizer.decode(output[0], skip_special_tokens=True)
83
 
84
- # clean artifacts
85
- for t in ["<|assistant|>", "<|user|>", "<|end|>"]:
86
- text = text.replace(t, "")
87
-
88
- return text.strip()
89
 
90
- # ─────────────────────────
91
  # UI
92
- # ─────────────────────────
93
  demo = gr.Interface(
94
  fn=generate_sql,
95
  inputs=gr.Textbox(lines=3, label="SQL Question"),
96
  outputs=gr.Textbox(lines=8, label="Generated SQL"),
97
- title="SQL Chat – Phi-3 mini",
98
- description="Free CPU Space. First response may take ~90s",
99
- cache_examples=False,
 
 
 
 
100
  )
101
 
102
- demo.launch()
 
 
 
1
  import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
  import torch
5
  import gradio as gr
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
7
 
 
8
  torch.set_num_threads(1)
9
 
10
+ # ─────────────────────
11
+ # MODEL
12
+ # ─────────────────────
13
+ BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
 
 
14
 
15
  print("Loading model...")
16
 
 
 
 
 
 
 
 
 
 
 
 
17
  model = AutoModelForCausalLM.from_pretrained(
18
  BASE_MODEL,
 
19
  device_map="cpu",
20
  torch_dtype=torch.float32,
 
21
  low_cpu_mem_usage=True,
22
  )
23
 
 
 
 
 
 
 
24
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
25
 
26
  model.eval()
27
  print("Model ready")
28
 
29
+ # ─────────────────────
30
+ # GENERATION
31
+ # ─────────────────────
 
32
  def generate_sql(question):
33
+ if not question.strip():
34
+ return "Enter SQL question."
35
 
36
+ prompt = f"""
37
+ You are a SQL expert.
38
+ Convert the user request into SQL query only.
39
 
40
+ User: {question}
41
+ SQL:
42
+ """
43
+
44
+ inputs = tokenizer(prompt, return_tensors="pt")
 
45
 
46
  with torch.no_grad():
47
  output = model.generate(
48
+ **inputs,
49
+ max_new_tokens=120,
50
+ temperature=0.2,
51
  do_sample=False,
52
  pad_token_id=tokenizer.eos_token_id,
53
  )
54
 
55
  text = tokenizer.decode(output[0], skip_special_tokens=True)
56
 
57
+ return text.split("SQL:")[-1].strip()
 
 
 
 
58
 
59
+ # ─────────────────────
60
  # UI
61
+ # ─────────────────────
62
  demo = gr.Interface(
63
  fn=generate_sql,
64
  inputs=gr.Textbox(lines=3, label="SQL Question"),
65
  outputs=gr.Textbox(lines=8, label="Generated SQL"),
66
+ title="SQL Generator (Portfolio Demo)",
67
+ description="Fast CPU model for portfolio demo.",
68
+ examples=[
69
+ ["Find duplicate emails in users table"],
70
+ ["Top 5 highest paid employees"],
71
+ ["Orders per customer last month"],
72
+ ],
73
  )
74
 
75
+ demo.launch(server_name="0.0.0.0")