saadkhi commited on
Commit
a93df3d
Β·
verified Β·
1 Parent(s): b612c20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -7,6 +7,7 @@ import torch
7
  import gradio as gr
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  from peft import PeftModel
 
10
 
11
  # reduce CPU overload on free tier
12
  torch.set_num_threads(1)
@@ -24,8 +25,14 @@ print("Loading model...")
24
  # ─────────────────────────
25
  # Load base model
26
  # ─────────────────────────
 
 
 
 
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
  BASE_MODEL,
 
29
  device_map="cpu",
30
  torch_dtype=torch.float32,
31
  trust_remote_code=True,
@@ -39,7 +46,6 @@ print("Merging LoRA...")
39
  model = model.merge_and_unload()
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
42
-
43
  model.eval()
44
  print("Model ready")
45
 
 
7
  import gradio as gr
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  from peft import PeftModel
10
+ from transformers import AutoConfig
11
 
12
  # reduce CPU overload on free tier
13
  torch.set_num_threads(1)
 
25
  # ─────────────────────────
26
  # Load base model
27
  # ─────────────────────────
28
+
29
+ # load config first and REMOVE quantization
30
+ config = AutoConfig.from_pretrained(BASE_MODEL, trust_remote_code=True)
31
+ config.quantization_config = None # πŸ”΄ important fix
32
+
33
  model = AutoModelForCausalLM.from_pretrained(
34
  BASE_MODEL,
35
+ config=config,
36
  device_map="cpu",
37
  torch_dtype=torch.float32,
38
  trust_remote_code=True,
 
46
  model = model.merge_and_unload()
47
 
48
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
49
  model.eval()
50
  print("Model ready")
51