File size: 1,054 Bytes
fcb2b04 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import torch
from transformers import AutoModelForCausalLM
from awq import AWQ4BitConfig, prepare_model
import os
# Load merged model
merged_model = AutoModelForCausalLM.from_pretrained(
"/Users/walidsobhi/.openclaw/workspace/stack-2.9-training/output/stack-2.9-merged",
torch_dtype=torch.float16,
load_in_4bit=True,
device_map="auto"
)
# Setup AWQ quantization
awq_config = AWQ4BitConfig(
num_groups=32,
min_coeff=0.01,
max_coeff=1.0,
bnb_config={
"bnb_4bit": True,
"bnb_use_double_quant": True,
"bnb_use_mixed_qembedding": True
}
)
# Apply AWQ quantization
quantized_model = prepare_model(merged_model, awq_config)
# Save quantized model
output_dir = "/Users/walidsobhi/.openclaw/workspace/stack-2.9-training/output/stack-2.9-awq"
os.makedirs(output_dir, exist_ok=True)
quantized_model.save_pretrained(output_dir)
print(f"Successfully applied AWQ quantization")
print(f"Quantized model saved to: {output_dir}")
print(f"Quantized model has {quantized_model.num_parameters()} parameters") |