| import torch | |
| from transformers import AutoModelForCausalLM | |
| from awq import AWQ4BitConfig, prepare_model | |
| import os | |
| # Load merged model | |
| merged_model = AutoModelForCausalLM.from_pretrained( | |
| "/Users/walidsobhi/.openclaw/workspace/stack-2.9-training/output/stack-2.9-merged", | |
| torch_dtype=torch.float16, | |
| load_in_4bit=True, | |
| device_map="auto" | |
| ) | |
| # Setup AWQ quantization | |
| awq_config = AWQ4BitConfig( | |
| num_groups=32, | |
| min_coeff=0.01, | |
| max_coeff=1.0, | |
| bnb_config={ | |
| "bnb_4bit": True, | |
| "bnb_use_double_quant": True, | |
| "bnb_use_mixed_qembedding": True | |
| } | |
| ) | |
| # Apply AWQ quantization | |
| quantized_model = prepare_model(merged_model, awq_config) | |
| # Save quantized model | |
| output_dir = "/Users/walidsobhi/.openclaw/workspace/stack-2.9-training/output/stack-2.9-awq" | |
| os.makedirs(output_dir, exist_ok=True) | |
| quantized_model.save_pretrained(output_dir) | |
| print(f"Successfully applied AWQ quantization") | |
| print(f"Quantized model saved to: {output_dir}") | |
| print(f"Quantized model has {quantized_model.num_parameters()} parameters") |