File size: 2,537 Bytes
cf17729
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# import torch
# from transformers import T5ForConditionalGeneration, T5Tokenizer
# from peft import LoraConfig, get_peft_model, TaskType

# device = "mps" if torch.backends.mps.is_available() else "cpu"

# MODEL_PATH = "../outputs/model"   # your supervised trained model

# print("Loading base model...")
# model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)

# tokenizer = T5Tokenizer.from_pretrained("t5-small")

# # ---------------- LoRA CONFIG ----------------
# lora_config = LoraConfig(
#     r=8,                       # rank (small brain attachment)
#     lora_alpha=16,
#     target_modules=["q", "v"], # attention matrices only
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.SEQ_2_SEQ_LM
# )

# print("Attaching LoRA adapters...")
# model = get_peft_model(model, lora_config)

# model.print_trainable_parameters()

# print("READY ✔ LoRA model loaded")

# ****************** task 5 @#$%^&*I(O)(*&^%$#$%^&*(*&^%$#$%^&*^%$#%^)
# )
# 
# 
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from peft import LoraConfig, get_peft_model, TaskType

# ---------------- DEVICE SETUP ----------------
device = "mps" if torch.backends.mps.is_available() else "cpu"

MODEL_PATH = "../outputs/model"

# ---------------- LOAD TOKENIZER ----------------
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# ---------------- LOAD MODEL WITH QUANTIZATION ----------------
def load_model(quantization=None):
    print(f"Loading model with quantization = {quantization}")

    if quantization == "int8":
        model = T5ForConditionalGeneration.from_pretrained(
            MODEL_PATH,
            load_in_8bit=True,
            device_map="auto"
        )

    elif quantization == "int4":
        model = T5ForConditionalGeneration.from_pretrained(
            MODEL_PATH,
            load_in_4bit=True,
            device_map="auto"
        )

    else:  # fp32
        model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)

    return model


# 👉 CHANGE THIS VALUE TO TEST
QUANTIZATION = "int8"   # options: None, "int8", "int4"

model = load_model(QUANTIZATION)


# ---------------- LoRA CONFIG ----------------
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

print("Attaching LoRA adapters...")
model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

print("READY ✔ LoRA + Quantized model loaded")