File size: 3,559 Bytes
a62de9c
 
 
 
5717486
031045b
 
 
a62de9c
 
 
 
 
 
 
 
031045b
5717486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
031045b
a62de9c
5717486
 
031045b
5717486
a62de9c
 
5717486
 
a62de9c
5717486
031045b
a62de9c
 
5717486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# ============================================================
# SIMPLE EXAMPLE: How to Use Your Trained Model
# ============================================================

```python
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Step 1: Load the model and tokenizer from the local directory
# (This assumes you ran Cell 18 earlier to save the model)
model_path = "optimized-bert-model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Step 2: Put model in evaluation mode
model.eval()

# Step 3: Test on simple examples using a helper function

def predict_paraphrase(sentence1, sentence2):
    """
    Predicts whether two sentences are paraphrases and returns prediction and confidence.
    """
    inputs = tokenizer(sentence1, sentence2, return_tensors="pt", 
                       truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=1).item()
        confidence = torch.softmax(logits, dim=1)[0].max().item()
    return prediction, confidence

def display_result(example_idx, sentence1, sentence2):
    prediction, confidence = predict_paraphrase(sentence1, sentence2)
    print("="*60)
    print(f"EXAMPLE {example_idx} - Are these paraphrases?")
    print("="*60)
    print(f"Sentence 1: {sentence1}")
    print(f"Sentence 2: {sentence2}")
    print(f"Prediction: {'YES (paraphrases)' if prediction == 1 else 'NO (not paraphrases)'}")
    print(f"Confidence: {confidence:.4f}")
    print()

# Example 1: Two sentences that ARE paraphrases
sentence1_1 = "The cat is sleeping on the mat"
sentence2_1 = "The cat is napping on the mat"

display_result(1, sentence1_1, sentence2_1)

# Example 2: Two sentences that are NOT paraphrases
sentence1_2 = "The dog is barking loudly"
sentence2_2 = "I love eating pizza"

display_result(2, sentence1_2, sentence2_2)

print("="*60)

# -----------------------
# Try your own examples!
# -----------------------
# Uncomment and edit the sentences below to test your own custom examples:
# user_sentence1 = "Your first sentence here."
# user_sentence2 = "Your second sentence here."
# display_result("USER", user_sentence1, user_sentence2)
```

# ------------------------------------------------------------
# How to call/use this model:
# ------------------------------------------------------------
# 1. Make sure you have the saved model files in the directory 'optimized-bert-model'
# 2. Run this script in your Python environment (with 'transformers' and 'torch' installed)
# 3. Change the example sentences inside the code block above to your own inputs to test paraphrase detection
# 4. The script prints whether the sentences are paraphrases and gives a confidence score

# Sample Output:
# ============================================================
# EXAMPLE 1 - Are these paraphrases?
# ============================================================
# Sentence 1: The cat is sleeping on the mat
# Sentence 2: The cat is napping on the mat
# Prediction: YES (paraphrases)
# Confidence: 0.9998
#
# ============================================================
# EXAMPLE 2 - Are these paraphrases?
# ============================================================
# Sentence 1: The dog is barking loudly
# Sentence 2: I love eating pizza
# Prediction: NO (not paraphrases)
# Confidence: 0.9584
# ============================================================