File size: 3,060 Bytes
4d1cb0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a239938
 
 
 
 
4d1cb0c
a239938
 
 
 
 
 
 
4d1cb0c
 
a239938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d1cb0c
a239938
 
4d1cb0c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility module for tweet analysis predictions.
# ==============================================================================

import sys
import pickle
import warnings
import numpy as np
import pandas as pd
import spacy
import en_core_web_lg
# Configure sys.path to permit localized module discovery within the core directory
sys.path.append('./core')

import clean_utilities as CU

# Suppression of non-critical runtime warnings to maintain a clean console log
warnings.filterwarnings("ignore")

def tweet_prediction(tweet: str) -> int:
    """
    Takes a tweet and returns whether it's classified as depressive (1) or not (0).
    
    The process:
        1. Clean the text using our utility module.
        2. Convert text to numbers using spaCy.
        3. Use the trained SVM model to make a prediction.
    Args:
        tweet (str): The tweet text from the user.

    Returns:
        int: 1 for Depressive, 0 for Non-depressive.
    """
# Global initialization of heavy resources to optimize runtime performance
# Loading these once at startup eliminates significant latency during individual requests

# 1. Load spaCy NLP engine
try:
    nlp_engine = en_core_web_lg.load()
except Exception as e:
    print(f"Error loading spaCy model: {e}")
    sys.exit(1)

# 2. Load pre-trained SVM Classifier
model_path = "./assets/models/model_svm1.pkl"
try:
    with open(model_path, 'rb') as model_file:
        classifier = pickle.load(model_file)
except Exception as e:
    print(f"Error loading SVM model from {model_path}: {e}")
    sys.exit(1)

def tweet_prediction(tweet: str) -> int:
    """
    Takes a tweet and returns whether it's classified as depressive (1) or not (0).
    
    The process:
        1. Clean the text using our utility module.
        2. Convert text to numbers using the pre-loaded spaCy engine.
        3. Use the pre-loaded SVM model to make a prediction.
    Args:
        tweet (str): The tweet text from the user.

    Returns:
        int: 1 for Depressive, 0 for Non-depressive.
    """
    # Step 1: Clean the text
    cleaned_text = CU.tweets_cleaner(tweet)
    
    # Step 2: Compute centroid word embeddings
    # We calculate the mean vector of all tokens to represent the tweet's semantic context
    # Note: Global 'nlp_engine' is used here, avoiding reload overhead
    vector = np.array([token.vector for token in nlp_engine(cleaned_text)]).mean(axis=0) * np.ones((300))
    semantic_vectors = np.array([vector])
    
    # Step 3: Perform binary classification
    # Note: Global 'classifier' is used here
    prediction_result = classifier.predict(semantic_vectors)
    
    return int(prediction_result[0])