File size: 3,060 Bytes
4d1cb0c a239938 4d1cb0c a239938 4d1cb0c a239938 4d1cb0c a239938 4d1cb0c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | # ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility module for tweet analysis predictions.
# ==============================================================================
import sys
import pickle
import warnings
import numpy as np
import pandas as pd
import spacy
import en_core_web_lg
# Configure sys.path to permit localized module discovery within the core directory
sys.path.append('./core')
import clean_utilities as CU
# Suppression of non-critical runtime warnings to maintain a clean console log
warnings.filterwarnings("ignore")
def tweet_prediction(tweet: str) -> int:
"""
Takes a tweet and returns whether it's classified as depressive (1) or not (0).
The process:
1. Clean the text using our utility module.
2. Convert text to numbers using spaCy.
3. Use the trained SVM model to make a prediction.
Args:
tweet (str): The tweet text from the user.
Returns:
int: 1 for Depressive, 0 for Non-depressive.
"""
# Global initialization of heavy resources to optimize runtime performance
# Loading these once at startup eliminates significant latency during individual requests
# 1. Load spaCy NLP engine
try:
nlp_engine = en_core_web_lg.load()
except Exception as e:
print(f"Error loading spaCy model: {e}")
sys.exit(1)
# 2. Load pre-trained SVM Classifier
model_path = "./assets/models/model_svm1.pkl"
try:
with open(model_path, 'rb') as model_file:
classifier = pickle.load(model_file)
except Exception as e:
print(f"Error loading SVM model from {model_path}: {e}")
sys.exit(1)
def tweet_prediction(tweet: str) -> int:
"""
Takes a tweet and returns whether it's classified as depressive (1) or not (0).
The process:
1. Clean the text using our utility module.
2. Convert text to numbers using the pre-loaded spaCy engine.
3. Use the pre-loaded SVM model to make a prediction.
Args:
tweet (str): The tweet text from the user.
Returns:
int: 1 for Depressive, 0 for Non-depressive.
"""
# Step 1: Clean the text
cleaned_text = CU.tweets_cleaner(tweet)
# Step 2: Compute centroid word embeddings
# We calculate the mean vector of all tokens to represent the tweet's semantic context
# Note: Global 'nlp_engine' is used here, avoiding reload overhead
vector = np.array([token.vector for token in nlp_engine(cleaned_text)]).mean(axis=0) * np.ones((300))
semantic_vectors = np.array([vector])
# Step 3: Perform binary classification
# Note: Global 'classifier' is used here
prediction_result = classifier.predict(semantic_vectors)
return int(prediction_result[0])
|