File size: 2,362 Bytes
c061ce5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | # ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility module for tweet analysis predictions.
# ==============================================================================
import sys
import pickle
import warnings
import numpy as np
import pandas as pd
import spacy
import en_core_web_lg
# Configure sys.path to permit localized module discovery within the core directory
sys.path.append('./core')
import clean_utilities as CU
# Suppression of non-critical runtime warnings to maintain a clean console log
warnings.filterwarnings("ignore")
def tweet_prediction(tweet: str) -> int:
"""
Takes a tweet and returns whether it's classified as depressive (1) or not (0).
The process:
1. Clean the text using our utility module.
2. Convert text to numbers using spaCy.
3. Use the trained SVM model to make a prediction.
Args:
tweet (str): The tweet text from the user.
Returns:
int: 1 for Depressive, 0 for Non-depressive.
"""
# Step 1: Clean the text
processed_tweet = tweet
cleaned_input = []
cleaned_input.append(CU.tweets_cleaner(processed_tweet))
# Step 2: Convert text to numbers using spaCy
nlp_engine = en_core_web_lg.load()
# Step 3: Compute centroid word embeddings
# We calculate the mean vector of all tokens to represent the tweet's semantic context
semantic_vectors = np.array([
np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300))
for s in cleaned_input
])
# Step 4: Load the pre-trained Support Vector Machine (SVM) model artifact
# The SVM was selected for its robust performance in high-dimensional text classification
model_path = "./assets/models/model_svm1.pkl"
with open(model_path, 'rb') as model_file:
classifier = pickle.load(model_file)
# Step 5: Perform binary classification
prediction_result = classifier.predict(semantic_vectors)
return int(prediction_result[0])
|