Spaces:

ameythakur
/

Depression-Detection-Using-Tweets

Running

File size: 3,060 Bytes

# ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility module for tweet analysis predictions.
# ==============================================================================

import sys
import pickle
import warnings
import numpy as np
import pandas as pd
import spacy
import en_core_web_lg
# Configure sys.path to permit localized module discovery within the core directory
sys.path.append('./core')

import clean_utilities as CU

# Suppression of non-critical runtime warnings to maintain a clean console log
warnings.filterwarnings("ignore")

def tweet_prediction(tweet: str) -> int:
    """
    Takes a tweet and returns whether it's classified as depressive (1) or not (0).
    
    The process:
        1. Clean the text using our utility module.
        2. Convert text to numbers using spaCy.
        3. Use the trained SVM model to make a prediction.
    Args:
        tweet (str): The tweet text from the user.

    Returns:
        int: 1 for Depressive, 0 for Non-depressive.
    """
# Global initialization of heavy resources to optimize runtime performance
# Loading these once at startup eliminates significant latency during individual requests

# 1. Load spaCy NLP engine
try:
    nlp_engine = en_core_web_lg.load()
except Exception as e:
    print(f"Error loading spaCy model: {e}")
    sys.exit(1)

# 2. Load pre-trained SVM Classifier
model_path = "./assets/models/model_svm1.pkl"
try:
    with open(model_path, 'rb') as model_file:
        classifier = pickle.load(model_file)
except Exception as e:
    print(f"Error loading SVM model from {model_path}: {e}")
    sys.exit(1)

def tweet_prediction(tweet: str) -> int:
    """
    Takes a tweet and returns whether it's classified as depressive (1) or not (0).
    
    The process:
        1. Clean the text using our utility module.
        2. Convert text to numbers using the pre-loaded spaCy engine.
        3. Use the pre-loaded SVM model to make a prediction.
    Args:
        tweet (str): The tweet text from the user.

    Returns:
        int: 1 for Depressive, 0 for Non-depressive.
    """
    # Step 1: Clean the text
    cleaned_text = CU.tweets_cleaner(tweet)
    
    # Step 2: Compute centroid word embeddings
    # We calculate the mean vector of all tokens to represent the tweet's semantic context
    # Note: Global 'nlp_engine' is used here, avoiding reload overhead
    vector = np.array([token.vector for token in nlp_engine(cleaned_text)]).mean(axis=0) * np.ones((300))
    semantic_vectors = np.array([vector])
    
    # Step 3: Perform binary classification
    # Note: Global 'classifier' is used here
    prediction_result = classifier.predict(semantic_vectors)
    
    return int(prediction_result[0])