Spaces:

ameythakur
/

Depression-Detection-Using-Tweets

Running

File size: 2,362 Bytes

c061ce5

# ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility module for tweet analysis predictions.
# ==============================================================================

import sys
import pickle
import warnings
import numpy as np
import pandas as pd
import spacy
import en_core_web_lg
# Configure sys.path to permit localized module discovery within the core directory
sys.path.append('./core')

import clean_utilities as CU

# Suppression of non-critical runtime warnings to maintain a clean console log
warnings.filterwarnings("ignore")

def tweet_prediction(tweet: str) -> int:
    """
    Takes a tweet and returns whether it's classified as depressive (1) or not (0).
    
    The process:
        1. Clean the text using our utility module.
        2. Convert text to numbers using spaCy.
        3. Use the trained SVM model to make a prediction.
    Args:
        tweet (str): The tweet text from the user.

    Returns:
        int: 1 for Depressive, 0 for Non-depressive.
    """
    # Step 1: Clean the text
    processed_tweet = tweet
    cleaned_input = []
    cleaned_input.append(CU.tweets_cleaner(processed_tweet))
    
    # Step 2: Convert text to numbers using spaCy
    nlp_engine = en_core_web_lg.load()
    
    # Step 3: Compute centroid word embeddings
    # We calculate the mean vector of all tokens to represent the tweet's semantic context
    semantic_vectors = np.array([
        np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300))
        for s in cleaned_input
    ])
    
    # Step 4: Load the pre-trained Support Vector Machine (SVM) model artifact
    # The SVM was selected for its robust performance in high-dimensional text classification
    model_path = "./assets/models/model_svm1.pkl"
    with open(model_path, 'rb') as model_file:
        classifier = pickle.load(model_file)
    
    # Step 5: Perform binary classification
    prediction_result = classifier.predict(semantic_vectors)
    
    return int(prediction_result[0])