# ============================================================================== # PROJECT: DEPRESSION-DETECTION-USING-TWEETS # AUTHORS: AMEY THAKUR & MEGA SATISH # GITHUB (AMEY): https://github.com/Amey-Thakur # GITHUB (MEGA): https://github.com/msatmod # REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS # RELEASE DATE: June 5, 2022 # LICENSE: MIT License # DESCRIPTION: Utility module for tweet analysis predictions. # ============================================================================== import sys import pickle import warnings import numpy as np import pandas as pd import spacy import en_core_web_lg # Configure sys.path to permit localized module discovery within the core directory sys.path.append('./core') import clean_utilities as CU # Suppression of non-critical runtime warnings to maintain a clean console log warnings.filterwarnings("ignore") def tweet_prediction(tweet: str) -> int: """ Takes a tweet and returns whether it's classified as depressive (1) or not (0). The process: 1. Clean the text using our utility module. 2. Convert text to numbers using spaCy. 3. Use the trained SVM model to make a prediction. Args: tweet (str): The tweet text from the user. Returns: int: 1 for Depressive, 0 for Non-depressive. """ # Step 1: Clean the text processed_tweet = tweet cleaned_input = [] cleaned_input.append(CU.tweets_cleaner(processed_tweet)) # Step 2: Convert text to numbers using spaCy nlp_engine = en_core_web_lg.load() # Step 3: Compute centroid word embeddings # We calculate the mean vector of all tokens to represent the tweet's semantic context semantic_vectors = np.array([ np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300)) for s in cleaned_input ]) # Step 4: Load the pre-trained Support Vector Machine (SVM) model artifact # The SVM was selected for its robust performance in high-dimensional text classification model_path = "./assets/models/model_svm1.pkl" with open(model_path, 'rb') as model_file: classifier = pickle.load(model_file) # Step 5: Perform binary classification prediction_result = classifier.predict(semantic_vectors) return int(prediction_result[0])