Spaces:

ameythakur
/

Depression-Detection-Using-Tweets

Running

App Files Files Community

Depression-Detection-Using-Tweets / source_code /core /predict.py

ameythakur

DEPRESSION-DETECTION

4d1cb0c verified about 1 month ago

raw

history blame contribute delete

4.56 kB

	# ==============================================================================
	# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
	# AUTHORS: AMEY THAKUR & MEGA SATISH
	# GITHUB (AMEY): https://github.com/Amey-Thakur
	# GITHUB (MEGA): https://github.com/msatmod
	# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
	# RELEASE DATE: June 5, 2022
	# LICENSE: MIT License
	# DESCRIPTION: Utility for predicting depression levels in tweets using SVM.
	# ==============================================================================

	import argparse
	import pickle
	import warnings
	import numpy as np
	import pandas as pd
	import spacy
	import en_core_web_lg
	import clean_utilities as CU

	# Suppression of non-critical runtime warnings to maintain output integrity
	warnings.filterwarnings("ignore")

	def main():
	"""
	Main entry point for the prediction utility.

	This script encapsulates the end-to-end inference pipeline:
	1. Argument Parsing: Captures input text file and model selection.
	2. Text Preprocessing: Normalization via clean_utilities.
	3. Feature Extraction: Generating centroid embeddings via spaCy.
	4. Classification: Binary sentiment analysis via pre-trained SVM.
	"""
	# Initialize the CLI argument parser with a descriptive header
	parser = argparse.ArgumentParser(
	description="Twitter Depression Detection: Machine Learning Inference Utility"
	)

	# Positional argument for the target tweet content (text file)
	parser.add_argument(
	'filename',
	help="Path to the text file containing the tweet for classification"
	)

	# Positional argument for the classification model type
	parser.add_argument(
	'model',
	help="Target model architecture (currently optimized for 'SVM')"
	)

	# Execution of the parsing logic
	args = parser.parse_args()

	# Pipeline validation: Ensuring input availability and model compatibility
	if args.filename is not None and args.model == "SVM":
	print(f"Loading input source: {args.filename}")

	try:
	# Step 1: Data Acquisition
	with open(args.filename, 'r', encoding='utf-8') as file:
	raw_test_tweet = file.read()
	print(f"Captured Content: \"{raw_test_tweet}\"")

	# Step 2: Linguistic Preprocessing
	# Normalizes raw discourse into a tokenizable semantic format
	print("Executing linguistic cleaning pipeline...")
	cleaned_input = [CU.tweets_cleaner(raw_test_tweet)]
	print(f"Normalized Form: {cleaned_input}")

	# Step 3: Feature Space Transformation
	# Utilizing dense word embeddings (spaCy 'en_core_web_lg' model)
	print("Transforming text to 300-dimensional semantic vectors...")
	nlp_engine = en_core_web_lg.load()

	# Generating the centroid vector representing the tweet's linguistic context
	semantic_features = np.array([
	np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300))
	for s in cleaned_input
	])

	# Step 4: Model Artifact Loading
	# Loading the serialized SVM classifier from the assets directory
	model_artifact_path = "../assets/models/model_svm1.pkl"
	with open(model_artifact_path, 'rb') as model_file:
	classifier = pickle.load(model_file)

	# Step 5: Algorithmic Inference
	# The SVM determines the classification boundary for the semantic vector
	print("Performing binary classification...")
	prediction_bin = classifier.predict(semantic_features)

	# Step 6: Result Interpretation and User Communication
	is_depressive = prediction_bin[0]
	if is_depressive == 1:
	print("\n>>> CLASSIFICATION RESULT: The analyzed content exhibits depressive characteristics.")
	else:
	print("\n>>> CLASSIFICATION RESULT: The analyzed content is classified as non-depressive.")

	except FileNotFoundError:
	print(f"Error: The input file {args.filename} could not be located.")
	except Exception as e:
	print(f"An error occurred during the inference process: {e}")

	else:
	print("Usage Error: Please provide an input file and specify 'SVM' as the target model.")

	if __name__ == '__main__':
	main()