| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import pickle |
| | import warnings |
| | import numpy as np |
| | import pandas as pd |
| | import spacy |
| | import en_core_web_lg |
| | import clean_utilities as CU |
| |
|
| | |
| | warnings.filterwarnings("ignore") |
| |
|
| | def main(): |
| | """ |
| | Main entry point for the prediction utility. |
| | |
| | This script encapsulates the end-to-end inference pipeline: |
| | 1. Argument Parsing: Captures input text file and model selection. |
| | 2. Text Preprocessing: Normalization via clean_utilities. |
| | 3. Feature Extraction: Generating centroid embeddings via spaCy. |
| | 4. Classification: Binary sentiment analysis via pre-trained SVM. |
| | """ |
| | |
| | parser = argparse.ArgumentParser( |
| | description="Twitter Depression Detection: Machine Learning Inference Utility" |
| | ) |
| |
|
| | |
| | parser.add_argument( |
| | 'filename', |
| | help="Path to the text file containing the tweet for classification" |
| | ) |
| |
|
| | |
| | parser.add_argument( |
| | 'model', |
| | help="Target model architecture (currently optimized for 'SVM')" |
| | ) |
| |
|
| | |
| | args = parser.parse_args() |
| |
|
| | |
| | if args.filename is not None and args.model == "SVM": |
| | print(f"Loading input source: {args.filename}") |
| | |
| | try: |
| | |
| | with open(args.filename, 'r', encoding='utf-8') as file: |
| | raw_test_tweet = file.read() |
| | print(f"Captured Content: \"{raw_test_tweet}\"") |
| | |
| | |
| | |
| | print("Executing linguistic cleaning pipeline...") |
| | cleaned_input = [CU.tweets_cleaner(raw_test_tweet)] |
| | print(f"Normalized Form: {cleaned_input}") |
| |
|
| | |
| | |
| | print("Transforming text to 300-dimensional semantic vectors...") |
| | nlp_engine = en_core_web_lg.load() |
| | |
| | |
| | semantic_features = np.array([ |
| | np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300)) |
| | for s in cleaned_input |
| | ]) |
| |
|
| | |
| | |
| | model_artifact_path = "../assets/models/model_svm1.pkl" |
| | with open(model_artifact_path, 'rb') as model_file: |
| | classifier = pickle.load(model_file) |
| | |
| | |
| | |
| | print("Performing binary classification...") |
| | prediction_bin = classifier.predict(semantic_features) |
| | |
| | |
| | is_depressive = prediction_bin[0] |
| | if is_depressive == 1: |
| | print("\n>>> CLASSIFICATION RESULT: The analyzed content exhibits depressive characteristics.") |
| | else: |
| | print("\n>>> CLASSIFICATION RESULT: The analyzed content is classified as non-depressive.") |
| |
|
| | except FileNotFoundError: |
| | print(f"Error: The input file {args.filename} could not be located.") |
| | except Exception as e: |
| | print(f"An error occurred during the inference process: {e}") |
| |
|
| | else: |
| | print("Usage Error: Please provide an input file and specify 'SVM' as the target model.") |
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|
| |
|
| |
|