Spaces:

ameythakur
/

Depression-Detection-Using-Tweets

Running

File size: 2,804 Bytes

c061ce5

# ==============================================================================
# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
# AUTHORS: AMEY THAKUR & MEGA SATISH
# GITHUB (AMEY): https://github.com/Amey-Thakur
# GITHUB (MEGA): https://github.com/msatmod
# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
# RELEASE DATE: June 5, 2022
# LICENSE: MIT License
# DESCRIPTION: Utility for cleaning raw tweet data for analysis.
# ==============================================================================

import argparse
import warnings
import clean_utilities as CU

# Suppression of non-critical runtime warnings to ensure output clarity
warnings.filterwarnings("ignore")

def main():
    """
    Primary execution routine for the tweet cleaning utility.
    
    This script facilitates the transformation of raw unstructured text 
    into a standardized format, essential for downstream machine learning 
    inference and training.
    """
    # Configuration of the command-line argument parser
    parser = argparse.ArgumentParser(
        description="Twitter Depression Detection: Text Cleaning Utility"
    )

    # Definition of the mandatory positional argument for input file path
    parser.add_argument(
        'filename', 
        help="Path to the raw text file containing the tweet to be sanitized"
    )

    # Parsing and validation of terminal arguments
    args = parser.parse_args()

    # Conditional logic to verify input availability before processing
    if args.filename is not None:
        print(f"Targeting file for preprocessing: {args.filename}")
        
        try:
            # Atomic read operation for the target text file
            with open(args.filename, 'r', encoding='utf-8') as file:
                raw_tweet = file.read()
                
                # Invocation of the granular cleaning pipeline
                # Methodology includes contraction expansion, tokenization, and lemmatization
                print("Linguistic cleaning in progress...")
                sanitized_tweet = CU.tweets_cleaner(raw_tweet)
                
                # Persisting the sanitized result to local storage
                with open('clean_tweet.txt', 'w', encoding='utf-8') as output_file:
                    print("Sanitization complete. Persistence target: clean_tweet.txt")
                    output_file.write(sanitized_tweet)
                    
        except FileNotFoundError:
            print(f"Error: The specified file '{args.filename}' was not discovered.")
        except Exception as e:
            print(f"An unexpected analytical error occurred: {e}")
            
    else:
        print("Required input: Please specify a valid filename as a positional argument.")

if __name__ == '__main__':
    main()