{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "modeling.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EnBV1jN248Ug", "outputId": "3424dd27-36c7-429e-b088-2267a6750bf3" }, "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "ceDOBFoAPvNl" }, "source": [ "#!pip install -qqq h5py" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HvyXFF0GHrHq", "outputId": "d694def4-662f-4682-86ed-1126420651fc" }, "source": [ "#!pip install --upgrade -qqq gensim" ], "execution_count": null, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[K |████████████████████████████████| 24.1 MB 1.8 MB/s \n", "\u001b[?25h" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Acq0aPH0-ZoR", "outputId": "68aa21cc-0c63-42dc-ba79-e7897a0ae04d" }, "source": [ "!python -m spacy download en_core_web_lg" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting en_core_web_lg==2.2.5\n", " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz (827.9 MB)\n", "\u001b[K |████████████████████████████████| 827.9 MB 1.3 MB/s \n", "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from en_core_web_lg==2.2.5) (2.2.4)\n", "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.0.6)\n", "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.19.5)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.6)\n", "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.8.2)\n", "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.1.3)\n", "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (2.23.0)\n", "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (0.4.1)\n", "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.6)\n", "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (7.4.0)\n", "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.5)\n", "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (1.0.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (57.4.0)\n", "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->en_core_web_lg==2.2.5) (4.62.3)\n", "Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (4.8.2)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.6.0)\n", "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.10.0.2)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (2021.10.8)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->en_core_web_lg==2.2.5) (1.24.3)\n", "Building wheels for collected packages: en-core-web-lg\n", " Building wheel for en-core-web-lg (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for en-core-web-lg: filename=en_core_web_lg-2.2.5-py3-none-any.whl size=829180942 sha256=2b245d4db35432e69601f09c954c3f03ad99442f4c272e38e79ff0cda6e18570\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-ll8e18vr/wheels/11/95/ba/2c36cc368c0bd339b44a791c2c1881a1fb714b78c29a4cb8f5\n", "Successfully built en-core-web-lg\n", "Installing collected packages: en-core-web-lg\n", "Successfully installed en-core-web-lg-2.2.5\n", "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", "You can now load the model via spacy.load('en_core_web_lg')\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 801 }, "id": "sBYe-DXP_Huq", "outputId": "fde81c27-16c0-42d4-b623-1c12984c6923" }, "source": [ "!pip install -U SpaCy==2.2.0" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting SpaCy==2.2.0\n", " Downloading spacy-2.2.0-cp37-cp37m-manylinux1_x86_64.whl (10.2 MB)\n", "\u001b[K |████████████████████████████████| 10.2 MB 14.1 MB/s \n", "\u001b[?25hCollecting thinc<7.2.0,>=7.1.1\n", " Downloading thinc-7.1.1-cp37-cp37m-manylinux1_x86_64.whl (2.1 MB)\n", "\u001b[K |████████████████████████████████| 2.1 MB 69.4 MB/s \n", "\u001b[?25hRequirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (0.4.1)\n", "Collecting plac<1.0.0,>=0.9.6\n", " Downloading plac-0.9.6-py2.py3-none-any.whl (20 kB)\n", "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (1.19.5)\n", "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (3.0.6)\n", "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (2.0.6)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (1.0.6)\n", "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (0.8.2)\n", "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (2.23.0)\n", "Requirement already satisfied: srsly<1.1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from SpaCy==2.2.0) (1.0.5)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->SpaCy==2.2.0) (2.10)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->SpaCy==2.2.0) (1.24.3)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->SpaCy==2.2.0) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->SpaCy==2.2.0) (2021.10.8)\n", "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /usr/local/lib/python3.7/dist-packages (from thinc<7.2.0,>=7.1.1->SpaCy==2.2.0) (4.62.3)\n", "Installing collected packages: plac, thinc, SpaCy\n", " Attempting uninstall: plac\n", " Found existing installation: plac 1.1.3\n", " Uninstalling plac-1.1.3:\n", " Successfully uninstalled plac-1.1.3\n", " Attempting uninstall: thinc\n", " Found existing installation: thinc 7.4.0\n", " Uninstalling thinc-7.4.0:\n", " Successfully uninstalled thinc-7.4.0\n", " Attempting uninstall: SpaCy\n", " Found existing installation: spacy 2.2.4\n", " Uninstalling spacy-2.2.4:\n", " Successfully uninstalled spacy-2.2.4\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "en-core-web-sm 2.2.5 requires spacy>=2.2.2, but you have spacy 2.2.0 which is incompatible.\n", "en-core-web-lg 2.2.5 requires spacy>=2.2.2, but you have spacy 2.2.0 which is incompatible.\u001b[0m\n", "Successfully installed SpaCy-2.2.0 plac-0.9.6 thinc-7.1.1\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "plac", "plac_core", "plac_ext", "spacy", "thinc" ] } } }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "id": "KF1Wqxyj5IqV" }, "source": [ "## Import required libraries\n", "\n", "## warnings\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "## for data\n", "import numpy as np\n", "import pandas as pd\n", "\n", "## for plotting\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "## TF-IDF \n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "## T-Sne\n", "from yellowbrick.text import TSNEVisualizer\n", "from sklearn import manifold\n", "\n", "## Train-Test Split\n", "from sklearn.model_selection import train_test_split\n", "\n", "## Feature selection\n", "from sklearn import feature_selection\n", "\n", "## libraraies for classification\n", "from sklearn.pipeline import Pipeline\n", "import sklearn.metrics as skm\n", "from sklearn.metrics import confusion_matrix, accuracy_score\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "## for saving model\n", "import pickle\n", "\n", "## for explainer\n", "#from lime import lime_text\n", "\n", "## detokenization\n", "from nltk.tokenize.treebank import TreebankWordDetokenizer\n", "\n", "## for word embedding with gensim\n", "import gensim\n", "import gensim.downloader as gensim_api\n", "from gensim.models import Word2Vec\n", "from gensim.models import KeyedVectors\n", "from keras.preprocessing.text import Tokenizer\n", "from keras.preprocessing.sequence import pad_sequences\n", "\n", "## for word embedding with Spacy\n", "import spacy\n", "import en_core_web_lg\n", "\n", "## for deep learning\n", "from keras.models import load_model\n", "from keras.models import Model, Sequential\n", "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", "from keras.layers import Conv1D, Dense, Input, LSTM, Embedding, Dropout, Activation, MaxPooling1D\n", "from tensorflow.keras import models, layers, preprocessing as kprocessing\n", "from tensorflow.keras import backend as K\n", "import tensorflow as tf\n", "import keras\n", "from keras.layers import Lambda\n", "import tensorflow as tf\n", "from keras.models import model_from_json\n", "\n", "## for bert language model\n", "#import transformers" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "qpKZPTJFf3ny" }, "source": [ "## Loading the dataset:" ] }, { "cell_type": "code", "metadata": { "id": "aX_vtWbl5Yg2" }, "source": [ "df_all = pd.read_csv(\"/content/drive/MyDrive/NLP/Depression_Detection/data_cleaning/processed_data/processed_data.csv\",\n", " sep='\\t', encoding='utf-8')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 414 }, "id": "MK9pdUZL3qiE", "outputId": "b755aeae-2e8f-41f0-ad0a-1bde478b8a7b" }, "source": [ "df_all" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
| \n", " | text | \n", "label | \n", "clean_text | \n", "
|---|---|---|---|
| 0 | \n", "Open discussion. Between the Transfer Portal a... | \n", "1 | \n", "open discussion transfer portal nil become obs... | \n", "
| 1 | \n", "Plenty of things are changing in my life and t... | \n", "1 | \n", "plenty thing changing life life around one thi... | \n", "
| 2 | \n", "I feel a little hopeless. Anyone else? #hopele... | \n", "1 | \n", "feel little hopeless anyone else | \n", "
| 3 | \n", "Which is more healthy? Hope, or hopelessness? ... | \n", "1 | \n", "healthy hope hopelessness | \n", "
| 4 | \n", "So someone tell me how do I get over #HOPELESS... | \n", "1 | \n", "someone tell get live world surrounded people ... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 38904 | \n", "@andreaj27 well, cheers. i have a glass of wat... | \n", "0 | \n", "well cheer glass water handy drink cheer | \n", "
| 38905 | \n", "@AndreaJo84 Is that right? You're over in Palm... | \n", "0 | \n", "right palm spring guess see local news still w... | \n", "
| 38906 | \n", "@AndreaKobayashi It's the best cafe in Asakusa... | \n", "0 | \n", "best cafe asakusa tokyo small serf best coffee | \n", "
| 38907 | \n", "@AndreaKoeln heeey! tonight is the night! no s... | \n", "0 | \n", "heeey tonight night sleeping like night loll | \n", "
| 38908 | \n", "@AmandaFClark it is when you add the hammock | \n", "0 | \n", "add hammock | \n", "
38909 rows × 3 columns
\n", "