File size: 2,015 Bytes
4d1cb0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""Data_Gathering_Twint.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1zV6s2FhvQCmyNh0uyknfm47WATAOihbU
"""

from google.colab import drive
drive.mount('/content/drive')

!git clone https://github.com/twintproject/twint.git

import os
os.chdir("/content/twint")

!pip freeze > requirements.txt

!pip install .

!pip install -U git+https://github.com/cyxv/twint.git@master

!pip install nest_asyncio

!pip3 install twint

# Import required libraries
import nest_asyncio
nest_asyncio.apply()
import pandas as pd
import twint
import pandas as pd
import re

# add some tweets with depressed and depression tags, for a particular year

depress_tags = ["#depressed", "#anxiety", "#depression", "#suicide", "#mentalhealth"
                "#loneliness", "#hopelessness", "#itsokaynottobeokay"]

content = {}
for i in range(len(depress_tags)):
    print(depress_tags[i])
    c = twint.Config()
    
    c.Format = "Tweet id: {id} | Tweet: {tweet}"
    c.Search = depress_tags[i]
    c.Limit = 1000
    c.Year = 2019
    c.Lang = "en"
    c.Store_csv = True
    c.Store_Object = True
    c.Output = "/content/drive/MyDrive/NLP/Depression_Detection/depressive_en_2019.csv"
    c.Hide_output = True
    c.Stats = True
    c.Lowercase  = True
    c.Filter_retweets = True
    twint.run.Search(c)

# add some tweets with depressed and depression tags, for a particular year

depress_tags = ["#depressed", "#depression", "#loneliness", "#hopelessness"]

content = {}
for i in range(len(depress_tags)):
    print(depress_tags[i])
    c = twint.Config()
    
    c.Format = "Tweet id: {id} | Tweet: {tweet}"
    c.Search = depress_tags[i]
    c.Limit = 1000
    c.Year = 2020
    c.Store_csv = True
    c.Store_json = True
    c.Output = "/content/drive/MyDrive/NLP/Depression_Detection/dataset_depression.json"
    c.Hide_output = True
    c.Stats = True
    c.Lowercase  = True
    c.Filter_retweets = True
    twint.run.Search(c)