| |
| """Data_Gathering_Twint.ipynb |
| |
| Automatically generated by Colaboratory. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1zV6s2FhvQCmyNh0uyknfm47WATAOihbU |
| """ |
|
|
| from google.colab import drive |
| drive.mount('/content/drive') |
|
|
| !git clone https://github.com/twintproject/twint.git |
|
|
| import os |
| os.chdir("/content/twint") |
|
|
| !pip freeze > requirements.txt |
|
|
| !pip install . |
|
|
| !pip install -U git+https://github.com/cyxv/twint.git@master |
|
|
| !pip install nest_asyncio |
|
|
| !pip3 install twint |
|
|
| |
| import nest_asyncio |
| nest_asyncio.apply() |
| import pandas as pd |
| import twint |
| import pandas as pd |
| import re |
|
|
| |
|
|
| depress_tags = ["#depressed", "#anxiety", "#depression", "#suicide", "#mentalhealth" |
| "#loneliness", "#hopelessness", "#itsokaynottobeokay"] |
|
|
| content = {} |
| for i in range(len(depress_tags)): |
| print(depress_tags[i]) |
| c = twint.Config() |
| |
| c.Format = "Tweet id: {id} | Tweet: {tweet}" |
| c.Search = depress_tags[i] |
| c.Limit = 1000 |
| c.Year = 2019 |
| c.Lang = "en" |
| c.Store_csv = True |
| c.Store_Object = True |
| c.Output = "/content/drive/MyDrive/NLP/Depression_Detection/depressive_en_2019.csv" |
| c.Hide_output = True |
| c.Stats = True |
| c.Lowercase = True |
| c.Filter_retweets = True |
| twint.run.Search(c) |
|
|
| |
|
|
| depress_tags = ["#depressed", "#depression", "#loneliness", "#hopelessness"] |
|
|
| content = {} |
| for i in range(len(depress_tags)): |
| print(depress_tags[i]) |
| c = twint.Config() |
| |
| c.Format = "Tweet id: {id} | Tweet: {tweet}" |
| c.Search = depress_tags[i] |
| c.Limit = 1000 |
| c.Year = 2020 |
| c.Store_csv = True |
| c.Store_json = True |
| c.Output = "/content/drive/MyDrive/NLP/Depression_Detection/dataset_depression.json" |
| c.Hide_output = True |
| c.Stats = True |
| c.Lowercase = True |
| c.Filter_retweets = True |
| twint.run.Search(c) |