| |
| |
|
|
| import pandas as pd |
| import spacy |
|
|
| |
| nlp = spacy.load("en_core_web_trf") |
|
|
|
|
| |
| def remove_names(text): |
| """ Function to remove the names of people from a given text. |
| |
| :param text: the text from which names will be removed. |
| :return: text without the names. |
| |
| >>> remove_names('My name is John Connor, leader of the rebellion.') |
| 'My name is , leader of the rebellion .' |
| """ |
| doc = nlp(text) |
| words_wo_names = [token.text for token in doc if token.ent_type_ != "PERSON"] |
| return " ".join(words_wo_names) |
|
|
|
|
| |
| movies = pd.read_csv('../../data/raw/0_inicial/movies.csv') |
| print(movies.columns) |
|
|
| |
| movies.drop(['Unnamed: 0', 'Genre', 'Wiki Page', 'title'], inplace=True, axis=1) |
|
|
| |
| movies['plot_sin_nombres'] = movies['Plot'].apply(remove_names) |
| movies.drop('Plot', inplace=True, axis=1) |
|
|
|
|
| |
| movies.to_csv('../../data/processed/movies_clean.csv') |
|
|
|
|
| if __name__ == '__main__': |
| __name__ |
|
|