| |
| import functools as ft |
| import numpy as np |
| import pandas as pd |
|
|
| |
| file_path = '<YOUR_DATA_PATH>/' |
| input_file_path = file_path + 'data_for_model_e_columns/' |
|
|
|
|
| def read_data(file): |
| """ |
| Read in data source |
| -------- |
| :param file: string filename |
| :return: dataframe |
| """ |
| df = pd.read_csv(file) |
| return df |
|
|
|
|
| def format_data(exacerbations_data, admissions_data, onboard, IDs): |
| """ |
| Remove unescessary columns from dataframes, |
| merge onboarding, admissions, and exacerbations dataframes, |
| convert datetime columns to datetime format, |
| filter to include only RECEIVER and scale up 1 IDs, |
| and create new column showing date of death for those who died during the study |
| -------- |
| :param exacerbations_data: dataframe containing exacerbations data |
| :param admissions_data: dataframe containing admissions data |
| :param IDs: dataframe containing RECEIVER and scale up 1 study IDs |
| :param onboard: dataframe containing onboarding dates |
| :return: formatted dataframe |
| """ |
| admissions_data = admissions_data[['Study_ID', 'admitted_1']] |
| exacerbations_data = exacerbations_data[['Study_ID', 'first_exacerbation']] |
|
|
| dfs = [onboard, exacerbations_data, admissions_data] |
| df_combined = ft.reduce(lambda left, right: pd.merge(left, right, on='Study_ID', how="outer"), dfs) |
| data = pd.merge(IDs, df_combined, on="Study_ID", how="left") |
| |
| data['first_exacerbation'] = pd.to_datetime(data['first_exacerbation']) |
| data['admitted_1'] = pd.to_datetime(data['admitted_1']) |
| data['OB_date'] = pd.to_datetime(data['OB_date']) |
| data['censor'] = pd.to_datetime(data['censor']) |
| |
| conditions_DOD = [data['censor'] != '2021-08-31'] |
| values_DOD = [data['censor'].dt.date] |
| data['DOD'] = np.select(conditions_DOD, values_DOD, default=None) |
| data['DOD'] = pd.to_datetime(data['DOD']) |
| return data |
|
|
|
|
| def time_to_events(data): |
| """ |
| Calculate time to first event (exacerbation, admission, or death) and first admission or death |
| for each study ID and save the summary dataframe |
| -------- |
| :param data: dataframe containing admissions data, exacerbations data, and onboarding dates |
| :return: dataframe with additional columns showing number of days until first event and number of days |
| to first admission/ death |
| """ |
| data['first_event'] = data[["admitted_1", "first_exacerbation", "DOD"]].min(axis=1) |
| data['first_event'] = pd.to_datetime(data['first_event']) |
| data['first_admission_or_death'] = data[["admitted_1", "DOD"]].min(axis=1) |
| data['first_admission_or_death'] = pd.to_datetime(data['first_admission_or_death']) |
|
|
| data['days_to_first_event'] = (data['first_event'] - data['OB_date']).dt.days |
| data['days_to_first_admission_death'] = (data['first_admission_or_death'] - data['OB_date']).dt.days |
| |
| data.to_csv(file_path + 'Time_to_first_event.csv') |
|
|
|
|
| def main(): |
| |
| PRO_LOGIC_data = input_file_path + "First_exacerbation_data.csv" |
| admissions_data_file = input_file_path + "admissions_data_up_to_31082021.csv" |
| RC_SU1_IDs_data_file = input_file_path + "RC_SU1_IDs.csv" |
| onboard_file = input_file_path + "onboarding_dates.csv" |
|
|
| PRO_LOGIC_data = read_data(PRO_LOGIC_data) |
| admissions_data = read_data(admissions_data_file) |
| RC_SU1_IDs = read_data(RC_SU1_IDs_data_file) |
| Onboard = read_data(onboard_file) |
|
|
| |
| RC_combined_data = format_data(PRO_LOGIC_data, admissions_data, Onboard, RC_SU1_IDs) |
| |
| |
| time_to_events(RC_combined_data) |
|
|
|
|
| main() |