| |
| import pandas as pd |
| import numpy as np |
|
|
| |
| file_path = '<YOUR_DATA_PATH>/' |
| input_file_path = file_path + 'data_for_model_e_columns/' |
|
|
|
|
| def read_data(file): |
| """ |
| Read in data source |
| -------- |
| :param file: string filename |
| :return: dataframe |
| """ |
| df = pd.read_csv(file) |
| return df |
|
|
|
|
| def format_data(onboard, IDs): |
| """ |
| Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, |
| and add Date of death column |
| -------- |
| :param onboard: dataframe containing onboarding dates |
| :param IDs: dataframe containing IDs of interest |
| :return: formatted dataframe |
| """ |
| onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) |
| onboard['censor'] = pd.to_datetime(onboard['censor']) |
| onboard = pd.merge(IDs, onboard, on="Study_ID", how="left") |
| conditions_DOD = [onboard['censor'] != '2021-08-31'] |
| values_DOD = [onboard['censor'].dt.date] |
| onboard['DOD'] = np.select(conditions_DOD, values_DOD, default=None) |
| onboard['DOD'] = pd.to_datetime(onboard['DOD']) |
| return onboard |
|
|
|
|
| def calculate_suvival(onboard, date_of_death, OB_date): |
| """ |
| Calculate days from onboarding to date of death for those who died over the course of the RECEIVER study |
| and save the dataframe |
| -------- |
| :param onboard: dataframe containing onboarding and date of death data |
| :param date of death: datetime column showing date of death |
| :param OB_date: datetime column showing onboarding date |
| """ |
| onboard['days'] = (onboard[date_of_death] - onboard[OB_date]).dt.days |
| onboard.to_csv(file_path + 'Time_to_death_for_cohorts.csv') |
|
|
|
|
| def main(): |
| |
| onboard_file = input_file_path + "onboarding_dates.csv" |
| RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" |
| |
| onboard = read_data(onboard_file) |
| RC_SU1_IDs = read_data(RC_SU1_IDs_file) |
|
|
| |
| onboard = format_data(onboard, RC_SU1_IDs) |
|
|
| |
| calculate_suvival(onboard, 'DOD', 'OB_date') |
|
|
| |
| main() |