| |
| from numpy import isnan |
| import pandas as pd |
|
|
| |
| file_path = '<YOUR_DATA_PATH>/' |
| input_file_path = file_path + 'data_for_model_e_columns/' |
|
|
|
|
| def read_data(file): |
| """ |
| Read in data source |
| -------- |
| :param file: string filename |
| :return: dataframe |
| """ |
| df = pd.read_csv(file) |
| return df |
|
|
|
|
| def GOLD_grade(data): |
| """ |
| Calculate GOLD grade for COPD classification using FEV1% |
| -------- |
| :param data: dataframe containing FEV1% column |
| :return: GOLD grade values based on if else statement |
| """ |
| if (data['FEV1%'] >= 80): |
| val = 'GOLD 1' |
| elif (data['FEV1%'] >= 50) & (data['FEV1%'] < 80): |
| val = 'GOLD 2' |
| elif (data['FEV1%'] >= 30) & (data['FEV1%'] < 50): |
| val = 'GOLD 3' |
| else: |
| val = 'GOLD 4' |
| return val |
|
|
|
|
| def GOLD_group(data): |
| """ |
| Calculate GOLD group from admissions data, exacerbations data, and CAT data |
| -------- |
| :param data: dataframe containing CAT, exacerbations, and admissions data |
| :return: GOLD group values based on if else statement |
| """ |
| if (data['CAT_baseline'] >= 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): |
| val = 'GOLD group D' |
| elif (data['CAT_baseline'] < 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): |
| val = 'GOLD group C' |
| elif (data['CAT_baseline'] >= 10) & ((data['Prior_Ad'] == 0) | (data['exac_prev_year'] < 2) | isnan(data['exac_prev_year'])): |
| val = 'GOLD group B' |
| else: |
| val = 'GOLD group A' |
| return val |
| |
|
|
| def apply_if_else(data, condition): |
| """ |
| Apply the criteria of an if else statement to all rows |
| -------- |
| :param data: dataframe |
| :condition: else if statement |
| :return: dataframe with column based on if else statement |
| """ |
| return data.apply(condition, axis=1) |
|
|
|
|
| def main(): |
| |
| RC_SU1_characteristics_file = input_file_path + "Cohort_characteristics_data_RC_SU.csv" |
| RC_SU1_characteristics_data = read_data(RC_SU1_characteristics_file) |
|
|
| |
| GOLD_data = RC_SU1_characteristics_data[['ID', 'FEV1%', 'CAT_baseline', 'Prior_Ad', 'exac_prev_year']] |
|
|
| |
| GOLD_data['GOLD grade'] = apply_if_else(GOLD_data, GOLD_grade) |
| GOLD_data['GOLD group'] = apply_if_else(GOLD_data, GOLD_group) |
|
|
| |
| GOLD_data.to_csv(file_path + 'GOLD_data.csv') |
| |
|
|
| main() |