| """ |
| Map GOLD standard COPD groupings from REC/SUP IDs to SafeHavenIDs. |
| -------- |
| NB: Data contained within 'RC_SU1_spirometry_data.csv' has been created using |
| from data within the teams space. |
| """ |
| import pandas as pd |
|
|
|
|
| |
| file_path = '<YOUR_DATA_PATH>/copd.model-e/' |
| input_file_path = file_path + 'training/src/data/' |
| output_file_path = '<YOUR_DATA_PATH>/Model_E_Extracts/rec_sup_spirometry_data.pkl' |
|
|
|
|
| def read_data(file): |
| """ |
| Read in data source |
| -------- |
| :param file: string filename |
| :return: dataframe |
| """ |
| df = pd.read_csv(file) |
|
|
| return df |
|
|
|
|
| def calc_gold_grade(data): |
| """ |
| Calculate GOLD grade for COPD classification using FEV1% |
| -------- |
| :param data: dataframe containing FEV1% column |
| :return: GOLD grade values based on if else statement |
| """ |
| fev1 = data['FEV1%'] |
| if fev1 >= 80: |
| val = 'GOLD 1' |
| elif (fev1 >= 50) & (fev1 < 80): |
| val = 'GOLD 2' |
| elif (fev1 >= 30) & (fev1 < 50): |
| val = 'GOLD 3' |
| elif fev1 < 30: |
| val = 'GOLD 4' |
| else: |
| val = '' |
|
|
| return val |
|
|
|
|
| def add_SH_mappings_for_RC_and_SU1(RC_IDs, SU1_IDs, spirometry_data): |
| """ |
| Join the SH ID mappings to the spirometry data for RC and SU1 |
| -------- |
| :param RC_IDs: dataframe containing RECEIVER - SH ID mappings |
| :param SU1_IDs: dataframe containing SU1 - SH ID mappings |
| :param spirometry_data: spirometry data for RC and SU1 |
| :return: RC and SU1 spirometry data with SH ID mapping columns |
| """ |
| receiver_IDs = RC_IDs.rename(columns={'RNo': 'StudyId'}) |
| scaleup_IDs = SU1_IDs.rename(columns={'Study_Number': 'StudyId'}) |
| all_service_IDs = pd.concat([receiver_IDs, scaleup_IDs], ignore_index=True) |
| spirometry_mappings = pd.merge( |
| spirometry_data, all_service_IDs, on="StudyId", how="left").dropna() |
| type_map = {'FEV1%': 'int32', 'SafeHavenID': 'int32'} |
| spirometry_mappings = spirometry_mappings.astype(type_map) |
|
|
| return spirometry_mappings |
|
|
|
|
| def main(): |
|
|
| |
| rec_sup_spiro_file = input_file_path + "RC_SU1_spirometry_data.csv" |
| rec_sup_spiro_data = read_data(rec_sup_spiro_file).dropna() |
|
|
| |
| rec_sup_spiro_data['GOLD grade'] = rec_sup_spiro_data.apply( |
| calc_gold_grade, axis=1) |
|
|
| |
| rec_id_file = "<YOUR_DATA_PATH>/EXAMPLE_STUDY_DATA/Cohort3Rand.csv" |
| sup_id_file = "<YOUR_DATA_PATH>/SU_IDs/Scale_Up_lookup.csv" |
| rec_id_map_data = read_data(rec_id_file) |
| sup_id_map_data = read_data(sup_id_file) |
|
|
| |
| mapped_data = add_SH_mappings_for_RC_and_SU1( |
| rec_id_map_data, sup_id_map_data, rec_sup_spiro_data) |
|
|
| |
| mapped_data.to_pickle(output_file_path) |
|
|
|
|
| main() |
|
|