| """ |
| Script for preprocessing pharmacy data |
| -------- |
| Process pharmacy data and track inhaler prescriptions and rescue meds |
| """ |
| import json |
| import pandas as pd |
| from datetime import date |
| from dateutil.relativedelta import relativedelta |
| from utils.common import (add_hist_adm_presc, correct_column_names, |
| first_patient_appearance) |
| from utils.presc_common import initialize_presc_data, track_medication |
|
|
|
|
| def add_inhaler_mappings(df): |
| """ |
| Load inhaler prescription mappings and track where they appear in the data |
| -------- |
| :param df: dataframe |
| :return: dataframe with column added for each inhaler type |
| """ |
| print('Mapping inhaler prescriptions') |
|
|
| |
| with open('mappings/inhaler_mapping.json') as json_file: |
| inhaler_mapping = json.load(json_file) |
|
|
| for k, v in inhaler_mapping.items(): |
| df[k + '_inhaler'] = df.PI_Approved_Name.str.contains( |
| '|'.join(v)).astype(int) |
|
|
| |
| df = df.drop(['LABA-LAMA-ICS_inhaler', 'Ignore_inhaler'], axis=1) |
|
|
| return df |
|
|
|
|
| def add_eoy_column(df, dt_col, eoy_date): |
| """ |
| Add EOY relative to user-specified end date |
| -------- |
| :param df: dataframe |
| :param dt_col: date column in dataframe |
| :param eoy_date: EOY date from config |
| :return: updated df with EOY column added |
| """ |
| |
| df = df.reset_index(drop=True) |
|
|
| |
| end_date = pd.to_datetime(eoy_date) |
| end_month = end_date.month |
| end_day = end_date.day |
|
|
| |
| df['eoy'] = [date(y, end_month, end_day) for y in df[dt_col].dt.year] |
|
|
| |
| eoy_index = df.columns[df.columns == 'eoy'] |
| adm_vs_eoy = df[dt_col] > df.eoy |
| row_index = df.index[adm_vs_eoy] |
| df.loc[row_index, eoy_index] = df[adm_vs_eoy].eoy + relativedelta(years=1) |
| df['eoy'] = pd.to_datetime(df.eoy) |
|
|
| return df |
|
|
|
|
| def calc_presc_per_year(df): |
| """ |
| Reduce data to 1 row per year |
| -------- |
| :param df: dataframe to reduced |
| :return: reduced dataframe |
| """ |
| print('Reducing to 1 row per year') |
|
|
| |
| eoy_cols = ['presc_to_date', 'days_since_rescue', 'rescue_to_date', |
| 'anxiety_depression_presc_to_date', 'rescue_date'] |
| last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last() |
|
|
| |
| sum_cols = ['SALBUTAMOL', 'SABA_inhaler', 'LABA_inhaler', 'LAMA_inhaler', |
| 'SAMA_inhaler', 'ICS_inhaler', 'LABA-ICS_inhaler', |
| 'LAMA +LABA-ICS_inhaler', 'SABA + SAMA_inhaler', |
| 'MCS_inhaler', 'rescue_meds', 'presc', 'anxiety_depression_presc'] |
| total_cols = [col + '_per_year' for col in sum_cols] |
| total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum() |
| total.columns = total_cols |
|
|
| |
| results = last.join(total) |
|
|
| return results |
|
|
|
|
| def main(): |
|
|
| |
| with open('../../../config.json') as json_config_file: |
| config = json.load(json_config_file) |
|
|
| |
| presc_file = config['extract_data_path'] + 'Pharmacy_Cohort3R.csv' |
| presc = initialize_presc_data(presc_file) |
|
|
| |
| data_path = config['model_data_path'] |
| first_patient_appearance(presc, 'PRESC_DATE', 'presc', data_path) |
|
|
| |
| presc = add_inhaler_mappings(presc) |
|
|
| |
| presc = track_medication(presc) |
|
|
| |
| cols_2_drop = ['PI_Approved_Name', 'PI_BNF_Item_Code', 'code'] |
| presc = presc.drop(cols_2_drop, axis=1) |
|
|
| |
| presc = add_eoy_column(presc, 'PRESC_DATE', config['date']) |
|
|
| |
| presc['presc'] = 1 |
|
|
| |
| presc = presc.groupby('SafeHavenID').apply( |
| add_hist_adm_presc, 'presc', 'PRESC_DATE') |
| presc = presc.reset_index(drop=True) |
|
|
| |
| presc.to_pickle(data_path + 'validation_presc_proc.pkl') |
|
|
| |
| presc_yearly = calc_presc_per_year(presc) |
|
|
| |
| presc_yearly.columns = correct_column_names(presc_yearly.columns, 'presc') |
|
|
| |
| presc_yearly.to_pickle(data_path + 'presc_proc.pkl') |
|
|
|
|
| main() |
|
|