| """ |
| Admission processing utilities |
| """ |
| import json |
| import numpy as np |
| from utils.common import track_event |
|
|
|
|
| def update_null_stay(df): |
| """ |
| Calculate length of stay based on ADM/DISDATE for null STAY values |
| -------- |
| :param df: pandas dataframe to be updated |
| :return: updated dataframe |
| """ |
| |
| is_null = df.STAY.isnull() |
|
|
| |
| if sum(is_null) > 0: |
| null_stay = np.where(is_null) |
| for i in null_stay: |
| stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item() |
| df.loc[i, 'STAY'] = float(stay.days) |
|
|
| return df |
|
|
|
|
| def calculate_total_stay(df): |
| """ |
| Convert admissions with same ADMDATE as previous DISDATE to single |
| admission where patient has been transferred between departments |
| -------- |
| :param df: pandas dataframe to be updated |
| :return: updated dataframe |
| """ |
| df.reset_index(inplace=True, drop=True) |
| rows_to_drop = [] |
|
|
| |
| df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift()) |
| for index, row in df.iloc[1:].iterrows(): |
| if row.transfer is True: |
| df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE |
| df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY |
| rows_to_drop.append(index - 1) |
|
|
| |
| df.drop(rows_to_drop, inplace=True) |
|
|
| |
| df.drop('transfer', axis=1, inplace=True) |
|
|
| return df |
|
|
|
|
| def convert_ethgrp_desc(eth): |
| """ |
| Find ethnic group based on given ETHGRP string |
| -------- |
| :param eth: str ethnic group description in the style of SMR01 data |
| :return: string ethnicity |
| """ |
| if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth): |
| return "White" |
|
|
| elif eth.startswith("British"): |
| return "White" |
|
|
| elif "mixed" in eth: |
| return "Mixed" |
|
|
| elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth): |
| return "Asian" |
|
|
| elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth): |
| return "Black" |
|
|
| elif ("Arab" in eth) | ("other ethnic" in eth): |
| return "Other" |
|
|
| elif "Refused" in eth: |
| return "Refused" |
|
|
| else: |
| return "Unknown" |
|
|
|
|
| def mode_ethnicity(v, eth_col): |
| """ |
| Select the most commonly occuring ethnicity for each patient in groupby |
| -------- |
| :param v: pandas patient dataframe to be updated |
| :param eth_col: str ethnicity column |
| :return: updated subset of data with common ethnicity per ID |
| """ |
| eth = v[eth_col] |
| n = eth.nunique() |
| has_unk = eth.str.contains('Unknown') |
| any_unk = any(has_unk) |
| wout_unk = has_unk.apply(lambda x: x is False) |
| has_ref = eth.str.contains('Refused') |
| any_ref = any(has_ref) |
| wout_ref = has_ref.apply(lambda x: x is False) |
|
|
| |
| if any_unk & any_ref & (n > 2): |
| eth = eth[wout_unk & wout_ref] |
| elif any_unk & (n > 1): |
| eth = eth[wout_unk] |
| elif any_ref & (n > 1): |
| eth = eth[wout_ref] |
|
|
| |
| main_eth = eth.mode().values[0] |
| v[eth_col] = main_eth |
| |
| return v |
|
|
|
|
| def search_diag(df, typ): |
| """ |
| Search diagnosis columns for descriptions indicative of copd or resp events |
| -------- |
| :param df: dataframe to search |
| :param typ: 'copd', 'resp' or 'anxiety_depression' |
| :return: dataframe with column added tracking specific type of admission |
| """ |
| |
| diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', |
| 'DIAG5Desc', 'DIAG6Desc'] |
|
|
| |
| copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json')) |
|
|
| |
| desc = copd_resp_desc[typ] |
|
|
| |
| single = typ == 'copd' |
|
|
| |
| df[typ + '_event'] = df[diag_cols].apply( |
| lambda x: track_event(x, desc, single)).any(axis=1).astype(int) |
|
|
| return df |
|
|