| """ |
| Utility functions common across admission processing |
| (admissions/comorbidities/gples) |
| """ |
| import pandas as pd |
| from utils.common import read_data |
| from utils.adm_processing import (update_null_stay, calculate_total_stay, |
| search_diag) |
|
|
|
|
| def initialize_adm_data(adm_file): |
| """ |
| Load in and convert admission dataset to correct format |
| -------- |
| :param adm_file: admission data file name |
| :return: admission dataframe with correct column names and types |
| """ |
| print('Loading admission data') |
|
|
| |
| adm_cols = ['SafeHavenID', 'ETHGRP', 'ADMDATE', 'DISDATE', 'STAY', |
| 'DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', |
| 'DIAG5Desc', 'DIAG6Desc'] |
| adm_types = ['int', 'object', 'object', 'object', 'int', |
| 'str', 'str', 'str', 'str', 'str', 'str'] |
| df = read_data(adm_file, adm_cols, adm_types) |
|
|
| |
| df = df.drop_duplicates() |
|
|
| |
| df['ADMDATE'] = pd.to_datetime(df['ADMDATE']) |
| df['DISDATE'] = pd.to_datetime(df['DISDATE']) |
|
|
| return df |
|
|
|
|
| def correct_stays(df): |
| """ |
| Fill any null STAY data and consolidate any transfer admissions into single |
| admission occurrences |
| -------- |
| :param df: admission dataframe to be corrected |
| :return: admission dataframe with null stays filled and transfers combined |
| """ |
| print('Correcting stays') |
|
|
| |
| df = update_null_stay(df) |
|
|
| |
| df = df.sort_values(['SafeHavenID', 'ADMDATE', 'DISDATE']) |
| df = df.groupby('SafeHavenID').apply(calculate_total_stay) |
| df = df.reset_index(drop=True) |
|
|
| return df |
|
|
|
|
| def track_copd_resp(df): |
| """ |
| Search for COPD and/or respiratory admissions |
| -------- |
| :param df: admission dataframe to be updated |
| :return: updated dataframe with events tracked |
| """ |
| print('Tracking events') |
|
|
| |
| df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x) |
|
|
| |
| df = search_diag(df, 'copd') |
|
|
| |
| df = search_diag(df, 'resp') |
|
|
| return df |