| """ |
| Admission reduction utilities |
| """ |
| import pandas as pd |
| from datetime import date |
|
|
|
|
| def fill_missing_years(df): |
| """ |
| Add admission data from years where patient is missing from the dataset |
| -------- |
| :param df: dataframe to be updated |
| :return: dataframe with missing years added |
| """ |
| df = df.sort_values('ADMDATE') |
| year_col = df.eoy.dt.year.tolist() |
| end_month = df.eoy.dt.month.iloc[0] |
| end_day = df.eoy.dt.day.iloc[0] |
|
|
| |
| year_range = range(year_col[0] + 1, year_col[-1]) |
| years = [y for y in year_range if not (y in year_col)] |
|
|
| |
| if len(years) > 0: |
| sh_id = df.SafeHavenID.iloc[0] |
| eth_grp = df.eth_grp.iloc[0] |
| adm_dates = pd.to_datetime([date(y, end_month, end_day) for y in years]) |
| data = {'SafeHavenID': sh_id, 'eth_grp': eth_grp, 'ADMDATE': adm_dates, |
| 'STAY': 0, 'copd_event': 0, 'resp_event': 0, 'eoy': adm_dates, |
| 'adm': 0, 'anxiety_depression_event': 0} |
| missed_years = pd.DataFrame(data) |
| df = pd.concat([df, missed_years]).sort_values('ADMDATE') |
|
|
| return df |
|
|
|
|
| def calc_adm_per_year(df): |
| """ |
| Reduce data to 1 row per year |
| -------- |
| :param df: dataframe to reduced |
| :return: reduced dataframe |
| """ |
| |
| eoy_cols = ['eth_grp', 'days_since_copd', 'days_since_resp', 'days_since_adm', |
| 'adm_to_date', 'copd_to_date', 'resp_to_date', |
| 'anxiety_depression_to_date', 'copd_date', 'resp_date', 'adm_date'] |
| last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last() |
|
|
| |
| los = df.groupby(['SafeHavenID', 'eoy'])[['STAY']].mean() |
| los.columns = ['mean_los'] |
|
|
| |
| sum_cols = ['adm', 'copd_event', 'resp_event', 'anxiety_depression_event', 'STAY'] |
| total_cols = ['adm_per_year', 'copd_per_year', 'resp_per_year', |
| 'anxiety_depression_per_year', 'total_hosp_days'] |
| total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum() |
| total.columns = total_cols |
|
|
| |
| results = last.join(los).join(total) |
|
|
| return results |